diff options
Diffstat (limited to 'megapixels/commands')
| -rw-r--r-- | megapixels/commands/faiss/build_db.py | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/megapixels/commands/faiss/build_db.py b/megapixels/commands/faiss/build_db.py index c90d178b..52c4980f 100644 --- a/megapixels/commands/faiss/build_db.py +++ b/megapixels/commands/faiss/build_db.py @@ -17,11 +17,15 @@ from app.settings import app_cfg as cfg def cli(ctx): """import the various CSVs into MySQL """ - datasets = [] + load_sql_datasets(clobber=True) + +def load_sql_datasets(path, clobber=False): + datasets = {} for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): - build_dataset(path) + dataset = load_sql_dataset(path, clobber) + datasets[dataset.name] = dataset -def build_dataset(path): +def load_sql_dataset(path, clobber=False): name = os.path.basename(path) dataset = SqlDataset(name) @@ -30,9 +34,12 @@ def build_dataset(path): table = dataset.get_table(key) if table is None: continue - df = pd.read_csv(fn) + if clobber: + df = pd.read_csv(fn) + + # fix columns that are named "index", a sql reserved word + df.columns = table.__table__.columns.keys() - # fix columns that are named "index", a sql reserved word - df.columns = table.__table__.columns.keys() + df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False) - df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False) + return dataset
\ No newline at end of file |
