summaryrefslogtreecommitdiff
path: root/megapixels/commands
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands')
-rw-r--r--megapixels/commands/faiss/build_db.py21
1 files changed, 14 insertions, 7 deletions
diff --git a/megapixels/commands/faiss/build_db.py b/megapixels/commands/faiss/build_db.py
index c90d178b..52c4980f 100644
--- a/megapixels/commands/faiss/build_db.py
+++ b/megapixels/commands/faiss/build_db.py
@@ -17,11 +17,15 @@ from app.settings import app_cfg as cfg
def cli(ctx):
"""import the various CSVs into MySQL
"""
- datasets = []
+ load_sql_datasets(clobber=True)
+
+def load_sql_datasets(path, clobber=False):
+ datasets = {}
for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")):
- build_dataset(path)
+ dataset = load_sql_dataset(path, clobber)
+ datasets[dataset.name] = dataset
-def build_dataset(path):
+def load_sql_dataset(path, clobber=False):
name = os.path.basename(path)
dataset = SqlDataset(name)
@@ -30,9 +34,12 @@ def build_dataset(path):
table = dataset.get_table(key)
if table is None:
continue
- df = pd.read_csv(fn)
+ if clobber:
+ df = pd.read_csv(fn)
+
+ # fix columns that are named "index", a sql reserved word
+ df.columns = table.__table__.columns.keys()
- # fix columns that are named "index", a sql reserved word
- df.columns = table.__table__.columns.keys()
+ df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False)
- df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False)
+ return dataset \ No newline at end of file