summaryrefslogtreecommitdiff
path: root/megapixels/commands/faiss/build_db.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/faiss/build_db.py')
-rw-r--r--megapixels/commands/faiss/build_db.py38
1 files changed, 38 insertions, 0 deletions
diff --git a/megapixels/commands/faiss/build_db.py b/megapixels/commands/faiss/build_db.py
new file mode 100644
index 00000000..c90d178b
--- /dev/null
+++ b/megapixels/commands/faiss/build_db.py
@@ -0,0 +1,38 @@
+"""
+Load all the CSV files into MySQL
+"""
+
+import os
+import glob
+import click
+import time
+import pandas as pd
+
+from app.models.sql_factory import engine, SqlDataset
+from app.utils.file_utils import load_recipe, load_csv_safe
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.pass_context
+def cli(ctx):
+ """import the various CSVs into MySQL
+ """
+ datasets = []
+ for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")):
+ build_dataset(path)
+
+def build_dataset(path):
+ name = os.path.basename(path)
+ dataset = SqlDataset(name)
+
+ for fn in glob.iglob(os.path.join(path, "*.csv")):
+ key = os.path.basename(fn).replace(".csv", "")
+ table = dataset.get_table(key)
+ if table is None:
+ continue
+ df = pd.read_csv(fn)
+
+ # fix columns that are named "index", a sql reserved word
+ df.columns = table.__table__.columns.keys()
+
+ df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False)