summaryrefslogtreecommitdiff
path: root/megapixels/commands/faiss
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-12-14 18:10:27 +0100
committerJules Laplace <julescarbon@gmail.com>2018-12-14 18:10:27 +0100
commit38746f284b17400d4e2555509ea60df5912b824a (patch)
tree6dea93f4ba348d12a58a761424ec5547697fcf1f /megapixels/commands/faiss
parent36b6082dfa768cbf35d40dc2c82706dfae0b687b (diff)
all the sql stuff communicating nicely
Diffstat (limited to 'megapixels/commands/faiss')
-rw-r--r--megapixels/commands/faiss/build_db.py36
1 files changed, 3 insertions, 33 deletions
diff --git a/megapixels/commands/faiss/build_db.py b/megapixels/commands/faiss/build_db.py
index 52c4980f..0f979e41 100644
--- a/megapixels/commands/faiss/build_db.py
+++ b/megapixels/commands/faiss/build_db.py
@@ -2,44 +2,14 @@
Load all the CSV files into MySQL
"""
-import os
-import glob
import click
-import time
-import pandas as pd
-from app.models.sql_factory import engine, SqlDataset
-from app.utils.file_utils import load_recipe, load_csv_safe
-from app.settings import app_cfg as cfg
+from app.models.sql_factory import load_sql_datasets
@click.command()
@click.pass_context
def cli(ctx):
"""import the various CSVs into MySQL
"""
- load_sql_datasets(clobber=True)
-
-def load_sql_datasets(path, clobber=False):
- datasets = {}
- for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")):
- dataset = load_sql_dataset(path, clobber)
- datasets[dataset.name] = dataset
-
-def load_sql_dataset(path, clobber=False):
- name = os.path.basename(path)
- dataset = SqlDataset(name)
-
- for fn in glob.iglob(os.path.join(path, "*.csv")):
- key = os.path.basename(fn).replace(".csv", "")
- table = dataset.get_table(key)
- if table is None:
- continue
- if clobber:
- df = pd.read_csv(fn)
-
- # fix columns that are named "index", a sql reserved word
- df.columns = table.__table__.columns.keys()
-
- df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False)
-
- return dataset \ No newline at end of file
+ print('Loading CSV datasets into SQL...')
+ load_sql_datasets(replace=True)