diff options
Diffstat (limited to 'megapixels/commands/faiss/build_db.py')
| -rw-r--r-- | megapixels/commands/faiss/build_db.py | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/megapixels/commands/faiss/build_db.py b/megapixels/commands/faiss/build_db.py new file mode 100644 index 00000000..c90d178b --- /dev/null +++ b/megapixels/commands/faiss/build_db.py @@ -0,0 +1,38 @@ +""" +Load all the CSV files into MySQL +""" + +import os +import glob +import click +import time +import pandas as pd + +from app.models.sql_factory import engine, SqlDataset +from app.utils.file_utils import load_recipe, load_csv_safe +from app.settings import app_cfg as cfg + +@click.command() +@click.pass_context +def cli(ctx): + """import the various CSVs into MySQL + """ + datasets = [] + for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): + build_dataset(path) + +def build_dataset(path): + name = os.path.basename(path) + dataset = SqlDataset(name) + + for fn in glob.iglob(os.path.join(path, "*.csv")): + key = os.path.basename(fn).replace(".csv", "") + table = dataset.get_table(key) + if table is None: + continue + df = pd.read_csv(fn) + + # fix columns that are named "index", a sql reserved word + df.columns = table.__table__.columns.keys() + + df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False) |
