diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-12-14 18:10:27 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-12-14 18:10:27 +0100 |
| commit | 38746f284b17400d4e2555509ea60df5912b824a (patch) | |
| tree | 6dea93f4ba348d12a58a761424ec5547697fcf1f /megapixels/commands/faiss/build_db.py | |
| parent | 36b6082dfa768cbf35d40dc2c82706dfae0b687b (diff) | |
all the sql stuff communicating nicely
Diffstat (limited to 'megapixels/commands/faiss/build_db.py')
| -rw-r--r-- | megapixels/commands/faiss/build_db.py | 36 |
1 files changed, 3 insertions, 33 deletions
diff --git a/megapixels/commands/faiss/build_db.py b/megapixels/commands/faiss/build_db.py index 52c4980f..0f979e41 100644 --- a/megapixels/commands/faiss/build_db.py +++ b/megapixels/commands/faiss/build_db.py @@ -2,44 +2,14 @@ Load all the CSV files into MySQL """ -import os -import glob import click -import time -import pandas as pd -from app.models.sql_factory import engine, SqlDataset -from app.utils.file_utils import load_recipe, load_csv_safe -from app.settings import app_cfg as cfg +from app.models.sql_factory import load_sql_datasets @click.command() @click.pass_context def cli(ctx): """import the various CSVs into MySQL """ - load_sql_datasets(clobber=True) - -def load_sql_datasets(path, clobber=False): - datasets = {} - for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): - dataset = load_sql_dataset(path, clobber) - datasets[dataset.name] = dataset - -def load_sql_dataset(path, clobber=False): - name = os.path.basename(path) - dataset = SqlDataset(name) - - for fn in glob.iglob(os.path.join(path, "*.csv")): - key = os.path.basename(fn).replace(".csv", "") - table = dataset.get_table(key) - if table is None: - continue - if clobber: - df = pd.read_csv(fn) - - # fix columns that are named "index", a sql reserved word - df.columns = table.__table__.columns.keys() - - df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False) - - return dataset
\ No newline at end of file + print('Loading CSV datasets into SQL...') + load_sql_datasets(replace=True) |
