blob: c90d178b5ed28e50da81faa148fb2ed636b9771c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
"""
Load all the CSV files into MySQL
"""
import os
import glob
import click
import time
import pandas as pd
from app.models.sql_factory import engine, SqlDataset
from app.utils.file_utils import load_recipe, load_csv_safe
from app.settings import app_cfg as cfg
@click.command()
@click.pass_context
def cli(ctx):
"""import the various CSVs into MySQL
"""
datasets = []
for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")):
build_dataset(path)
def build_dataset(path):
name = os.path.basename(path)
dataset = SqlDataset(name)
for fn in glob.iglob(os.path.join(path, "*.csv")):
key = os.path.basename(fn).replace(".csv", "")
table = dataset.get_table(key)
if table is None:
continue
df = pd.read_csv(fn)
# fix columns that are named "index", a sql reserved word
df.columns = table.__table__.columns.keys()
df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False)
|