diff options
Diffstat (limited to 'check/app/models/sql_factory.py')
| -rw-r--r-- | check/app/models/sql_factory.py | 38 |
1 files changed, 12 insertions, 26 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py index 5cfb36b..499113d 100644 --- a/check/app/models/sql_factory.py +++ b/check/app/models/sql_factory.py @@ -3,11 +3,10 @@ import glob import time import pandas as pd -from sqlalchemy import create_engine, Table, Column, String, Integer, DateTime, Float, func +from sqlalchemy import create_engine, Table, Column, String, BigInteger, Integer, DateTime, func from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base -from app.utils.file_utils import load_recipe, load_csv_safe from app.settings import app_cfg as cfg connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( @@ -20,24 +19,23 @@ connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( loaded = False Session = None -class FileRecord(self.base_model): - __tablename__ = self.name + "_file_record" +Base = declarative_base() + +class ImageHashTable(Base): + __tablename__ = 'imagehashes' id = Column(Integer, primary_key=True) - ext = Column(String(3, convert_unicode=True), nullable=False) - fn = Column(String(36, convert_unicode=True), nullable=False) - identity_key = Column(String(36, convert_unicode=True), nullable=False) sha256 = Column(String(36, convert_unicode=True), nullable=False) + phash = Column(BigInteger(blank=True), nullable=False) + ext = Column(String(3, convert_unicode=True), nullable=False) def toJSON(self): return { 'id': self.id, - 'uuid': self.uuid, - 'identity_id': self.identity_id, + 'sha256': self.sha256, + 'phash': self.phash, + 'ext': self.ext, } def load_sql_datasets(replace=False, base_model=None): - global datasets, loaded, Session - if loaded: - return datasets engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600) # db.set_character_set('utf8') # dbc = db.cursor() @@ -45,29 +43,17 @@ def load_sql_datasets(replace=False, base_model=None): # dbc.execute('SET CHARACTER SET utf8;') # dbc.execute('SET character_set_connection=utf8;') Session = sessionmaker(bind=engine) - for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): - dataset = load_sql_dataset(path, replace, engine, base_model) - datasets[dataset.name] = dataset - loaded = True - return datasets - -def load_sql_dataset(path, replace=False, engine=None, base_model=None): - name = os.path.basename(path) - dataset = SqlDataset(name, base_model=base_model) for fn in glob.iglob(os.path.join(path, "*.csv")): key = os.path.basename(fn).replace(".csv", "") - table = dataset.get_table(key) - if table is None: - continue if replace: print('loading dataset {}'.format(fn)) df = pd.read_csv(fn) # fix columns that are named "index", a sql reserved word df.reindex_axis(sorted(df.columns), axis=1) - columns = [column.name for column in table.__table__.columns] + columns = [column.name for column in ImageHashTable.__table__.columns] df.columns = columns - df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False) + df.to_sql(name=ImageHashTable.__tablename__, con=engine, if_exists='replace', index=False) return dataset class SqlDataset: |
