summaryrefslogtreecommitdiff
path: root/check/app/models/sql_factory.py
diff options
context:
space:
mode:
Diffstat (limited to 'check/app/models/sql_factory.py')
-rw-r--r--check/app/models/sql_factory.py38
1 files changed, 12 insertions, 26 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py
index 5cfb36b..499113d 100644
--- a/check/app/models/sql_factory.py
+++ b/check/app/models/sql_factory.py
@@ -3,11 +3,10 @@ import glob
import time
import pandas as pd
-from sqlalchemy import create_engine, Table, Column, String, Integer, DateTime, Float, func
+from sqlalchemy import create_engine, Table, Column, String, BigInteger, Integer, DateTime, func
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
-from app.utils.file_utils import load_recipe, load_csv_safe
from app.settings import app_cfg as cfg
connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format(
@@ -20,24 +19,23 @@ connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format(
loaded = False
Session = None
-class FileRecord(self.base_model):
- __tablename__ = self.name + "_file_record"
+Base = declarative_base()
+
+class ImageHashTable(Base):
+ __tablename__ = 'imagehashes'
id = Column(Integer, primary_key=True)
- ext = Column(String(3, convert_unicode=True), nullable=False)
- fn = Column(String(36, convert_unicode=True), nullable=False)
- identity_key = Column(String(36, convert_unicode=True), nullable=False)
sha256 = Column(String(36, convert_unicode=True), nullable=False)
+ phash = Column(BigInteger(blank=True), nullable=False)
+ ext = Column(String(3, convert_unicode=True), nullable=False)
def toJSON(self):
return {
'id': self.id,
- 'uuid': self.uuid,
- 'identity_id': self.identity_id,
+ 'sha256': self.sha256,
+ 'phash': self.phash,
+ 'ext': self.ext,
}
def load_sql_datasets(replace=False, base_model=None):
- global datasets, loaded, Session
- if loaded:
- return datasets
engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600)
# db.set_character_set('utf8')
# dbc = db.cursor()
@@ -45,29 +43,17 @@ def load_sql_datasets(replace=False, base_model=None):
# dbc.execute('SET CHARACTER SET utf8;')
# dbc.execute('SET character_set_connection=utf8;')
Session = sessionmaker(bind=engine)
- for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")):
- dataset = load_sql_dataset(path, replace, engine, base_model)
- datasets[dataset.name] = dataset
- loaded = True
- return datasets
-
-def load_sql_dataset(path, replace=False, engine=None, base_model=None):
- name = os.path.basename(path)
- dataset = SqlDataset(name, base_model=base_model)
for fn in glob.iglob(os.path.join(path, "*.csv")):
key = os.path.basename(fn).replace(".csv", "")
- table = dataset.get_table(key)
- if table is None:
- continue
if replace:
print('loading dataset {}'.format(fn))
df = pd.read_csv(fn)
# fix columns that are named "index", a sql reserved word
df.reindex_axis(sorted(df.columns), axis=1)
- columns = [column.name for column in table.__table__.columns]
+ columns = [column.name for column in ImageHashTable.__table__.columns]
df.columns = columns
- df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False)
+ df.to_sql(name=ImageHashTable.__tablename__, con=engine, if_exists='replace', index=False)
return dataset
class SqlDataset: