summaryrefslogtreecommitdiff
path: root/check
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-04-14 19:52:16 +0200
committerJules Laplace <julescarbon@gmail.com>2019-04-14 19:52:16 +0200
commit61f169d109c9ba1c01ada06d830973c873d4e634 (patch)
tree8ec603eaa0e10332367fc9c4bef085db2b18c06a /check
parent0a4b3ea4611fcbe66b148041f82e77a0f138e688 (diff)
record format
Diffstat (limited to 'check')
-rw-r--r--check/app/models/sql_factory.py38
-rw-r--r--check/app/utils/im_utils.py10
-rw-r--r--check/commands/cli_imagehash.py1
-rw-r--r--check/commands/imagehash/add.py21
-rw-r--r--check/commands/imagehash/load.py25
-rw-r--r--check/commands/imagehash/query.py19
-rw-r--r--check/commands/imagehash/test.py19
7 files changed, 100 insertions, 33 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py
index 5cfb36b..499113d 100644
--- a/check/app/models/sql_factory.py
+++ b/check/app/models/sql_factory.py
@@ -3,11 +3,10 @@ import glob
import time
import pandas as pd
-from sqlalchemy import create_engine, Table, Column, String, Integer, DateTime, Float, func
+from sqlalchemy import create_engine, Table, Column, String, BigInteger, Integer, DateTime, func
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
-from app.utils.file_utils import load_recipe, load_csv_safe
from app.settings import app_cfg as cfg
connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format(
@@ -20,24 +19,23 @@ connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format(
loaded = False
Session = None
-class FileRecord(self.base_model):
- __tablename__ = self.name + "_file_record"
+Base = declarative_base()
+
+class ImageHashTable(Base):
+ __tablename__ = 'imagehashes'
id = Column(Integer, primary_key=True)
- ext = Column(String(3, convert_unicode=True), nullable=False)
- fn = Column(String(36, convert_unicode=True), nullable=False)
- identity_key = Column(String(36, convert_unicode=True), nullable=False)
sha256 = Column(String(36, convert_unicode=True), nullable=False)
+ phash = Column(BigInteger(blank=True), nullable=False)
+ ext = Column(String(3, convert_unicode=True), nullable=False)
def toJSON(self):
return {
'id': self.id,
- 'uuid': self.uuid,
- 'identity_id': self.identity_id,
+ 'sha256': self.sha256,
+ 'phash': self.phash,
+ 'ext': self.ext,
}
def load_sql_datasets(replace=False, base_model=None):
- global datasets, loaded, Session
- if loaded:
- return datasets
engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600)
# db.set_character_set('utf8')
# dbc = db.cursor()
@@ -45,29 +43,17 @@ def load_sql_datasets(replace=False, base_model=None):
# dbc.execute('SET CHARACTER SET utf8;')
# dbc.execute('SET character_set_connection=utf8;')
Session = sessionmaker(bind=engine)
- for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")):
- dataset = load_sql_dataset(path, replace, engine, base_model)
- datasets[dataset.name] = dataset
- loaded = True
- return datasets
-
-def load_sql_dataset(path, replace=False, engine=None, base_model=None):
- name = os.path.basename(path)
- dataset = SqlDataset(name, base_model=base_model)
for fn in glob.iglob(os.path.join(path, "*.csv")):
key = os.path.basename(fn).replace(".csv", "")
- table = dataset.get_table(key)
- if table is None:
- continue
if replace:
print('loading dataset {}'.format(fn))
df = pd.read_csv(fn)
# fix columns that are named "index", a sql reserved word
df.reindex_axis(sorted(df.columns), axis=1)
- columns = [column.name for column in table.__table__.columns]
+ columns = [column.name for column in ImageHashTable.__table__.columns]
df.columns = columns
- df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False)
+ df.to_sql(name=ImageHashTable.__tablename__, con=engine, if_exists='replace', index=False)
return dataset
class SqlDataset:
diff --git a/check/app/utils/im_utils.py b/check/app/utils/im_utils.py
index 3002632..eae7bc1 100644
--- a/check/app/utils/im_utils.py
+++ b/check/app/utils/im_utils.py
@@ -208,6 +208,16 @@ def compute_phash(im):
"""
return imagehash.phash(ensure_pil(im))
+def compute_phash_int(im):
+ """Compute perceptual hash using ImageHash library and convert to binary
+ :param im: Numpy.ndarray
+ :returns: ImageHash as binary
+ """
+ phash = imagehash.phash(ensure_pil(im))
+ phash.hash[-1] = False
+ phash_as_bigint = struct.unpack('Q', numpy.packbits(phash.hash))[0]
+ return phash_as_bigint
+
def compute_dhash(im):
"""Compute difference hash using ImageHash library
:param im: Numpy.ndarray
diff --git a/check/commands/cli_imagehash.py b/check/commands/cli_imagehash.py
index c51c84c..32f8250 100644
--- a/check/commands/cli_imagehash.py
+++ b/check/commands/cli_imagehash.py
@@ -32,4 +32,3 @@ def cli(ctx, **kwargs):
# --------------------------------------------------------
if __name__ == '__main__':
cli()
-
diff --git a/check/commands/imagehash/add.py b/check/commands/imagehash/add.py
index 7c5dc31..37aad0d 100644
--- a/check/commands/imagehash/add.py
+++ b/check/commands/imagehash/add.py
@@ -1 +1,20 @@
-# add a file to the database \ No newline at end of file
+"""
+Add a file to the database
+"""
+
+import click
+
+# from app.models.sql_factory import load_sql_datasets
+
+@click.command()
+@click.option('-i', '--input', 'opt_fn',
+ required=True,
+ help="File to add (gif/jpg/png)")
+@click.option('-u', '--upload', 'opt_upload', is_flag=True,
+ help='Whether to upload this file to S3')
+@click.pass_context
+def cli(ctx, opt_fn, opt_upload):
+ """
+ Add a single file
+ """
+ print('Adding a file...')
diff --git a/check/commands/imagehash/load.py b/check/commands/imagehash/load.py
index bdcb19c..dbb2567 100644
--- a/check/commands/imagehash/load.py
+++ b/check/commands/imagehash/load.py
@@ -1,3 +1,22 @@
-# loop over a directory of images
-# - compute their phashes
-# - optionally upload them to s3?
+"""
+Loop over a directory of images
+ - Compute their phashes
+ - Optionally upload them to s3?
+"""
+
+import click
+
+# from app.models.sql_factory import load_sql_datasets
+
+@click.command()
+@click.option('-i', '--input', 'opt_dir_fn',
+ required=True,
+ help="File to add (gif/jpg/png)")
+@click.option('-u', '--upload', 'opt_upload', is_flag=True,
+ help='Whether to upload this file to S3')
+@click.pass_context
+def cli(ctx, opt_dir_fn, opt_store):
+ """
+ Add a directory of images
+ """
+ print('Adding a directory...')
diff --git a/check/commands/imagehash/query.py b/check/commands/imagehash/query.py
index eb15960..cf5c2d4 100644
--- a/check/commands/imagehash/query.py
+++ b/check/commands/imagehash/query.py
@@ -1 +1,18 @@
-# search the database for a file \ No newline at end of file
+"""
+Search the database for an image
+"""
+
+import click
+
+# from app.models.sql_factory import load_sql_datasets
+
+@click.command()
+@click.option('-i', '--input', 'opt_fn',
+ required=True,
+ help="File to search")
+@click.pass_context
+def cli(ctx, opt_fn):
+ """
+ Search the database for an image
+ """
+ print('Searching for a file...')
diff --git a/check/commands/imagehash/test.py b/check/commands/imagehash/test.py
index 05d2ee4..499a424 100644
--- a/check/commands/imagehash/test.py
+++ b/check/commands/imagehash/test.py
@@ -1 +1,18 @@
-# query the database with a test set \ No newline at end of file
+"""
+Query the database with a test set
+"""
+
+import click
+
+# from app.models.sql_factory import load_sql_datasets
+
+@click.command()
+@click.option('-i', '--input', 'opt_fn',
+ required=True,
+ help="Directory to search")
+@click.pass_context
+def cli(ctx, opt_fn):
+ """
+ Search the database for an image
+ """
+ print('Searching for a file...')