diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-04-14 19:52:16 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-04-14 19:52:16 +0200 |
| commit | 61f169d109c9ba1c01ada06d830973c873d4e634 (patch) | |
| tree | 8ec603eaa0e10332367fc9c4bef085db2b18c06a | |
| parent | 0a4b3ea4611fcbe66b148041f82e77a0f138e688 (diff) | |
record format
| -rw-r--r-- | check/app/models/sql_factory.py | 38 | ||||
| -rw-r--r-- | check/app/utils/im_utils.py | 10 | ||||
| -rw-r--r-- | check/commands/cli_imagehash.py | 1 | ||||
| -rw-r--r-- | check/commands/imagehash/add.py | 21 | ||||
| -rw-r--r-- | check/commands/imagehash/load.py | 25 | ||||
| -rw-r--r-- | check/commands/imagehash/query.py | 19 | ||||
| -rw-r--r-- | check/commands/imagehash/test.py | 19 |
7 files changed, 100 insertions, 33 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py index 5cfb36b..499113d 100644 --- a/check/app/models/sql_factory.py +++ b/check/app/models/sql_factory.py @@ -3,11 +3,10 @@ import glob import time import pandas as pd -from sqlalchemy import create_engine, Table, Column, String, Integer, DateTime, Float, func +from sqlalchemy import create_engine, Table, Column, String, BigInteger, Integer, DateTime, func from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base -from app.utils.file_utils import load_recipe, load_csv_safe from app.settings import app_cfg as cfg connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( @@ -20,24 +19,23 @@ connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( loaded = False Session = None -class FileRecord(self.base_model): - __tablename__ = self.name + "_file_record" +Base = declarative_base() + +class ImageHashTable(Base): + __tablename__ = 'imagehashes' id = Column(Integer, primary_key=True) - ext = Column(String(3, convert_unicode=True), nullable=False) - fn = Column(String(36, convert_unicode=True), nullable=False) - identity_key = Column(String(36, convert_unicode=True), nullable=False) sha256 = Column(String(36, convert_unicode=True), nullable=False) + phash = Column(BigInteger(blank=True), nullable=False) + ext = Column(String(3, convert_unicode=True), nullable=False) def toJSON(self): return { 'id': self.id, - 'uuid': self.uuid, - 'identity_id': self.identity_id, + 'sha256': self.sha256, + 'phash': self.phash, + 'ext': self.ext, } def load_sql_datasets(replace=False, base_model=None): - global datasets, loaded, Session - if loaded: - return datasets engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600) # db.set_character_set('utf8') # dbc = db.cursor() @@ -45,29 +43,17 @@ def load_sql_datasets(replace=False, base_model=None): # dbc.execute('SET CHARACTER SET utf8;') # dbc.execute('SET character_set_connection=utf8;') Session = sessionmaker(bind=engine) - for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): - dataset = load_sql_dataset(path, replace, engine, base_model) - datasets[dataset.name] = dataset - loaded = True - return datasets - -def load_sql_dataset(path, replace=False, engine=None, base_model=None): - name = os.path.basename(path) - dataset = SqlDataset(name, base_model=base_model) for fn in glob.iglob(os.path.join(path, "*.csv")): key = os.path.basename(fn).replace(".csv", "") - table = dataset.get_table(key) - if table is None: - continue if replace: print('loading dataset {}'.format(fn)) df = pd.read_csv(fn) # fix columns that are named "index", a sql reserved word df.reindex_axis(sorted(df.columns), axis=1) - columns = [column.name for column in table.__table__.columns] + columns = [column.name for column in ImageHashTable.__table__.columns] df.columns = columns - df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False) + df.to_sql(name=ImageHashTable.__tablename__, con=engine, if_exists='replace', index=False) return dataset class SqlDataset: diff --git a/check/app/utils/im_utils.py b/check/app/utils/im_utils.py index 3002632..eae7bc1 100644 --- a/check/app/utils/im_utils.py +++ b/check/app/utils/im_utils.py @@ -208,6 +208,16 @@ def compute_phash(im): """ return imagehash.phash(ensure_pil(im)) +def compute_phash_int(im): + """Compute perceptual hash using ImageHash library and convert to binary + :param im: Numpy.ndarray + :returns: ImageHash as binary + """ + phash = imagehash.phash(ensure_pil(im)) + phash.hash[-1] = False + phash_as_bigint = struct.unpack('Q', numpy.packbits(phash.hash))[0] + return phash_as_bigint + def compute_dhash(im): """Compute difference hash using ImageHash library :param im: Numpy.ndarray diff --git a/check/commands/cli_imagehash.py b/check/commands/cli_imagehash.py index c51c84c..32f8250 100644 --- a/check/commands/cli_imagehash.py +++ b/check/commands/cli_imagehash.py @@ -32,4 +32,3 @@ def cli(ctx, **kwargs): # -------------------------------------------------------- if __name__ == '__main__': cli() - diff --git a/check/commands/imagehash/add.py b/check/commands/imagehash/add.py index 7c5dc31..37aad0d 100644 --- a/check/commands/imagehash/add.py +++ b/check/commands/imagehash/add.py @@ -1 +1,20 @@ -# add a file to the database
\ No newline at end of file +""" +Add a file to the database +""" + +import click + +# from app.models.sql_factory import load_sql_datasets + +@click.command() +@click.option('-i', '--input', 'opt_fn', + required=True, + help="File to add (gif/jpg/png)") +@click.option('-u', '--upload', 'opt_upload', is_flag=True, + help='Whether to upload this file to S3') +@click.pass_context +def cli(ctx, opt_fn, opt_upload): + """ + Add a single file + """ + print('Adding a file...') diff --git a/check/commands/imagehash/load.py b/check/commands/imagehash/load.py index bdcb19c..dbb2567 100644 --- a/check/commands/imagehash/load.py +++ b/check/commands/imagehash/load.py @@ -1,3 +1,22 @@ -# loop over a directory of images -# - compute their phashes -# - optionally upload them to s3? +""" +Loop over a directory of images + - Compute their phashes + - Optionally upload them to s3? +""" + +import click + +# from app.models.sql_factory import load_sql_datasets + +@click.command() +@click.option('-i', '--input', 'opt_dir_fn', + required=True, + help="File to add (gif/jpg/png)") +@click.option('-u', '--upload', 'opt_upload', is_flag=True, + help='Whether to upload this file to S3') +@click.pass_context +def cli(ctx, opt_dir_fn, opt_store): + """ + Add a directory of images + """ + print('Adding a directory...') diff --git a/check/commands/imagehash/query.py b/check/commands/imagehash/query.py index eb15960..cf5c2d4 100644 --- a/check/commands/imagehash/query.py +++ b/check/commands/imagehash/query.py @@ -1 +1,18 @@ -# search the database for a file
\ No newline at end of file +""" +Search the database for an image +""" + +import click + +# from app.models.sql_factory import load_sql_datasets + +@click.command() +@click.option('-i', '--input', 'opt_fn', + required=True, + help="File to search") +@click.pass_context +def cli(ctx, opt_fn): + """ + Search the database for an image + """ + print('Searching for a file...') diff --git a/check/commands/imagehash/test.py b/check/commands/imagehash/test.py index 05d2ee4..499a424 100644 --- a/check/commands/imagehash/test.py +++ b/check/commands/imagehash/test.py @@ -1 +1,18 @@ -# query the database with a test set
\ No newline at end of file +""" +Query the database with a test set +""" + +import click + +# from app.models.sql_factory import load_sql_datasets + +@click.command() +@click.option('-i', '--input', 'opt_fn', + required=True, + help="Directory to search") +@click.pass_context +def cli(ctx, opt_fn): + """ + Search the database for an image + """ + print('Searching for a file...') |
