summaryrefslogtreecommitdiff
path: root/check
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-04-15 16:26:03 +0200
committerJules Laplace <julescarbon@gmail.com>2019-04-15 16:26:03 +0200
commit79f0e696f3f6067a0841a37404fb546dedaa07cb (patch)
treea064f2841dc532f81fcf04eb84300e679fda2b27 /check
parente257e83f313a2976347b0a30f58e66b7bcbc1235 (diff)
cli suite working
Diffstat (limited to 'check')
-rw-r--r--check/__init__.py0
-rw-r--r--check/app/__init__.py0
-rw-r--r--check/app/models/click_factory.py145
-rw-r--r--check/app/models/sql_factory.py50
-rw-r--r--check/app/settings/app_cfg.py45
-rw-r--r--check/app/settings/types.py10
-rw-r--r--check/app/utils/im_utils.py3
-rw-r--r--check/app/utils/logger_utils.py68
-rw-r--r--check/cli_phash.py (renamed from check/cli_imagehash.py)2
-rw-r--r--check/commands/imagehash/add.py34
-rw-r--r--check/commands/imagehash/load.py22
-rw-r--r--check/commands/imagehash/query.py34
-rw-r--r--check/commands/imagehash/test.py18
-rw-r--r--check/commands/phash/add.py21
-rw-r--r--check/commands/phash/drop.py22
-rw-r--r--check/commands/phash/load.py23
-rw-r--r--check/commands/phash/query.py45
-rw-r--r--check/commands/phash/test.py41
18 files changed, 465 insertions, 118 deletions
diff --git a/check/__init__.py b/check/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/check/__init__.py
diff --git a/check/app/__init__.py b/check/app/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/check/app/__init__.py
diff --git a/check/app/models/click_factory.py b/check/app/models/click_factory.py
new file mode 100644
index 0000000..61a3b5e
--- /dev/null
+++ b/check/app/models/click_factory.py
@@ -0,0 +1,145 @@
+"""
+Click processor factory
+- Inspired by and used code from @wiretapped's HTSLAM codebase
+- In particular the very useful
+"""
+
+import os
+import sys
+from os.path import join
+from pathlib import Path
+import os
+from os.path import join
+import sys
+from functools import update_wrapper, wraps
+import itertools
+from pathlib import Path
+from glob import glob
+import importlib
+import logging
+
+import click
+from app.settings import app_cfg as cfg
+
+
+# --------------------------------------------------------
+# Click Group Class
+# --------------------------------------------------------
+
+# set global variable during parent class create
+dir_plugins = None # set in create
+
+class ClickComplex:
+ """Wrapper generator for custom Click CLI's based on LR's coroutine"""
+
+ def __init__(self):
+ pass
+
+
+ class CustomGroup(click.Group):
+ #global dir_plugins # from CliGenerator init
+
+ # lists commands in plugin directory
+ def list_commands(self, ctx):
+ global dir_plugins # from CliGenerator init
+ rv = list(self.commands.keys())
+ fp_cmds = [Path(x) for x in Path(dir_plugins).iterdir() \
+ if str(x).endswith('.py') \
+ and '__init__' not in str(x)]
+ for fp_cmd in fp_cmds:
+ try:
+ assert fp_cmd.name not in rv, "[-] Error: {} can't exist in cli.py and {}".format(fp_cmd.name)
+ except Exception as ex:
+ logging.getLogger('app').error('{}'.format(ex))
+ rv.append(fp_cmd.stem)
+ rv.sort()
+ return rv
+
+ # Complex version: gets commands in directory and in this file
+ # Based on code from @wiretapped + HTSLAM
+ def get_command(self, ctx, cmd_name):
+ global dir_plugins
+ if cmd_name in self.commands:
+ return self.commands[cmd_name]
+ ns = {}
+ fpp_cmd = Path(dir_plugins, cmd_name + '.py')
+ fp_cmd = fpp_cmd.as_posix()
+ if not fpp_cmd.exists():
+ sys.exit('[-] {} file does not exist'.format(fpp_cmd))
+ code = compile(fpp_cmd.read_bytes(), fp_cmd, 'exec')
+ try:
+ eval(code, ns, ns)
+ except Exception as ex:
+ logging.getLogger('vframe').error('exception: {}'.format(ex))
+ @click.command()
+ def _fail():
+ raise Exception('while loading {}'.format(fpp_cmd.name))
+ _fail.short_help = repr(ex)
+ _fail.help = repr(ex)
+ return _fail
+ if 'cli' not in ns:
+ sys.exit('[-] Error: {} does not contain a cli function'.format(fp_cmd))
+ return ns['cli']
+
+ @classmethod
+ def create(self, dir_plugins_local):
+ global dir_plugins
+ dir_plugins = dir_plugins_local
+ return self.CustomGroup
+
+
+
+class ClickSimple:
+ """Wrapper generator for custom Click CLI's"""
+
+ def __init__(self):
+ pass
+
+
+ class CustomGroup(click.Group):
+ #global dir_plugins # from CliGenerator init
+
+ # lists commands in plugin directory
+ def list_commands(self, ctx):
+ global dir_plugins # from CliGenerator init
+ rv = list(self.commands.keys())
+ fp_cmds = [Path(x) for x in Path(dir_plugins).iterdir() \
+ if str(x).endswith('.py') \
+ and '__init__' not in str(x)]
+ for fp_cmd in fp_cmds:
+ assert fp_cmd.name not in rv, "[-] Error: {} can't exist in cli.py and {}".format(fp_cmd.name)
+ rv.append(fp_cmd.stem)
+ rv.sort()
+ return rv
+
+ # Complex version: gets commands in directory and in this file
+ # from HTSLAM
+ def get_command(self, ctx, cmd_name):
+ global dir_plugins # from CliGenerator init
+ if cmd_name in self.commands:
+ return self.commands[cmd_name]
+ ns = {}
+ fpp_cmd = Path(dir_plugins, cmd_name + '.py')
+ fp_cmd = fpp_cmd.as_posix()
+ if not fpp_cmd.exists():
+ sys.exit('[-] {} file does not exist'.format(fpp_cmd))
+ code = compile(fpp_cmd.read_bytes(), fp_cmd, 'exec')
+ try:
+ eval(code, ns, ns)
+ except Exception as ex:
+ logging.getLogger('vframe').error('exception: {}'.format(ex))
+ @click.command()
+ def _fail():
+ raise Exception('while loading {}'.format(fpp_cmd.name))
+ _fail.short_help = repr(ex)
+ _fail.help = repr(ex)
+ return _fail
+ if 'cli' not in ns:
+ sys.exit('[-] Error: {} does not contain a cli function'.format(fp_cmd))
+ return ns['cli']
+
+ @classmethod
+ def create(self, dir_plugins_local):
+ global dir_plugins
+ dir_plugins = dir_plugins_local
+ return self.CustomGroup
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py
index d4a371e..1d32a68 100644
--- a/check/app/models/sql_factory.py
+++ b/check/app/models/sql_factory.py
@@ -3,11 +3,17 @@ import glob
import time
import pandas as pd
-from sqlalchemy import create_engine, Table, Column, String, Integer, BigInteger
+from PIL import Image
+
+from sqlalchemy import create_engine, Table, Column, String, Integer, BigInteger, text
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from app.settings import app_cfg as cfg
+from app.settings.types import VALID_IMAGE_EXTENSIONS
+
+from app.utils.im_utils import compute_phash_int
+from app.utils.file_utils import sha256
connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format(
os.getenv("DB_USER"),
@@ -19,16 +25,15 @@ connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format(
loaded = False
engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600)
Session = sessionmaker(bind=engine)
-
Base = declarative_base()
class FileTable(Base):
"""Table for storing various hashes of images"""
__tablename__ = 'files'
id = Column(Integer, primary_key=True)
- sha256 = Column(String(36, convert_unicode=True), nullable=False)
+ sha256 = Column(String(64, convert_unicode=True), nullable=False)
phash = Column(BigInteger, nullable=False)
- ext = Column(String(3, convert_unicode=True), nullable=False)
+ ext = Column(String(4, convert_unicode=True), nullable=False)
def toJSON(self):
return {
'id': self.id,
@@ -37,12 +42,21 @@ class FileTable(Base):
'ext': self.ext,
}
+Base.metadata.create_all(engine)
+
def search_by_phash(phash, threshold=6):
"""Search files for a particular phash"""
connection = engine.connect()
- cmd = 'SELECT files.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM images_image HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1'
- matches = connection.execute(text(cmd), phash=phash, threshold=threshold)
- return matches
+ cmd = 'SELECT files.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM files HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1'
+ matches = connection.execute(text(cmd), phash=phash, threshold=threshold).fetchall()
+ keys = ('id', 'sha256', 'phash', 'ext', 'score')
+ results = [ dict(zip(keys, values)) for values in matches ]
+ return results
+
+def search_by_hash(hash):
+ session = Session()
+ match = session.query(FileTable).filter(FileTable.sha256 == hash)
+ return match.first()
def add_phash(sha256, phash, ext):
"""Add a file to the table"""
@@ -51,4 +65,26 @@ def add_phash(sha256, phash, ext):
)
session = Session()
session.add(rec)
+ session.commit()
session.flush()
+
+def add_phash_by_filename(path):
+ """Add a file by filename, getting all the necessary attributes"""
+ print(path)
+ if not os.path.exists(path):
+ print("File does not exist")
+ return
+
+ dir, fn = os.path.split(path)
+ root, ext = os.path.splitext(fn)
+ ext = ext.strip('.')
+ if ext not in VALID_IMAGE_EXTENSIONS:
+ print("Not an image file")
+ return
+
+ im = Image.open(path).convert('RGB')
+ phash = compute_phash_int(im)
+
+ hash = sha256(path)
+
+ add_phash(sha256=hash, phash=phash, ext=ext)
diff --git a/check/app/settings/app_cfg.py b/check/app/settings/app_cfg.py
index 904e0d4..ce5b5d8 100644
--- a/check/app/settings/app_cfg.py
+++ b/check/app/settings/app_cfg.py
@@ -14,7 +14,7 @@ codecs.register(lambda name: codecs.lookup('utf8') if name == 'utf8mb4' else Non
# -----------------------------------------------------------------------------
# click settings
# -----------------------------------------------------------------------------
-DIR_COMMANDS_CHECK = 'commands/check'
+DIR_COMMANDS_PHASH = 'commands/phash'
# -----------------------------------------------------------------------------
# S3 storage
@@ -29,3 +29,46 @@ S3_HTTP_METADATA_URL = join(S3_HTTP_URL, 'metadata')
# -----------------------------------------------------------------------------
CELERY_BROKER_URL = 'redis://localhost:6379/0'
CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'
+
+# -----------------------------------------------------------------------------
+# Logging options exposed for custom click Params
+# -----------------------------------------------------------------------------
+LOGGER_NAME = 'app'
+LOGLEVELS = {
+ types.LogLevel.DEBUG: logging.DEBUG,
+ types.LogLevel.INFO: logging.INFO,
+ types.LogLevel.WARN: logging.WARN,
+ types.LogLevel.ERROR: logging.ERROR,
+ types.LogLevel.CRITICAL: logging.CRITICAL
+}
+LOGLEVEL_OPT_DEFAULT = types.LogLevel.DEBUG.name
+#LOGFILE_FORMAT = "%(asctime)s: %(levelname)s: %(message)s"
+#LOGFILE_FORMAT = "%(levelname)s:%(name)s: %(message)s"
+#LOGFILE_FORMAT = "%(levelname)s: %(message)s"
+#LOGFILE_FORMAT = "%(filename)s:%(lineno)s %(funcName)s() %(message)s"
+# colored logs
+"""
+black, red, green, yellow, blue, purple, cyan and white.
+{color}, fg_{color}, bg_{color}: Foreground and background colors.
+bold, bold_{color}, fg_bold_{color}, bg_bold_{color}: Bold/bright colors.
+reset: Clear all formatting (both foreground and background colors).
+"""
+LOGFILE_FORMAT = "%(log_color)s%(levelname)-8s%(reset)s %(cyan)s%(filename)s:%(lineno)s:%(bold_cyan)s%(funcName)s() %(reset)s%(message)s"
+
+LogLevelVar = click_utils.ParamVar(types.LogLevel)
+
+# -----------------------------------------------------------------------------
+# Filesystem settings
+# hash trees enforce a maximum number of directories per directory
+# -----------------------------------------------------------------------------
+ZERO_PADDING = 6 # padding for enumerated image filenames
+#FRAME_NAME_ZERO_PADDING = 6 # is this active??
+CKPT_ZERO_PADDING = 9
+HASH_TREE_DEPTH = 3
+HASH_BRANCH_SIZE = 3
+
+# -----------------------------------------------------------------------------
+# .env config for keys
+# -----------------------------------------------------------------------------
+# DIR_DOTENV = join(DIR_APP, '.env')
+load_dotenv() # dotenv_path=DIR_DOTENV)
diff --git a/check/app/settings/types.py b/check/app/settings/types.py
index e3e9b4e..044579b 100644
--- a/check/app/settings/types.py
+++ b/check/app/settings/types.py
@@ -5,3 +5,13 @@ def find_type(name, enum_type):
if name == enum_opt.name.lower():
return enum_opt
return None
+
+# ---------------------------------------------------------------------
+# Logger, monitoring
+# --------------------------------------------------------------------
+
+class LogLevel(Enum):
+ """Loger vebosity"""
+ DEBUG, INFO, WARN, ERROR, CRITICAL = range(5)
+
+VALID_IMAGE_EXTENSIONS = ['gif', 'jpg', 'jpeg', 'png']
diff --git a/check/app/utils/im_utils.py b/check/app/utils/im_utils.py
index dfd5739..747e900 100644
--- a/check/app/utils/im_utils.py
+++ b/check/app/utils/im_utils.py
@@ -15,6 +15,7 @@ import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
+import struct
from torch.autograd import Variable
from sklearn.metrics.pairwise import cosine_similarity
import datetime
@@ -214,7 +215,7 @@ def phash2int(phash):
:returns: binary-encoded bigint
"""
phash.hash[-1] = False
- phash_as_bigint = struct.unpack('Q', numpy.packbits(phash.hash))[0]
+ phash_as_bigint = struct.unpack('Q', np.packbits(phash.hash))[0]
return phash_as_bigint
def compute_phash_int(im):
diff --git a/check/app/utils/logger_utils.py b/check/app/utils/logger_utils.py
new file mode 100644
index 0000000..d4f962e
--- /dev/null
+++ b/check/app/utils/logger_utils.py
@@ -0,0 +1,68 @@
+"""
+Logger instantiator for use with Click utlity scripts
+"""
+import sys
+import os
+import logging
+
+import colorlog
+
+from app.settings import app_cfg as cfg
+
+
+class Logger:
+
+ logger_name = 'app'
+
+ def __init__(self):
+ pass
+
+ @staticmethod
+ def create(verbosity=4, logfile=None):
+ """Configures a logger from click params
+ :param verbosity: (int) between 0 and 5
+ :param logfile: (str) path to logfile
+ :returns: logging root object
+ """
+
+ loglevel = (5 - (max(0, min(verbosity, 5)))) * 10 # where logging.DEBUG = 10
+ date_format = '%Y-%m-%d %H:%M:%S'
+ if 'colorlog' in sys.modules and os.isatty(2):
+ cformat = '%(log_color)s' + cfg.LOGFILE_FORMAT
+ f = colorlog.ColoredFormatter(cformat, date_format,
+ log_colors = { 'DEBUG' : 'yellow', 'INFO' : 'white',
+ 'WARNING' : 'bold_yellow', 'ERROR': 'bold_red',
+ 'CRITICAL': 'bold_red' })
+ else:
+ f = logging.Formatter(cfg.LOGFILE_FORMAT, date_format)
+
+ # logger = logging.getLogger(Logger.logger_name)
+ logger = logging.getLogger(cfg.LOGGER_NAME)
+ logger.setLevel(loglevel)
+
+ if logfile:
+ # create file handler which logs even debug messages
+ fh = logging.FileHandler(logfile)
+ fh.setLevel(loglevel)
+ logger.addHandler(fh)
+
+ # add colored handler
+ ch = logging.StreamHandler()
+ ch.setFormatter(f)
+ logger.addHandler(ch)
+
+ if verbosity == 0:
+ logger.disabled = True
+
+ # test
+ # logger.debug('Hello Debug')
+ # logger.info('Hello Info')
+ # logger.warn('Hello Warn')
+ # logger.error('Hello Error')
+ # logger.critical('Hello Critical')
+
+ return logger
+
+ @staticmethod
+ def getLogger():
+ return logging.getLogger(cfg.LOGGER_NAME) \ No newline at end of file
diff --git a/check/cli_imagehash.py b/check/cli_phash.py
index 32f8250..c5df139 100644
--- a/check/cli_imagehash.py
+++ b/check/cli_phash.py
@@ -9,7 +9,7 @@ from app.utils import logger_utils
from app.models.click_factory import ClickSimple
# click cli factory
-cc = ClickSimple.create(cfg.DIR_COMMANDS_IMAGEHASH)
+cc = ClickSimple.create(cfg.DIR_COMMANDS_PHASH)
# --------------------------------------------------------
# CLI
diff --git a/check/commands/imagehash/add.py b/check/commands/imagehash/add.py
deleted file mode 100644
index 73f8b69..0000000
--- a/check/commands/imagehash/add.py
+++ /dev/null
@@ -1,34 +0,0 @@
-"""
-Add a file to the database
-"""
-
-import click
-import os
-
-from app.models.sql_factory import search_by_phash, add_phash
-from app.utils.im_utils import compute_phash_int
-from app.utils.file_utils import sha256
-
-@click.command()
-@click.option('-i', '--input', 'opt_fn',
- required=True,
- help="File to add (gif/jpg/png)")
-@click.option('-u', '--upload', 'opt_upload', is_flag=True,
- help='Whether to upload this file to S3')
-@click.pass_context
-def cli(ctx, opt_fn, opt_upload):
- """
- Add a single file
- """
- print('Adding a file...')
- if not os.path.exists(opt_fn):
- print("File does not exist")
- return
-
- hash = sha256(opt_fn)
- phash = compute_phash_int(opt_fn)
-
- dir, fn = os.path.split(opt_fn)
- root, ext = os.path.splitext(fn)
-
- add_phash(sha256=hash, phash=phash, ext=ext)
diff --git a/check/commands/imagehash/load.py b/check/commands/imagehash/load.py
deleted file mode 100644
index e61b751..0000000
--- a/check/commands/imagehash/load.py
+++ /dev/null
@@ -1,22 +0,0 @@
-"""
-Loop over a directory of images
- - Compute their phashes
- - Optionally upload them to s3?
-"""
-
-import click
-
-from app.models.sql_factory import search_by_phash, add_phash
-
-@click.command()
-@click.option('-i', '--input', 'opt_dir_fn',
- required=True,
- help="File to add (gif/jpg/png)")
-@click.option('-u', '--upload', 'opt_upload', is_flag=True,
- help='Whether to upload this file to S3')
-@click.pass_context
-def cli(ctx, opt_dir_fn, opt_store):
- """
- Add a directory of images
- """
- print('Adding a directory...')
diff --git a/check/commands/imagehash/query.py b/check/commands/imagehash/query.py
deleted file mode 100644
index f5d3a54..0000000
--- a/check/commands/imagehash/query.py
+++ /dev/null
@@ -1,34 +0,0 @@
-"""
-Search the database for an image
-"""
-
-import click
-import os
-
-from app.models.sql_factory import search_by_phash
-from app.utils.im_utils import compute_phash_int
-from app.utils.file_utils import sha256
-
-@click.command()
-@click.option('-i', '--input', 'opt_fn',
- required=True,
- help="File to search")
-@click.pass_context
-def cli(ctx, opt_fn):
- """
- Search the database for an image
- """
- print('Searching for a file...')
-
- if not os.path.exists(opt_fn):
- print("File does not exist")
- return
-
- hash = sha256(opt_fn)
- phash = compute_phash_int(opt_fn)
-
- res = search_by_hash(hash)
- print("search by hash:", res)
-
- res =search_by_phash(phash)
- print("search by phash:", res)
diff --git a/check/commands/imagehash/test.py b/check/commands/imagehash/test.py
deleted file mode 100644
index b3ddbe5..0000000
--- a/check/commands/imagehash/test.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""
-Query the database with a test set
-"""
-
-import click
-
-from app.models.sql_factory import search_by_phash, add_phash
-
-@click.command()
-@click.option('-i', '--input', 'opt_fn',
- required=True,
- help="Directory to search")
-@click.pass_context
-def cli(ctx, opt_fn):
- """
- Search the database for an image
- """
- print('Query the database with a test set')
diff --git a/check/commands/phash/add.py b/check/commands/phash/add.py
new file mode 100644
index 0000000..1565202
--- /dev/null
+++ b/check/commands/phash/add.py
@@ -0,0 +1,21 @@
+"""
+Add a file to the database
+"""
+
+import click
+
+from app.models.sql_factory import add_phash_by_filename
+
+@click.command()
+@click.option('-i', '--input', 'opt_fn',
+ required=True,
+ help="File to add (gif/jpg/png)")
+@click.option('-u', '--upload', 'opt_upload', is_flag=True,
+ help='Whether to upload this file to S3')
+@click.pass_context
+def cli(ctx, opt_fn, opt_upload):
+ """
+ Add a single file
+ """
+ print('Adding a file...')
+ add_phash_by_filename(opt_fn)
diff --git a/check/commands/phash/drop.py b/check/commands/phash/drop.py
new file mode 100644
index 0000000..40a8261
--- /dev/null
+++ b/check/commands/phash/drop.py
@@ -0,0 +1,22 @@
+"""
+Drop the database (useful when testing)
+"""
+
+import click
+import glob
+
+from app.models.sql_factory import Base, engine
+
+@click.command()
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+ help='Actually drop the database')
+@click.pass_context
+def cli(ctx, opt_force):
+ """
+ Drop the database
+ """
+ if opt_force:
+ print('Dropping the database...!')
+ Base.metadata.drop_all(engine)
+ else:
+ print('Will foolishly drop the database if the --force flag is passed') \ No newline at end of file
diff --git a/check/commands/phash/load.py b/check/commands/phash/load.py
new file mode 100644
index 0000000..ce9ceef
--- /dev/null
+++ b/check/commands/phash/load.py
@@ -0,0 +1,23 @@
+"""
+Loop over a directory of images
+ - Compute their phashes
+ - Optionally upload them to s3?
+"""
+
+import click
+import glob
+
+from app.models.sql_factory import add_phash_by_filename
+
+@click.command()
+@click.option('-i', '--input', 'opt_input_glob',
+ required=True,
+ help="File glob to add -- e.g. '../docs/images/*.jpg'")
+@click.pass_context
+def cli(ctx, opt_input_glob):
+ """
+ Add a directory of images
+ """
+ print('Adding a directory...')
+ for fn in glob.iglob(opt_input_glob):
+ add_phash_by_filename(fn)
diff --git a/check/commands/phash/query.py b/check/commands/phash/query.py
new file mode 100644
index 0000000..8fc8c61
--- /dev/null
+++ b/check/commands/phash/query.py
@@ -0,0 +1,45 @@
+"""
+Search the database for an image
+"""
+
+import click
+import os
+
+from PIL import Image
+
+from app.models.sql_factory import search_by_phash, search_by_hash
+from app.utils.im_utils import compute_phash_int
+from app.utils.file_utils import sha256
+
+@click.command()
+@click.option('-i', '--input', 'opt_fn',
+ required=True,
+ help="File to search")
+@click.pass_context
+def cli(ctx, opt_fn):
+ """
+ Search the database for an image
+ """
+ print('Searching for a file...')
+
+ if not os.path.exists(opt_fn):
+ print("File does not exist")
+ return
+
+ im = Image.open(opt_fn).convert('RGB')
+ phash = compute_phash_int(im)
+
+ hash = sha256(opt_fn)
+
+ phash_match = search_by_phash(phash)
+ hash_match = search_by_hash(hash)
+
+ hash_result = 'NO'
+ if hash_match:
+ hash_result = 'YES'
+
+ phash_result = 'NO'
+ if len(phash_match):
+ phash_result = 'YES, score={}'.format(phash_match[0]['score'])
+
+ print("{} - hash={}, phash={}".format(opt_fn, hash_result, phash_result))
diff --git a/check/commands/phash/test.py b/check/commands/phash/test.py
new file mode 100644
index 0000000..7fe2ae3
--- /dev/null
+++ b/check/commands/phash/test.py
@@ -0,0 +1,41 @@
+"""
+Query the database with a test set
+"""
+
+import click
+import os
+import glob
+
+from PIL import Image
+
+from app.models.sql_factory import search_by_phash, search_by_hash
+from app.utils.im_utils import compute_phash_int
+from app.utils.file_utils import sha256
+
+@click.command()
+@click.option('-i', '--input', 'opt_input_glob',
+ required=True,
+ help="Input glob to search -- e.g. '../docs/images/*.jpg'")
+@click.pass_context
+def cli(ctx, opt_input_glob):
+ """
+ Query the database with a test set
+ """
+ for fn in sorted(glob.iglob(opt_input_glob)):
+ im = Image.open(fn).convert('RGB')
+ phash = compute_phash_int(im)
+
+ hash = sha256(fn)
+
+ phash_match = search_by_phash(phash)
+ hash_match = search_by_hash(hash)
+
+ hash_result = 'NO'
+ if hash_match:
+ hash_result = 'YES'
+
+ phash_result = 'NO'
+ if len(phash_match):
+ phash_result = 'YES, score={}'.format(phash_match[0]['score'])
+
+ print("{} - hash={}, phash={}".format(fn, hash_result, phash_result))