summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--check/app/models/sql_factory.py2
-rw-r--r--check/commands/phash/drop.py6
-rw-r--r--check/commands/phash/import.py60
3 files changed, 64 insertions, 4 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py
index ad27f62..1aa617c 100644
--- a/check/app/models/sql_factory.py
+++ b/check/app/models/sql_factory.py
@@ -31,7 +31,7 @@ class FileTable(Base):
"""Table for storing various hashes of images"""
__tablename__ = 'files'
id = Column(Integer, primary_key=True)
- sha256 = Column(String(64, convert_unicode=True), nullable=False)
+ sha256 = Column(String(64, convert_unicode=True), nullable=False, unique=True)
phash = Column(BigInteger, nullable=False, index=True)
ext = Column(String(4, convert_unicode=True), nullable=False)
url = Column(String(255, convert_unicode=True), nullable=False)
diff --git a/check/commands/phash/drop.py b/check/commands/phash/drop.py
index 40a8261..aee922d 100644
--- a/check/commands/phash/drop.py
+++ b/check/commands/phash/drop.py
@@ -15,8 +15,8 @@ def cli(ctx, opt_force):
"""
Drop the database
"""
- if opt_force:
+ if not opt_force:
+ print('Will foolishly drop the database only if the --force flag is passed')
+ else:
print('Dropping the database...!')
Base.metadata.drop_all(engine)
- else:
- print('Will foolishly drop the database if the --force flag is passed') \ No newline at end of file
diff --git a/check/commands/phash/import.py b/check/commands/phash/import.py
new file mode 100644
index 0000000..5e8dc6f
--- /dev/null
+++ b/check/commands/phash/import.py
@@ -0,0 +1,60 @@
+"""
+Import a folder of images, deduping them first
+"""
+
+import click
+import os
+import glob
+
+from PIL import Image
+
+from app.models.sql_factory import add_phash
+from app.utils.im_utils import compute_phash, phash2int
+from app.utils.file_utils import write_json, sha256
+
+valid_exts = ['.gif', '.jpg', '.jpeg', '.png']
+
+@click.command()
+@click.option('-i', '--input', 'opt_input_glob',
+ required=True,
+ help="Input glob to search -- e.g. 'static/sample_set_test_01/images/*'")
+@click.option('-t', '--threshold', 'opt_threshold',
+ required=True,
+ default=6,
+ type=int,
+ help="Threshold for hamming distance comparison (0-64, default=6)")
+@click.pass_context
+def cli(ctx, opt_input_glob, opt_threshold):
+ """
+ Import a folder of images, deduping them first
+ """
+ seen = []
+ total = 0
+ if not opt_input_glob.startswith('static/'):
+ print("Please move your files into the static folder to make them accessible")
+ return
+ for fn in sorted(glob.iglob(os.path.expanduser(opt_input_glob))):
+ fname, ext = os.path.splitext(fn)
+ if ext not in valid_exts:
+ continue
+ ext = ext[1:]
+ total += 1
+ im = Image.open(fn).convert('RGB')
+ phash = compute_phash(im)
+ if is_phash_new(fn, phash, seen, opt_threshold):
+ hash = sha256(fn)
+ url = '/' + fn
+ seen.append({
+ 'sha256': hash,
+ 'phash': phash,
+ 'fn': fn,
+ })
+ add_phash(sha256=hash, phash=phash2int(phash), ext=ext, url=url)
+ print("checked {} files, found {} unique".format(total, len(seen)))
+
+def is_phash_new(fn, phash, seen, opt_threshold):
+ for item in seen:
+ diff = item['phash'] - phash
+ if diff < opt_threshold:
+ return False
+ return True