summaryrefslogtreecommitdiff
path: root/check/commands/phash/import.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-04-27 19:20:39 +0200
committerJules Laplace <julescarbon@gmail.com>2019-04-27 19:20:39 +0200
commitc59a52dacd1259fadb545c852883412e61f74c84 (patch)
treee032cbd200c9041c2470197e008ae8d3b56932fb /check/commands/phash/import.py
parent1db97e03f5cac4eb6421e0b55628a3187c41e29c (diff)
import script
Diffstat (limited to 'check/commands/phash/import.py')
-rw-r--r--check/commands/phash/import.py60
1 files changed, 60 insertions, 0 deletions
diff --git a/check/commands/phash/import.py b/check/commands/phash/import.py
new file mode 100644
index 0000000..5e8dc6f
--- /dev/null
+++ b/check/commands/phash/import.py
@@ -0,0 +1,60 @@
+"""
+Import a folder of images, deduping them first
+"""
+
+import click
+import os
+import glob
+
+from PIL import Image
+
+from app.models.sql_factory import add_phash
+from app.utils.im_utils import compute_phash, phash2int
+from app.utils.file_utils import write_json, sha256
+
+valid_exts = ['.gif', '.jpg', '.jpeg', '.png']
+
+@click.command()
+@click.option('-i', '--input', 'opt_input_glob',
+ required=True,
+ help="Input glob to search -- e.g. 'static/sample_set_test_01/images/*'")
+@click.option('-t', '--threshold', 'opt_threshold',
+ required=True,
+ default=6,
+ type=int,
+ help="Threshold for hamming distance comparison (0-64, default=6)")
+@click.pass_context
+def cli(ctx, opt_input_glob, opt_threshold):
+ """
+ Import a folder of images, deduping them first
+ """
+ seen = []
+ total = 0
+ if not opt_input_glob.startswith('static/'):
+ print("Please move your files into the static folder to make them accessible")
+ return
+ for fn in sorted(glob.iglob(os.path.expanduser(opt_input_glob))):
+ fname, ext = os.path.splitext(fn)
+ if ext not in valid_exts:
+ continue
+ ext = ext[1:]
+ total += 1
+ im = Image.open(fn).convert('RGB')
+ phash = compute_phash(im)
+ if is_phash_new(fn, phash, seen, opt_threshold):
+ hash = sha256(fn)
+ url = '/' + fn
+ seen.append({
+ 'sha256': hash,
+ 'phash': phash,
+ 'fn': fn,
+ })
+ add_phash(sha256=hash, phash=phash2int(phash), ext=ext, url=url)
+ print("checked {} files, found {} unique".format(total, len(seen)))
+
+def is_phash_new(fn, phash, seen, opt_threshold):
+ for item in seen:
+ diff = item['phash'] - phash
+ if diff < opt_threshold:
+ return False
+ return True