summaryrefslogtreecommitdiff
path: root/check
diff options
context:
space:
mode:
Diffstat (limited to 'check')
-rw-r--r--check/app/server/api.py8
-rw-r--r--check/cli_phash.py2
-rw-r--r--check/commands/phash/dedupe.py46
3 files changed, 51 insertions, 5 deletions
diff --git a/check/app/server/api.py b/check/app/server/api.py
index 1b17a1e..c4f9f80 100644
--- a/check/app/server/api.py
+++ b/check/app/server/api.py
@@ -5,7 +5,6 @@ import numpy as np
from flask import Blueprint, request, jsonify
from PIL import Image
-# from app.utils.im_utils import pil2np
from app.models.sql_factory import search_by_phash, add_phash
from app.utils.im_utils import pil2np
@@ -42,9 +41,10 @@ def upload():
})
im = Image.open(file.stream).convert('RGB')
- im_np = pil2np(im)
+ phash = compute_phash_int(im)
- res = search_by_phash(im_np)
+ threshold = request.args.get('threshold') || 6
+
+ res = search_by_phash(phash, threshold)
- # get threshold
return jsonify({ 'res': res })
diff --git a/check/cli_phash.py b/check/cli_phash.py
index c5df139..169dfa5 100644
--- a/check/cli_phash.py
+++ b/check/cli_phash.py
@@ -20,7 +20,7 @@ cc = ClickSimple.create(cfg.DIR_COMMANDS_PHASH)
help='Verbosity: -v DEBUG, -vv INFO, -vvv WARN, -vvvv ERROR, -vvvvv CRITICAL')
@click.pass_context
def cli(ctx, **kwargs):
- """\033[1m\033[94mMegaPixels: Dataset Image Scripts\033[0m
+ """\033[1m\033[94mVFrame Check Image Deduplication API\033[0m
"""
ctx.opts = {}
# init logger
diff --git a/check/commands/phash/dedupe.py b/check/commands/phash/dedupe.py
new file mode 100644
index 0000000..3cf60d4
--- /dev/null
+++ b/check/commands/phash/dedupe.py
@@ -0,0 +1,46 @@
+"""
+Dedupe a folder of images
+"""
+
+import click
+import os
+import glob
+
+from PIL import Image
+
+from app.utils.im_utils import compute_phash
+
+@click.command()
+@click.option('-i', '--input', 'opt_input_glob',
+ required=True,
+ help="Input glob to search -- e.g. '../docs/images/*.jpg'")
+@click.option('-t', '--threshold', 'opt_threshold',
+ required=True,
+ default=6,
+ type=int,
+ help="Threshold for PHash hamming distance comparison (0-64, default=6)")
+@click.pass_context
+def cli(ctx, opt_input_glob, opt_threshold):
+ """
+ Dedupe a folder of images
+ """
+ seen = []
+ total = 0
+ for fn in sorted(glob.iglob(os.path.expanduser(opt_input_glob))):
+ total += 1
+ im = Image.open(fn).convert('RGB')
+ phash = compute_phash(im)
+ if is_phash_new(fn, phash, seen, opt_threshold):
+ seen.append({
+ 'phash': phash,
+ 'fn': fn,
+ })
+ print("checked {} files, found {} unique".format(total, len(seen)))
+
+def is_phash_new(fn, phash, seen, opt_threshold):
+ for item in seen:
+ diff = item['phash'] - phash
+ if diff < opt_threshold:
+ print("{} === {} (diff: {})".format(fn, item['fn'], diff))
+ return False
+ return True