summaryrefslogtreecommitdiff
path: root/check/commands/phash/dedupe.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-04-26 04:07:15 +0200
committerJules Laplace <julescarbon@gmail.com>2019-04-26 04:07:15 +0200
commitc8df1b6f5031a030fea31012e096c14fb2fddf55 (patch)
tree0d3389f32b04296875c7209302e75177ae909a4e /check/commands/phash/dedupe.py
parentc9c72cdc3128fe272edeb6ec20959b2248f33877 (diff)
dump dedupe output
Diffstat (limited to 'check/commands/phash/dedupe.py')
-rw-r--r--check/commands/phash/dedupe.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/check/commands/phash/dedupe.py b/check/commands/phash/dedupe.py
index 2e99f62..28266f4 100644
--- a/check/commands/phash/dedupe.py
+++ b/check/commands/phash/dedupe.py
@@ -9,18 +9,22 @@ import glob
from PIL import Image
from app.utils.im_utils import compute_phash
+from app.utils.file_utils import write_json, sha256
@click.command()
@click.option('-i', '--input', 'opt_input_glob',
required=True,
help="Input glob to search -- e.g. '../docs/images/*.jpg'")
+@click.option('-o', '--output', 'opt_output_fn',
+ required=False,
+ help="Input glob to search -- e.g. '../docs/images/*.jpg'")
@click.option('-t', '--threshold', 'opt_threshold',
required=True,
default=6,
type=int,
help="Threshold for PHash hamming distance comparison (0-64, default=6)")
@click.pass_context
-def cli(ctx, opt_input_glob, opt_threshold):
+def cli(ctx, opt_input_glob, opt_output_fn, opt_threshold):
"""
Dedupe a folder of images
"""
@@ -31,10 +35,14 @@ def cli(ctx, opt_input_glob, opt_threshold):
im = Image.open(fn).convert('RGB')
phash = compute_phash(im)
if is_phash_new(fn, phash, seen, opt_threshold):
+ hash = sha256(fn)
seen.append({
+ 'sha256': hash,
'phash': phash,
'fn': fn,
})
+ if opt_output_fn:
+ write_json(seen, opt_output_fn)
print("checked {} files, found {} unique".format(total, len(seen)))
def is_phash_new(fn, phash, seen, opt_threshold):