diff options
Diffstat (limited to 'check')
| -rw-r--r-- | check/commands/phash/dedupe.py | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/check/commands/phash/dedupe.py b/check/commands/phash/dedupe.py index 2e99f62..28266f4 100644 --- a/check/commands/phash/dedupe.py +++ b/check/commands/phash/dedupe.py @@ -9,18 +9,22 @@ import glob from PIL import Image from app.utils.im_utils import compute_phash +from app.utils.file_utils import write_json, sha256 @click.command() @click.option('-i', '--input', 'opt_input_glob', required=True, help="Input glob to search -- e.g. '../docs/images/*.jpg'") +@click.option('-o', '--output', 'opt_output_fn', + required=False, + help="Input glob to search -- e.g. '../docs/images/*.jpg'") @click.option('-t', '--threshold', 'opt_threshold', required=True, default=6, type=int, help="Threshold for PHash hamming distance comparison (0-64, default=6)") @click.pass_context -def cli(ctx, opt_input_glob, opt_threshold): +def cli(ctx, opt_input_glob, opt_output_fn, opt_threshold): """ Dedupe a folder of images """ @@ -31,10 +35,14 @@ def cli(ctx, opt_input_glob, opt_threshold): im = Image.open(fn).convert('RGB') phash = compute_phash(im) if is_phash_new(fn, phash, seen, opt_threshold): + hash = sha256(fn) seen.append({ + 'sha256': hash, 'phash': phash, 'fn': fn, }) + if opt_output_fn: + write_json(seen, opt_output_fn) print("checked {} files, found {} unique".format(total, len(seen))) def is_phash_new(fn, phash, seen, opt_threshold): |
