diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-04-23 13:10:45 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-04-23 13:10:45 +0200 |
| commit | f5f30912a2efd9c3d47cc9aeaf6a542211b00474 (patch) | |
| tree | 094c4d1e006711b3f7724f2aaf869e21ac5cb406 | |
| parent | 79f0e696f3f6067a0841a37404fb546dedaa07cb (diff) | |
edits
| -rw-r--r-- | check/app/server/api.py | 8 | ||||
| -rw-r--r-- | check/cli_phash.py | 2 | ||||
| -rw-r--r-- | check/commands/phash/dedupe.py | 46 | ||||
| -rw-r--r-- | docs/specifications.md | 73 |
4 files changed, 71 insertions, 58 deletions
diff --git a/check/app/server/api.py b/check/app/server/api.py index 1b17a1e..c4f9f80 100644 --- a/check/app/server/api.py +++ b/check/app/server/api.py @@ -5,7 +5,6 @@ import numpy as np from flask import Blueprint, request, jsonify from PIL import Image -# from app.utils.im_utils import pil2np from app.models.sql_factory import search_by_phash, add_phash from app.utils.im_utils import pil2np @@ -42,9 +41,10 @@ def upload(): }) im = Image.open(file.stream).convert('RGB') - im_np = pil2np(im) + phash = compute_phash_int(im) - res = search_by_phash(im_np) + threshold = request.args.get('threshold') || 6 + + res = search_by_phash(phash, threshold) - # get threshold return jsonify({ 'res': res }) diff --git a/check/cli_phash.py b/check/cli_phash.py index c5df139..169dfa5 100644 --- a/check/cli_phash.py +++ b/check/cli_phash.py @@ -20,7 +20,7 @@ cc = ClickSimple.create(cfg.DIR_COMMANDS_PHASH) help='Verbosity: -v DEBUG, -vv INFO, -vvv WARN, -vvvv ERROR, -vvvvv CRITICAL') @click.pass_context def cli(ctx, **kwargs): - """\033[1m\033[94mMegaPixels: Dataset Image Scripts\033[0m + """\033[1m\033[94mVFrame Check Image Deduplication API\033[0m """ ctx.opts = {} # init logger diff --git a/check/commands/phash/dedupe.py b/check/commands/phash/dedupe.py new file mode 100644 index 0000000..3cf60d4 --- /dev/null +++ b/check/commands/phash/dedupe.py @@ -0,0 +1,46 @@ +""" +Dedupe a folder of images +""" + +import click +import os +import glob + +from PIL import Image + +from app.utils.im_utils import compute_phash + +@click.command() +@click.option('-i', '--input', 'opt_input_glob', + required=True, + help="Input glob to search -- e.g. '../docs/images/*.jpg'") +@click.option('-t', '--threshold', 'opt_threshold', + required=True, + default=6, + type=int, + help="Threshold for PHash hamming distance comparison (0-64, default=6)") +@click.pass_context +def cli(ctx, opt_input_glob, opt_threshold): + """ + Dedupe a folder of images + """ + seen = [] + total = 0 + for fn in sorted(glob.iglob(os.path.expanduser(opt_input_glob))): + total += 1 + im = Image.open(fn).convert('RGB') + phash = compute_phash(im) + if is_phash_new(fn, phash, seen, opt_threshold): + seen.append({ + 'phash': phash, + 'fn': fn, + }) + print("checked {} files, found {} unique".format(total, len(seen))) + +def is_phash_new(fn, phash, seen, opt_threshold): + for item in seen: + diff = item['phash'] - phash + if diff < opt_threshold: + print("{} === {} (diff: {})".format(fn, item['fn'], diff)) + return False + return True diff --git a/docs/specifications.md b/docs/specifications.md index ec5c81f..6840a8d 100644 --- a/docs/specifications.md +++ b/docs/specifications.md @@ -48,72 +48,39 @@ Example response for a successful image upload with no match: ``` { - "success": True, - "match": False, - "closest_matches": - [ - { - "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90", - "score": 2 - }, - { - "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f", - "score": 3 - } - ] -" + "success": True, + "match": False +} ``` -Example response for a successful image upload with a match: +Example response for a successful image upload with a match, within the standard similarity threshold: `check.vframe.io/v1/match/` ``` { - "success": True, - "match": True, - "match": - { - "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632", - "score": 0 - }, - "close_matches": - [ - { - "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90", - "score": 2 - }, - { - "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f", - "score": 2 - } - ] + "success": True, + "match": True, + "closest_match": { + "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632", + "score": 0 + } +} " ``` Get match, but with more permissive threshold -`check.vframe.io/v1/match/threshold/3/` +`check.vframe.io/v1/match/?threshold=10` ``` { - "success": True, - "match": True, - "matches": - { - "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632", - "score": 0 - }, - "closest_matches": - [ - { - "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90", - "score": 3 - }, - { - "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f", - "score": 3 - } - ] -"
\ No newline at end of file + "success": True, + "match": True, + "closest_match": { + "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632", + "score": 7 + }, +} +``` |
