summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-04-23 13:10:45 +0200
committerJules Laplace <julescarbon@gmail.com>2019-04-23 13:10:45 +0200
commitf5f30912a2efd9c3d47cc9aeaf6a542211b00474 (patch)
tree094c4d1e006711b3f7724f2aaf869e21ac5cb406
parent79f0e696f3f6067a0841a37404fb546dedaa07cb (diff)
edits
-rw-r--r--check/app/server/api.py8
-rw-r--r--check/cli_phash.py2
-rw-r--r--check/commands/phash/dedupe.py46
-rw-r--r--docs/specifications.md73
4 files changed, 71 insertions, 58 deletions
diff --git a/check/app/server/api.py b/check/app/server/api.py
index 1b17a1e..c4f9f80 100644
--- a/check/app/server/api.py
+++ b/check/app/server/api.py
@@ -5,7 +5,6 @@ import numpy as np
from flask import Blueprint, request, jsonify
from PIL import Image
-# from app.utils.im_utils import pil2np
from app.models.sql_factory import search_by_phash, add_phash
from app.utils.im_utils import pil2np
@@ -42,9 +41,10 @@ def upload():
})
im = Image.open(file.stream).convert('RGB')
- im_np = pil2np(im)
+ phash = compute_phash_int(im)
- res = search_by_phash(im_np)
+ threshold = request.args.get('threshold') || 6
+
+ res = search_by_phash(phash, threshold)
- # get threshold
return jsonify({ 'res': res })
diff --git a/check/cli_phash.py b/check/cli_phash.py
index c5df139..169dfa5 100644
--- a/check/cli_phash.py
+++ b/check/cli_phash.py
@@ -20,7 +20,7 @@ cc = ClickSimple.create(cfg.DIR_COMMANDS_PHASH)
help='Verbosity: -v DEBUG, -vv INFO, -vvv WARN, -vvvv ERROR, -vvvvv CRITICAL')
@click.pass_context
def cli(ctx, **kwargs):
- """\033[1m\033[94mMegaPixels: Dataset Image Scripts\033[0m
+ """\033[1m\033[94mVFrame Check Image Deduplication API\033[0m
"""
ctx.opts = {}
# init logger
diff --git a/check/commands/phash/dedupe.py b/check/commands/phash/dedupe.py
new file mode 100644
index 0000000..3cf60d4
--- /dev/null
+++ b/check/commands/phash/dedupe.py
@@ -0,0 +1,46 @@
+"""
+Dedupe a folder of images
+"""
+
+import click
+import os
+import glob
+
+from PIL import Image
+
+from app.utils.im_utils import compute_phash
+
+@click.command()
+@click.option('-i', '--input', 'opt_input_glob',
+ required=True,
+ help="Input glob to search -- e.g. '../docs/images/*.jpg'")
+@click.option('-t', '--threshold', 'opt_threshold',
+ required=True,
+ default=6,
+ type=int,
+ help="Threshold for PHash hamming distance comparison (0-64, default=6)")
+@click.pass_context
+def cli(ctx, opt_input_glob, opt_threshold):
+ """
+ Dedupe a folder of images
+ """
+ seen = []
+ total = 0
+ for fn in sorted(glob.iglob(os.path.expanduser(opt_input_glob))):
+ total += 1
+ im = Image.open(fn).convert('RGB')
+ phash = compute_phash(im)
+ if is_phash_new(fn, phash, seen, opt_threshold):
+ seen.append({
+ 'phash': phash,
+ 'fn': fn,
+ })
+ print("checked {} files, found {} unique".format(total, len(seen)))
+
+def is_phash_new(fn, phash, seen, opt_threshold):
+ for item in seen:
+ diff = item['phash'] - phash
+ if diff < opt_threshold:
+ print("{} === {} (diff: {})".format(fn, item['fn'], diff))
+ return False
+ return True
diff --git a/docs/specifications.md b/docs/specifications.md
index ec5c81f..6840a8d 100644
--- a/docs/specifications.md
+++ b/docs/specifications.md
@@ -48,72 +48,39 @@ Example response for a successful image upload with no match:
```
{
- "success": True,
- "match": False,
- "closest_matches":
- [
- {
- "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90",
- "score": 2
- },
- {
- "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f",
- "score": 3
- }
- ]
-"
+ "success": True,
+ "match": False
+}
```
-Example response for a successful image upload with a match:
+Example response for a successful image upload with a match, within the standard similarity threshold:
`check.vframe.io/v1/match/`
```
{
- "success": True,
- "match": True,
- "match":
- {
- "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632",
- "score": 0
- },
- "close_matches":
- [
- {
- "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90",
- "score": 2
- },
- {
- "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f",
- "score": 2
- }
- ]
+ "success": True,
+ "match": True,
+ "closest_match": {
+ "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632",
+ "score": 0
+ }
+}
"
```
Get match, but with more permissive threshold
-`check.vframe.io/v1/match/threshold/3/`
+`check.vframe.io/v1/match/?threshold=10`
```
{
- "success": True,
- "match": True,
- "matches":
- {
- "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632",
- "score": 0
- },
- "closest_matches":
- [
- {
- "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90",
- "score": 3
- },
- {
- "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f",
- "score": 3
- }
- ]
-" \ No newline at end of file
+ "success": True,
+ "match": True,
+ "closest_match": {
+ "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632",
+ "score": 7
+ },
+}
+```