summaryrefslogtreecommitdiff
path: root/check/commands/phash
diff options
context:
space:
mode:
Diffstat (limited to 'check/commands/phash')
-rw-r--r--check/commands/phash/dedupe.py12
-rw-r--r--check/commands/phash/query.py42
-rw-r--r--check/commands/phash/test.py49
3 files changed, 52 insertions, 51 deletions
diff --git a/check/commands/phash/dedupe.py b/check/commands/phash/dedupe.py
index 3cf60d4..28266f4 100644
--- a/check/commands/phash/dedupe.py
+++ b/check/commands/phash/dedupe.py
@@ -1,5 +1,5 @@
"""
-Dedupe a folder of images
+Dedupe a folder of images (uses phash directly, does not use database)
"""
import click
@@ -9,18 +9,22 @@ import glob
from PIL import Image
from app.utils.im_utils import compute_phash
+from app.utils.file_utils import write_json, sha256
@click.command()
@click.option('-i', '--input', 'opt_input_glob',
required=True,
help="Input glob to search -- e.g. '../docs/images/*.jpg'")
+@click.option('-o', '--output', 'opt_output_fn',
+ required=False,
+ help="Input glob to search -- e.g. '../docs/images/*.jpg'")
@click.option('-t', '--threshold', 'opt_threshold',
required=True,
default=6,
type=int,
help="Threshold for PHash hamming distance comparison (0-64, default=6)")
@click.pass_context
-def cli(ctx, opt_input_glob, opt_threshold):
+def cli(ctx, opt_input_glob, opt_output_fn, opt_threshold):
"""
Dedupe a folder of images
"""
@@ -31,10 +35,14 @@ def cli(ctx, opt_input_glob, opt_threshold):
im = Image.open(fn).convert('RGB')
phash = compute_phash(im)
if is_phash_new(fn, phash, seen, opt_threshold):
+ hash = sha256(fn)
seen.append({
+ 'sha256': hash,
'phash': phash,
'fn': fn,
})
+ if opt_output_fn:
+ write_json(seen, opt_output_fn)
print("checked {} files, found {} unique".format(total, len(seen)))
def is_phash_new(fn, phash, seen, opt_threshold):
diff --git a/check/commands/phash/query.py b/check/commands/phash/query.py
index 8fc8c61..7fe2ae3 100644
--- a/check/commands/phash/query.py
+++ b/check/commands/phash/query.py
@@ -1,9 +1,10 @@
"""
-Search the database for an image
+Query the database with a test set
"""
import click
import os
+import glob
from PIL import Image
@@ -12,34 +13,29 @@ from app.utils.im_utils import compute_phash_int
from app.utils.file_utils import sha256
@click.command()
-@click.option('-i', '--input', 'opt_fn',
+@click.option('-i', '--input', 'opt_input_glob',
required=True,
- help="File to search")
+ help="Input glob to search -- e.g. '../docs/images/*.jpg'")
@click.pass_context
-def cli(ctx, opt_fn):
+def cli(ctx, opt_input_glob):
"""
- Search the database for an image
+ Query the database with a test set
"""
- print('Searching for a file...')
+ for fn in sorted(glob.iglob(opt_input_glob)):
+ im = Image.open(fn).convert('RGB')
+ phash = compute_phash_int(im)
- if not os.path.exists(opt_fn):
- print("File does not exist")
- return
+ hash = sha256(fn)
- im = Image.open(opt_fn).convert('RGB')
- phash = compute_phash_int(im)
+ phash_match = search_by_phash(phash)
+ hash_match = search_by_hash(hash)
- hash = sha256(opt_fn)
+ hash_result = 'NO'
+ if hash_match:
+ hash_result = 'YES'
- phash_match = search_by_phash(phash)
- hash_match = search_by_hash(hash)
+ phash_result = 'NO'
+ if len(phash_match):
+ phash_result = 'YES, score={}'.format(phash_match[0]['score'])
- hash_result = 'NO'
- if hash_match:
- hash_result = 'YES'
-
- phash_result = 'NO'
- if len(phash_match):
- phash_result = 'YES, score={}'.format(phash_match[0]['score'])
-
- print("{} - hash={}, phash={}".format(opt_fn, hash_result, phash_result))
+ print("{} - hash={}, phash={}".format(fn, hash_result, phash_result))
diff --git a/check/commands/phash/test.py b/check/commands/phash/test.py
index 7fe2ae3..77c4c69 100644
--- a/check/commands/phash/test.py
+++ b/check/commands/phash/test.py
@@ -1,41 +1,38 @@
"""
-Query the database with a test set
+Test the API
"""
import click
import os
import glob
+import requests
-from PIL import Image
-
-from app.models.sql_factory import search_by_phash, search_by_hash
-from app.utils.im_utils import compute_phash_int
-from app.utils.file_utils import sha256
+mime_types = {
+ '.png': 'image/png',
+ '.gif': 'image/gif',
+ '.jpg': 'image/jpeg',
+ '.jpeg': 'image/jpeg',
+}
@click.command()
-@click.option('-i', '--input', 'opt_input_glob',
+@click.option('-i', '--input', 'opt_input_fn',
required=True,
- help="Input glob to search -- e.g. '../docs/images/*.jpg'")
+ help="Image to test the API with")
@click.pass_context
-def cli(ctx, opt_input_glob):
+def cli(ctx, opt_input_fn):
"""
- Query the database with a test set
+ Query the API with a test image
"""
- for fn in sorted(glob.iglob(opt_input_glob)):
- im = Image.open(fn).convert('RGB')
- phash = compute_phash_int(im)
-
- hash = sha256(fn)
-
- phash_match = search_by_phash(phash)
- hash_match = search_by_hash(hash)
-
- hash_result = 'NO'
- if hash_match:
- hash_result = 'YES'
+ with open(opt_input_fn, 'rb') as f:
+ fn = os.path.basename(opt_input_fn)
+ fpart, ext = os.path.splitext(fn)
+ if ext not in mime_types:
+ print("Invalid filetype: {}".format(ext))
- phash_result = 'NO'
- if len(phash_match):
- phash_result = 'YES, score={}'.format(phash_match[0]['score'])
+ query = [
+ ('q', (fn, f, mime_types[ext]))
+ ]
- print("{} - hash={}, phash={}".format(fn, hash_result, phash_result))
+ print("Testing match API")
+ r = requests.post('http://0.0.0.0:5000/api/v1/match', files=query)
+ print(r.json())