diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-04-25 18:29:46 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-04-25 18:29:46 +0200 |
| commit | 4d5c3d59f32b80638d82373d33a476652520e260 (patch) | |
| tree | 88edd56458963229511b54276586c236604504b6 /check | |
| parent | 4f4df4d4e38f8ce27dc7e471359f9f644ca74092 (diff) | |
test API
Diffstat (limited to 'check')
| -rw-r--r-- | check/app/models/sql_factory.py | 12 | ||||
| -rw-r--r-- | check/app/server/api.py | 59 | ||||
| -rw-r--r-- | check/app/utils/file_utils.py | 10 | ||||
| -rw-r--r-- | check/commands/phash/dedupe.py | 2 | ||||
| -rw-r--r-- | check/commands/phash/query.py | 42 | ||||
| -rw-r--r-- | check/commands/phash/test.py | 49 | ||||
| -rw-r--r-- | check/static/demo.html | 7 | ||||
| -rw-r--r-- | check/static/index.html | 2 |
8 files changed, 122 insertions, 61 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py index 1d32a68..68c2e30 100644 --- a/check/app/models/sql_factory.py +++ b/check/app/models/sql_factory.py @@ -44,11 +44,17 @@ class FileTable(Base): Base.metadata.create_all(engine) -def search_by_phash(phash, threshold=6): +def search_by_phash(phash, threshold=6, limit=1): """Search files for a particular phash""" connection = engine.connect() - cmd = 'SELECT files.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM files HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1' - matches = connection.execute(text(cmd), phash=phash, threshold=threshold).fetchall() + cmd = """ + SELECT files.*, BIT_COUNT(phash ^ :phash) + AS hamming_distance FROM files + HAVING hamming_distance < :threshold + ORDER BY hamming_distance ASC + LIMIT :limit + """ + matches = connection.execute(text(cmd), phash=phash, threshold=threshold, limit=limit).fetchall() keys = ('id', 'sha256', 'phash', 'ext', 'score') results = [ dict(zip(keys, values)) for values in matches ] return results diff --git a/check/app/server/api.py b/check/app/server/api.py index c4f9f80..322d899 100644 --- a/check/app/server/api.py +++ b/check/app/server/api.py @@ -2,11 +2,13 @@ import os import re import time import numpy as np +import logging from flask import Blueprint, request, jsonify from PIL import Image from app.models.sql_factory import search_by_phash, add_phash -from app.utils.im_utils import pil2np +from app.utils.im_utils import compute_phash_int +from app.utils.file_utils import sha256_stream sanitize_re = re.compile('[\W]+') valid_exts = ['.gif', '.jpg', '.jpeg', '.png'] @@ -22,29 +24,72 @@ def index(): """ return jsonify({ 'status': 'ok' }) -@api.route('/v1/match/', methods=['POST']) -def upload(): +@api.route('/v1/match', methods=['POST']) +def match(): """ Search by uploading an image """ start = time.time() + logging.debug(start) - file = request.files['query_img'] + file = request.files['q'] fn = file.filename if fn.endswith('blob'): # FIX PNG IMAGES? fn = 'filename.jpg' + logging.debug(fn) basename, ext = os.path.splitext(fn) if ext.lower() not in valid_exts: return jsonify({ + 'success': False, + 'match': False, 'error': 'not_an_image' }) + ext = ext[1:].lower() + im = Image.open(file.stream).convert('RGB') phash = compute_phash_int(im) - threshold = request.args.get('threshold') || 6 + logging.debug(phash) + try: + threshold = int(request.args.get('threshold') or 6) + limit = int(request.args.get('limit') or 1) + add = str(request.args.get('add') or 'true') == 'true' + except: + return jsonify({ + 'success': False, + 'match': False, + 'error': 'param_error' + }) + + results = search_by_phash(phash=phash, threshold=threshold, limit=limit) - res = search_by_phash(phash, threshold) + if len(results) == 0: + if add: + hash = sha256_stream(file) + add_phash(sha256=hash, phash=phash, ext=ext) + if limit == 1: + return jsonify({ + 'success': True, + 'match': False, + }) + else: + return jsonify({ + 'success': True, + 'match': False, + 'results': [], + }) + + if limit > 1: + return jsonify({ + 'success': True, + 'match': True, + 'results': results, + }) - return jsonify({ 'res': res }) + return jsonify({ + 'success': True, + 'match': True, + 'closest_match': results[0], + }) diff --git a/check/app/utils/file_utils.py b/check/app/utils/file_utils.py index 1ed1833..a185cf4 100644 --- a/check/app/utils/file_utils.py +++ b/check/app/utils/file_utils.py @@ -352,6 +352,16 @@ def sha256(fp_in, block_size=65536): sha256.update(block) return sha256.hexdigest() +def sha256_stream(stream, block_size=65536): + """Generates SHA256 hash for a file stream (from Flask) + :param fp_in: (FileStream) stream object + :param block_size: (int) byte size of block + :returns: (str) hash + """ + sha256 = hashlib.sha256() + for block in iter(lambda: stream.read(block_size), b''): + sha256.update(block) + return sha256.hexdigest() def sha256_tree(sha256): """Split hash into branches with tree-depth for faster file indexing diff --git a/check/commands/phash/dedupe.py b/check/commands/phash/dedupe.py index 3cf60d4..2e99f62 100644 --- a/check/commands/phash/dedupe.py +++ b/check/commands/phash/dedupe.py @@ -1,5 +1,5 @@ """ -Dedupe a folder of images +Dedupe a folder of images (uses phash directly, does not use database) """ import click diff --git a/check/commands/phash/query.py b/check/commands/phash/query.py index 8fc8c61..7fe2ae3 100644 --- a/check/commands/phash/query.py +++ b/check/commands/phash/query.py @@ -1,9 +1,10 @@ """ -Search the database for an image +Query the database with a test set """ import click import os +import glob from PIL import Image @@ -12,34 +13,29 @@ from app.utils.im_utils import compute_phash_int from app.utils.file_utils import sha256 @click.command() -@click.option('-i', '--input', 'opt_fn', +@click.option('-i', '--input', 'opt_input_glob', required=True, - help="File to search") + help="Input glob to search -- e.g. '../docs/images/*.jpg'") @click.pass_context -def cli(ctx, opt_fn): +def cli(ctx, opt_input_glob): """ - Search the database for an image + Query the database with a test set """ - print('Searching for a file...') + for fn in sorted(glob.iglob(opt_input_glob)): + im = Image.open(fn).convert('RGB') + phash = compute_phash_int(im) - if not os.path.exists(opt_fn): - print("File does not exist") - return + hash = sha256(fn) - im = Image.open(opt_fn).convert('RGB') - phash = compute_phash_int(im) + phash_match = search_by_phash(phash) + hash_match = search_by_hash(hash) - hash = sha256(opt_fn) + hash_result = 'NO' + if hash_match: + hash_result = 'YES' - phash_match = search_by_phash(phash) - hash_match = search_by_hash(hash) + phash_result = 'NO' + if len(phash_match): + phash_result = 'YES, score={}'.format(phash_match[0]['score']) - hash_result = 'NO' - if hash_match: - hash_result = 'YES' - - phash_result = 'NO' - if len(phash_match): - phash_result = 'YES, score={}'.format(phash_match[0]['score']) - - print("{} - hash={}, phash={}".format(opt_fn, hash_result, phash_result)) + print("{} - hash={}, phash={}".format(fn, hash_result, phash_result)) diff --git a/check/commands/phash/test.py b/check/commands/phash/test.py index 7fe2ae3..77c4c69 100644 --- a/check/commands/phash/test.py +++ b/check/commands/phash/test.py @@ -1,41 +1,38 @@ """ -Query the database with a test set +Test the API """ import click import os import glob +import requests -from PIL import Image - -from app.models.sql_factory import search_by_phash, search_by_hash -from app.utils.im_utils import compute_phash_int -from app.utils.file_utils import sha256 +mime_types = { + '.png': 'image/png', + '.gif': 'image/gif', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', +} @click.command() -@click.option('-i', '--input', 'opt_input_glob', +@click.option('-i', '--input', 'opt_input_fn', required=True, - help="Input glob to search -- e.g. '../docs/images/*.jpg'") + help="Image to test the API with") @click.pass_context -def cli(ctx, opt_input_glob): +def cli(ctx, opt_input_fn): """ - Query the database with a test set + Query the API with a test image """ - for fn in sorted(glob.iglob(opt_input_glob)): - im = Image.open(fn).convert('RGB') - phash = compute_phash_int(im) - - hash = sha256(fn) - - phash_match = search_by_phash(phash) - hash_match = search_by_hash(hash) - - hash_result = 'NO' - if hash_match: - hash_result = 'YES' + with open(opt_input_fn, 'rb') as f: + fn = os.path.basename(opt_input_fn) + fpart, ext = os.path.splitext(fn) + if ext not in mime_types: + print("Invalid filetype: {}".format(ext)) - phash_result = 'NO' - if len(phash_match): - phash_result = 'YES, score={}'.format(phash_match[0]['score']) + query = [ + ('q', (fn, f, mime_types[ext])) + ] - print("{} - hash={}, phash={}".format(fn, hash_result, phash_result)) + print("Testing match API") + r = requests.post('http://0.0.0.0:5000/api/v1/match', files=query) + print(r.json()) diff --git a/check/static/demo.html b/check/static/demo.html new file mode 100644 index 0000000..c8de017 --- /dev/null +++ b/check/static/demo.html @@ -0,0 +1,7 @@ +<html> +<head> + <title>PHash Check API Demo</title> +</head> +<body> +</body> +</html>
\ No newline at end of file diff --git a/check/static/index.html b/check/static/index.html index a17fa53..6c70bcf 100644 --- a/check/static/index.html +++ b/check/static/index.html @@ -1 +1 @@ -deebeephunky
\ No newline at end of file +<html></html>
\ No newline at end of file |
