diff options
Diffstat (limited to 'check/app')
| -rw-r--r-- | check/app/models/sql_factory.py | 11 | ||||
| -rw-r--r-- | check/app/server/api.py | 65 |
2 files changed, 49 insertions, 27 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py index 68c2e30..5433b67 100644 --- a/check/app/models/sql_factory.py +++ b/check/app/models/sql_factory.py @@ -32,7 +32,8 @@ class FileTable(Base): __tablename__ = 'files' id = Column(Integer, primary_key=True) sha256 = Column(String(64, convert_unicode=True), nullable=False) - phash = Column(BigInteger, nullable=False) + url = Column(String(255, convert_unicode=True), nullable=False) + phash = Column(BigInteger, nullable=False, index=True) ext = Column(String(4, convert_unicode=True), nullable=False) def toJSON(self): return { @@ -64,11 +65,9 @@ def search_by_hash(hash): match = session.query(FileTable).filter(FileTable.sha256 == hash) return match.first() -def add_phash(sha256, phash, ext): +def add_phash(sha256, phash, ext, url): """Add a file to the table""" - rec = FileTable( - sha256=sha256, phash=phash, ext=ext, - ) + rec = FileTable(sha256=sha256, phash=phash, ext=ext, url=url) session = Session() session.add(rec) session.commit() @@ -93,4 +92,4 @@ def add_phash_by_filename(path): hash = sha256(path) - add_phash(sha256=hash, phash=phash, ext=ext) + add_phash(sha256=hash, phash=phash, ext=ext, url=path) diff --git a/check/app/server/api.py b/check/app/server/api.py index c4878c5..3742b15 100644 --- a/check/app/server/api.py +++ b/check/app/server/api.py @@ -1,8 +1,10 @@ +import io import os import re import time import numpy as np import logging +import urllib.request from flask import Blueprint, request, jsonify from PIL import Image @@ -31,25 +33,6 @@ def match(): """ start = time.time() - file = request.files['q'] - fn = file.filename - if fn.endswith('blob'): # FIX PNG IMAGES? - logging.debug('received a blob, assuming JPEG') - fn = 'filename.jpg' - - basename, ext = os.path.splitext(fn) - if ext.lower() not in valid_exts: - return jsonify({ - 'success': False, - 'match': False, - 'error': 'not_an_image' - }) - - ext = ext[1:].lower() - - im = Image.open(file.stream).convert('RGB') - phash = compute_phash_int(im) - try: threshold = int(request.args.get('threshold') or 6) limit = int(request.args.get('limit') or 1) @@ -61,12 +44,52 @@ def match(): 'error': 'param_error' }) + if 'q' in request.files: + file = request.files['q'] + fn = file.filename + if fn.endswith('blob'): # FIX PNG IMAGES? + logging.debug('received a blob, assuming JPEG') + fn = 'filename.jpg' + + basename, ext = os.path.splitext(fn) + if ext.lower() not in valid_exts: + return jsonify({ + 'success': False, + 'match': False, + 'error': 'not_an_image' + }) + + im = Image.open(file.stream).convert('RGB') + else: + url = request.args.get('url') + if not url: + return jsonify({ + 'success': False, + 'match': False, + 'error': 'no_image' + }) + basename, ext = os.path.splitext(url) + if ext.lower() not in valid_exts: + return jsonify({ + 'success': False, + 'match': False, + 'error': 'not_an_image' + }) + + remote_request = urllib.request.Request(url) + remote_response = urllib.request.urlopen(remote_request) + raw = remote_response.read() + im = Image.open(io.BytesIO(raw)).convert('RGB') + + phash = compute_phash_int(im) + ext = ext[1:].lower() + results = search_by_phash(phash=phash, threshold=threshold, limit=limit) if len(results) == 0: - if add: + if add and url: hash = sha256_stream(file) - add_phash(sha256=hash, phash=phash, ext=ext) + add_phash(sha256=hash, phash=phash, ext=ext, url=url) match = False else: match = True |
