diff options
Diffstat (limited to 'check')
| -rw-r--r-- | check/app/models/sql_factory.py | 2 | ||||
| -rw-r--r-- | check/commands/phash/drop.py | 6 | ||||
| -rw-r--r-- | check/commands/phash/import.py | 60 |
3 files changed, 64 insertions, 4 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py index ad27f62..1aa617c 100644 --- a/check/app/models/sql_factory.py +++ b/check/app/models/sql_factory.py @@ -31,7 +31,7 @@ class FileTable(Base): """Table for storing various hashes of images""" __tablename__ = 'files' id = Column(Integer, primary_key=True) - sha256 = Column(String(64, convert_unicode=True), nullable=False) + sha256 = Column(String(64, convert_unicode=True), nullable=False, unique=True) phash = Column(BigInteger, nullable=False, index=True) ext = Column(String(4, convert_unicode=True), nullable=False) url = Column(String(255, convert_unicode=True), nullable=False) diff --git a/check/commands/phash/drop.py b/check/commands/phash/drop.py index 40a8261..aee922d 100644 --- a/check/commands/phash/drop.py +++ b/check/commands/phash/drop.py @@ -15,8 +15,8 @@ def cli(ctx, opt_force): """ Drop the database """ - if opt_force: + if not opt_force: + print('Will foolishly drop the database only if the --force flag is passed') + else: print('Dropping the database...!') Base.metadata.drop_all(engine) - else: - print('Will foolishly drop the database if the --force flag is passed')
\ No newline at end of file diff --git a/check/commands/phash/import.py b/check/commands/phash/import.py new file mode 100644 index 0000000..5e8dc6f --- /dev/null +++ b/check/commands/phash/import.py @@ -0,0 +1,60 @@ +""" +Import a folder of images, deduping them first +""" + +import click +import os +import glob + +from PIL import Image + +from app.models.sql_factory import add_phash +from app.utils.im_utils import compute_phash, phash2int +from app.utils.file_utils import write_json, sha256 + +valid_exts = ['.gif', '.jpg', '.jpeg', '.png'] + +@click.command() +@click.option('-i', '--input', 'opt_input_glob', + required=True, + help="Input glob to search -- e.g. 'static/sample_set_test_01/images/*'") +@click.option('-t', '--threshold', 'opt_threshold', + required=True, + default=6, + type=int, + help="Threshold for hamming distance comparison (0-64, default=6)") +@click.pass_context +def cli(ctx, opt_input_glob, opt_threshold): + """ + Import a folder of images, deduping them first + """ + seen = [] + total = 0 + if not opt_input_glob.startswith('static/'): + print("Please move your files into the static folder to make them accessible") + return + for fn in sorted(glob.iglob(os.path.expanduser(opt_input_glob))): + fname, ext = os.path.splitext(fn) + if ext not in valid_exts: + continue + ext = ext[1:] + total += 1 + im = Image.open(fn).convert('RGB') + phash = compute_phash(im) + if is_phash_new(fn, phash, seen, opt_threshold): + hash = sha256(fn) + url = '/' + fn + seen.append({ + 'sha256': hash, + 'phash': phash, + 'fn': fn, + }) + add_phash(sha256=hash, phash=phash2int(phash), ext=ext, url=url) + print("checked {} files, found {} unique".format(total, len(seen))) + +def is_phash_new(fn, phash, seen, opt_threshold): + for item in seen: + diff = item['phash'] - phash + if diff < opt_threshold: + return False + return True |
