""" Import a folder of images, deduping them first """ import click import os import glob from PIL import Image from app.models.sql_factory import add_phash from app.utils.im_utils import compute_phash, phash2int from app.utils.file_utils import write_json, sha256 valid_exts = ['.gif', '.jpg', '.jpeg', '.png'] @click.command() @click.option('-i', '--input', 'opt_input_glob', required=True, help="Input glob to search -- e.g. 'static/sample_set_test_01/images/*'") @click.option('-t', '--threshold', 'opt_threshold', required=True, default=6, type=int, help="Threshold for hamming distance comparison (0-64, default=6)") @click.pass_context def cli(ctx, opt_input_glob, opt_threshold): """ Import a folder of images, deduping them first """ seen = [] total = 0 if not opt_input_glob.startswith('static/'): print("Please move your files into the static folder to make them accessible") return for fn in sorted(glob.iglob(os.path.expanduser(opt_input_glob))): fname, ext = os.path.splitext(fn) if ext not in valid_exts: continue ext = ext[1:] total += 1 im = Image.open(fn).convert('RGB') phash = compute_phash(im) if is_phash_new(fn, phash, seen, opt_threshold): hash = sha256(fn) url = '/' + fn seen.append({ 'sha256': hash, 'phash': phash, 'fn': fn, }) add_phash(sha256=hash, phash=phash2int(phash), ext=ext, url=url) print("checked {} files, found {} unique".format(total, len(seen))) def is_phash_new(fn, phash, seen, opt_threshold): for item in seen: diff = item['phash'] - phash if diff < opt_threshold: return False return True