1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
"""
Import a folder of images, deduping them first
"""
import click
import os
import glob
from PIL import Image
from app.models.sql_factory import add_phash
from app.utils.im_utils import compute_phash, phash2int
from app.utils.file_utils import write_json, sha256
valid_exts = ['.gif', '.jpg', '.jpeg', '.png']
@click.command()
@click.option('-i', '--input', 'opt_input_glob',
required=True,
help="Input glob to search -- e.g. 'static/sample_set_test_01/images/*'")
@click.option('-t', '--threshold', 'opt_threshold',
required=True,
default=6,
type=int,
help="Threshold for hamming distance comparison (0-64, default=6)")
@click.pass_context
def cli(ctx, opt_input_glob, opt_threshold):
"""
Import a folder of images, deduping them first
"""
seen = []
total = 0
if not opt_input_glob.startswith('static/'):
print("Please move your files into the static folder to make them accessible")
return
for fn in sorted(glob.iglob(os.path.expanduser(opt_input_glob))):
fname, ext = os.path.splitext(fn)
if ext not in valid_exts:
continue
ext = ext[1:]
total += 1
im = Image.open(fn).convert('RGB')
phash = compute_phash(im)
if is_phash_new(fn, phash, seen, opt_threshold):
hash = sha256(fn)
url = '/' + fn
seen.append({
'sha256': hash,
'phash': phash,
'fn': fn,
})
add_phash(sha256=hash, phash=phash2int(phash), ext=ext, url=url)
print("checked {} files, found {} unique".format(total, len(seen)))
def is_phash_new(fn, phash, seen, opt_threshold):
for item in seen:
diff = item['phash'] - phash
if diff < opt_threshold:
return False
return True
|