summaryrefslogtreecommitdiff
path: root/check/commands/phash/report.py
diff options
context:
space:
mode:
Diffstat (limited to 'check/commands/phash/report.py')
-rw-r--r--check/commands/phash/report.py21
1 files changed, 5 insertions, 16 deletions
diff --git a/check/commands/phash/report.py b/check/commands/phash/report.py
index 362480d..a2de9aa 100644
--- a/check/commands/phash/report.py
+++ b/check/commands/phash/report.py
@@ -67,35 +67,24 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_recursive, opt_thresh, opt_slice):
# Deduplicate the list of images
log.info('Deduplicating images...')
duplicates = []
+ names_added = []
for sha256_a, im_obj_a in tqdm(ims_meta.copy().items()):
for sha256_b, im_obj_b in ims_meta.copy().items():
- if sha256_a == sha256_b:
+ if sha256_a == sha256_b or im_obj_b['fname'] in names_added:
continue
d = abs(im_obj_a['imhash'] - im_obj_b['imhash'])
if d <= opt_thresh:
# mark B as a duplicate of A
- ims_meta[sha256_b]['duplicate'] = sha256_a
+ #ims_meta[sha256_b]['duplicate'] = sha256_a
duplicates.append({'sha256_a': sha256_a, 'fname_a': im_obj_a['fname'],
'sha256_b': sha256_b, 'fname_b': im_obj_b['fname'], 'score': d})
ims_meta.pop(sha256_b)
+ names_added.append(im_obj_a['fname'])
n_dupes = sum(1 for k,v in ims_meta.items() if v['duplicate'] is not None)
log.info(f'Found {n_dupes}')
-
- #im_list = [v for k,v in ims_meta.items()] # dict to list of dicts
df_items = pd.DataFrame.from_dict(duplicates)
- #df_items.drop(['imhash', 'filepath'], axis=1, inplace=True)
file_utils.ensure_dir(opt_fp_out)
log.info(f'Writing: {opt_fp_out}')
- df_items.to_csv(opt_fp_out, index=False)
-
- # generate HTML
- # copy images to another directory
- # import shutil
- # file_utils.ensure_dir(opt_fp_dir_copyto)
- # for sha256, im_meta in ims_meta.items():
- # src = im_meta['filepath']
- # dst = join(opt_fp_dir_copyto, f'{sha256}.jpg')
- # # dst = join(opt_fp_dir_copyto, f"{im_meta['fname']}")
- # shutil.copy(src, dst) \ No newline at end of file
+ df_items.to_csv(opt_fp_out, index=False) \ No newline at end of file