From f4416606df4d06bde65eb45bae3e4f2ee852351a Mon Sep 17 00:00:00 2001 From: adamhrv Date: Tue, 23 Apr 2019 19:04:04 +0200 Subject: update for call --- check/commands/phash/report.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'check/commands/phash/report.py') diff --git a/check/commands/phash/report.py b/check/commands/phash/report.py index 362480d..a2de9aa 100644 --- a/check/commands/phash/report.py +++ b/check/commands/phash/report.py @@ -67,35 +67,24 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_recursive, opt_thresh, opt_slice): # Deduplicate the list of images log.info('Deduplicating images...') duplicates = [] + names_added = [] for sha256_a, im_obj_a in tqdm(ims_meta.copy().items()): for sha256_b, im_obj_b in ims_meta.copy().items(): - if sha256_a == sha256_b: + if sha256_a == sha256_b or im_obj_b['fname'] in names_added: continue d = abs(im_obj_a['imhash'] - im_obj_b['imhash']) if d <= opt_thresh: # mark B as a duplicate of A - ims_meta[sha256_b]['duplicate'] = sha256_a + #ims_meta[sha256_b]['duplicate'] = sha256_a duplicates.append({'sha256_a': sha256_a, 'fname_a': im_obj_a['fname'], 'sha256_b': sha256_b, 'fname_b': im_obj_b['fname'], 'score': d}) ims_meta.pop(sha256_b) + names_added.append(im_obj_a['fname']) n_dupes = sum(1 for k,v in ims_meta.items() if v['duplicate'] is not None) log.info(f'Found {n_dupes}') - - #im_list = [v for k,v in ims_meta.items()] # dict to list of dicts df_items = pd.DataFrame.from_dict(duplicates) - #df_items.drop(['imhash', 'filepath'], axis=1, inplace=True) file_utils.ensure_dir(opt_fp_out) log.info(f'Writing: {opt_fp_out}') - df_items.to_csv(opt_fp_out, index=False) - - # generate HTML - # copy images to another directory - # import shutil - # file_utils.ensure_dir(opt_fp_dir_copyto) - # for sha256, im_meta in ims_meta.items(): - # src = im_meta['filepath'] - # dst = join(opt_fp_dir_copyto, f'{sha256}.jpg') - # # dst = join(opt_fp_dir_copyto, f"{im_meta['fname']}") - # shutil.copy(src, dst) \ No newline at end of file + df_items.to_csv(opt_fp_out, index=False) \ No newline at end of file -- cgit v1.2.3-70-g09d2