diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-04-27 11:21:50 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-04-27 11:21:50 +0200 |
| commit | 1db97e03f5cac4eb6421e0b55628a3187c41e29c (patch) | |
| tree | c4fdfb27d8a9e446755ac4fea27027cbd4d55b9e /check/commands/phash/report.py | |
| parent | 4e78fcabe911b42211dd3aff0a64365c2f96ad21 (diff) | |
| parent | f4416606df4d06bde65eb45bae3e4f2ee852351a (diff) | |
Merge branch 'master' of github.com:adamhrv/vframe_check_api
Diffstat (limited to 'check/commands/phash/report.py')
| -rw-r--r-- | check/commands/phash/report.py | 21 |
1 files changed, 5 insertions, 16 deletions
diff --git a/check/commands/phash/report.py b/check/commands/phash/report.py index 362480d..a2de9aa 100644 --- a/check/commands/phash/report.py +++ b/check/commands/phash/report.py @@ -67,35 +67,24 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_recursive, opt_thresh, opt_slice): # Deduplicate the list of images log.info('Deduplicating images...') duplicates = [] + names_added = [] for sha256_a, im_obj_a in tqdm(ims_meta.copy().items()): for sha256_b, im_obj_b in ims_meta.copy().items(): - if sha256_a == sha256_b: + if sha256_a == sha256_b or im_obj_b['fname'] in names_added: continue d = abs(im_obj_a['imhash'] - im_obj_b['imhash']) if d <= opt_thresh: # mark B as a duplicate of A - ims_meta[sha256_b]['duplicate'] = sha256_a + #ims_meta[sha256_b]['duplicate'] = sha256_a duplicates.append({'sha256_a': sha256_a, 'fname_a': im_obj_a['fname'], 'sha256_b': sha256_b, 'fname_b': im_obj_b['fname'], 'score': d}) ims_meta.pop(sha256_b) + names_added.append(im_obj_a['fname']) n_dupes = sum(1 for k,v in ims_meta.items() if v['duplicate'] is not None) log.info(f'Found {n_dupes}') - - #im_list = [v for k,v in ims_meta.items()] # dict to list of dicts df_items = pd.DataFrame.from_dict(duplicates) - #df_items.drop(['imhash', 'filepath'], axis=1, inplace=True) file_utils.ensure_dir(opt_fp_out) log.info(f'Writing: {opt_fp_out}') - df_items.to_csv(opt_fp_out, index=False) - - # generate HTML - # copy images to another directory - # import shutil - # file_utils.ensure_dir(opt_fp_dir_copyto) - # for sha256, im_meta in ims_meta.items(): - # src = im_meta['filepath'] - # dst = join(opt_fp_dir_copyto, f'{sha256}.jpg') - # # dst = join(opt_fp_dir_copyto, f"{im_meta['fname']}") - # shutil.copy(src, dst)
\ No newline at end of file + df_items.to_csv(opt_fp_out, index=False)
\ No newline at end of file |
