From f4416606df4d06bde65eb45bae3e4f2ee852351a Mon Sep 17 00:00:00 2001 From: adamhrv Date: Tue, 23 Apr 2019 19:04:04 +0200 Subject: update for call --- TODO.md | 12 ++++++++++++ check/commands/phash/report.py | 21 +++++---------------- check/static/assets/css.css | 3 +++ check/static/perceptual_hash_report.html | 2 +- 4 files changed, 21 insertions(+), 17 deletions(-) create mode 100644 TODO.md diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..97347af --- /dev/null +++ b/TODO.md @@ -0,0 +1,12 @@ +# VFRAME Check API + +ToDo + +- deploy demo app to digital ocean +- add post request with image URL +- look into Docker Compose +- wait for new 3GB sample set, produce new report +- add upload-demo page to see resuluts, change threshold +- add system status page +- add authentication (htpasswd should be ok, unless you want to do API key)\ +- clean up HTML report and add PDF report \ No newline at end of file diff --git a/check/commands/phash/report.py b/check/commands/phash/report.py index 362480d..a2de9aa 100644 --- a/check/commands/phash/report.py +++ b/check/commands/phash/report.py @@ -67,35 +67,24 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_recursive, opt_thresh, opt_slice): # Deduplicate the list of images log.info('Deduplicating images...') duplicates = [] + names_added = [] for sha256_a, im_obj_a in tqdm(ims_meta.copy().items()): for sha256_b, im_obj_b in ims_meta.copy().items(): - if sha256_a == sha256_b: + if sha256_a == sha256_b or im_obj_b['fname'] in names_added: continue d = abs(im_obj_a['imhash'] - im_obj_b['imhash']) if d <= opt_thresh: # mark B as a duplicate of A - ims_meta[sha256_b]['duplicate'] = sha256_a + #ims_meta[sha256_b]['duplicate'] = sha256_a duplicates.append({'sha256_a': sha256_a, 'fname_a': im_obj_a['fname'], 'sha256_b': sha256_b, 'fname_b': im_obj_b['fname'], 'score': d}) ims_meta.pop(sha256_b) + names_added.append(im_obj_a['fname']) n_dupes = sum(1 for k,v in ims_meta.items() if v['duplicate'] is not None) log.info(f'Found {n_dupes}') - - #im_list = [v for k,v in ims_meta.items()] # dict to list of dicts df_items = pd.DataFrame.from_dict(duplicates) - #df_items.drop(['imhash', 'filepath'], axis=1, inplace=True) file_utils.ensure_dir(opt_fp_out) log.info(f'Writing: {opt_fp_out}') - df_items.to_csv(opt_fp_out, index=False) - - # generate HTML - # copy images to another directory - # import shutil - # file_utils.ensure_dir(opt_fp_dir_copyto) - # for sha256, im_meta in ims_meta.items(): - # src = im_meta['filepath'] - # dst = join(opt_fp_dir_copyto, f'{sha256}.jpg') - # # dst = join(opt_fp_dir_copyto, f"{im_meta['fname']}") - # shutil.copy(src, dst) \ No newline at end of file + df_items.to_csv(opt_fp_out, index=False) \ No newline at end of file diff --git a/check/static/assets/css.css b/check/static/assets/css.css index 9e8a59f..e721c16 100755 --- a/check/static/assets/css.css +++ b/check/static/assets/css.css @@ -326,6 +326,9 @@ kbd { .img_match{ max-width:300px } +table.image_group{ + margin-bottom:100px; +} td.result_txt{ vertical-align: top; } diff --git a/check/static/perceptual_hash_report.html b/check/static/perceptual_hash_report.html index 566a058..83748fa 100644 --- a/check/static/perceptual_hash_report.html +++ b/check/static/perceptual_hash_report.html @@ -8,7 +8,7 @@ {% for fname_a, image_group in image_groups %}

Duplicates for {{ fname_a }}

- +
-- cgit v1.2.3-70-g09d2
Image A Image B