diff options
| author | adamhrv <adam@ahprojects.com> | 2019-07-04 02:17:37 +0200 |
|---|---|---|
| committer | adamhrv <adam@ahprojects.com> | 2019-07-04 02:17:37 +0200 |
| commit | b0134234faf869ebcc323c634f247ea11d77cf4c (patch) | |
| tree | 317d6972813f198f216c4545a84d6abe2b70ac5a /megapixels | |
| parent | 563cb9b02a3c6de5a5710e5f0734d30810e6a5a5 (diff) | |
add summary
Diffstat (limited to 'megapixels')
| -rw-r--r-- | megapixels/commands/msc/count.py | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/megapixels/commands/msc/count.py b/megapixels/commands/msc/count.py index 4a92170a..581eb7a0 100644 --- a/megapixels/commands/msc/count.py +++ b/megapixels/commands/msc/count.py @@ -46,6 +46,10 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force): df_embassies = pd.read_csv(opt_fp_in) df_embassies.fillna('', inplace=True) + len_orig = len(df_embassies) + df_embassies = df_embassies[df_embassies.skip != True] + log.debug(f'Skipping {len_orig - len(df_embassies)} embassies') + embassy_nsids = list(df_embassies['nsid']) match_items = [] @@ -98,26 +102,32 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force): } embassy_images.append(im_obj) - if nsid == '51226353@N03': - malta_images.append(im_obj) + # Save embassy matches - df = pd.DataFrame.from_dict(match_items) - df.to_csv(opt_fp_out, index=False) - total = df['count'].sum() + df_matches = pd.DataFrame.from_dict(match_items) + df_matches.to_csv(opt_fp_out, index=False) + total = df_matches['count'].sum() # Save image matches - df = pd.DataFrame.from_dict(embassy_images) + df_images = pd.DataFrame.from_dict(embassy_images) fp_out = opt_fp_out.replace('.csv', '_images.csv') - df.to_csv(fp_out, index=False) + df_images.to_csv(fp_out, index=False) total = len(embassy_images) log.debug(f'wrote {fp_out}') log.debug(f'Found {total:,} embassy images') - # Save malta images - df = pd.DataFrame.from_dict(malta_images) - fp_out = opt_fp_out.replace('.csv', '_images_malta.csv') - df.to_csv(fp_out, index=False) - total = len(malta_images) + # save summary count per dataset + groups_datasets = df_matches.groupby('dataset_key') + summary_counts = [] + for group_dataset, df_dataset in groups_datasets: + log.debug(f'{group_dataset}') + summary_counts.append({'dataset': group_dataset, 'images': df_dataset['count'].sum()}) + df_dataset_counts = pd.DataFrame.from_dict(summary_counts) + fp_out = opt_fp_out.replace('.csv', '_counts_summary_dataset.csv') + df_dataset_counts.to_csv(fp_out, index=False) + + log.debug(f'wrote {fp_out}') - log.debug(f'Found {total:,} malta embassy images')
\ No newline at end of file + log.debug(f'Found {len(embassy_images):,} embassy images') + |
