diff options
Diffstat (limited to 'megapixels/commands/msc/count.py')
| -rw-r--r-- | megapixels/commands/msc/count.py | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/megapixels/commands/msc/count.py b/megapixels/commands/msc/count.py index 4a92170a..581eb7a0 100644 --- a/megapixels/commands/msc/count.py +++ b/megapixels/commands/msc/count.py @@ -46,6 +46,10 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force): df_embassies = pd.read_csv(opt_fp_in) df_embassies.fillna('', inplace=True) + len_orig = len(df_embassies) + df_embassies = df_embassies[df_embassies.skip != True] + log.debug(f'Skipping {len_orig - len(df_embassies)} embassies') + embassy_nsids = list(df_embassies['nsid']) match_items = [] @@ -98,26 +102,32 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force): } embassy_images.append(im_obj) - if nsid == '51226353@N03': - malta_images.append(im_obj) + # Save embassy matches - df = pd.DataFrame.from_dict(match_items) - df.to_csv(opt_fp_out, index=False) - total = df['count'].sum() + df_matches = pd.DataFrame.from_dict(match_items) + df_matches.to_csv(opt_fp_out, index=False) + total = df_matches['count'].sum() # Save image matches - df = pd.DataFrame.from_dict(embassy_images) + df_images = pd.DataFrame.from_dict(embassy_images) fp_out = opt_fp_out.replace('.csv', '_images.csv') - df.to_csv(fp_out, index=False) + df_images.to_csv(fp_out, index=False) total = len(embassy_images) log.debug(f'wrote {fp_out}') log.debug(f'Found {total:,} embassy images') - # Save malta images - df = pd.DataFrame.from_dict(malta_images) - fp_out = opt_fp_out.replace('.csv', '_images_malta.csv') - df.to_csv(fp_out, index=False) - total = len(malta_images) + # save summary count per dataset + groups_datasets = df_matches.groupby('dataset_key') + summary_counts = [] + for group_dataset, df_dataset in groups_datasets: + log.debug(f'{group_dataset}') + summary_counts.append({'dataset': group_dataset, 'images': df_dataset['count'].sum()}) + df_dataset_counts = pd.DataFrame.from_dict(summary_counts) + fp_out = opt_fp_out.replace('.csv', '_counts_summary_dataset.csv') + df_dataset_counts.to_csv(fp_out, index=False) + + log.debug(f'wrote {fp_out}') - log.debug(f'Found {total:,} malta embassy images')
\ No newline at end of file + log.debug(f'Found {len(embassy_images):,} embassy images') + |
