summaryrefslogtreecommitdiff
path: root/megapixels
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels')
-rw-r--r--megapixels/commands/msc/count.py36
1 files changed, 23 insertions, 13 deletions
diff --git a/megapixels/commands/msc/count.py b/megapixels/commands/msc/count.py
index 4a92170a..581eb7a0 100644
--- a/megapixels/commands/msc/count.py
+++ b/megapixels/commands/msc/count.py
@@ -46,6 +46,10 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force):
df_embassies = pd.read_csv(opt_fp_in)
df_embassies.fillna('', inplace=True)
+ len_orig = len(df_embassies)
+ df_embassies = df_embassies[df_embassies.skip != True]
+ log.debug(f'Skipping {len_orig - len(df_embassies)} embassies')
+
embassy_nsids = list(df_embassies['nsid'])
match_items = []
@@ -98,26 +102,32 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force):
}
embassy_images.append(im_obj)
- if nsid == '51226353@N03':
- malta_images.append(im_obj)
+
# Save embassy matches
- df = pd.DataFrame.from_dict(match_items)
- df.to_csv(opt_fp_out, index=False)
- total = df['count'].sum()
+ df_matches = pd.DataFrame.from_dict(match_items)
+ df_matches.to_csv(opt_fp_out, index=False)
+ total = df_matches['count'].sum()
# Save image matches
- df = pd.DataFrame.from_dict(embassy_images)
+ df_images = pd.DataFrame.from_dict(embassy_images)
fp_out = opt_fp_out.replace('.csv', '_images.csv')
- df.to_csv(fp_out, index=False)
+ df_images.to_csv(fp_out, index=False)
total = len(embassy_images)
log.debug(f'wrote {fp_out}')
log.debug(f'Found {total:,} embassy images')
- # Save malta images
- df = pd.DataFrame.from_dict(malta_images)
- fp_out = opt_fp_out.replace('.csv', '_images_malta.csv')
- df.to_csv(fp_out, index=False)
- total = len(malta_images)
+ # save summary count per dataset
+ groups_datasets = df_matches.groupby('dataset_key')
+ summary_counts = []
+ for group_dataset, df_dataset in groups_datasets:
+ log.debug(f'{group_dataset}')
+ summary_counts.append({'dataset': group_dataset, 'images': df_dataset['count'].sum()})
+ df_dataset_counts = pd.DataFrame.from_dict(summary_counts)
+ fp_out = opt_fp_out.replace('.csv', '_counts_summary_dataset.csv')
+ df_dataset_counts.to_csv(fp_out, index=False)
+
+
log.debug(f'wrote {fp_out}')
- log.debug(f'Found {total:,} malta embassy images') \ No newline at end of file
+ log.debug(f'Found {len(embassy_images):,} embassy images')
+