summaryrefslogtreecommitdiff
path: root/megapixels
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2019-07-04 02:17:37 +0200
committeradamhrv <adam@ahprojects.com>2019-07-04 02:17:37 +0200
commitb0134234faf869ebcc323c634f247ea11d77cf4c (patch)
tree317d6972813f198f216c4545a84d6abe2b70ac5a /megapixels
parent563cb9b02a3c6de5a5710e5f0734d30810e6a5a5 (diff)
add summary
Diffstat (limited to 'megapixels')
-rw-r--r--megapixels/commands/msc/count.py36
1 files changed, 23 insertions, 13 deletions
diff --git a/megapixels/commands/msc/count.py b/megapixels/commands/msc/count.py
index 4a92170a..581eb7a0 100644
--- a/megapixels/commands/msc/count.py
+++ b/megapixels/commands/msc/count.py
@@ -46,6 +46,10 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force):
df_embassies = pd.read_csv(opt_fp_in)
df_embassies.fillna('', inplace=True)
+ len_orig = len(df_embassies)
+ df_embassies = df_embassies[df_embassies.skip != True]
+ log.debug(f'Skipping {len_orig - len(df_embassies)} embassies')
+
embassy_nsids = list(df_embassies['nsid'])
match_items = []
@@ -98,26 +102,32 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force):
}
embassy_images.append(im_obj)
- if nsid == '51226353@N03':
- malta_images.append(im_obj)
+
# Save embassy matches
- df = pd.DataFrame.from_dict(match_items)
- df.to_csv(opt_fp_out, index=False)
- total = df['count'].sum()
+ df_matches = pd.DataFrame.from_dict(match_items)
+ df_matches.to_csv(opt_fp_out, index=False)
+ total = df_matches['count'].sum()
# Save image matches
- df = pd.DataFrame.from_dict(embassy_images)
+ df_images = pd.DataFrame.from_dict(embassy_images)
fp_out = opt_fp_out.replace('.csv', '_images.csv')
- df.to_csv(fp_out, index=False)
+ df_images.to_csv(fp_out, index=False)
total = len(embassy_images)
log.debug(f'wrote {fp_out}')
log.debug(f'Found {total:,} embassy images')
- # Save malta images
- df = pd.DataFrame.from_dict(malta_images)
- fp_out = opt_fp_out.replace('.csv', '_images_malta.csv')
- df.to_csv(fp_out, index=False)
- total = len(malta_images)
+ # save summary count per dataset
+ groups_datasets = df_matches.groupby('dataset_key')
+ summary_counts = []
+ for group_dataset, df_dataset in groups_datasets:
+ log.debug(f'{group_dataset}')
+ summary_counts.append({'dataset': group_dataset, 'images': df_dataset['count'].sum()})
+ df_dataset_counts = pd.DataFrame.from_dict(summary_counts)
+ fp_out = opt_fp_out.replace('.csv', '_counts_summary_dataset.csv')
+ df_dataset_counts.to_csv(fp_out, index=False)
+
+
log.debug(f'wrote {fp_out}')
- log.debug(f'Found {total:,} malta embassy images') \ No newline at end of file
+ log.debug(f'Found {len(embassy_images):,} embassy images')
+