summaryrefslogtreecommitdiff
path: root/megapixels/commands/msc/summarize.py
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2019-05-29 15:24:30 +0200
committeradamhrv <adam@ahprojects.com>2019-05-29 15:24:30 +0200
commit5b916111ee1a012650a586ec07bc9150d66020bc (patch)
tree128092857e6a9b6d67877e55e05da4f99ea2f5eb /megapixels/commands/msc/summarize.py
parentf5141a7b48ee569089b07428bc75cb84a55c4834 (diff)
add MSC nbs and cli cmds
Diffstat (limited to 'megapixels/commands/msc/summarize.py')
-rw-r--r--megapixels/commands/msc/summarize.py67
1 files changed, 67 insertions, 0 deletions
diff --git a/megapixels/commands/msc/summarize.py b/megapixels/commands/msc/summarize.py
new file mode 100644
index 00000000..d5d251db
--- /dev/null
+++ b/megapixels/commands/msc/summarize.py
@@ -0,0 +1,67 @@
+import click
+
+from app.settings import types
+from app.models.dataset import Dataset
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils.logger_utils import Logger
+
+log = Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True)
+@click.option('-o', '--output', 'opt_fp_out', required=True)
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out):
+ """_template_"""
+
+ import sys
+ from glob import glob
+ from os.path import join
+ from pathlib import Path
+ import time
+ from pprint import pprint
+
+ import pandas as pd
+ from tqdm import tqdm
+
+ from app.utils import file_utils
+
+ log = Logger.getLogger()
+
+ dataset_names = ['helen', 'megaface', 'adience', 'pipa', 'lfpw', 'duke_mtmc', 'brainwash', 'msceleb', 'uccs']
+
+ df = pd.DataFrame()
+ fp_out = opt_fp_out.replace('.csv', '_citations.csv')
+ for dataset_name in dataset_names:
+ fp_csv = join(opt_fp_in, f'{dataset_name}.csv')
+ _df = pd.read_csv(fp_csv)
+ _df = _df[_df.lat != 0]
+ print(dataset_name, len(_df))
+ df = df.append(_df, ignore_index=True)
+
+ df.to_csv(opt_fp_out, index=False)
+
+ # create country summary
+ fp_out = opt_fp_out.replace('.csv', '_countries.csv')
+ country_groups = df.groupby('country')
+ summary = []
+ for group_name, group in country_groups:
+ summary.append({'country': group_name, 'citations': len(group)})
+ df_summary = pd.DataFrame.from_dict(summary)
+ df_summary.sort_values(by='citations', ascending=False, inplace=True)
+ df_summary.to_csv(fp_out, index=False)
+ pprint(df_summary)
+
+ # summary sector
+ summary = []
+ fp_out = opt_fp_out.replace('.csv', '_sector.csv')
+ groups = df.groupby('loc_type')
+ for group_name, group in groups:
+ summary.append({'type': group_name, 'citations': len(group)})
+ df_types = pd.DataFrame.from_dict(summary)
+ df_types.sort_values(by='citations', ascending=False, inplace=True)
+ df_types.to_csv(fp_out, index=False)
+ pprint(df_types)
+
+