From 5b916111ee1a012650a586ec07bc9150d66020bc Mon Sep 17 00:00:00 2001 From: adamhrv Date: Wed, 29 May 2019 15:24:30 +0200 Subject: add MSC nbs and cli cmds --- megapixels/commands/msc/summarize.py | 67 ++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 megapixels/commands/msc/summarize.py (limited to 'megapixels/commands/msc/summarize.py') diff --git a/megapixels/commands/msc/summarize.py b/megapixels/commands/msc/summarize.py new file mode 100644 index 00000000..d5d251db --- /dev/null +++ b/megapixels/commands/msc/summarize.py @@ -0,0 +1,67 @@ +import click + +from app.settings import types +from app.models.dataset import Dataset +from app.utils import click_utils +from app.settings import app_cfg as cfg +from app.utils.logger_utils import Logger + +log = Logger.getLogger() + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True) +@click.option('-o', '--output', 'opt_fp_out', required=True) +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_out): + """_template_""" + + import sys + from glob import glob + from os.path import join + from pathlib import Path + import time + from pprint import pprint + + import pandas as pd + from tqdm import tqdm + + from app.utils import file_utils + + log = Logger.getLogger() + + dataset_names = ['helen', 'megaface', 'adience', 'pipa', 'lfpw', 'duke_mtmc', 'brainwash', 'msceleb', 'uccs'] + + df = pd.DataFrame() + fp_out = opt_fp_out.replace('.csv', '_citations.csv') + for dataset_name in dataset_names: + fp_csv = join(opt_fp_in, f'{dataset_name}.csv') + _df = pd.read_csv(fp_csv) + _df = _df[_df.lat != 0] + print(dataset_name, len(_df)) + df = df.append(_df, ignore_index=True) + + df.to_csv(opt_fp_out, index=False) + + # create country summary + fp_out = opt_fp_out.replace('.csv', '_countries.csv') + country_groups = df.groupby('country') + summary = [] + for group_name, group in country_groups: + summary.append({'country': group_name, 'citations': len(group)}) + df_summary = pd.DataFrame.from_dict(summary) + df_summary.sort_values(by='citations', ascending=False, inplace=True) + df_summary.to_csv(fp_out, index=False) + pprint(df_summary) + + # summary sector + summary = [] + fp_out = opt_fp_out.replace('.csv', '_sector.csv') + groups = df.groupby('loc_type') + for group_name, group in groups: + summary.append({'type': group_name, 'citations': len(group)}) + df_types = pd.DataFrame.from_dict(summary) + df_types.sort_values(by='citations', ascending=False, inplace=True) + df_types.to_csv(fp_out, index=False) + pprint(df_types) + + -- cgit v1.2.3-70-g09d2