summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/megaface_age_from_orig.py
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2019-10-08 16:02:47 +0200
committeradamhrv <adam@ahprojects.com>2019-10-08 16:02:47 +0200
commit27340ac4cd43f8eec7414495b541a65566ae2656 (patch)
treecd43fcf1af026c75e6045d71d7d783ec460ba3ee /megapixels/commands/datasets/megaface_age_from_orig.py
parenta4ea2852f4b46566a61f988342aa04e4059ccef9 (diff)
update site, white
Diffstat (limited to 'megapixels/commands/datasets/megaface_age_from_orig.py')
-rw-r--r--megapixels/commands/datasets/megaface_age_from_orig.py62
1 files changed, 62 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/megaface_age_from_orig.py b/megapixels/commands/datasets/megaface_age_from_orig.py
new file mode 100644
index 00000000..489bebf3
--- /dev/null
+++ b/megapixels/commands/datasets/megaface_age_from_orig.py
@@ -0,0 +1,62 @@
+import click
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input path to metadata directory')
+@click.option('-o', '--output', 'opt_fp_out',
+ help='Output path to age CSV')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out):
+ """Creates CSV of MegaFace ages from original BBoxes"""
+
+ import os
+ from os.path import join
+ from pathlib import Path
+ from glob import glob
+
+ import dlib
+ import pandas as pd
+ from tqdm import tqdm
+
+ from app.settings import types
+ from app.utils import click_utils
+ from app.settings import app_cfg
+
+ from PIL import Image, ImageOps, ImageFilter
+ from app.utils import file_utils, im_utils, logger_utils
+
+ log = logger_utils.Logger.getLogger()
+
+ # -------------------------------------------------
+ # process
+ fp_im_dirs = glob(join(opt_fp_in, '**/'), recursive=True)
+
+ log.info('Found {} directories'.format(len(fp_im_dirs)))
+
+ identities = {}
+
+ for fp_im_dir in tqdm(fp_im_dirs):
+ # 1234567@N05_identity_1
+ try:
+ dir_id_name = Path(fp_im_dir).name
+ nsid = dir_id_name.split('_')[0]
+ identity_num = dir_id_name.split('_')[2]
+ id_key = '{}_{}'.format(nsid, identity_num)
+ num_images = len(glob(join(fp_im_dir, '*.jpg')))
+ if not id_key in identities.keys():
+ identities[id_key] = {'nsid': nsid, 'identity': identity_num, 'images': num_images}
+ else:
+ identities[id_key]['images'] += num_images
+ except Exception as e:
+ continue
+
+ # convert to dict
+ identities_list = [v for k, v in identities.items()]
+ df = pd.DataFrame.from_dict(identities_list)
+
+ file_utils.mkdirs(opt_fp_out)
+
+ log.info('Wrote {} lines to {}'.format(len(df), opt_fp_out))
+ df.to_csv(opt_fp_out, index=False)
+
+