summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/megaface_names.py
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2018-12-05 12:00:15 +0100
committeradamhrv <adam@ahprojects.com>2018-12-05 12:00:15 +0100
commit90abf459d1df1f21960c1d653a1f936d1ec30256 (patch)
treefacab8e9bac6c56e69c369c2140cdbea218a01df /megapixels/commands/datasets/megaface_names.py
parent0529d4cd1618016319e995c37aa118bf8c2d501b (diff)
.
Diffstat (limited to 'megapixels/commands/datasets/megaface_names.py')
-rw-r--r--megapixels/commands/datasets/megaface_names.py65
1 files changed, 65 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/megaface_names.py b/megapixels/commands/datasets/megaface_names.py
new file mode 100644
index 00000000..01e93e2d
--- /dev/null
+++ b/megapixels/commands/datasets/megaface_names.py
@@ -0,0 +1,65 @@
+from glob import glob
+import os
+from os.path import join
+from pathlib import Path
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils import logger_utils
+
+import dlib
+import pandas as pd
+from PIL import Image, ImageOps, ImageFilter
+from app.utils import file_utils, im_utils
+
+
+log = logger_utils.Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out',
+ help='Output directory')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out):
+ """Creates CSV of NSIDs from MegaFace"""
+
+ from tqdm import tqdm
+ from glob import glob
+
+ # -------------------------------------------------
+ # process
+ fp_im_dirs = glob(join(opt_fp_in, '**/'), recursive=True)
+
+ log.info('Found {} directories'.format(len(fp_im_dirs)))
+
+ identities = {}
+
+ for fp_im_dir in tqdm(fp_im_dirs):
+ # 1234567@N05_identity_1
+ try:
+ dir_id_name = Path(fp_im_dir).name
+ nsid = dir_id_name.split('_')[0]
+ identity_num = dir_id_name.split('_')[2]
+ id_key = '{}_{}'.format(nsid, identity_num)
+ num_images = len(glob(join(fp_im_dir, '*.jpg')))
+ if not id_key in identities.keys():
+ identities[id_key] = {'nsid': nsid, 'identity': identity_num, 'images': num_images}
+ else:
+ identities[id_key]['images'] += num_images
+ except Exception as e:
+ continue
+
+ # convert to dict
+ identities_list = [v for k, v in identities.items()]
+ df = pd.DataFrame.from_dict(identities_list)
+
+ file_utils.mkdirs(opt_fp_out)
+
+ log.info('Wrote {} lines to {}'.format(len(df), opt_fp_out))
+ df.to_csv(opt_fp_out, index=False)
+
+