from glob import glob import os from os.path import join from pathlib import Path import click from app.settings import types from app.utils import click_utils from app.settings import app_cfg as cfg from app.utils import logger_utils import dlib import pandas as pd from PIL import Image, ImageOps, ImageFilter from app.utils import file_utils, im_utils log = logger_utils.Logger.getLogger() @click.command() @click.option('-i', '--input', 'opt_fp_in', required=True, help='Input directory') @click.option('-o', '--output', 'opt_fp_out', help='Output directory') @click.pass_context def cli(ctx, opt_fp_in, opt_fp_out): """Creates CSV of NSIDs from MegaFace""" from tqdm import tqdm from glob import glob # ------------------------------------------------- # process fp_im_dirs = glob(join(opt_fp_in, '**/'), recursive=True) log.info('Found {} directories'.format(len(fp_im_dirs))) identities = {} for fp_im_dir in tqdm(fp_im_dirs): # 1234567@N05_identity_1 try: dir_id_name = Path(fp_im_dir).name nsid = dir_id_name.split('_')[0] identity_num = dir_id_name.split('_')[2] id_key = '{}_{}'.format(nsid, identity_num) num_images = len(glob(join(fp_im_dir, '*.jpg'))) if not id_key in identities.keys(): identities[id_key] = {'nsid': nsid, 'identity': identity_num, 'images': num_images} else: identities[id_key]['images'] += num_images except Exception as e: continue # convert to dict identities_list = [v for k, v in identities.items()] df = pd.DataFrame.from_dict(identities_list) file_utils.mkdirs(opt_fp_out) log.info('Wrote {} lines to {}'.format(len(df), opt_fp_out)) df.to_csv(opt_fp_out, index=False)