''' add identity from description using subdir ''' import click from app.settings import types from app.models.dataset import Dataset from app.utils import click_utils from app.settings import app_cfg as cfg from app.utils.logger_utils import Logger log = Logger.getLogger() @click.command() @click.option('-i', '--input', 'opt_fp_in', required=True, help='Identity meta file') @click.option('-o', '--output', 'opt_fp_out', default=None, help='Override enum output filename CSV') @click.option('--data_store', 'opt_data_store', type=cfg.DataStoreVar, default=click_utils.get_default(types.DataStore.SSD), show_default=True, help=click_utils.show_help(types.Dataset)) @click.option('-f', '--force', 'opt_force', is_flag=True, help='Force overwrite file') @click.pass_context def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_force): """Display image info""" import sys from glob import glob from os.path import join from pathlib import Path import time import pandas as pd import cv2 as cv from tqdm import tqdm from app.utils import file_utils, im_utils from app.models.data_store import DataStore log = Logger.getLogger() # output file opt_dataset = types.Dataset.VGG_FACE2 data_store = DataStore(opt_data_store, opt_dataset) fp_out = data_store.metadata(types.Metadata.IDENTITY) if opt_fp_out is None else opt_fp_out # exit if exists log.debug(fp_out) if not opt_force and Path(fp_out).exists(): log.error('File exists. Use "-f / --force" to overwite') return # init dataset # load file records identity_key = 'identity_key' fp_record = data_store.metadata(types.Metadata.FILE_RECORD) df_record = pd.read_csv(fp_record).set_index('index') # load identity meta # this file is maybe prepared in a Jupyter notebook # the "identity_key" df_identity_meta = pd.read_csv(opt_fp_in).set_index('index') # create a new file called 'identity.csv' identities = [] # iterate records and get identity index where 'identity_key' matches log.debug(type(df_record)) identity_indices = [] for ds_record in tqdm(df_record.itertuples(), total=len(df_record)): identity_value = ds_record.identity_key identity_index = ds_record.identity_index ds_identity_meta = df_identity_meta.loc[(df_identity_meta[identity_key] == identity_value)] if identity_index not in identity_indices: identity_indices.append(identity_index) identities.append({ 'description': ds_identity_meta.description.values[0], 'name': ds_identity_meta.name.values[0], 'images': ds_identity_meta.images.values[0], 'gender': ds_identity_meta.gender.values[0], }) # write to csv df_identity = pd.DataFrame.from_dict(identities) df_identity.index.name = 'index' df_identity.to_csv(fp_out)