From 88ec48e1c4d93ba9cd3aa186c068ef2aa4c27c56 Mon Sep 17 00:00:00 2001 From: adamhrv Date: Mon, 17 Dec 2018 01:37:31 +0100 Subject: fixing dataset procesosrs --- megapixels/commands/datasets/identity_meta_lfw.py | 93 +++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 megapixels/commands/datasets/identity_meta_lfw.py (limited to 'megapixels/commands/datasets/identity_meta_lfw.py') diff --git a/megapixels/commands/datasets/identity_meta_lfw.py b/megapixels/commands/datasets/identity_meta_lfw.py new file mode 100644 index 00000000..45386b23 --- /dev/null +++ b/megapixels/commands/datasets/identity_meta_lfw.py @@ -0,0 +1,93 @@ +''' +add identity from description using subdir +''' +import click + +from app.settings import types +from app.models.dataset import Dataset +from app.utils import click_utils +from app.settings import app_cfg as cfg +from app.utils.logger_utils import Logger + +log = Logger.getLogger() + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Identity meta file') +@click.option('-o', '--output', 'opt_fp_out', default=None, + help='Override enum output filename CSV') +@click.option('--column', 'opt_identity_key', default='identity_key', + help='Match column') +@click.option('--data_store', 'opt_data_store', + type=cfg.DataStoreVar, + default=click_utils.get_default(types.DataStore.SSD), + show_default=True, + help=click_utils.show_help(types.Dataset)) +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_out, opt_identity_key, opt_data_store, opt_force): + """Display image info""" + + import sys + from glob import glob + from os.path import join + from pathlib import Path + import time + + import pandas as pd + import cv2 as cv + from tqdm import tqdm + + from app.utils import file_utils, im_utils + from app.models.data_store import DataStore + + log = Logger.getLogger() + + # output file + opt_dataset = types.Dataset.LFW + data_store = DataStore(opt_data_store, opt_dataset) + fp_out = data_store.metadata(types.Metadata.IDENTITY) if opt_fp_out is None else opt_fp_out + # exit if exists + log.debug(fp_out) + if not opt_force and Path(fp_out).exists(): + log.error('File exists. Use "-f / --force" to overwite') + return + + # init dataset + # load file records + fp_record = data_store.metadata(types.Metadata.FILE_RECORD) + df_record = pd.read_csv(fp_record).set_index('index') + + # load identity meta + # this file is maybe prepared in a Jupyter notebook + # the "identity_key" + df_identity_meta = pd.read_csv(opt_fp_in).set_index('index') + # create a new file called 'identity.csv' + identities = [] + # iterate records and get identity index where 'identity_key' matches + log.debug(type(df_record)) + identity_indices = [] + for record_idx, ds_record in tqdm(df_record.iterrows(), total=len(df_record)): + identity_value = ds_record[opt_identity_key] + identity_index = ds_record.identity_index + ds_identity_meta = df_identity_meta.loc[(df_identity_meta[opt_identity_key] == identity_value)] + if identity_index not in identity_indices: + identity_indices.append(identity_index) + identities.append({ + 'description': ds_identity_meta.description.values[0], + 'name': ds_identity_meta.name.values[0], + 'images': ds_identity_meta.images.values[0], + 'gender': ds_identity_meta.gender.values[0], + }) + + # write to csv + df_identity = pd.DataFrame.from_dict(identities) + df_identity.index.name = 'index' + df_identity.to_csv(fp_out) + ''' + index,name,name_orig,description,gender,images,image_index,identity_key + 0,A. J. Cook,AJ Cook,Canadian actress,f,1,0,AJ_Cook + ''' + + -- cgit v1.2.3-70-g09d2