diff options
Diffstat (limited to 'megapixels/commands/datasets/identity_meta_vgg_face2.py')
| -rw-r--r-- | megapixels/commands/datasets/identity_meta_vgg_face2.py | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/identity_meta_vgg_face2.py b/megapixels/commands/datasets/identity_meta_vgg_face2.py new file mode 100644 index 00000000..85b6644d --- /dev/null +++ b/megapixels/commands/datasets/identity_meta_vgg_face2.py @@ -0,0 +1,88 @@ +''' +add identity from description using subdir +''' +import click + +from app.settings import types +from app.models.dataset import Dataset +from app.utils import click_utils +from app.settings import app_cfg as cfg +from app.utils.logger_utils import Logger + +log = Logger.getLogger() + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Identity meta file') +@click.option('-o', '--output', 'opt_fp_out', default=None, + help='Override enum output filename CSV') +@click.option('--data_store', 'opt_data_store', + type=cfg.DataStoreVar, + default=click_utils.get_default(types.DataStore.SSD), + show_default=True, + help=click_utils.show_help(types.Dataset)) +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_force): + """Display image info""" + + import sys + from glob import glob + from os.path import join + from pathlib import Path + import time + + import pandas as pd + import cv2 as cv + from tqdm import tqdm + + from app.utils import file_utils, im_utils + from app.models.data_store import DataStore + + log = Logger.getLogger() + + # output file + opt_dataset = types.Dataset.VGG_FACE2 + data_store = DataStore(opt_data_store, opt_dataset) + fp_out = data_store.metadata(types.Metadata.IDENTITY) if opt_fp_out is None else opt_fp_out + # exit if exists + log.debug(fp_out) + if not opt_force and Path(fp_out).exists(): + log.error('File exists. Use "-f / --force" to overwite') + return + + # init dataset + # load file records + identity_key = 'identity_key' + fp_record = data_store.metadata(types.Metadata.FILE_RECORD) + df_record = pd.read_csv(fp_record).set_index('index') + + # load identity meta + # this file is maybe prepared in a Jupyter notebook + # the "identity_key" + df_identity_meta = pd.read_csv(opt_fp_in).set_index('index') + # create a new file called 'identity.csv' + identities = [] + # iterate records and get identity index where 'identity_key' matches + log.debug(type(df_record)) + identity_indices = [] + for ds_record in tqdm(df_record.itertuples(), total=len(df_record)): + identity_value = ds_record.identity_key + identity_index = ds_record.identity_index + ds_identity_meta = df_identity_meta.loc[(df_identity_meta[identity_key] == identity_value)] + if identity_index not in identity_indices: + identity_indices.append(identity_index) + identities.append({ + 'description': ds_identity_meta.description.values[0], + 'name': ds_identity_meta.name.values[0], + 'images': ds_identity_meta.images.values[0], + 'gender': ds_identity_meta.gender.values[0], + }) + + # write to csv + df_identity = pd.DataFrame.from_dict(identities) + df_identity.index.name = 'index' + df_identity.to_csv(fp_out) + + |
