summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/identity_meta_lfw.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/datasets/identity_meta_lfw.py')
-rw-r--r--megapixels/commands/datasets/identity_meta_lfw.py93
1 files changed, 93 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/identity_meta_lfw.py b/megapixels/commands/datasets/identity_meta_lfw.py
new file mode 100644
index 00000000..45386b23
--- /dev/null
+++ b/megapixels/commands/datasets/identity_meta_lfw.py
@@ -0,0 +1,93 @@
+'''
+add identity from description using subdir
+'''
+import click
+
+from app.settings import types
+from app.models.dataset import Dataset
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils.logger_utils import Logger
+
+log = Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Identity meta file')
+@click.option('-o', '--output', 'opt_fp_out', default=None,
+ help='Override enum output filename CSV')
+@click.option('--column', 'opt_identity_key', default='identity_key',
+ help='Match column')
+@click.option('--data_store', 'opt_data_store',
+ type=cfg.DataStoreVar,
+ default=click_utils.get_default(types.DataStore.SSD),
+ show_default=True,
+ help=click_utils.show_help(types.Dataset))
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+ help='Force overwrite file')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_identity_key, opt_data_store, opt_force):
+ """Display image info"""
+
+ import sys
+ from glob import glob
+ from os.path import join
+ from pathlib import Path
+ import time
+
+ import pandas as pd
+ import cv2 as cv
+ from tqdm import tqdm
+
+ from app.utils import file_utils, im_utils
+ from app.models.data_store import DataStore
+
+ log = Logger.getLogger()
+
+ # output file
+ opt_dataset = types.Dataset.LFW
+ data_store = DataStore(opt_data_store, opt_dataset)
+ fp_out = data_store.metadata(types.Metadata.IDENTITY) if opt_fp_out is None else opt_fp_out
+ # exit if exists
+ log.debug(fp_out)
+ if not opt_force and Path(fp_out).exists():
+ log.error('File exists. Use "-f / --force" to overwite')
+ return
+
+ # init dataset
+ # load file records
+ fp_record = data_store.metadata(types.Metadata.FILE_RECORD)
+ df_record = pd.read_csv(fp_record).set_index('index')
+
+ # load identity meta
+ # this file is maybe prepared in a Jupyter notebook
+ # the "identity_key"
+ df_identity_meta = pd.read_csv(opt_fp_in).set_index('index')
+ # create a new file called 'identity.csv'
+ identities = []
+ # iterate records and get identity index where 'identity_key' matches
+ log.debug(type(df_record))
+ identity_indices = []
+ for record_idx, ds_record in tqdm(df_record.iterrows(), total=len(df_record)):
+ identity_value = ds_record[opt_identity_key]
+ identity_index = ds_record.identity_index
+ ds_identity_meta = df_identity_meta.loc[(df_identity_meta[opt_identity_key] == identity_value)]
+ if identity_index not in identity_indices:
+ identity_indices.append(identity_index)
+ identities.append({
+ 'description': ds_identity_meta.description.values[0],
+ 'name': ds_identity_meta.name.values[0],
+ 'images': ds_identity_meta.images.values[0],
+ 'gender': ds_identity_meta.gender.values[0],
+ })
+
+ # write to csv
+ df_identity = pd.DataFrame.from_dict(identities)
+ df_identity.index.name = 'index'
+ df_identity.to_csv(fp_out)
+ '''
+ index,name,name_orig,description,gender,images,image_index,identity_key
+ 0,A. J. Cook,AJ Cook,Canadian actress,f,1,0,AJ_Cook
+ '''
+
+