1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
'''
add identity from description using subdir
'''
import click
from app.settings import types
from app.models.dataset import Dataset
from app.utils import click_utils
from app.settings import app_cfg as cfg
from app.utils.logger_utils import Logger
log = Logger.getLogger()
@click.command()
@click.option('-i', '--input', 'opt_fp_in', required=True,
help='Identity meta file')
@click.option('-o', '--output', 'opt_fp_out', default=None,
help='Override enum output filename CSV')
@click.option('--data_store', 'opt_data_store',
type=cfg.DataStoreVar,
default=click_utils.get_default(types.DataStore.SSD),
show_default=True,
help=click_utils.show_help(types.Dataset))
@click.option('-f', '--force', 'opt_force', is_flag=True,
help='Force overwrite file')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_force):
"""Display image info"""
import sys
from glob import glob
from os.path import join
from pathlib import Path
import time
import pandas as pd
import cv2 as cv
from tqdm import tqdm
from app.utils import file_utils, im_utils
from app.models.data_store import DataStore
log = Logger.getLogger()
# output file
opt_dataset = types.Dataset.VGG_FACE2
data_store = DataStore(opt_data_store, opt_dataset)
fp_out = data_store.metadata(types.Metadata.IDENTITY) if opt_fp_out is None else opt_fp_out
# exit if exists
log.debug(fp_out)
if not opt_force and Path(fp_out).exists():
log.error('File exists. Use "-f / --force" to overwite')
return
# init dataset
# load file records
identity_key = 'identity_key'
fp_record = data_store.metadata(types.Metadata.FILE_RECORD)
df_record = pd.read_csv(fp_record).set_index('index')
# load identity meta
# this file is maybe prepared in a Jupyter notebook
# the "identity_key"
df_identity_meta = pd.read_csv(opt_fp_in).set_index('index')
# create a new file called 'identity.csv'
identities = []
# iterate records and get identity index where 'identity_key' matches
log.debug(type(df_record))
identity_indices = []
for ds_record in tqdm(df_record.itertuples(), total=len(df_record)):
identity_value = ds_record.identity_key
identity_index = ds_record.identity_index
ds_identity_meta = df_identity_meta.loc[(df_identity_meta[identity_key] == identity_value)]
if identity_index not in identity_indices:
identity_indices.append(identity_index)
identities.append({
'description': ds_identity_meta.description.values[0],
'name': ds_identity_meta.name.values[0],
'images': ds_identity_meta.images.values[0],
'gender': ds_identity_meta.gender.values[0],
})
# write to csv
df_identity = pd.DataFrame.from_dict(identities)
df_identity.index.name = 'index'
df_identity.to_csv(fp_out)
|