megapixels/commands/datasets/identity_meta_vgg_face2.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

'''
add identity from description using subdir
'''
import click

from app.settings import types
from app.models.dataset import Dataset
from app.utils import click_utils
from app.settings import app_cfg as cfg
from app.utils.logger_utils import Logger

log = Logger.getLogger()

@click.command()
@click.option('-i', '--input', 'opt_fp_in', required=True,
  help='Identity meta file')
@click.option('-o', '--output', 'opt_fp_out', default=None,
  help='Override enum output filename CSV')
@click.option('--data_store', 'opt_data_store',
  type=cfg.DataStoreVar,
  default=click_utils.get_default(types.DataStore.SSD),
  show_default=True,
  help=click_utils.show_help(types.Dataset))
@click.option('-f', '--force', 'opt_force', is_flag=True,
  help='Force overwrite file')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_force):
  """Display image info"""
  
  import sys
  from glob import glob
  from os.path import join
  from pathlib import Path
  import time

  import pandas as pd
  import cv2 as cv
  from tqdm import tqdm
  
  from app.utils import file_utils, im_utils
  from app.models.data_store import DataStore

  log = Logger.getLogger()
  
  # output file
  opt_dataset = types.Dataset.VGG_FACE2
  data_store = DataStore(opt_data_store, opt_dataset)
  fp_out = data_store.metadata(types.Metadata.IDENTITY) if opt_fp_out is None else opt_fp_out
  # exit if exists
  log.debug(fp_out)
  if not opt_force and Path(fp_out).exists():
    log.error('File exists. Use "-f / --force" to overwite')
    return

  # init dataset
  # load file records
  identity_key = 'identity_key'
  fp_record = data_store.metadata(types.Metadata.FILE_RECORD)
  df_record = pd.read_csv(fp_record).set_index('index')

  # load identity meta
  # this file is maybe prepared in a Jupyter notebook
  # the "identity_key"
  df_identity_meta = pd.read_csv(opt_fp_in).set_index('index')
  # create a new file called 'identity.csv'
  identities = []
  # iterate records and get identity index where 'identity_key' matches
  log.debug(type(df_record))
  identity_indices = []
  for ds_record in tqdm(df_record.itertuples(), total=len(df_record)):
    identity_value = ds_record.identity_key
    identity_index = ds_record.identity_index
    ds_identity_meta = df_identity_meta.loc[(df_identity_meta[identity_key] == identity_value)]
    if identity_index not in identity_indices:
      identity_indices.append(identity_index)
      identities.append({
        'description': ds_identity_meta.description.values[0],
        'name': ds_identity_meta.name.values[0],
        'images': ds_identity_meta.images.values[0],
        'gender': ds_identity_meta.gender.values[0],
        })

  # write to csv
  df_identity = pd.DataFrame.from_dict(identities)
  df_identity.index.name = 'index'
  df_identity.to_csv(fp_out)