summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/megaface_age_from_orig.py
blob: 489bebf3f05b6b22a40ce56ad04f8b2fb21b1967 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import click

@click.command()
@click.option('-i', '--input', 'opt_fp_in', required=True,
  help='Input path to metadata directory')
@click.option('-o', '--output', 'opt_fp_out',
  help='Output path to age CSV')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out):
  """Creates CSV of MegaFace ages from original BBoxes"""
  
  import os
  from os.path import join
  from pathlib import Path
  from glob import glob

  import dlib
  import pandas as pd
  from tqdm import tqdm

  from app.settings import types
  from app.utils import click_utils
  from app.settings import app_cfg

  from PIL import Image, ImageOps, ImageFilter
  from app.utils import file_utils, im_utils, logger_utils

  log = logger_utils.Logger.getLogger()

  # -------------------------------------------------
  # process  
  fp_im_dirs = glob(join(opt_fp_in, '**/'), recursive=True)
  
  log.info('Found {} directories'.format(len(fp_im_dirs)))

  identities = {}

  for fp_im_dir in tqdm(fp_im_dirs):
    # 1234567@N05_identity_1
    try:
      dir_id_name = Path(fp_im_dir).name
      nsid = dir_id_name.split('_')[0]
      identity_num = dir_id_name.split('_')[2]
      id_key = '{}_{}'.format(nsid, identity_num)
      num_images = len(glob(join(fp_im_dir, '*.jpg')))
      if not id_key in identities.keys():
        identities[id_key] = {'nsid': nsid, 'identity': identity_num, 'images': num_images}
      else:
        identities[id_key]['images'] += num_images
    except Exception as e:
      continue  

  # convert to dict 
  identities_list = [v for k, v in identities.items()]
  df = pd.DataFrame.from_dict(identities_list)

  file_utils.mkdirs(opt_fp_out)

  log.info('Wrote {} lines to {}'.format(len(df), opt_fp_out))
  df.to_csv(opt_fp_out, index=False)