summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/megaface_names.py
blob: 01e93e2dd4eaf5cdb6899586ea524d621281e99b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from glob import glob
import os
from os.path import join
from pathlib import Path

import click

from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg
from app.utils import logger_utils

import dlib
import pandas as pd
from PIL import Image, ImageOps, ImageFilter
from app.utils import file_utils, im_utils


log = logger_utils.Logger.getLogger()

@click.command()
@click.option('-i', '--input', 'opt_fp_in', required=True,
  help='Input directory')
@click.option('-o', '--output', 'opt_fp_out',
  help='Output directory')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out):
  """Creates CSV of NSIDs from MegaFace"""
  
  from tqdm import tqdm
  from glob import glob

  # -------------------------------------------------
  # process  
  fp_im_dirs = glob(join(opt_fp_in, '**/'), recursive=True)
  
  log.info('Found {} directories'.format(len(fp_im_dirs)))

  identities = {}

  for fp_im_dir in tqdm(fp_im_dirs):
    # 1234567@N05_identity_1
    try:
      dir_id_name = Path(fp_im_dir).name
      nsid = dir_id_name.split('_')[0]
      identity_num = dir_id_name.split('_')[2]
      id_key = '{}_{}'.format(nsid, identity_num)
      num_images = len(glob(join(fp_im_dir, '*.jpg')))
      if not id_key in identities.keys():
        identities[id_key] = {'nsid': nsid, 'identity': identity_num, 'images': num_images}
      else:
        identities[id_key]['images'] += num_images
    except Exception as e:
      continue  

  # convert to dict 
  identities_list = [v for k, v in identities.items()]
  df = pd.DataFrame.from_dict(identities_list)

  file_utils.mkdirs(opt_fp_out)

  log.info('Wrote {} lines to {}'.format(len(df), opt_fp_out))
  df.to_csv(opt_fp_out, index=False)