megapixels/commands/cv/csv_to_faces.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

"""
Reads in CSV of ROIs and extracts facial regions with padding
"""

import click

from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg

@click.command()
@click.option('-i', '--input', 'opt_fp_in', required=True,
  help='Input CSV')
@click.option('-m', '--media', 'opt_dir_media', required=True,
  help='Input image/video directory')
@click.option('-o', '--output', 'opt_dir_out', required=True,
  help='Output directory for extracted ROI images')
@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
  help='Slice list of files')
@click.option('--padding', 'opt_padding', default=0.25,
  help='Facial padding as percentage of face width')
@click.option('--ext', 'opt_ext_out', default='png', type=click.Choice(['jpg', 'png']),
  help='Output image type')
@click.option('--min', 'opt_min', default=(60, 60),
  help='Minimum original face size')
@click.pass_context
def cli(ctx, opt_fp_in, opt_dir_media, opt_dir_out, opt_slice, 
  opt_padding, opt_ext_out, opt_min):
  """Converts ROIs to images"""
  
  import os
  from os.path import join
  from pathlib import Path
  from glob import glob
  
  from tqdm import tqdm
  import numpy as np
  from PIL import Image, ImageOps, ImageFilter, ImageDraw
  import cv2 as cv
  import pandas as pd

  from app.utils import logger_utils, file_utils, im_utils
  from app.models.bbox import BBox

  # -------------------------------------------------
  # process here
  log = logger_utils.Logger.getLogger()
  
  df_rois = pd.read_csv(opt_fp_in, dtype={'subdir': str, 'fn': str})
  if opt_slice:
    df_rois = df_rois[opt_slice[0]:opt_slice[1]]
  
  log.info('Processing {:,} rows'.format(len(df_rois)))

  file_utils.mkdirs(opt_dir_out)

  df_rois_grouped = df_rois.groupby(['fn'])  # group by fn/filename
  groups = df_rois_grouped.groups
  skipped = []

  for group in tqdm(groups):
    # get image
    group_rows = df_rois_grouped.get_group(group)

    row = group_rows.iloc[0]
    fp_im = join(opt_dir_media, str(row['subdir']), '{fn}.{ext}'.format(**row))  # TODO change to ext
    try:
      im = Image.open(fp_im).convert('RGB')
      im.verify()
    except Exception as e:
      log.warn('Could not open: {}'.format(fp_im))
      log.error(e)
      continue

    for idx, roi in group_rows.iterrows():
      # get bbox to im dimensions
      xywh = [roi['x'], roi['y'], roi['w'] , roi['h']]
      bbox = BBox.from_xywh(*xywh)
      dim = im.size
      bbox_dim = bbox.to_dim(dim)
      # expand
      opt_padding_px = int(opt_padding * bbox_dim.width)
      bbox_dim_exp = bbox_dim.expand_dim(opt_padding_px, dim)
      # crop
      x1y2 = bbox_dim_exp.pt_tl + bbox_dim_exp.pt_br
      im_crop = im.crop(box=x1y2)

      # strip exif, create new image and paste data
      im_crop_data = list(im_crop.getdata())
      im_crop_no_exif = Image.new(im_crop.mode, im_crop.size)
      im_crop_no_exif.putdata(im_crop_data)

      # save
      idx_zpad = file_utils.zpad(idx, zeros=3)
      subdir = '' if roi['subdir'] == '.' else '{}_'.format(roi['subdir'])
      subdir = subdir.replace('/', '_')
      fp_im_out = join(opt_dir_out, '{}{}{}.{}'.format(subdir, roi['fn'], idx_zpad, opt_ext_out))
      # threshold size and save
      if im_crop_no_exif.size[0] < opt_min[0] or im_crop_no_exif.size[1] < opt_min[1]:
        skipped.append(fp_im_out)
        log.info('Face too small: {}, idx: {}'.format(fp_im, idx))
      else:
        im_crop_no_exif.save(fp_im_out)

  log.info('Skipped {:,} images'.format(len(skipped)))