add pose, indexing

author: adamhrv <adam@ahprojects.com> 2018-12-13 14:39:07 +0100
committer: adamhrv <adam@ahprojects.com> 2018-12-13 14:39:07 +0100
commit: bd51b3cdf474c93b1d7c667d9e5a33159c97640a (patch)
tree: 6a5ae5524efa971cbd348cc2720d200fbeb2fecb /megapixels/commands
parent: 49a49bebe3f972e93add837180f5672a4ae62ce0 (diff)
12 files changed, 899 insertions, 27 deletions
diff --git a/megapixels/commands/cv/csv_to_faces_mt.py b/megapixels/commands/cv/csv_to_faces_mt.py
new file mode 100644
index 00000000..64c8b965
--- /dev/null
+++ b/megapixels/commands/cv/csv_to_faces_mt.py
@@ -0,0 +1,105 @@
+"""
+Reads in CSV of ROIs and extracts facial regions with padding
+"""
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input CSV')
+@click.option('-m', '--media', 'opt_dir_media', required=True,
+  help='Input image/video directory')
+@click.option('-o', '--output', 'opt_dir_out', required=True,
+  help='Output directory for extracted ROI images')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+  help='Slice list of files')
+@click.option('--padding', 'opt_padding', default=0.25,
+  help='Facial padding as percentage of face width')
+@click.option('--ext', 'opt_ext_out', default='png', type=click.Choice(['jpg', 'png']),
+  help='Output image type')
+@click.option('--min', 'opt_min', default=(60, 60),
+  help='Minimum original face size')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_dir_media, opt_dir_out, opt_slice, 
+  opt_padding, opt_ext_out, opt_min):
+  """Converts ROIs to images"""
+  
+  import os
+  from os.path import join
+  from pathlib import Path
+  from glob import glob
+  
+  from tqdm import tqdm
+  import numpy as np
+  from PIL import Image, ImageOps, ImageFilter, ImageDraw
+  import cv2 as cv
+  import pandas as pd
+
+  from app.utils import logger_utils, file_utils, im_utils
+  from app.models.bbox import BBox
+
+  # -------------------------------------------------
+  # process here
+  log = logger_utils.Logger.getLogger()
+  
+  df_rois = pd.read_csv(opt_fp_in, dtype={'subdir': str, 'fn': str})
+  if opt_slice:
+    df_rois = df_rois[opt_slice[0]:opt_slice[1]]
+  
+  log.info('Processing {:,} rows'.format(len(df_rois)))
+
+  file_utils.mkdirs(opt_dir_out)
+
+  df_rois_grouped = df_rois.groupby(['fn'])  # group by fn/filename
+  groups = df_rois_grouped.groups
+  skipped = []
+
+  for group in tqdm(groups):
+    # get image
+    group_rows = df_rois_grouped.get_group(group)
+
+    row = group_rows.iloc[0]
+    fp_im = join(opt_dir_media, str(row['subdir']), '{fn}.{ext}'.format(**row))  # TODO change to ext
+    try:
+      im = Image.open(fp_im).convert('RGB')
+      im.verify()
+    except Exception as e:
+      log.warn('Could not open: {}'.format(fp_im))
+      log.error(e)
+      continue
+
+    for idx, roi in group_rows.iterrows():
+      # get bbox to im dimensions
+      xywh = [roi['x'], roi['y'], roi['w'] , roi['h']]
+      bbox = BBox.from_xywh(*xywh)
+      dim = im.size
+      bbox_dim = bbox.to_dim(dim)
+      # expand
+      opt_padding_px = int(opt_padding * bbox_dim.width)
+      bbox_dim_exp = bbox_dim.expand_dim(opt_padding_px, dim)
+      # crop
+      x1y2 = bbox_dim_exp.pt_tl + bbox_dim_exp.pt_br
+      im_crop = im.crop(box=x1y2)
+
+      # strip exif, create new image and paste data
+      im_crop_data = list(im_crop.getdata())
+      im_crop_no_exif = Image.new(im_crop.mode, im_crop.size)
+      im_crop_no_exif.putdata(im_crop_data)
+
+      # save
+      idx_zpad = file_utils.zpad(idx, zeros=3)
+      subdir = '' if roi['subdir'] == '.' else '{}_'.format(roi['subdir'])
+      subdir = subdir.replace('/', '_')
+      fp_im_out = join(opt_dir_out, '{}{}{}.{}'.format(subdir, roi['fn'], idx_zpad, opt_ext_out))
+      # threshold size and save
+      if im_crop_no_exif.size[0] < opt_min[0] or im_crop_no_exif.size[1] < opt_min[1]:
+        skipped.append(fp_im_out)
+        log.info('Face too small: {}, idx: {}'.format(fp_im, idx))
+      else:
+        im_crop_no_exif.save(fp_im_out)
+
+  log.info('Skipped {:,} images'.format(len(skipped)))
diff --git a/megapixels/commands/cv/embeddings.py b/megapixels/commands/cv/embeddings.py
new file mode 100644
index 00000000..9cb26ae7
--- /dev/null
+++ b/megapixels/commands/cv/embeddings.py
@@ -0,0 +1,100 @@
+"""
+Crop images to prepare for training
+"""
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input directory')
+@click.option('-r', '--records', 'opt_fp_records', required=True,
+  help='Input directory')
+@click.option('-m', '--media', 'opt_fp_media', required=True,
+  help='Image directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output CSV')
+@click.option('--size', 'opt_size', 
+  type=(int, int), default=(300, 300),
+  help='Output image size')
+@click.option('-g', '--gpu', 'opt_gpu', default=0,
+  help='GPU index')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+  help='Slice list of files')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+  help='Force overwrite file')
+@click.option('-j', '--jitters', 'opt_jitters', default=cfg.DLIB_FACEREC_JITTERS,
+  help='Number of jitters')
+@click.option('-p', '--padding', 'opt_padding', default=cfg.DLIB_FACEREC_PADDING,
+  help='Percentage padding')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_records, opt_fp_out, opt_fp_media, opt_size, opt_gpu,
+  opt_slice, opt_jitters, opt_padding, opt_force):
+  """Converts frames with faces to CSV of rows"""
+  
+  import sys
+  import os
+  from os.path import join
+  from pathlib import Path
+  
+  from tqdm import tqdm
+  import numpy as np
+  import dlib  # must keep a local reference for dlib
+  import cv2 as cv
+  import dlib
+  import pandas as pd
+
+  from app.utils import logger_utils, file_utils, im_utils
+  from app.models.bbox import BBox
+  from app.processors import face_recognition
+
+  # -------------------------------------------------
+  # init here
+
+  log = logger_utils.Logger.getLogger()
+
+  if not opt_force and Path(opt_fp_out).exists():
+    log.error('File exists. Use "-f / --force" to overwite')
+    return
+  
+  # init dlib FR
+  facerec = face_recognition.RecognitionDLIB()
+
+  # load data
+  df_rois = pd.read_csv(opt_fp_in)
+  df_records = pd.read_csv(opt_fp_records)
+
+  if opt_slice:
+    df_rois = df_rois[opt_slice[0]:opt_slice[1]]
+  log.info('Processing {:,} rows'.format(len(df_rois)))
+  nrows = len(df_rois)
+
+  # face vecs
+  vecs = []
+
+  for roi_idx, row in tqdm(df_rois.iterrows(), total=nrows):
+    # make image path
+    record_id = int(row['id'])
+    df = df_records.iloc[record_id]
+    fp_im = join(opt_fp_media, df['subdir'], '{}.{}'.format(df['fn'], df['ext'])) 
+    # load image
+    im = cv.imread(fp_im)
+    # make bbox
+    xywh = [row['x'], row['y'], row['w'] , row['h']]
+    bbox = BBox.from_xywh(*xywh)
+    # scale to actual image size
+    dim = (row['image_width'], row['image_height'])
+    bbox_dim = bbox.to_dim(dim)
+    # compute vec
+    vec = facerec.vec(im, bbox_dim, jitters=opt_jitters, padding=opt_padding)
+    vec_str = ','.join([repr(x) for x in vec])
+    vecs.append( {'id': row['id'], 'vec': vec_str})
+  
+  # save data
+  file_utils.mkdirs(opt_fp_out)
+  df_vecs = pd.DataFrame.from_dict(vecs)
+  df_vecs.to_csv(opt_fp_out, index=False)
+  log.info('saved {:,} lines to {}'.format(len(df_vecs), opt_fp_out))
+\ No newline at end of file
diff --git a/megapixels/commands/cv/face_pose_to_csv.py b/megapixels/commands/cv/face_pose_to_csv.py
new file mode 100644
index 00000000..ca7489de
--- /dev/null
+++ b/megapixels/commands/cv/face_pose_to_csv.py
@@ -0,0 +1,105 @@
+"""
+Crop images to prepare for training
+"""
+
+import click
+# from PIL import Image, ImageOps, ImageFilter, ImageDraw
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+color_filters = {'color': 1, 'gray': 2, 'all': 3}
+
+@click.command()
+@click.option('-f', '--files', 'opt_fp_files', required=True,
+  help='Input ROI CSV')
+@click.option('-r', '--rois', 'opt_fp_rois', required=True,
+  help='Input ROI CSV')
+@click.option('-m', '--media', 'opt_dir_media', required=True,
+  help='Input media directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output CSV')
+@click.option('--size', 'opt_size', 
+  type=(int, int), default=(300, 300),
+  help='Output image size')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+  help='Slice list of files')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+  help='Force overwrite file')
+@click.pass_context
+def cli(ctx, opt_fp_files, opt_fp_rois, opt_dir_media, opt_fp_out, opt_size, 
+  opt_slice, opt_force):
+  """Converts ROIs to pose: roll, yaw, pitch"""
+  
+  import sys
+  import os
+  from os.path import join
+  from pathlib import Path
+  from glob import glob
+  
+  from tqdm import tqdm
+  import numpy as np
+  import dlib  # must keep a local reference for dlib
+  import cv2 as cv
+  import pandas as pd
+
+  from app.models.bbox import BBox
+  from app.utils import logger_utils, file_utils, im_utils
+  from app.processors.face_landmarks import LandmarksDLIB
+  from app.processors.face_pose import FacePoseDLIB
+
+  # -------------------------------------------------
+  # init here
+
+  log = logger_utils.Logger.getLogger()
+
+  # init face processors
+  face_pose = FacePoseDLIB()
+  face_landmarks = LandmarksDLIB()
+
+  df_files = pd.read_csv(opt_fp_files)
+  df_rois = pd.read_csv(opt_fp_rois)
+
+  if not opt_force and Path(opt_fp_out).exists():
+    log.error('File exists. Use "-f / --force" to overwite')
+    return
+  
+  if opt_slice:
+    df_rois = df_rois[opt_slice[0]:opt_slice[1]]
+  
+  # -------------------------------------------------
+  # process here
+
+  df_roi_groups = df_rois.groupby('index')
+  log.debug('processing {:,} groups'.format(len(df_roi_groups)))
+
+
+  poses = []
+
+  #for df_roi_group in tqdm(df_roi_groups.itertuples(), total=len(df_roi_groups)):
+  for df_roi_group_idx, df_roi_group in tqdm(df_roi_groups):
+    # make fp
+    image_index = df_roi_group.image_index.values[0]
+    pds_file = df_files.iloc[image_index]
+    fp_im = join(opt_dir_media, pds_file.subdir, '{}.{}'.format(pds_file.fn, pds_file.ext))
+    im = cv.imread(fp_im)
+    # get bbox
+    x = df_roi_group.x.values[0]
+    y = df_roi_group.y.values[0]
+    w = df_roi_group.w.values[0]
+    h = df_roi_group.h.values[0]
+    dim = im.shape[:2][::-1]
+    bbox = BBox.from_xywh(x, y, w, h).to_dim(dim)
+    # get pose
+    landmarks = face_landmarks.landmarks(im, bbox)
+    pose = face_pose.pose(landmarks, dim)
+    pose['image_index'] = image_index
+    poses.append(pose)
+
+
+  # save date
+  file_utils.mkdirs(opt_fp_out)
+  df = pd.DataFrame.from_dict(poses)
+  df.index.name = 'index'
+  df.to_csv(opt_fp_out)
+\ No newline at end of file
diff --git a/megapixels/commands/cv/faces_to_csv.py b/megapixels/commands/cv/faces_to_csv.py
index 07226c31..1fd47571 100644
--- a/megapixels/commands/cv/faces_to_csv.py
+++ b/megapixels/commands/cv/faces_to_csv.py
@@ -30,7 +30,7 @@ color_filters = {'color': 1, 'gray': 2, 'all': 3}
   help='GPU index')
 @click.option('--conf', 'opt_conf_thresh', default=0.85, type=click.FloatRange(0,1),
   help='Confidence minimum threshold')
-@click.option('--pyramids', 'opt_pyramids', default=0, type=click.IntRange(0,4),
+@click.option('-p', '--pyramids', 'opt_pyramids', default=0, type=click.IntRange(0,4),
   help='Number pyramids to upscale for DLIB detectors')
 @click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
   help='Slice list of files')
@@ -78,6 +78,8 @@ def cli(ctx, opt_dirs_in, opt_fp_out, opt_ext, opt_size, opt_detector_type,
     detector = face_detector.DetectorDLIBCNN(opt_gpu)
   elif opt_detector_type == types.FaceDetectNet.DLIB_HOG:
     detector = face_detector.DetectorDLIBHOG()
+  elif opt_detector_type == types.FaceDetectNet.MTCNN:
+    detector = face_detector.DetectorMTCNN()
   elif opt_detector_type == types.FaceDetectNet.HAAR:
     log.error('{} not yet implemented'.format(opt_detector_type.name))
     return
@@ -129,6 +131,8 @@ def cli(ctx, opt_dirs_in, opt_fp_out, opt_ext, opt_size, opt_detector_type,
     subdir = str(fpp_im.parent.relative_to(opt_dir_in))
 
     for bbox in bboxes:
+      # log.debug('is square: {}'.format(bbox.w == bbox.h))
+      nw,nh = int(bbox.w * im.shape[1]),  int(bbox.h * im.shape[0])
       roi = {
         'fn': fpp_im.stem, 
         'ext': fpp_im.suffix.replace('.',''), 
diff --git a/megapixels/commands/cv/faces_to_csv_indexed.py b/megapixels/commands/cv/faces_to_csv_indexed.py
new file mode 100644
index 00000000..ef958f89
--- /dev/null
+++ b/megapixels/commands/cv/faces_to_csv_indexed.py
@@ -0,0 +1,156 @@
+"""
+Crop images to prepare for training
+"""
+
+import click
+# from PIL import Image, ImageOps, ImageFilter, ImageDraw
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+color_filters = {'color': 1, 'gray': 2, 'all': 3}
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input CSV (eg image_files.csv)')
+@click.option('-m', '--media', 'opt_dir_media', required=True,
+  help='Input media directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output CSV')
+@click.option('--size', 'opt_size', 
+  type=(int, int), default=(300, 300),
+  help='Output image size')
+@click.option('-t', '--detector-type', 'opt_detector_type',
+  type=cfg.FaceDetectNetVar,
+  default=click_utils.get_default(types.FaceDetectNet.DLIB_CNN),
+  help=click_utils.show_help(types.FaceDetectNet))
+@click.option('-g', '--gpu', 'opt_gpu', default=0,
+  help='GPU index')
+@click.option('--conf', 'opt_conf_thresh', default=0.85, type=click.FloatRange(0,1),
+  help='Confidence minimum threshold')
+@click.option('-p', '--pyramids', 'opt_pyramids', default=0, type=click.IntRange(0,4),
+  help='Number pyramids to upscale for DLIB detectors')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+  help='Slice list of files')
+@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False,
+  help='Display detections to debug')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+  help='Force overwrite file')
+@click.option('--color', 'opt_color_filter', 
+  type=click.Choice(color_filters.keys()), default='all',
+  help='Filter to keep color or grayscale images (color = keep color')
+@click.option('--largest', 'opt_largest', is_flag=True, 
+  help='Only keep largest face')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_dir_media, opt_fp_out, opt_size, opt_detector_type, 
+  opt_gpu, opt_conf_thresh, opt_pyramids, opt_slice, opt_display, opt_force, opt_color_filter,
+  opt_largest):
+  """Converts frames with faces to CSV of ROIs"""
+  
+  import sys
+  import os
+  from os.path import join
+  from pathlib import Path
+  from glob import glob
+  
+  from tqdm import tqdm
+  import numpy as np
+  import dlib  # must keep a local reference for dlib
+  import cv2 as cv
+  import pandas as pd
+
+  from app.utils import logger_utils, file_utils, im_utils
+  from app.processors import face_detector 
+
+  # -------------------------------------------------
+  # init here
+
+  log = logger_utils.Logger.getLogger()
+
+  if not opt_force and Path(opt_fp_out).exists():
+    log.error('File exists. Use "-f / --force" to overwite')
+    return
+  
+  if opt_detector_type == types.FaceDetectNet.CVDNN:
+    detector = face_detector.DetectorCVDNN()
+  elif opt_detector_type == types.FaceDetectNet.DLIB_CNN:
+    detector = face_detector.DetectorDLIBCNN(opt_gpu)
+  elif opt_detector_type == types.FaceDetectNet.DLIB_HOG:
+    detector = face_detector.DetectorDLIBHOG()
+  elif opt_detector_type == types.FaceDetectNet.MTCNN:
+    detector = face_detector.DetectorMTCNN()
+  elif opt_detector_type == types.FaceDetectNet.HAAR:
+    log.error('{} not yet implemented'.format(opt_detector_type.name))
+    return
+
+  
+  # -------------------------------------------------
+  # process here
+  color_filter = color_filters[opt_color_filter]
+  
+  # get list of files to process
+  df_files = pd.read_csv(opt_fp_in).set_index('index')
+
+  if opt_slice:
+    df_files = df_files[opt_slice[0]:opt_slice[1]]
+  log.debug('processing {:,} files'.format(len(df_files)))
+
+
+  data = []
+
+  for df_file in tqdm(df_files.itertuples(), total=len(df_files)):
+    fp_im = join(opt_dir_media, df_file.subdir, '{}.{}'.format(df_file.fn, df_file.ext))
+    im = cv.imread(fp_im)
+
+    # filter out color or grayscale iamges
+    if color_filter != color_filters['all']:
+      try:
+        is_gray = im_utils.is_grayscale(im)
+        if is_gray and color_filter != color_filters['gray']:
+          log.debug('Skipping grayscale image: {}'.format(fp_im))
+          continue
+      except Exception as e:
+        log.error('Could not check grayscale: {}'.format(fp_im))
+        continue
+        
+    try:
+      bboxes = detector.detect(im, opt_size=opt_size, opt_pyramids=opt_pyramids, opt_largest=opt_largest)
+    except Exception as e:
+      log.error('could not detect: {}'.format(fp_im))
+      log.error('{}'.format(e))
+      continue
+
+    for bbox in bboxes:
+      roi = {
+        'image_index': int(df_file.Index),
+        'x': bbox.x, 
+        'y': bbox.y, 
+        'w': bbox.w, 
+        'h': bbox.h,
+        'image_width': im.shape[1],
+        'image_height': im.shape[0]}
+      data.append(roi)
+    
+    # debug display
+    if opt_display and len(bboxes):
+      bbox_dim = bbox.to_dim(im.shape[:2][::-1])  # w,h
+      im_md = im_utils.resize(im, width=min(1200, opt_size[0]))
+      for bbox in bboxes:
+        bbox_dim = bbox.to_dim(im_md.shape[:2][::-1])
+        cv.rectangle(im_md, bbox_dim.pt_tl, bbox_dim.pt_br, (0,255,0), 3)
+      cv.imshow('', im_md)
+      while True:
+        k = cv.waitKey(1) & 0xFF
+        if k == 27 or k == ord('q'):  # ESC
+          cv.destroyAllWindows()
+          sys.exit()
+        elif k != 255:
+          # any key to continue
+          break
+
+  # save date
+  file_utils.mkdirs(opt_fp_out)
+  df = pd.DataFrame.from_dict(data)
+  df.index.name = 'index'
+  df.to_csv(opt_fp_out)
+\ No newline at end of file
diff --git a/megapixels/commands/cv/resize.py b/megapixels/commands/cv/resize.py
index f535c8b6..dcd621b3 100644
--- a/megapixels/commands/cv/resize.py
+++ b/megapixels/commands/cv/resize.py
@@ -62,9 +62,11 @@ centerings = {
   help='Crop focal point')
 @click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
   help='Slice the input list')
+@click.option('-t', '--threads', 'opt_threads', default=8,
+  help='Number of threads')
 @click.pass_context
 def cli(ctx, opt_dir_in, opt_dir_out, opt_glob_ext, opt_size, opt_scale_method,
- opt_equalize, opt_sharpen, opt_center, opt_slice):
+ opt_equalize, opt_sharpen, opt_center, opt_slice, opt_threads):
   """Crop, mirror images"""
   
   import os
@@ -72,6 +74,8 @@ def cli(ctx, opt_dir_in, opt_dir_out, opt_glob_ext, opt_size, opt_scale_method,
   from pathlib import Path
   from glob import glob
   from tqdm import tqdm
+  from multiprocessing.dummy import Pool as ThreadPool
+  from functools import partial
 
   from app.utils import logger_utils, file_utils, im_utils
   
@@ -80,46 +84,63 @@ def cli(ctx, opt_dir_in, opt_dir_out, opt_glob_ext, opt_size, opt_scale_method,
 
   log = logger_utils.Logger.getLogger()
 
-  centering = centerings[opt_center]
 
   # -------------------------------------------------
   # process here
 
+  def pool_resize(fp_im, opt_size, scale_method, centering):
+    # Threaded image resize function
+    try:
+      pbar.update(1)
+      try:
+        im = Image.open(fp_im).convert('RGB')
+        im.verify()
+      except Exception as e:
+        log.warn('Could not open: {}'.format(fp_im))
+        log.error(e)
+        return False
+
+      im = ImageOps.fit(im, opt_size, method=scale_method, centering=centering)
+
+      if opt_equalize:
+        im_np = im_utils.pil2np(im)
+        im_np_eq = eq_hist_yuv(im_np)
+        im_np = cv.addWeighted(im_np_eq, 0.35, im_np, 0.65, 0)
+        im = im_utils.np2pil(im_np)
+
+      if opt_sharpen:
+        im = im.filter(ImageFilter.UnsharpMask)
+        
+      fp_out = join(opt_dir_out, Path(fp_im).name)
+      im.save(fp_out)
+      return True
+    except:
+      return False
+
+  centering = centerings[opt_center]
+  scale_method = methods[opt_scale_method]
+
   # get list of files to process
   fp_ims = glob(join(opt_dir_in, '*.{}'.format(opt_glob_ext)))
   if opt_slice:
     fp_ims = fp_ims[opt_slice[0]:opt_slice[1]]
   log.info('processing {:,} files'.format(len(fp_ims)))
   
-  # set scale method
-  scale_method = methods[opt_scale_method]
-
+  
   # ensure output dir exists
   file_utils.mkdirs(opt_dir_out)
 
-  # resize and save images
-  for fp_im in tqdm(fp_ims):
-    try:
-      im = Image.open(fp_im).convert('RGB')
-      im.verify()
-    except Exception as e:
-      log.warn('Could not open: {}'.format(fp_im))
-      log.error(e)
-      continue
-
-    im = ImageOps.fit(im, opt_size, method=scale_method, centering=centering)
+  # setup multithreading
+  pbar = tqdm(total=len(fp_ims))
+  pool_resize = partial(pool_resize, opt_size=opt_size, scale_method=scale_method, centering=centering)
+  #result_list = pool.map(prod_x, data_list)
+  pool = ThreadPool(opt_threads) 
+  with tqdm(total=len(fp_ims)) as pbar:
+    results = pool.map(pool_resize, fp_ims)
+  pbar.close()
 
-    if opt_equalize:
-      im_np = im_utils.pil2np(im)
-      im_np_eq = eq_hist_yuv(im_np)
-      im_np = cv.addWeighted(im_np_eq, 0.35, im_np, 0.65, 0)
-      im = im_utils.np2pil(im_np)
+  log.info('Resized: {} / {} images'.format(results.count(True), len(fp_ims)))
 
-    if opt_sharpen:
-      im = im.filter(ImageFilter.UnsharpMask)
-      
-    fp_out = join(opt_dir_out, Path(fp_im).name)
-    im.save(fp_out)
 
 
 def eq_hist_yuv(im):
diff --git a/megapixels/commands/datasets/add_uuid.py b/megapixels/commands/datasets/add_uuid.py
new file mode 100644
index 00000000..9c14c0e3
--- /dev/null
+++ b/megapixels/commands/datasets/add_uuid.py
@@ -0,0 +1,44 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils.logger_utils import Logger
+
+log = Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out',
+  help='Output directory')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+  help='Force overwrite file')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_force):
+  """Appends UUID to records CSV"""
+  
+  from glob import glob
+  from os.path import join
+  from pathlib import Path
+  import base64
+  import uuid
+
+  from tqdm import tqdm
+  import pandas as pd
+  
+  if not opt_force and Path(opt_fp_out).exists():
+    log.error('File exists. Use "-f / --force" to overwite')
+    return
+
+  # load names
+  df_records = pd.read_csv(opt_fp_in)
+  records = df_records.to_dict('index')
+  # append a UUID to every entry
+  for idx, item in records.items():
+    records[idx]['uuid'] = uuid.uuid4()
+  # save to csv
+  df_uuid = pd.DataFrame.from_dict(list(records.values()))  # ignore the indices
+  df_uuid.to_csv(opt_fp_out, index=False)
+
+  log.info('done')
+\ No newline at end of file
diff --git a/megapixels/commands/datasets/feret.py b/megapixels/commands/datasets/feret.py
new file mode 100644
index 00000000..906b4e37
--- /dev/null
+++ b/megapixels/commands/datasets/feret.py
@@ -0,0 +1,139 @@
+import bz2
+import io
+
+import click
+from PIL import Image
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils.logger_utils import Logger
+
+log = Logger.getLogger()
+
+pose_choices = {
+'fa':0, 'fb':0, 'hl':67.5, 'hr':-67.5, 'pl':90, 'pr':-90, 
+'ql':22.5, 'qr':-22.5, 'ra':45, 'rb':15, 'rc':-15, 'rd':-45, 're':-75}
+
+poses_left = ['hl', 'ql', 'pl', 'ra', 'rb']
+poses_right = ['hr', 'qr', 'pr', 'rc', 're', 're']
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output directory')
+@click.option('-a', '--angle', 'opt_angle', type=(float, float), default=(0,0),
+  help='Min/max face angles')
+@click.option('-t', '--threads', 'opt_threads', default=8,
+  help='Number of threads')
+@click.option('--flip', 'opt_flip', type=click.Choice(['r', 'l']), 
+  help='Flip profile images to the R or L')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_angle, opt_threads, opt_flip):
+  """Extracts FERET images"""
+  
+  from glob import glob
+  from os.path import join
+  from pathlib import Path
+  import time
+  from tqdm import tqdm
+  from multiprocessing.dummy import Pool as ThreadPool
+  from functools import partial
+
+  from PIL import ImageOps
+  from app.utils import file_utils
+
+  # filter angles
+  poses = [k for k, v in pose_choices.items() if \
+    abs(v) >= opt_angle[0] and abs(v) <= opt_angle[1]]
+  
+  # glob images dir for all *ppm.bz2
+  fp_ims = []
+  for pose in poses:
+    log.info('globbing pose: {}'.format(pose))
+    fp_ims += glob(join(opt_fp_in, '**/*_{}.ppm.bz2').format(pose))
+  log.info('Processing: {:,} files'.format(len(fp_ims)))
+  
+  # convert bz2 to png
+  def pool_func(fp_im, opt_fp_out, opt_flip):
+    try:
+      pbar.update(1)
+      im_pil = bz2_to_pil(fp_im)
+      fpp_im = Path(fp_im)
+      fp_out = join(opt_fp_out, '{}.png'.format(fpp_im.stem))
+      fp_out = fp_out.replace('.ppm','')  # remove ppm
+      if opt_flip:
+        pose_code = fpp_im.stem.split('_')[-1][:2]
+        # log.debug('opt_flip: {}, found: {}'.format(opt_flip, pose_code))
+        if opt_flip == 'r' and pose_code in poses_right \
+          or opt_flip == 'l' and pose_code in poses_left:
+            im_pil = ImageOps.mirror(im_pil)
+      im_pil.save(fp_out)
+      return True
+    except Exception as e:
+      log.error('Error processing: {}, error: {}'.format(fp_im, e))
+      return False
+
+  # make output directory
+  file_utils.mkdirs(opt_fp_out)
+
+  # setup multithreading
+  pbar = tqdm(total=len(fp_ims))
+  pool_resize = partial(pool_func, opt_fp_out=opt_fp_out, opt_flip=opt_flip)
+  pool = ThreadPool(opt_threads) 
+  with tqdm(total=len(fp_ims)) as pbar:
+    results = pool.map(pool_resize, fp_ims)
+  pbar.close()
+
+  # results
+  log.info('Converted: {} / {} images'.format(results.count(True), len(fp_ims)))
+
+  
+# ------------------------------------------------------------------
+# local utils
+
+def bz2_to_pil(fp_src):
+  with open(fp_src, 'rb') as fp:
+    im_raw = bz2.decompress(fp.read())
+  im_pil = Image.open(io.BytesIO(im_raw))
+  return im_pil
+
+
+
+"""
+
+A breakdown of the images by pose is:
+  Pose Angle   Images Subjects
+  fa       0     1364      994
+  fb       0     1358      993
+  hl   +67.5     1267      917
+  hr   -67.5     1320      953
+  pl     +90     1312      960
+  pr     -90     1363      994
+  ql   +22.5      761      501
+  qr   -22.5      761      501
+  ra     +45      321      261
+  rb     +15      321      261
+  rc     -15      610      423
+  rd     -45      290      236
+  re     -75      290      236
+
+  There are 13 different poses. (The orientation "right" means
+facing the photographer's right.)
+  fa  regular frontal image
+  fb  alternative frontal image, taken shortly after the
+      corresponding fa image
+  pl  profile left
+  hl  half left - head turned about 67.5 degrees left
+  ql  quarter left - head turned about 22.5 degrees left
+  pr  profile right
+  hr  half right - head turned about 67.5 degrees right
+  qr  quarter right - head turned about 22.5 degrees right
+  ra  random image - head turned about 45 degree left
+  rb  random image - head turned about 15 degree left
+  rc  random image - head turned about 15 degree right
+  rd  random image - head turned about 45 degree right
+  re  random image - head turned about 75 degree right
+
+"""
+\ No newline at end of file
diff --git a/megapixels/commands/datasets/s3.py b/megapixels/commands/datasets/s3.py
new file mode 100644
index 00000000..7769896b
--- /dev/null
+++ b/megapixels/commands/datasets/s3.py
@@ -0,0 +1,47 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+s3_dirs = {'media': cfg.S3_MEDIA_ROOT, 'metadata': cfg.S3_METADATA_ROOT}
+
+@click.command()
+@click.option('-i', '--input', 'opt_fps_in', required=True, multiple=True,
+  help='Input directory')
+@click.option('--name', 'opt_dataset_name', required=True,
+  help='Dataset key (eg "lfw"')
+@click.option('-a', '--action', 'opt_action', type=click.Choice(['sync', 'put']), default='sync',
+  help='S3 action')
+@click.option('-t', '--type', 'opt_type', type=click.Choice(s3_dirs.keys()), required=True,
+  help='S3 location')
+@click.option('--dry-run', 'opt_dryrun', is_flag=True, default=False)
+@click.pass_context
+def cli(ctx, opt_fps_in, opt_dataset_name, opt_action, opt_type, opt_dryrun):
+  """Syncs files with S3/spaces server"""
+  
+  from os.path import join
+  from pathlib import Path
+  
+  from tqdm import tqdm
+  import pandas as pd
+  import subprocess
+
+  from app.utils import logger_utils, file_utils
+  
+  # -------------------------------------------------
+  # init here
+
+  log = logger_utils.Logger.getLogger()
+  for opt_fp_in in opt_fps_in:
+    dir_dst = join(s3_dirs[opt_type], opt_dataset_name, '')
+    if Path(opt_fp_in).is_dir():
+      fp_src = join(opt_fp_in, '')  # add trailing slashes
+    else:
+      fp_src = join(opt_fp_in) 
+    cmd = ['s3cmd', opt_action, fp_src, dir_dst, '-P', '--follow-symlinks']
+    log.info(' '.join(cmd))
+    if not opt_dryrun:
+      subprocess.call(cmd)
+
+  
+\ No newline at end of file
diff --git a/megapixels/commands/datasets/symlink.py b/megapixels/commands/datasets/symlink.py
new file mode 100644
index 00000000..70ec6c46
--- /dev/null
+++ b/megapixels/commands/datasets/symlink.py
@@ -0,0 +1,45 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input records CSV')
+@click.option('-m', '--media', 'opt_fp_media', required=True,
+  help='Input media directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output directory')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_media, opt_fp_out):
+  """Symlinks images to new directory for S3"""
+  
+  import sys
+  import os
+  from os.path import join
+  from pathlib import Path
+  
+  from tqdm import tqdm
+  import pandas as pd
+
+  from app.utils import logger_utils, file_utils
+  
+  # -------------------------------------------------
+  # init here
+
+  log = logger_utils.Logger.getLogger()
+
+  df_records = pd.read_csv(opt_fp_in)
+  nrows = len(df_records)
+
+  file_utils.mkdirs(opt_fp_out)
+
+  for record_id, row in tqdm(df_records.iterrows(), total=nrows):
+    # make image path
+    df = df_records.iloc[record_id]
+    fpp_src = Path(join(opt_fp_media, df['subdir'], '{}.{}'.format(df['fn'], df['ext'])))
+    fpp_dst = Path(join(opt_fp_out, '{}.{}'.format(df['uuid'], df['ext'])))
+    fpp_dst.symlink_to(fpp_src)
+
+  log.info('symlinked {:,} files'.format(nrows))
+\ No newline at end of file
diff --git a/megapixels/commands/datasets/vecs_to_id.py b/megapixels/commands/datasets/vecs_to_id.py
new file mode 100644
index 00000000..07c7389e
--- /dev/null
+++ b/megapixels/commands/datasets/vecs_to_id.py
@@ -0,0 +1,50 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input directory')
+@click.option('-r', '--records', 'opt_fp_records', required=True,
+  help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output JSON')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+  help='Force overwrite file')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_records, opt_fp_out,opt_force):
+  """Merges ID with face vectors"""
+  
+  import sys
+  import os
+  from os.path import join
+  from pathlib import Path
+  
+  from tqdm import tqdm
+  import pandas as pd
+
+  from app.utils import logger_utils, file_utils
+  
+  # -------------------------------------------------
+  # init here
+
+  log = logger_utils.Logger.getLogger()
+
+  df_vecs = pd.read_csv(opt_fp_in)
+  df_records = pd.read_csv(opt_fp_records)
+  nrows = len(df_vecs)
+
+  # face vecs
+  id_vecs = {}
+
+  for roi_idx, row in tqdm(df_vecs.iterrows(), total=nrows):
+    record_id = int(row['id'])
+    vec = row['vec'].split(',')
+    id_vecs[record_id] = vec
+
+  # save as JSON
+  file_utils.write_json(id_vecs, opt_fp_out, verbose=True)
+
+  
+\ No newline at end of file
diff --git a/megapixels/commands/datasets/vecs_to_uuid.py b/megapixels/commands/datasets/vecs_to_uuid.py
new file mode 100644
index 00000000..7bb82083
--- /dev/null
+++ b/megapixels/commands/datasets/vecs_to_uuid.py
@@ -0,0 +1,56 @@
+"""
+Crop images to prepare for training
+"""
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input directory')
+@click.option('-r', '--records', 'opt_fp_records', required=True,
+  help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output JSON')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+  help='Force overwrite file')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_records, opt_fp_out,opt_force):
+  """Merges UUID with face vectors"""
+  
+  import sys
+  import os
+  from os.path import join
+  from pathlib import Path
+  
+  from tqdm import tqdm
+  import pandas as pd
+
+  from app.utils import logger_utils, file_utils
+  
+  # -------------------------------------------------
+  # init here
+
+  log = logger_utils.Logger.getLogger()
+
+  df_vecs = pd.read_csv(opt_fp_in)
+  df_records = pd.read_csv(opt_fp_records)
+  nrows = len(df_vecs)
+
+  # face vecs
+  uuid_vecs = {}
+
+  for roi_idx, row in tqdm(df_vecs.iterrows(), total=nrows):
+    # make image path
+    record_id = int(row['id'])
+    uuid = df_records.iloc[record_id]['uuid']
+    vec = row['vec'].split(',')
+    uuid_vecs[uuid] = vec
+
+  # save as JSON
+  file_utils.write_json(uuid_vecs, opt_fp_out)
+
+  
+\ No newline at end of file
author	adamhrv <adam@ahprojects.com>	2018-12-13 14:39:07 +0100
committer	adamhrv <adam@ahprojects.com>	2018-12-13 14:39:07 +0100
commit	bd51b3cdf474c93b1d7c667d9e5a33159c97640a (patch)
tree	6a5ae5524efa971cbd348cc2720d200fbeb2fecb /megapixels/commands
parent	49a49bebe3f972e93add837180f5672a4ae62ce0 (diff)