6 files changed, 756 insertions, 131 deletions
diff --git a/megapixels/app/utils/draw_utils.py b/megapixels/app/utils/draw_utils.py
index 7083c956..3a389e68 100644
--- a/megapixels/app/utils/draw_utils.py
+++ b/megapixels/app/utils/draw_utils.py
@@ -1,8 +1,102 @@
 import sys
+from math import sqrt
 
+import numpy as np
 import cv2 as cv
 
 
+end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1
+
+# ---------------------------------------------------------------------------
+#
+# 3D landmark drawing utilities
+#
+# ---------------------------------------------------------------------------
+
+def plot_keypoints(im, kpts):
+  '''Draw 68 key points
+  :param im: the input im
+  :param kpts: (68, 3). flattened list
+  '''
+  im = im.copy()
+  kpts = np.round(kpts).astype(np.int32)
+  for i in range(kpts.shape[0]):
+    st = kpts[i, :2]
+    im = cv.circle(im, (st[0], st[1]), 1, (0, 0, 255), 2)
+    if i in end_list:
+      continue
+    ed = kpts[i + 1, :2]
+    im = cv.line(im, (st[0], st[1]), (ed[0], ed[1]), (255, 255, 255), 1)
+  return im
+
+
+def calc_hypotenuse(pts):
+  bbox = [min(pts[0, :]), min(pts[1, :]), max(pts[0, :]), max(pts[1, :])]
+  center = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
+  radius = max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 2
+  bbox = [center[0] - radius, center[1] - radius, center[0] + radius, center[1] + radius]
+  llength = sqrt((bbox[2] - bbox[0]) ** 2 + (bbox[3] - bbox[1]) ** 2)
+  return llength / 3
+
+def build_camera_box(rear_size=90):
+  point_3d = []
+  rear_depth = 0
+  point_3d.append((-rear_size, -rear_size, rear_depth))
+  point_3d.append((-rear_size, rear_size, rear_depth))
+  point_3d.append((rear_size, rear_size, rear_depth))
+  point_3d.append((rear_size, -rear_size, rear_depth))
+  point_3d.append((-rear_size, -rear_size, rear_depth))
+
+  front_size = int(4 / 3 * rear_size)
+  front_depth = int(4 / 3 * rear_size)
+  point_3d.append((-front_size, -front_size, front_depth))
+  point_3d.append((-front_size, front_size, front_depth))
+  point_3d.append((front_size, front_size, front_depth))
+  point_3d.append((front_size, -front_size, front_depth))
+  point_3d.append((-front_size, -front_size, front_depth))
+  point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3)
+
+  return point_3d
+
+
+def plot_pose_box(im, Ps, pts68s, color=(40, 255, 0), line_width=2):
+  '''Draw a 3D box as annotation of pose. 
+    ref: https://github.com/yinguobing/head-pose-estimation/blob/master/pose_estimator.py
+  :param image: the input image
+  :param P: (3, 4). Affine Camera Matrix.
+  :param kpts: (2, 68) or (3, 68)
+  '''
+  im_draw = im.copy()
+  if not isinstance(pts68s, list):
+    pts68s = [pts68s]
+  
+  if not isinstance(Ps, list):
+    Ps = [Ps]
+  
+  for i in range(len(pts68s)):
+    pts68 = pts68s[i]
+    llength = calc_hypotenuse(pts68)
+    point_3d = build_camera_box(llength)
+    P = Ps[i]
+
+    # Map to 2d im points
+    point_3d_homo = np.hstack((point_3d, np.ones([point_3d.shape[0], 1])))  # n x 4
+    point_2d = point_3d_homo.dot(P.T)[:, :2]
+
+    point_2d[:, 1] = - point_2d[:, 1]
+    point_2d[:, :2] = point_2d[:, :2] - np.mean(point_2d[:4, :2], 0) + np.mean(pts68[:2, :27], 1)
+    point_2d = np.int32(point_2d.reshape(-1, 2))
+
+    # Draw all the lines
+    cv.polylines(im_draw, [point_2d], True, color, line_width, cv.LINE_AA)
+    cv.line(im_draw, tuple(point_2d[1]), tuple(point_2d[6]), color, line_width, cv.LINE_AA)
+    cv.line(im_draw, tuple(point_2d[2]), tuple(point_2d[7]), color, line_width, cv.LINE_AA)
+    cv.line(im_draw, tuple(point_2d[3]), tuple(point_2d[8]), color, line_width, cv.LINE_AA)
+
+    return im_draw
+
+
+
 # ---------------------------------------------------------------------------
 #
 # OpenCV drawing functions
@@ -11,7 +105,6 @@ import cv2 as cv
 
 pose_types = {'pitch': (0,0,255), 'roll': (255,0,0), 'yaw': (0,255,0)}
 
-
 def draw_landmarks2D(im, points, radius=3, color=(0,255,0), stroke_weight=2):
   '''Draws facial landmarks, either 5pt or 68pt
   '''
@@ -27,7 +120,7 @@ def draw_landmarks3D(im, points, radius=3, color=(0,255,0), stroke_weight=2):
 
 
 def draw_bbox(im, bbox, color=(0,255,0), stroke_weight=2):
-  '''Draws a dimensioned (not-normalized) BBox onto cv2 image
+  '''Draws a dimensioned (not-normalized) BBox onto cv image
   '''
   cv.rectangle(im, bbox.pt_tl, bbox.pt_br, color, stroke_weight)
 
diff --git a/megapixels/commands/cv/face_3ddfa.py b/megapixels/commands/cv/face_3ddfa.py
new file mode 100644
index 00000000..ffc74180
--- /dev/null
+++ b/megapixels/commands/cv/face_3ddfa.py
@@ -0,0 +1,331 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', default=None, required=True,
+  help='Image filepath')
+@click.option('-o', '--output', 'opt_fp_out', default=None,
+  help='GIF output path')
+@click.option('--size', 'opt_size', 
+  type=(int, int), default=(300, 300),
+  help='Output image size')
+@click.option('-g', '--gpu', 'opt_gpu', default=0,
+  help='GPU index')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+  help='Force overwrite file')
+@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False,
+  help='Display detections to debug')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_gpu, opt_size, opt_force, opt_display):
+  """Face detector demo"""
+  
+  import sys
+  import os
+  from os.path import join
+  from pathlib import Path
+  import time
+  
+  from tqdm import tqdm
+  import numpy as np
+  import pandas as pd
+  import cv2 as cv
+  import dlib
+
+  from app.utils import logger_utils, file_utils, im_utils, display_utils, draw_utils
+  from app.utils import plot_utils
+  from app.processors import face_detector, face_age
+  from app.models.data_store import DataStore
+  
+  # 3DDFA
+  # git clone https://github.com/cleardusk/3DDFA/ 3rdparty/
+  
+  import torch
+  import torchvision.transforms as transforms
+  import mobilenet_v1
+  from utils.ddfa import ToTensorGjz, NormalizeGjz, str2bool
+  import scipy.io as sio
+  from utils.inference import get_suffix, parse_roi_box_from_landmark, crop_img, predict_68pts, dump_to_ply, dump_vertex, \
+      draw_landmarks, predict_dense, parse_roi_box_from_bbox, get_colors, write_obj_with_colors
+  from utils.cv_plot import plot_pose_box
+  from utils.estimate_pose import parse_pose
+  from utils.render import get_depths_image, cget_depths_image, cpncc
+  from utils.paf import gen_img_paf
+  import argparse
+  import torch.backends.cudnn as cudnn
+
+
+  log = logger_utils.Logger.getLogger()
+
+  
+  # -------------------------------------------------
+  # load image
+
+  im = cv.imread(opt_fp_in)
+  im_resized = im_utils.resize(im, width=opt_size[0], height=opt_size[1])
+
+  # ----------------------------------------------------------------------------
+  # detect face
+
+  face_detector = face_detector.DetectorDLIBCNN(gpu=opt_gpu)  # -1 for CPU
+  bboxes = face_detector.detect(im_resized, largest=True)
+  bbox = bboxes[0]
+  dim = im_resized.shape[:2][::-1]
+  bbox_dim = bbox.to_dim(dim)
+  if not bbox:
+    log.error('no face detected')
+    return
+  else:
+    log.info(f'face detected: {bbox_dim.to_xyxy()}')
+
+
+  # ----------------------------------------------------------------------------
+  # age
+
+  age_apparent_predictor = face_age.FaceAgeApparent()
+  age_real_predictor = face_age.FaceAgeReal()
+
+  st = time.time()
+  age_real = age_real_predictor.age(im_resized, bbox_dim)
+  log.info(f'age real took: {(time.time()-st)/1000:.5f}s')
+  st = time.time()
+  age_apparent = age_apparent_predictor.age(im_resized, bbox_dim)
+  log.info(f'age apparent took: {(time.time()-st)/1000:.5f}s')
+
+
+  # ----------------------------------------------------------------------------
+  # output
+  
+  log.info(f'Face coords: {bbox_dim} face')
+  log.info(f'Age (real): {(age_real):.2f}')
+  log.info(f'Age (apparent): {(age_apparent):.2f}')
+
+
+  # ----------------------------------------------------------------------------
+  # draw
+
+  # draw real age
+  im_age_real = im_resized.copy()
+  draw_utils.draw_bbox(im_age_real, bbox_dim)
+  txt = f'{(age_real):.2f}'
+  draw_utils.draw_text(im_age_real, bbox_dim.pt_tl, txt)
+
+  # apparent
+  im_age_apparent = im_resized.copy()
+  draw_utils.draw_bbox(im_age_apparent, bbox_dim)
+  txt = f'{(age_apparent):.2f}'
+  draw_utils.draw_text(im_age_apparent, bbox_dim.pt_tl, txt)
+
+
+  # ----------------------------------------------------------------------------
+  # save
+  
+  if opt_fp_out:
+    # save pose only
+    fpp_out = Path(opt_fp_out)
+    
+    fp_out = join(fpp_out.parent, f'{fpp_out.stem}_real{fpp_out.suffix}')
+    cv.imwrite(fp_out, im_age_real)
+
+    fp_out = join(fpp_out.parent, f'{fpp_out.stem}_apparent{fpp_out.suffix}')
+    cv.imwrite(fp_out, im_age_apparent)
+
+
+  # ----------------------------------------------------------------------------
+  # display
+
+  if opt_display:
+    # show all images here
+    cv.imshow('real', im_age_real)
+    cv.imshow('apparent', im_age_apparent)
+    display_utils.handle_keyboard()
+
+
+
+
+
+STD_SIZE = 120
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='3DDFA inference pipeline')
+    parser.add_argument('-f', '--files', nargs='+',
+                        help='image files paths fed into network, single or multiple images')
+    parser.add_argument('-m', '--mode', default='cpu', type=str, help='gpu or cpu mode')
+    parser.add_argument('--show_flg', default='true', type=str2bool, help='whether show the visualization result')
+    parser.add_argument('--bbox_init', default='one', type=str,
+                        help='one|two: one-step bbox initialization or two-step')
+    parser.add_argument('--dump_res', default='true', type=str2bool, help='whether write out the visualization image')
+    parser.add_argument('--dump_vertex', default='true', type=str2bool,
+                        help='whether write out the dense face vertices to mat')
+    parser.add_argument('--dump_ply', default='true', type=str2bool)
+    parser.add_argument('--dump_pts', default='true', type=str2bool)
+    parser.add_argument('--dump_roi_box', default='true', type=str2bool)
+    parser.add_argument('--dump_pose', default='true', type=str2bool)
+    parser.add_argument('--dump_depth', default='true', type=str2bool)
+    parser.add_argument('--dump_pncc', default='true', type=str2bool)
+    parser.add_argument('--dump_paf', default='true', type=str2bool)
+    parser.add_argument('--paf_size', default=3, type=int, help='PAF feature kernel size')
+    parser.add_argument('--dump_obj', default='true', type=str2bool)
+    parser.add_argument('--dlib_bbox', default='true', type=str2bool, help='whether use dlib to predict bbox')
+    parser.add_argument('--dlib_landmark', default='true', type=str2bool,
+                        help='whether use dlib landmark to crop image')
+
+    args = parser.parse_args()
+    main(args)
+
+
+
+def main(args):
+    # 1. load pre-tained model
+    checkpoint_fp = 'models/phase1_wpdc_vdc_v2.pth.tar'
+    arch = 'mobilenet_1'
+
+    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
+    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
+    model_dict = model.state_dict()
+    # because the model is trained by multiple gpus, prefix module should be removed
+    for k in checkpoint.keys():
+        model_dict[k.replace('module.', '')] = checkpoint[k]
+    model.load_state_dict(model_dict, strict=False)
+    if args.mode == 'gpu':
+        cudnn.benchmark = True
+        model = model.cuda()
+    model.eval()
+
+    # 2. load dlib model for face detection and landmark used for face cropping
+    if args.dlib_landmark:
+        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
+        face_regressor = dlib.shape_predictor(dlib_landmark_model)
+    if args.dlib_bbox:
+        face_detector = dlib.get_frontal_face_detector()
+
+    # 3. forward
+    tri = sio.loadmat('visualize/tri.mat')['tri']
+    transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
+    for img_fp in args.files:
+        img_ori = cv2.imread(img_fp)
+        if args.dlib_bbox:
+            rects = face_detector(img_ori, 1)
+        else:
+            rects = []
+
+        if len(rects) == 0:
+            rects = dlib.rectangles()
+            rect_fp = img_fp + '.bbox'
+            lines = open(rect_fp).read().strip().split('\n')[1:]
+            for l in lines:
+                l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
+                rect = dlib.rectangle(l, r, t, b)
+                rects.append(rect)
+
+        pts_res = []
+        Ps = []  # Camera matrix collection
+        poses = []  # pose collection, [todo: validate it]
+        vertices_lst = []  # store multiple face vertices
+        ind = 0
+        suffix = get_suffix(img_fp)
+        for rect in rects:
+            # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
+            if args.dlib_landmark:
+                # - use landmark for cropping
+                pts = face_regressor(img_ori, rect).parts()
+                pts = np.array([[pt.x, pt.y] for pt in pts]).T
+                roi_box = parse_roi_box_from_landmark(pts)
+            else:
+                # - use detected face bbox
+                bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
+                roi_box = parse_roi_box_from_bbox(bbox)
+
+            img = crop_img(img_ori, roi_box)
+
+            # forward: one step
+            img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
+            input = transform(img).unsqueeze(0)
+            with torch.no_grad():
+                if args.mode == 'gpu':
+                    input = input.cuda()
+                param = model(input)
+                param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
+
+            # 68 pts
+            pts68 = predict_68pts(param, roi_box)
+
+            # two-step for more accurate bbox to crop face
+            if args.bbox_init == 'two':
+                roi_box = parse_roi_box_from_landmark(pts68)
+                img_step2 = crop_img(img_ori, roi_box)
+                img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
+                input = transform(img_step2).unsqueeze(0)
+                with torch.no_grad():
+                    if args.mode == 'gpu':
+                        input = input.cuda()
+                    param = model(input)
+                    param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
+
+                pts68 = predict_68pts(param, roi_box)
+
+            pts_res.append(pts68)
+            P, pose = parse_pose(param)
+            Ps.append(P)
+            poses.append(pose)
+
+            # dense face 3d vertices
+            if args.dump_ply or args.dump_vertex or args.dump_depth or args.dump_pncc or args.dump_obj:
+                vertices = predict_dense(param, roi_box)
+                vertices_lst.append(vertices)
+            if args.dump_ply:
+                dump_to_ply(vertices, tri, '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind))
+            if args.dump_vertex:
+                dump_vertex(vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''), ind))
+
+                # save .mat for 3d Face
+                wfp = '{}_{}_face3d.mat'.format(img_fp.replace(suffix, ''), ind) 
+                colors = get_colors(img_ori, vertices)
+                sio.savemat(wfp, {'vertices': vertices, 'colors': colors, 'triangles': tri})
+
+            if args.dump_pts:
+                wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind)
+                np.savetxt(wfp, pts68, fmt='%.3f')
+                print('Save 68 3d landmarks to {}'.format(wfp))
+            if args.dump_roi_box:
+                wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
+                np.savetxt(wfp, roi_box, fmt='%.3f')
+                print('Save roi box to {}'.format(wfp))
+            if args.dump_paf:
+                wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''), ind)
+                wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''), ind)
+                paf_feature = gen_img_paf(img_crop=img, param=param, kernel_size=args.paf_size)
+
+                cv2.imwrite(wfp_paf, paf_feature)
+                cv2.imwrite(wfp_crop, img)
+                print('Dump to {} and {}'.format(wfp_crop, wfp_paf))
+            if args.dump_obj:
+                wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
+                colors = get_colors(img_ori, vertices)
+                write_obj_with_colors(wfp, vertices, tri, colors)
+                print('Dump obj with sampled texture to {}'.format(wfp))
+            ind += 1
+
+        if args.dump_pose:
+            # P, pose = parse_pose(param)  # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
+            img_pose = plot_pose_box(img_ori, Ps, pts_res)
+            wfp = img_fp.replace(suffix, '_pose.jpg')
+            cv2.imwrite(wfp, img_pose)
+            print('Dump to {}'.format(wfp))
+        if args.dump_depth:
+            wfp = img_fp.replace(suffix, '_depth.png')
+            # depths_img = get_depths_image(img_ori, vertices_lst, tri-1)  # python version
+            depths_img = cget_depths_image(img_ori, vertices_lst, tri - 1)  # cython version
+            cv2.imwrite(wfp, depths_img)
+            print('Dump to {}'.format(wfp))
+        if args.dump_pncc:
+            wfp = img_fp.replace(suffix, '_pncc.png')
+            pncc_feature = cpncc(img_ori, vertices_lst, tri - 1)  # cython version
+            cv2.imwrite(wfp, pncc_feature[:, :, ::-1])  # cv2.imwrite will swap RGB -> BGR
+            print('Dump to {}'.format(wfp))
+        if args.dump_res:
+            draw_landmarks(img_ori, pts_res, wfp=img_fp.replace(suffix, '_3DDFA.jpg'), show_flg=args.show_flg)
diff --git a/megapixels/commands/demo/all.py b/megapixels/commands/demo/all.py
index e447492b..b939a4ec 100644
--- a/megapixels/commands/demo/all.py
+++ b/megapixels/commands/demo/all.py
@@ -158,8 +158,23 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_gpu, opt_gif_frames,
     log.info('')
 
 
-  # x
+  # ----------------------------------------------------------------------------
+  # generate pose from 68 point 2D landmarks
+
+  # done
+  self.log.debug('Add age real')
+  self.log.debug('Add age apparent')
+  self.log.debug('Add gender')
+  
+  
+  # 3DDFA
+  self.log.debug('Add depth')
+  self.log.debug('Add pncc')
 
+  # TODO
+  self.log.debug('Add 3D face model')
+  self.log.debug('Add face texture flat')
+  self.log.debug('Add ethnicity')
 
 
   # display
diff --git a/megapixels/commands/demo/face_3ddfa.py b/megapixels/commands/demo/face_3ddfa.py
new file mode 100644
index 00000000..6182aeb6
--- /dev/null
+++ b/megapixels/commands/demo/face_3ddfa.py
@@ -0,0 +1,314 @@
+'''
+Combines 3D face mode + rendering
+https://github.com/cleardusk/3DDFA
+https://github.com/YadiraF/face3d
+'''
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', default=None, required=True,
+  help='Image filepath')
+@click.option('-o', '--output', 'opt_fp_out', default=None,
+  help='GIF output path')
+@click.option('--size', 'opt_size', 
+  type=(int, int), default=(300, 300),
+  help='Output image size')
+@click.option('-g', '--gpu', 'opt_gpu', default=0,
+  help='GPU index')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+  help='Force overwrite file')
+@click.option('--bbox-init', 'opt_bbox_init', is_flag=True,
+  help='Use landmarks for ROI instead of BBox')
+@click.option('--size', 'opt_render_dim', 
+  type=(int, int), default=(512, 512),
+  help='2.5D render image size')
+@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False,
+  help='Display detections to debug')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_gpu, opt_bbox_init, 
+  opt_size, opt_render_dim, opt_force, opt_display):
+  """3D face demo"""
+  
+  import sys
+  import os
+  from os.path import join
+  from pathlib import Path
+  import time
+  
+  from tqdm import tqdm
+  import numpy as np
+  import pandas as pd
+  import cv2 as cv
+  import dlib
+
+  from app.models.bbox import BBox
+  from app.utils import logger_utils, file_utils, im_utils, display_utils, draw_utils
+  from app.utils import plot_utils
+  from app.processors import face_detector, face_landmarks
+  from app.models.data_store import DataStore
+
+  import torch
+  import torchvision.transforms as transforms
+  import torch.backends.cudnn as cudnn
+  import scipy.io as sio
+
+  sys.path.append(join(Path.cwd().parent, '3rdparty'))
+  # change name of 3DDFA to d3DDFA because can't start with number
+  from d3DDFA import mobilenet_v1
+  from d3DDFA.utils.ddfa import ToTensorGjz, NormalizeGjz, str2bool
+  from d3DDFA.utils import inference as d3dfa_utils
+  from d3DDFA.utils.inference import parse_roi_box_from_landmark, crop_img, predict_68pts
+  from d3DDFA.utils.inference import dump_to_ply, dump_vertex, draw_landmarks
+  from d3DDFA.utils.inference import predict_dense, parse_roi_box_from_bbox, get_colors
+  from d3DDFA.utils.inference import write_obj_with_colors
+  from d3DDFA.utils.estimate_pose import parse_pose
+  from d3DDFA.utils.render import get_depths_image, cget_depths_image, cpncc
+  from d3DDFA.utils import paf as d3dfa_paf_utils
+
+  # https://github.com/YadiraF/face3d
+  # compile cython module in face3d/mesh/cython/ python setup.py build_ext -i
+  from face3d.face3d import mesh as face3d_mesh
+  
+
+  log = logger_utils.Logger.getLogger()
+
+  # -------------------------------------------------
+  # load image
+
+  fpp_in = Path(opt_fp_in)
+  im = cv.imread(opt_fp_in)
+  #im = im_utils.resize(im_orig, width=opt_size[0], height=opt_size[1])
+  # im = im_orig.copy()
+
+  # ----------------------------------------------------------------------------
+  # detect face
+
+  face_detector = face_detector.DetectorDLIBCNN(gpu=opt_gpu)  # -1 for CPU
+  bboxes = face_detector.detect(im, largest=True)
+  bbox = bboxes[0]
+  dim = im.shape[:2][::-1]
+  bbox_dim = bbox.to_dim(dim)
+  if not bbox:
+    log.error('no face detected')
+    return
+  else:
+    log.info(f'face detected: {bbox_dim.to_xyxy()}')
+
+
+  # -------------------------------------------------------------------------
+  # landmarks
+
+  landmark_predictor = face_landmarks.Dlib2D_68()
+  lanmarks = landmark_predictor.landmarks(im, bbox_dim)
+
+
+  # -------------------------------------------------------------------------
+  # 3ddfa
+
+  STD_SIZE = 120
+
+  # load pre-tained model
+  fp_ckpt = join(cfg.DIR_MODELS_PYTORCH, '3ddfa', 'phase1_wpdc_vdc_v2.pth.tar')
+  arch = 'mobilenet_1'
+  checkpoint = torch.load(fp_ckpt, map_location=lambda storage, loc: storage)['state_dict']
+  model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
+  model_dict = model.state_dict()
+  
+  # because the model is trained by multiple gpus, prefix module should be removed
+  for k in checkpoint.keys():
+    model_dict[k.replace('module.', '')] = checkpoint[k]
+  model.load_state_dict(model_dict, strict=False)
+  if opt_gpu > -1:
+    cudnn.benchmark = True
+    model = model.cuda()
+  model.eval()
+
+  # forward
+  st = time.time()
+  fp_tri = join(cfg.DIR_MODELS_PYTORCH, '3ddfa', 'tri.mat')
+  triangles = sio.loadmat(fp_tri)['tri']
+  transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
+
+  pts_res = []
+  Ps = []  # Camera matrix collection
+  poses = []  # pose collection, [todo: validate it]
+  vertices_lst = []  # store multiple face vertices
+
+  # use landmark as roi
+  pts = np.array(lanmarks).T
+  # roi_box = d3dfa_utils.parse_roi_box_from_landmark(pts)
+  roi_box = parse_roi_box_from_bbox(bbox_dim.to_xyxy())
+  im_crop = d3dfa_utils.crop_img(im, roi_box)
+  im_crop = cv.resize(im_crop, dsize=(STD_SIZE, STD_SIZE), interpolation=cv.INTER_LINEAR)
+
+  # forward
+  torch_input = transform(im_crop).unsqueeze(0)
+  with torch.no_grad():
+    if opt_gpu > -1:
+      torch_input = torch_input.cuda()
+    param = model(torch_input)
+    param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
+
+  # 68 pts
+  pts68 = d3dfa_utils.predict_68pts(param, roi_box)
+
+  pts_res.append(pts68)
+  P, pose = parse_pose(param)
+  Ps.append(P)
+  poses.append(pose)
+
+  # dense face 3d vertices
+  vertices = d3dfa_utils.predict_dense(param, roi_box)
+  vertices_lst.append(vertices)
+    
+  log.info(f'generated 3d data in: {(time.time() - st):.2f}s')
+
+  # filepath helper function
+  def to_fp(fpp, ext, suffix=None):
+    if suffix:
+      fp = join(fpp.parent, f'{fpp.stem}_{suffix}.{ext}')
+    else:
+      fp = join(fpp.parent, f'{fpp.stem}.{ext}')
+    return fp
+  
+  # save .mat
+  colors = d3dfa_utils.get_colors(im, vertices)
+  vertices_orig = vertices.copy()
+  fp_mat_3df = to_fp(fpp_in, 'mat', suffix='face3d')
+  sio.savemat(fp_mat_3df, {'vertices': vertices, 'colors': colors, 'triangles': triangles})
+
+  # save PAF
+  #fp_paf = to_fp(fpp_in, 'jpg', suffix='paf')
+  #opt_paf_size = 3  # PAF feature kernel size
+  #im_paf = d3dfa_paf_utils.gen_img_paf(img_crop=im_crop, param=param, kernel_size=opt_paf_size)
+  #cv.imwrite(fp_paf, im_paf)
+  
+  # save pose image
+  # P, pose = parse_pose(param)  # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
+
+  img_pose = draw_utils.plot_pose_box(im, Ps, pts_res)
+  fp_pose = to_fp(fpp_in, 'jpg', suffix='pose')
+  cv.imwrite(fp_pose, img_pose)
+
+  # save depth image
+  fp_depth = to_fp(fpp_in, 'png', suffix='depth')
+  # depths_img = get_depths_image(im, vertices_lst, tri-1)  # python version
+  im_depth = cget_depths_image(im, vertices_lst, triangles - 1)  # cython version
+  cv.imwrite(fp_depth, im_depth)
+
+  # save pncc image
+  fp_pose = to_fp(fpp_in, 'png', suffix='pncc')
+  pncc_feature = cpncc(im, vertices_lst, triangles - 1)  # cython version
+  cv.imwrite(fp_pose, pncc_feature[:, :, ::-1])  # cv.imwrite will swap RGB -> BGR
+
+  # save .ply
+  #fp_ply = to_fp(fpp_in, 'ply')
+  #dump_to_ply(vertices, triangles, fp_ply)
+
+  # skip: save .mat (3ddfa default not compatible with face3d utils)
+  #fp_mat = to_fp(fpp_in, 'mat')
+  #d3dfa_utils.dump_vertex(vertices, fp_mat)
+  
+  # save 68 points
+  #fp_txt = to_fp(fpp_in, 'txt', suffix='68')
+  #np.savetxt(to_fp(fpp_in, 'txt'), pts68, fmt='%.3f')
+
+  # save roi
+  #fp_txt = to_fp(fpp_in, 'txt', suffix='roi')
+  #np.savetxt(fp_txt, roi_box, fmt='%.3f')a
+  
+  # save crop
+  #fp_crop = to_fp(fpp_in, 'jpg', suffix='crop')
+  #cv.imwrite(fp_crop, im_crop)
+
+  # save obj
+  colors = d3dfa_utils.get_colors(im, vertices_orig)
+  fp_obj = to_fp(fpp_in, 'obj')
+  write_obj_with_colors(fp_obj, vertices_orig, triangles, colors)
+
+  #fp_landmarks = to_fp(fpp_in, 'jpg', suffix='3DDFA')
+  # show_flg?
+  #d3dfa_utils.draw_landmarks(im, pts_res, wfp=fp_landmarks, show_flg=False)
+
+  # -------------------------------------------------------------------------
+  # face3d
+
+  # create 3D mesh photo face
+  # if loading file
+  # TODO find where vertices is being changed
+  vertices = vertices_orig  # vertices changes somewhere, so keep copy
+
+  # preprocess 3D data from 3DDFA for face3d rendering
+  vertices = vertices.transpose()
+  triangles = triangles.transpose()
+  vertices = vertices.astype(np.float64)  # change data type
+  # subtract 1 from triangle vertex indices (depends on your .mat file)
+  triangles = np.array([np.array([t[0]-1, t[1]-1, t[2]-1]).astype(np.int32) for t in triangles])
+  vertices -= np.array([abs(np.min(vertices[:,0])), np.min(abs(vertices[:,1])), np.min(abs(vertices[:,2]))])
+  vertices -= np.array([np.mean(vertices[:,0]), np.mean(vertices[:,1]), np.mean(vertices[:,2])])
+  # colors = np.array([c[::-1] for c in colors])  # BGR --> RGB
+  colors = colors/np.max(colors)  # normalize color range
+
+  # set max render size (about 75% of canvas size)
+  max_render_size = int(max(opt_render_dim) * .75)
+  s = max_render_size/(np.max(vertices[:,1]) - np.min(vertices[:,1]))
+  
+  # rotation matrix
+  R = face3d_mesh.transform.angle2matrix([-180, -20, 0]) 
+  
+  # no translation. center of obj:[0,0]
+  t = [0, 0, 0]
+  vertices_trans = face3d_mesh.transform.similarity_transform(vertices, s, R, t)
+  
+  # lighting: add point lights, positions are defined in world space
+  light_pos = np.array([[-128, -128, 512]])
+  light_clr_amt = np.array([[1, 1, 1]])
+  colors_lit = face3d_mesh.light.add_light(vertices_trans, triangles, colors, light_pos, light_clr_amt)
+
+  # transform from world space to camera space (what the world is in the eye of observer)
+  vertices_cam = face3d_mesh.transform.lookat_camera(vertices_trans, eye = [0, 0, 0], at = np.array([0, 0, 1]), up = None)
+  # project from 3d world space into 2d image plane. orthographic or perspective projection
+  vertices_proj = face3d_mesh.transform.orthographic_project(vertices_cam)
+
+  # -------------------------------------------------------------------------
+  # render 2D image
+
+  w = h = max(opt_render_dim)
+  vertices_im = face3d_mesh.transform.to_image(vertices_proj, h, w)
+  rendering = face3d_mesh.render.render_colors(vertices_im, triangles, colors_lit, h, w)
+
+  cv.imshow('', rendering)
+  display_utils.handle_keyboard()
+
+  # ----------------------------------------------------------------------------
+  # save
+  
+  if opt_fp_out:
+    # save pose only
+    fpp_out = Path(opt_fp_out)
+    
+    fp_out = join(fpp_out.parent, f'{fpp_out.stem}_real{fpp_out.suffix}')
+    cv.imwrite(fp_out, im_age_real)
+
+    fp_out = join(fpp_out.parent, f'{fpp_out.stem}_apparent{fpp_out.suffix}')
+    cv.imwrite(fp_out, im_age_apparent)
+
+    fp_out = join(fpp_out.parent, f'{fpp_out.stem}_gender{fpp_out.suffix}')
+    cv.imwrite(fp_out, im_age_apparent)
+
+
+  # ----------------------------------------------------------------------------
+  # display
+
+  if opt_display:
+    # show all images here
+    cv.imshow('real', im_age_real)
+    cv.imshow('apparent', im_age_apparent)
+    cv.imshow('gender', im_gender)
+    display_utils.handle_keyboard()
+
diff --git a/megapixels/commands/demo/face_age_gender.py b/megapixels/commands/demo/face_age_gender.py
index 477404a5..c74f1e45 100644
--- a/megapixels/commands/demo/face_age_gender.py
+++ b/megapixels/commands/demo/face_age_gender.py
@@ -43,7 +43,6 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_gpu, opt_size, opt_force, opt_display):
 
   log = logger_utils.Logger.getLogger()
 
-  
   # -------------------------------------------------
   # load image
 
diff --git a/megapixels/commands/demo/face_gender.py b/megapixels/commands/demo/face_gender.py
deleted file mode 100644
index ea083fcb..00000000
--- a/megapixels/commands/demo/face_gender.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import click
-
-from app.settings import types
-from app.utils import click_utils
-from app.settings import app_cfg as cfg
-
-
-@click.command()
-@click.option('-i', '--input', 'opt_fp_in', default=None, required=True,
-  help='Image filepath')
-@click.option('-o', '--output', 'opt_fp_out', default=None,
-  help='GIF output path')
-@click.option('--size', 'opt_size', 
-  type=(int, int), default=(300, 300),
-  help='Output image size')
-@click.option('-g', '--gpu', 'opt_gpu', default=0,
-  help='GPU index')
-@click.option('-f', '--force', 'opt_force', is_flag=True,
-  help='Force overwrite file')
-@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False,
-  help='Display detections to debug')
-@click.pass_context
-def cli(ctx, opt_fp_in, opt_fp_out, opt_gpu, opt_size, opt_force, opt_display):
-  """Face detector demo"""
-  
-  import sys
-  import os
-  from os.path import join
-  from pathlib import Path
-  import time
-  
-  from tqdm import tqdm
-  import numpy as np
-  import pandas as pd
-  import cv2 as cv
-  import dlib
-
-  from app.utils import logger_utils, file_utils, im_utils, display_utils, draw_utils
-  from app.utils import plot_utils
-  from app.processors import face_detector, face_age
-  from app.models.data_store import DataStore
-  
-
-  log = logger_utils.Logger.getLogger()
-
-  
-  # -------------------------------------------------
-  # load image
-
-  im = cv.imread(opt_fp_in)
-  im_resized = im_utils.resize(im, width=opt_size[0], height=opt_size[1])
-
-  # ----------------------------------------------------------------------------
-  # detect face
-
-  face_detector = face_detector.DetectorDLIBCNN(gpu=opt_gpu)  # -1 for CPU
-  bboxes = face_detector.detect(im_resized, largest=True)
-  bbox = bboxes[0]
-  dim = im_resized.shape[:2][::-1]
-  bbox_dim = bbox.to_dim(dim)
-  if not bbox:
-    log.error('no face detected')
-    return
-  else:
-    log.info(f'face detected: {bbox_dim.to_xyxy()}')
-
-
-  # ----------------------------------------------------------------------------
-  # age
-
-  age_apparent_predictor = face_age.FaceAgeApparent()
-  age_real_predictor = face_age.FaceAgeReal()
-
-  st = time.time()
-  age_real = age_real_predictor.age(im_resized, bbox_dim)
-  log.info(f'age real took: {(time.time()-st)/1000:.5f}s')
-  st = time.time()
-  age_apparent = age_apparent_predictor.age(im_resized, bbox_dim)
-  log.info(f'age apparent took: {(time.time()-st)/1000:.5f}s')
-
-
-  # ----------------------------------------------------------------------------
-  # output
-  
-  log.info(f'Face coords: {bbox_dim} face')
-  log.info(f'Age (real): {(age_real):.2f}')
-  log.info(f'Age (apparent): {(age_apparent):.2f}')
-
-
-  # ----------------------------------------------------------------------------
-  # draw
-
-  # draw real age
-  im_age_real = im_resized.copy()
-  draw_utils.draw_bbox(im_age_real, bbox_dim)
-  txt = f'{(age_real):.2f}'
-  draw_utils.draw_text(im_age_real, bbox_dim.pt_tl, txt)
-
-  # apparent
-  im_age_apparent = im_resized.copy()
-  draw_utils.draw_bbox(im_age_apparent, bbox_dim)
-  txt = f'{(age_apparent):.2f}'
-  draw_utils.draw_text(im_age_apparent, bbox_dim.pt_tl, txt)
-
-
-  # ----------------------------------------------------------------------------
-  # save
-  
-  if opt_fp_out:
-    # save pose only
-    fpp_out = Path(opt_fp_out)
-    
-    fp_out = join(fpp_out.parent, f'{fpp_out.stem}_real{fpp_out.suffix}')
-    cv.imwrite(fp_out, im_age_real)
-
-    fp_out = join(fpp_out.parent, f'{fpp_out.stem}_apparent{fpp_out.suffix}')
-    cv.imwrite(fp_out, im_age_apparent)
-
-
-  # ----------------------------------------------------------------------------
-  # display
-
-  if opt_display:
-    # show all images here
-    cv.imshow('real', im_age_real)
-    cv.imshow('apparent', im_age_apparent)
-    display_utils.handle_keyboard()
-\ No newline at end of file