''' Combines 3D face mode + rendering https://github.com/cleardusk/3DDFA --> 3d landmarks https://github.com/YadiraF/face3d --> render 3D with lighting as 2.5d image ''' import click from app.settings import types from app.utils import click_utils from app.settings import app_cfg as cfg @click.command() @click.option('-i', '--input', 'opt_fp_in', default=None, required=True, help='Image filepath') @click.option('-o', '--output', 'opt_dir_out', default=None, help='Directory for output files') @click.option('--size', 'opt_size', type=(int, int), default=(300, 300), help='Output image size') @click.option('-g', '--gpu', 'opt_gpu', default=0, help='GPU index') @click.option('-f', '--force', 'opt_force', is_flag=True, help='Force overwrite file') @click.option('--bbox-init', 'opt_bbox_init', is_flag=True, help='Use landmarks for ROI instead of BBox') @click.option('--size', 'opt_render_dim', type=(int, int), default=(512, 512), help='2.5D render image size') @click.option('--display/--no-display', 'opt_display', is_flag=True, default=True, help='Display detections to debug') @click.option('--save/--no-save', 'opt_save', is_flag=True, default=True, help='Save output images/files') @click.pass_context def cli(ctx, opt_fp_in, opt_dir_out, opt_gpu, opt_bbox_init, opt_size, opt_render_dim, opt_force, opt_display, opt_save): """3D face demo""" import sys import os from os.path import join from pathlib import Path import time from tqdm import tqdm import numpy as np import pandas as pd import cv2 as cv import dlib from app.models.bbox import BBox from app.utils import logger_utils, file_utils, im_utils, display_utils, draw_utils from app.utils import plot_utils from app.processors import face_detector, face_landmarks from app.models.data_store import DataStore import torch import torchvision.transforms as transforms import torch.backends.cudnn as cudnn import scipy.io as sio sys.path.append(join(Path.cwd().parent, '3rdparty')) # git clone https://github.com/cleardusk/3DDFA 3rdparty/d3ddfa # change name of 3DDFA to d3DDFA because can't start with number from d3DDFA import mobilenet_v1 from d3DDFA.utils.ddfa import ToTensorGjz, NormalizeGjz, str2bool from d3DDFA.utils import inference as d3dfa_utils from d3DDFA.utils.inference import parse_roi_box_from_landmark, crop_img, predict_68pts from d3DDFA.utils.inference import dump_to_ply, dump_vertex, draw_landmarks from d3DDFA.utils.inference import predict_dense, parse_roi_box_from_bbox, get_colors from d3DDFA.utils.inference import write_obj_with_colors from d3DDFA.utils.estimate_pose import parse_pose from d3DDFA.utils.render import get_depths_image, cget_depths_image, cpncc from d3DDFA.utils import paf as d3dfa_paf_utils # git clone https://github.com/YadiraF/face3d 3rdparty/face3d # compile cython module in face3d/mesh/cython/ python setup.py build_ext -i from face3d.face3d import mesh as face3d_mesh log = logger_utils.Logger.getLogger() # ------------------------------------------------- # load image fpp_in = Path(opt_fp_in) im = cv.imread(opt_fp_in) # ---------------------------------------------------------------------------- # detect face face_detector = face_detector.DetectorCVDNN() # -1 for CPU bboxes = face_detector.detect(im, largest=True) bbox = bboxes[0] dim = im.shape[:2][::-1] bbox_dim = bbox.to_dim(dim) if not bbox: log.error('no face detected') return else: log.info(f'face detected: {bbox_dim.to_xyxy()}') # ------------------------------------------------------------------------- # landmarks landmark_predictor = face_landmarks.Dlib2D_68() lanmarks = landmark_predictor.landmarks(im, bbox_dim) # ------------------------------------------------------------------------- # 3ddfa STD_SIZE = 120 # load pre-tained model fp_ckpt = join(cfg.DIR_MODELS_PYTORCH, '3ddfa', 'phase1_wpdc_vdc_v2.pth.tar') arch = 'mobilenet_1' checkpoint = torch.load(fp_ckpt, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict, strict=False) if opt_gpu > -1: cudnn.benchmark = True model = model.cuda() model.eval() # forward st = time.time() fp_tri = join(cfg.DIR_MODELS_PYTORCH, '3ddfa', 'tri.mat') triangles = sio.loadmat(fp_tri)['tri'] transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) pts_res = [] Ps = [] # Camera matrix collection poses = [] # pose collection, [todo: validate it] vertices_lst = [] # store multiple face vertices # use landmark as roi pts = np.array(lanmarks).T # roi_box = d3dfa_utils.parse_roi_box_from_landmark(pts) roi_box = parse_roi_box_from_bbox(bbox_dim.to_xyxy()) im_crop = d3dfa_utils.crop_img(im, roi_box) im_crop = cv.resize(im_crop, dsize=(STD_SIZE, STD_SIZE), interpolation=cv.INTER_LINEAR) # forward torch_input = transform(im_crop).unsqueeze(0) with torch.no_grad(): if opt_gpu > -1: torch_input = torch_input.cuda() param = model(torch_input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) # 68 pts pts68 = d3dfa_utils.predict_68pts(param, roi_box) pts_res.append(pts68) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) # dense face 3d vertices vertices = d3dfa_utils.predict_dense(param, roi_box) vertices_lst.append(vertices) log.info(f'generated 3d data in: {(time.time() - st):.2f}s') # filepath helper function def to_fp(fpp, ext, suffix=None): if suffix: fp = join(fpp.parent, f'{fpp.stem}_{suffix}.{ext}') else: fp = join(fpp.parent, f'{fpp.stem}.{ext}') return fp # save .mat colors = d3dfa_utils.get_colors(im, vertices) vertices_orig = vertices.copy() fp_mat_3df = to_fp(fpp_in, 'mat', suffix='face3d') sio.savemat(fp_mat_3df, {'vertices': vertices, 'colors': colors, 'triangles': triangles}) # save PAF im_paf = d3dfa_paf_utils.gen_img_paf(img_crop=im_crop, param=param, kernel_size=3) # save pose image # P, pose = parse_pose(param) # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify) im_pose = draw_utils.plot_pose_box(im, Ps, pts_res) # save depth image # depths_img = get_depths_image(im, vertices_lst, tri-1) # python version im_depth = cget_depths_image(im, vertices_lst, triangles - 1) # cython version # save pncc image pncc_feature = cpncc(im, vertices_lst, triangles - 1) # cython version im_pncc = pncc_feature[:, :, ::-1] # swap BGR # save .ply #fp_ply = to_fp(fpp_in, 'ply') #dump_to_ply(vertices, triangles, fp_ply) # skip: save .mat (3ddfa default not compatible with face3d utils) #fp_mat = to_fp(fpp_in, 'mat') #d3dfa_utils.dump_vertex(vertices, fp_mat) # save 68 points #fp_txt = to_fp(fpp_in, 'txt', suffix='68') #np.savetxt(to_fp(fpp_in, 'txt'), pts68, fmt='%.3f') # save roi #fp_txt = to_fp(fpp_in, 'txt', suffix='roi') #np.savetxt(fp_txt, roi_box, fmt='%.3f')a # save crop #fp_crop = to_fp(fpp_in, 'jpg', suffix='crop') #cv.imwrite(fp_crop, im_crop) # save obj colors = d3dfa_utils.get_colors(im, vertices_orig) #fp_landmarks = to_fp(fpp_in, 'jpg', suffix='3DDFA') # show_flg? #d3dfa_utils.draw_landmarks(im, pts_res, wfp=fp_landmarks, show_flg=False) # ------------------------------------------------------------------------- # face3d # create 3D mesh photo face # if loading file # TODO find where vertices is being changed vertices = vertices_orig # vertices changes somewhere, so keep copy # preprocess 3D data from 3DDFA for face3d rendering vertices = vertices.transpose() triangles = triangles.transpose() vertices = vertices.astype(np.float64) # change data type # subtract 1 from triangle vertex indices (depends on your .mat file) triangles = np.array([np.array([t[0]-1, t[1]-1, t[2]-1]).astype(np.int32) for t in triangles]) vertices -= np.array([abs(np.min(vertices[:,0])), np.min(abs(vertices[:,1])), np.min(abs(vertices[:,2]))]) vertices -= np.array([np.mean(vertices[:,0]), np.mean(vertices[:,1]), np.mean(vertices[:,2])]) # colors = np.array([c[::-1] for c in colors]) # BGR --> RGB colors = colors/np.max(colors) # normalize color range # set max render size (about 75% of canvas size) max_render_size = int(max(opt_render_dim) * .75) s = max_render_size/(np.max(vertices[:,1]) - np.min(vertices[:,1])) # rotation matrix R = face3d_mesh.transform.angle2matrix([-180, -20, 0]) # no translation. center of obj:[0,0] t = [0, 0, 0] vertices_trans = face3d_mesh.transform.similarity_transform(vertices, s, R, t) # lighting: add point lights, positions are defined in world space light_pos = np.array([[-128, -128, 512]]) light_clr_amt = np.array([[1, 1, 1]]) colors_lit = face3d_mesh.light.add_light(vertices_trans, triangles, colors, light_pos, light_clr_amt) # transform from world space to camera space (what the world is in the eye of observer) vertices_cam = face3d_mesh.transform.lookat_camera(vertices_trans, eye = [0, 0, 0], at = np.array([0, 0, 1]), up = None) # project from 3d world space into 2d image plane. orthographic or perspective projection vertices_proj = face3d_mesh.transform.orthographic_project(vertices_cam) # ------------------------------------------------------------------------- # render 2D images w = h = max(opt_render_dim) vertices_im = face3d_mesh.transform.to_image(vertices_proj, h, w) im_render = face3d_mesh.render.render_colors(vertices_im, triangles, colors_lit, h, w) im_render = (255* im_render).astype(np.uint8) im_pncc = im_pncc.astype(np.uint8) im_depth = im_depth.astype(np.uint8) im_paf = im_paf.astype(np.uint8) # ---------------------------------------------------------------------------- # save if opt_save: fpp_out = Path(opt_dir_out) if opt_dir_out is not None else Path(opt_fp_in).parent fpp_in = Path(opt_fp_in) fp_out = join(fpp_out, f'{fpp_in.stem}_render.png') cv.imwrite(fp_out, im_render) fp_out = join(fpp_out, f'{fpp_in.stem}_pose.png') cv.imwrite(fp_out, im_pose) fp_out = join(fpp_out, f'{fpp_in.stem}_depth.png') cv.imwrite(fp_out, im_depth) fp_out = join(fpp_out, f'{fpp_in.stem}_pncc.png') cv.imwrite(fp_out, im_pncc) fp_out = join(fpp_out, f'{fpp_in.stem}_paf.png') cv.imwrite(fp_out, im_paf) fp_out = join(fpp_out, f'{fpp_in.stem}.obj') write_obj_with_colors(fp_out, vertices_orig, triangles, colors) fp_out = join(fpp_out, f'{fpp_in.stem}.txt') np.savetxt(fp_out, pts68, fmt='%.3f') # ---------------------------------------------------------------------------- # display if opt_display: # show all images here cv.imshow('3d', im_render) cv.imshow('depth', im_depth) cv.imshow('pncc', im_pncc) cv.imshow('pose', im_pose) cv.imshow('paf', im_paf) display_utils.handle_keyboard()