diff options
Diffstat (limited to 'megapixels/commands/cv/face_3ddfa.py')
| -rw-r--r-- | megapixels/commands/cv/face_3ddfa.py | 331 |
1 files changed, 0 insertions, 331 deletions
diff --git a/megapixels/commands/cv/face_3ddfa.py b/megapixels/commands/cv/face_3ddfa.py deleted file mode 100644 index ffc74180..00000000 --- a/megapixels/commands/cv/face_3ddfa.py +++ /dev/null @@ -1,331 +0,0 @@ -import click - -from app.settings import types -from app.utils import click_utils -from app.settings import app_cfg as cfg - - -@click.command() -@click.option('-i', '--input', 'opt_fp_in', default=None, required=True, - help='Image filepath') -@click.option('-o', '--output', 'opt_fp_out', default=None, - help='GIF output path') -@click.option('--size', 'opt_size', - type=(int, int), default=(300, 300), - help='Output image size') -@click.option('-g', '--gpu', 'opt_gpu', default=0, - help='GPU index') -@click.option('-f', '--force', 'opt_force', is_flag=True, - help='Force overwrite file') -@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False, - help='Display detections to debug') -@click.pass_context -def cli(ctx, opt_fp_in, opt_fp_out, opt_gpu, opt_size, opt_force, opt_display): - """Face detector demo""" - - import sys - import os - from os.path import join - from pathlib import Path - import time - - from tqdm import tqdm - import numpy as np - import pandas as pd - import cv2 as cv - import dlib - - from app.utils import logger_utils, file_utils, im_utils, display_utils, draw_utils - from app.utils import plot_utils - from app.processors import face_detector, face_age - from app.models.data_store import DataStore - - # 3DDFA - # git clone https://github.com/cleardusk/3DDFA/ 3rdparty/ - - import torch - import torchvision.transforms as transforms - import mobilenet_v1 - from utils.ddfa import ToTensorGjz, NormalizeGjz, str2bool - import scipy.io as sio - from utils.inference import get_suffix, parse_roi_box_from_landmark, crop_img, predict_68pts, dump_to_ply, dump_vertex, \ - draw_landmarks, predict_dense, parse_roi_box_from_bbox, get_colors, write_obj_with_colors - from utils.cv_plot import plot_pose_box - from utils.estimate_pose import parse_pose - from utils.render import get_depths_image, cget_depths_image, cpncc - from utils.paf import gen_img_paf - import argparse - import torch.backends.cudnn as cudnn - - - log = logger_utils.Logger.getLogger() - - - # ------------------------------------------------- - # load image - - im = cv.imread(opt_fp_in) - im_resized = im_utils.resize(im, width=opt_size[0], height=opt_size[1]) - - # ---------------------------------------------------------------------------- - # detect face - - face_detector = face_detector.DetectorDLIBCNN(gpu=opt_gpu) # -1 for CPU - bboxes = face_detector.detect(im_resized, largest=True) - bbox = bboxes[0] - dim = im_resized.shape[:2][::-1] - bbox_dim = bbox.to_dim(dim) - if not bbox: - log.error('no face detected') - return - else: - log.info(f'face detected: {bbox_dim.to_xyxy()}') - - - # ---------------------------------------------------------------------------- - # age - - age_apparent_predictor = face_age.FaceAgeApparent() - age_real_predictor = face_age.FaceAgeReal() - - st = time.time() - age_real = age_real_predictor.age(im_resized, bbox_dim) - log.info(f'age real took: {(time.time()-st)/1000:.5f}s') - st = time.time() - age_apparent = age_apparent_predictor.age(im_resized, bbox_dim) - log.info(f'age apparent took: {(time.time()-st)/1000:.5f}s') - - - # ---------------------------------------------------------------------------- - # output - - log.info(f'Face coords: {bbox_dim} face') - log.info(f'Age (real): {(age_real):.2f}') - log.info(f'Age (apparent): {(age_apparent):.2f}') - - - # ---------------------------------------------------------------------------- - # draw - - # draw real age - im_age_real = im_resized.copy() - draw_utils.draw_bbox(im_age_real, bbox_dim) - txt = f'{(age_real):.2f}' - draw_utils.draw_text(im_age_real, bbox_dim.pt_tl, txt) - - # apparent - im_age_apparent = im_resized.copy() - draw_utils.draw_bbox(im_age_apparent, bbox_dim) - txt = f'{(age_apparent):.2f}' - draw_utils.draw_text(im_age_apparent, bbox_dim.pt_tl, txt) - - - # ---------------------------------------------------------------------------- - # save - - if opt_fp_out: - # save pose only - fpp_out = Path(opt_fp_out) - - fp_out = join(fpp_out.parent, f'{fpp_out.stem}_real{fpp_out.suffix}') - cv.imwrite(fp_out, im_age_real) - - fp_out = join(fpp_out.parent, f'{fpp_out.stem}_apparent{fpp_out.suffix}') - cv.imwrite(fp_out, im_age_apparent) - - - # ---------------------------------------------------------------------------- - # display - - if opt_display: - # show all images here - cv.imshow('real', im_age_real) - cv.imshow('apparent', im_age_apparent) - display_utils.handle_keyboard() - - - - - -STD_SIZE = 120 - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='3DDFA inference pipeline') - parser.add_argument('-f', '--files', nargs='+', - help='image files paths fed into network, single or multiple images') - parser.add_argument('-m', '--mode', default='cpu', type=str, help='gpu or cpu mode') - parser.add_argument('--show_flg', default='true', type=str2bool, help='whether show the visualization result') - parser.add_argument('--bbox_init', default='one', type=str, - help='one|two: one-step bbox initialization or two-step') - parser.add_argument('--dump_res', default='true', type=str2bool, help='whether write out the visualization image') - parser.add_argument('--dump_vertex', default='true', type=str2bool, - help='whether write out the dense face vertices to mat') - parser.add_argument('--dump_ply', default='true', type=str2bool) - parser.add_argument('--dump_pts', default='true', type=str2bool) - parser.add_argument('--dump_roi_box', default='true', type=str2bool) - parser.add_argument('--dump_pose', default='true', type=str2bool) - parser.add_argument('--dump_depth', default='true', type=str2bool) - parser.add_argument('--dump_pncc', default='true', type=str2bool) - parser.add_argument('--dump_paf', default='true', type=str2bool) - parser.add_argument('--paf_size', default=3, type=int, help='PAF feature kernel size') - parser.add_argument('--dump_obj', default='true', type=str2bool) - parser.add_argument('--dlib_bbox', default='true', type=str2bool, help='whether use dlib to predict bbox') - parser.add_argument('--dlib_landmark', default='true', type=str2bool, - help='whether use dlib landmark to crop image') - - args = parser.parse_args() - main(args) - - - -def main(args): - # 1. load pre-tained model - checkpoint_fp = 'models/phase1_wpdc_vdc_v2.pth.tar' - arch = 'mobilenet_1' - - checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] - model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) - model_dict = model.state_dict() - # because the model is trained by multiple gpus, prefix module should be removed - for k in checkpoint.keys(): - model_dict[k.replace('module.', '')] = checkpoint[k] - model.load_state_dict(model_dict, strict=False) - if args.mode == 'gpu': - cudnn.benchmark = True - model = model.cuda() - model.eval() - - # 2. load dlib model for face detection and landmark used for face cropping - if args.dlib_landmark: - dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat' - face_regressor = dlib.shape_predictor(dlib_landmark_model) - if args.dlib_bbox: - face_detector = dlib.get_frontal_face_detector() - - # 3. forward - tri = sio.loadmat('visualize/tri.mat')['tri'] - transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) - for img_fp in args.files: - img_ori = cv2.imread(img_fp) - if args.dlib_bbox: - rects = face_detector(img_ori, 1) - else: - rects = [] - - if len(rects) == 0: - rects = dlib.rectangles() - rect_fp = img_fp + '.bbox' - lines = open(rect_fp).read().strip().split('\n')[1:] - for l in lines: - l, r, t, b = [int(_) for _ in l.split(' ')[1:]] - rect = dlib.rectangle(l, r, t, b) - rects.append(rect) - - pts_res = [] - Ps = [] # Camera matrix collection - poses = [] # pose collection, [todo: validate it] - vertices_lst = [] # store multiple face vertices - ind = 0 - suffix = get_suffix(img_fp) - for rect in rects: - # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping - if args.dlib_landmark: - # - use landmark for cropping - pts = face_regressor(img_ori, rect).parts() - pts = np.array([[pt.x, pt.y] for pt in pts]).T - roi_box = parse_roi_box_from_landmark(pts) - else: - # - use detected face bbox - bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()] - roi_box = parse_roi_box_from_bbox(bbox) - - img = crop_img(img_ori, roi_box) - - # forward: one step - img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) - input = transform(img).unsqueeze(0) - with torch.no_grad(): - if args.mode == 'gpu': - input = input.cuda() - param = model(input) - param = param.squeeze().cpu().numpy().flatten().astype(np.float32) - - # 68 pts - pts68 = predict_68pts(param, roi_box) - - # two-step for more accurate bbox to crop face - if args.bbox_init == 'two': - roi_box = parse_roi_box_from_landmark(pts68) - img_step2 = crop_img(img_ori, roi_box) - img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) - input = transform(img_step2).unsqueeze(0) - with torch.no_grad(): - if args.mode == 'gpu': - input = input.cuda() - param = model(input) - param = param.squeeze().cpu().numpy().flatten().astype(np.float32) - - pts68 = predict_68pts(param, roi_box) - - pts_res.append(pts68) - P, pose = parse_pose(param) - Ps.append(P) - poses.append(pose) - - # dense face 3d vertices - if args.dump_ply or args.dump_vertex or args.dump_depth or args.dump_pncc or args.dump_obj: - vertices = predict_dense(param, roi_box) - vertices_lst.append(vertices) - if args.dump_ply: - dump_to_ply(vertices, tri, '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind)) - if args.dump_vertex: - dump_vertex(vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''), ind)) - - # save .mat for 3d Face - wfp = '{}_{}_face3d.mat'.format(img_fp.replace(suffix, ''), ind) - colors = get_colors(img_ori, vertices) - sio.savemat(wfp, {'vertices': vertices, 'colors': colors, 'triangles': tri}) - - if args.dump_pts: - wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind) - np.savetxt(wfp, pts68, fmt='%.3f') - print('Save 68 3d landmarks to {}'.format(wfp)) - if args.dump_roi_box: - wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind) - np.savetxt(wfp, roi_box, fmt='%.3f') - print('Save roi box to {}'.format(wfp)) - if args.dump_paf: - wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''), ind) - wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''), ind) - paf_feature = gen_img_paf(img_crop=img, param=param, kernel_size=args.paf_size) - - cv2.imwrite(wfp_paf, paf_feature) - cv2.imwrite(wfp_crop, img) - print('Dump to {} and {}'.format(wfp_crop, wfp_paf)) - if args.dump_obj: - wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind) - colors = get_colors(img_ori, vertices) - write_obj_with_colors(wfp, vertices, tri, colors) - print('Dump obj with sampled texture to {}'.format(wfp)) - ind += 1 - - if args.dump_pose: - # P, pose = parse_pose(param) # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify) - img_pose = plot_pose_box(img_ori, Ps, pts_res) - wfp = img_fp.replace(suffix, '_pose.jpg') - cv2.imwrite(wfp, img_pose) - print('Dump to {}'.format(wfp)) - if args.dump_depth: - wfp = img_fp.replace(suffix, '_depth.png') - # depths_img = get_depths_image(img_ori, vertices_lst, tri-1) # python version - depths_img = cget_depths_image(img_ori, vertices_lst, tri - 1) # cython version - cv2.imwrite(wfp, depths_img) - print('Dump to {}'.format(wfp)) - if args.dump_pncc: - wfp = img_fp.replace(suffix, '_pncc.png') - pncc_feature = cpncc(img_ori, vertices_lst, tri - 1) # cython version - cv2.imwrite(wfp, pncc_feature[:, :, ::-1]) # cv2.imwrite will swap RGB -> BGR - print('Dump to {}'.format(wfp)) - if args.dump_res: - draw_landmarks(img_ori, pts_res, wfp=img_fp.replace(suffix, '_3DDFA.jpg'), show_flg=args.show_flg) |
