1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
|
'''
Combines 3D face mode + rendering
https://github.com/cleardusk/3DDFA --> 3d landmarks
https://github.com/YadiraF/face3d --> render 3D with lighting as 2.5d image
'''
import click
from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg
@click.command()
@click.option('-i', '--input', 'opt_fp_in', default=None, required=True,
help='Image filepath')
@click.option('-o', '--output', 'opt_dir_out', default=None,
help='Directory for output files')
@click.option('--size', 'opt_size',
type=(int, int), default=(300, 300),
help='Output image size')
@click.option('-g', '--gpu', 'opt_gpu', default=0,
help='GPU index')
@click.option('-f', '--force', 'opt_force', is_flag=True,
help='Force overwrite file')
@click.option('--bbox-init', 'opt_bbox_init', is_flag=True,
help='Use landmarks for ROI instead of BBox')
@click.option('--size', 'opt_render_dim',
type=(int, int), default=(512, 512),
help='2.5D render image size')
@click.option('--display/--no-display', 'opt_display', is_flag=True, default=True,
help='Display detections to debug')
@click.option('--save/--no-save', 'opt_save', is_flag=True, default=True,
help='Save output images/files')
@click.pass_context
def cli(ctx, opt_fp_in, opt_dir_out, opt_gpu, opt_bbox_init,
opt_size, opt_render_dim, opt_force, opt_display, opt_save):
"""3D face demo"""
import sys
import os
from os.path import join
from pathlib import Path
import time
from tqdm import tqdm
import numpy as np
import pandas as pd
import cv2 as cv
import dlib
from app.models.bbox import BBox
from app.utils import logger_utils, file_utils, im_utils, display_utils, draw_utils
from app.utils import plot_utils
from app.processors import face_detector, face_landmarks
from app.models.data_store import DataStore
import torch
import torchvision.transforms as transforms
import torch.backends.cudnn as cudnn
import scipy.io as sio
sys.path.append(join(Path.cwd().parent, '3rdparty'))
# git clone https://github.com/cleardusk/3DDFA 3rdparty/d3ddfa
# change name of 3DDFA to d3DDFA because can't start with number
from d3DDFA import mobilenet_v1
from d3DDFA.utils.ddfa import ToTensorGjz, NormalizeGjz, str2bool
from d3DDFA.utils import inference as d3dfa_utils
from d3DDFA.utils.inference import parse_roi_box_from_landmark, crop_img, predict_68pts
from d3DDFA.utils.inference import dump_to_ply, dump_vertex, draw_landmarks
from d3DDFA.utils.inference import predict_dense, parse_roi_box_from_bbox, get_colors
from d3DDFA.utils.inference import write_obj_with_colors
from d3DDFA.utils.estimate_pose import parse_pose
from d3DDFA.utils.render import get_depths_image, cget_depths_image, cpncc
from d3DDFA.utils import paf as d3dfa_paf_utils
# git clone https://github.com/YadiraF/face3d 3rdparty/face3d
# compile cython module in face3d/mesh/cython/ python setup.py build_ext -i
from face3d.face3d import mesh as face3d_mesh
log = logger_utils.Logger.getLogger()
# -------------------------------------------------
# load image
fpp_in = Path(opt_fp_in)
im = cv.imread(opt_fp_in)
# ----------------------------------------------------------------------------
# detect face
face_detector = face_detector.DetectorCVDNN() # -1 for CPU
bboxes = face_detector.detect(im, largest=True)
bbox = bboxes[0]
dim = im.shape[:2][::-1]
bbox_dim = bbox.to_dim(dim)
if not bbox:
log.error('no face detected')
return
else:
log.info(f'face detected: {bbox_dim.to_xyxy()}')
# -------------------------------------------------------------------------
# landmarks
landmark_predictor = face_landmarks.Dlib2D_68()
lanmarks = landmark_predictor.landmarks(im, bbox_dim)
# -------------------------------------------------------------------------
# 3ddfa
STD_SIZE = 120
# load pre-tained model
fp_ckpt = join(cfg.DIR_MODELS_PYTORCH, '3ddfa', 'phase1_wpdc_vdc_v2.pth.tar')
arch = 'mobilenet_1'
checkpoint = torch.load(fp_ckpt, map_location=lambda storage, loc: storage)['state_dict']
model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression)
model_dict = model.state_dict()
# because the model is trained by multiple gpus, prefix module should be removed
for k in checkpoint.keys():
model_dict[k.replace('module.', '')] = checkpoint[k]
model.load_state_dict(model_dict, strict=False)
if opt_gpu > -1:
cudnn.benchmark = True
model = model.cuda()
model.eval()
# forward
st = time.time()
fp_tri = join(cfg.DIR_MODELS_PYTORCH, '3ddfa', 'tri.mat')
triangles = sio.loadmat(fp_tri)['tri']
transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
pts_res = []
Ps = [] # Camera matrix collection
poses = [] # pose collection, [todo: validate it]
vertices_lst = [] # store multiple face vertices
# use landmark as roi
pts = np.array(lanmarks).T
# roi_box = d3dfa_utils.parse_roi_box_from_landmark(pts)
roi_box = parse_roi_box_from_bbox(bbox_dim.to_xyxy())
im_crop = d3dfa_utils.crop_img(im, roi_box)
im_crop = cv.resize(im_crop, dsize=(STD_SIZE, STD_SIZE), interpolation=cv.INTER_LINEAR)
# forward
torch_input = transform(im_crop).unsqueeze(0)
with torch.no_grad():
if opt_gpu > -1:
torch_input = torch_input.cuda()
param = model(torch_input)
param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
# 68 pts
pts68 = d3dfa_utils.predict_68pts(param, roi_box)
pts_res.append(pts68)
P, pose = parse_pose(param)
Ps.append(P)
poses.append(pose)
# dense face 3d vertices
vertices = d3dfa_utils.predict_dense(param, roi_box)
vertices_lst.append(vertices)
log.info(f'generated 3d data in: {(time.time() - st):.2f}s')
# filepath helper function
def to_fp(fpp, ext, suffix=None):
if suffix:
fp = join(fpp.parent, f'{fpp.stem}_{suffix}.{ext}')
else:
fp = join(fpp.parent, f'{fpp.stem}.{ext}')
return fp
# save .mat
colors = d3dfa_utils.get_colors(im, vertices)
vertices_orig = vertices.copy()
fp_mat_3df = to_fp(fpp_in, 'mat', suffix='face3d')
sio.savemat(fp_mat_3df, {'vertices': vertices, 'colors': colors, 'triangles': triangles})
# save PAF
im_paf = d3dfa_paf_utils.gen_img_paf(img_crop=im_crop, param=param, kernel_size=3)
# save pose image
# P, pose = parse_pose(param) # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
im_pose = draw_utils.plot_pose_box(im, Ps, pts_res)
# save depth image
# depths_img = get_depths_image(im, vertices_lst, tri-1) # python version
im_depth = cget_depths_image(im, vertices_lst, triangles - 1) # cython version
# save pncc image
pncc_feature = cpncc(im, vertices_lst, triangles - 1) # cython version
im_pncc = pncc_feature[:, :, ::-1] # swap BGR
# save .ply
#fp_ply = to_fp(fpp_in, 'ply')
#dump_to_ply(vertices, triangles, fp_ply)
# skip: save .mat (3ddfa default not compatible with face3d utils)
#fp_mat = to_fp(fpp_in, 'mat')
#d3dfa_utils.dump_vertex(vertices, fp_mat)
# save 68 points
#fp_txt = to_fp(fpp_in, 'txt', suffix='68')
#np.savetxt(to_fp(fpp_in, 'txt'), pts68, fmt='%.3f')
# save roi
#fp_txt = to_fp(fpp_in, 'txt', suffix='roi')
#np.savetxt(fp_txt, roi_box, fmt='%.3f')a
# save crop
#fp_crop = to_fp(fpp_in, 'jpg', suffix='crop')
#cv.imwrite(fp_crop, im_crop)
# save obj
colors = d3dfa_utils.get_colors(im, vertices_orig)
#fp_landmarks = to_fp(fpp_in, 'jpg', suffix='3DDFA')
# show_flg?
#d3dfa_utils.draw_landmarks(im, pts_res, wfp=fp_landmarks, show_flg=False)
# -------------------------------------------------------------------------
# face3d
# create 3D mesh photo face
# if loading file
# TODO find where vertices is being changed
vertices = vertices_orig # vertices changes somewhere, so keep copy
# preprocess 3D data from 3DDFA for face3d rendering
vertices = vertices.transpose()
triangles = triangles.transpose()
vertices = vertices.astype(np.float64) # change data type
# subtract 1 from triangle vertex indices (depends on your .mat file)
triangles = np.array([np.array([t[0]-1, t[1]-1, t[2]-1]).astype(np.int32) for t in triangles])
vertices -= np.array([abs(np.min(vertices[:,0])), np.min(abs(vertices[:,1])), np.min(abs(vertices[:,2]))])
vertices -= np.array([np.mean(vertices[:,0]), np.mean(vertices[:,1]), np.mean(vertices[:,2])])
# colors = np.array([c[::-1] for c in colors]) # BGR --> RGB
colors = colors/np.max(colors) # normalize color range
# set max render size (about 75% of canvas size)
max_render_size = int(max(opt_render_dim) * .75)
s = max_render_size/(np.max(vertices[:,1]) - np.min(vertices[:,1]))
# rotation matrix
R = face3d_mesh.transform.angle2matrix([-180, -20, 0])
# no translation. center of obj:[0,0]
t = [0, 0, 0]
vertices_trans = face3d_mesh.transform.similarity_transform(vertices, s, R, t)
# lighting: add point lights, positions are defined in world space
light_pos = np.array([[-128, -128, 512]])
light_clr_amt = np.array([[1, 1, 1]])
colors_lit = face3d_mesh.light.add_light(vertices_trans, triangles, colors, light_pos, light_clr_amt)
# transform from world space to camera space (what the world is in the eye of observer)
vertices_cam = face3d_mesh.transform.lookat_camera(vertices_trans, eye = [0, 0, 0], at = np.array([0, 0, 1]), up = None)
# project from 3d world space into 2d image plane. orthographic or perspective projection
vertices_proj = face3d_mesh.transform.orthographic_project(vertices_cam)
# -------------------------------------------------------------------------
# render 2D images
w = h = max(opt_render_dim)
vertices_im = face3d_mesh.transform.to_image(vertices_proj, h, w)
im_render = face3d_mesh.render.render_colors(vertices_im, triangles, colors_lit, h, w)
im_render = (255* im_render).astype(np.uint8)
im_pncc = im_pncc.astype(np.uint8)
im_depth = im_depth.astype(np.uint8)
im_paf = im_paf.astype(np.uint8)
# ----------------------------------------------------------------------------
# save
if opt_save:
fpp_out = Path(opt_dir_out) if opt_dir_out is not None else Path(opt_fp_in).parent
fpp_in = Path(opt_fp_in)
fp_out = join(fpp_out, f'{fpp_in.stem}_render.png')
cv.imwrite(fp_out, im_render)
fp_out = join(fpp_out, f'{fpp_in.stem}_pose.png')
cv.imwrite(fp_out, im_pose)
fp_out = join(fpp_out, f'{fpp_in.stem}_depth.png')
cv.imwrite(fp_out, im_depth)
fp_out = join(fpp_out, f'{fpp_in.stem}_pncc.png')
cv.imwrite(fp_out, im_pncc)
fp_out = join(fpp_out, f'{fpp_in.stem}_paf.png')
cv.imwrite(fp_out, im_paf)
fp_out = join(fpp_out, f'{fpp_in.stem}.obj')
write_obj_with_colors(fp_out, vertices_orig, triangles, colors)
# ----------------------------------------------------------------------------
# display
if opt_display:
# show all images here
cv.imshow('3d', im_render)
cv.imshow('depth', im_depth)
cv.imshow('pncc', im_pncc)
cv.imshow('pose', im_pose)
cv.imshow('paf', im_paf)
display_utils.handle_keyboard()
|