megapixels/app/processors/face_pose.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

import os
from os.path import join
from pathlib import Path
import math

import cv2 as cv
import numpy as np
import imutils

from app.utils import im_utils, logger_utils
from app.models.bbox import BBox
from app.settings import app_cfg as cfg
from app.settings import types


class FacePoseDLIB:


  dnn_size = (400, 400)
  pose_types = {'pitch': (0,0,255), 'roll': (255,0,0), 'yaw': (0,255,0)}

  def __init__(self):
    pass


  def pose(self, landmarks, dim):
    '''Returns face pose information
    :param landmarks: (list) of 68 (int, int) xy tuples
    :param dim: (tuple|list) of image (width, height)
    :returns (dict) of pose attributes
    '''

    # computes pose using 6 / 68 points from dlib face landmarks
    # based on learnopencv.com and 
    # https://github.com/jerryhouuu/Face-Yaw-Roll-Pitch-from-Pose-Estimation-using-OpenCV/
    # NB: not as accurate as MTCNN, see @jerryhouuu for ideas
    
    pose_points_idx = (30, 8, 36, 45, 48, 54)
    axis = np.float32([[500,0,0], [0,500,0], [0,0,500]])
    
    # 3D model points.
    model_points = np.array([
        (0.0, 0.0, 0.0),             # Nose tip
        (0.0, -330.0, -65.0),        # Chin
        (-225.0, 170.0, -135.0),     # Left eye left corner
        (225.0, 170.0, -135.0),      # Right eye right corne
        (-150.0, -150.0, -125.0),    # Left Mouth corner
        (150.0, -150.0, -125.0)      # Right mouth corner
    ])
    
    # Assuming no lens distortion
    dist_coeffs = np.zeros((4,1))

    # find 6 pose points
    pose_points = []
    for j, idx in enumerate(pose_points_idx):
      pt = landmarks[idx]
      pose_points.append((pt[0], pt[1]))
    pose_points = np.array(pose_points, dtype='double')  # convert to double
    
    # create camera matrix
    focal_length = dim[0]
    center = (dim[0]/2, dim[1]/2)
    cam_mat = np.array(
        [[focal_length, 0, center[0]],
        [0, focal_length, center[1]],
        [0, 1, 1]], dtype = "double")
    
    # solve PnP for rotation and translation
    (success, rot_vec, tran_vec) = cv.solvePnP(model_points, pose_points, 
                                               cam_mat, dist_coeffs, 
                                               flags=cv.SOLVEPNP_ITERATIVE)

    result = {}

    # project points
    #if project_points:
    pts_im, jac = cv.projectPoints(axis, rot_vec, tran_vec, cam_mat, dist_coeffs)
    pts_model, jac2 = cv.projectPoints(model_points, rot_vec, tran_vec, cam_mat, dist_coeffs)
    #result['points_model'] = pts_model
    #result['points_image'] = pts_im
    result['points'] = {
      'pitch': pts_im[0],
      'roll': pts_im[2],
      'yaw': pts_im[1]
      }
    result['point_nose'] = tuple(landmarks[pose_points_idx[0]])

    rvec_matrix = cv.Rodrigues(rot_vec)[0]
    
    # convert to degrees
    proj_matrix = np.hstack((rvec_matrix, tran_vec))
    eulerAngles = cv.decomposeProjectionMatrix(proj_matrix)[6] 
    pitch, yaw, roll = [math.radians(x) for x in eulerAngles]
    pitch = math.degrees(math.asin(math.sin(pitch)))
    roll = -math.degrees(math.asin(math.sin(roll)))
    yaw = math.degrees(math.asin(math.sin(yaw)))
    result['pitch'] = pitch
    result['roll'] = roll
    result['yaw'] = yaw

    return result