4 files changed, 296 insertions, 18 deletions
diff --git a/megapixels/app/processors/face_detector.py b/megapixels/app/processors/face_detector.py
index 747e057b..593e9feb 100644
--- a/megapixels/app/processors/face_detector.py
+++ b/megapixels/app/processors/face_detector.py
@@ -4,12 +4,51 @@ from pathlib import Path
 
 import cv2 as cv
 import numpy as np
-import dlib
-# import imutils
+import imutils
+import operator
 
 from app.utils import im_utils, logger_utils
 from app.models.bbox import BBox
 from app.settings import app_cfg as cfg
+from app.settings import types
+
+
+class DetectorMTCNN:
+  
+  # https://github.com/ipazc/mtcnn
+  # pip install mtcnn
+
+  dnn_size = (300, 300)
+
+  def __init__(self, size=(400,400)):
+    from mtcnn.mtcnn import MTCNN
+    self.detector = MTCNN()
+
+  def detect(self, im, opt_size=(400,400), opt_conf_thresh=None, opt_pyramids=None, opt_largest=False):
+    '''Detects face using MTCNN and returns (list) of BBox
+    :param im: (numpy.ndarray) image
+    :returns list of BBox
+    '''
+    bboxes = []
+    #conf_thresh = self.conf_thresh if opt_conf_thresh is None else opt_conf_thresh
+    #pyramids = self.pyramids if opt_pyramids is None else opt_pyramids
+    dnn_size = self.dnn_size if opt_size is None else opt_size
+
+    im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1])
+    dim = im.shape[:2][::-1]
+    dets = self.detector.detect_faces(im)
+    for det in dets:
+      rect = det['box']
+      #keypoints = det['keypoints']  # not using here. see 'face_landmarks.py'
+      bbox = BBox.from_xywh_dim(*rect, dim)
+      bboxes.append(bbox)
+
+    if opt_largest and len(bboxes) > 1:
+      # only keep largest
+      bboxes.sort(key=operator.attrgetter('area'), reverse=True)
+      bboxes = [bboxes[0]]
+
+    return bboxes
 
 
 class DetectorHaar:
@@ -21,16 +60,18 @@ class DetectorHaar:
     self.log = logger_utils.Logger.getLogger()
 
   def detect(self, im, scale_factor=1.05, overlaps=5):
-    return
+    pass
     
 
 class DetectorDLIBCNN:
+  
 
   dnn_size = (300, 300)
   pyramids = 0
   conf_thresh = 0.85
 
   def __init__(self, opt_gpu=0):
+    import dlib
     self.log = logger_utils.Logger.getLogger()
     cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '')
     os.environ['CUDA_VISIBLE_DEVICES'] = str(opt_gpu)
@@ -38,8 +79,8 @@ class DetectorDLIBCNN:
     self.detector = dlib.cnn_face_detection_model_v1(cfg.DIR_MODELS_DLIB_CNN)
     os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices  # reset
 
-  def detect(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=None):
-    rois = []
+  def detect(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=None, opt_largest=False):
+    bboxes = []
     conf_thresh = self.conf_thresh if opt_conf_thresh is None else opt_conf_thresh
     pyramids = self.pyramids if opt_pyramids is None else opt_pyramids
     dnn_size = self.dnn_size if opt_size is None else opt_size
@@ -48,24 +89,34 @@ class DetectorDLIBCNN:
     dim = im.shape[:2][::-1]
     im = im_utils.bgr2rgb(im)  # convert to RGB for dlib
     # run detector
-    mmod_rects = self.detector(im, 1)
+    mmod_rects = self.detector(im, opt_pyramids)
     # sort results
     for mmod_rect in mmod_rects:
+      self.log.debug('conf: {}, this: {}'.format(conf_thresh, mmod_rect.confidence))
       if mmod_rect.confidence > conf_thresh:
         bbox = BBox.from_dlib_dim(mmod_rect.rect, dim)
-        rois.append(bbox)
-    return rois
+        bboxes.append(bbox)
+
+    if opt_largest and len(bboxes) > 1:
+      # only keep largest
+      bboxes.sort(key=operator.attrgetter('area'), reverse=True)
+      bboxes = [bboxes[0]]
+
+    return bboxes
 
 
 class DetectorDLIBHOG:
 
   size = (320, 240)
   pyramids = 0
+  conf_thresh = 0.85
 
   def __init__(self):
+    import dlib
+    self.log = logger_utils.Logger.getLogger()
     self.detector = dlib.get_frontal_face_detector()
 
-  def detect(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=0):
+  def detect(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=0, opt_largest=False):
     conf_thresh = self.conf_thresh if opt_conf_thresh is None else opt_conf_thresh
     dnn_size = self.size if opt_size is None else opt_size
     pyramids = self.pyramids if opt_pyramids is None else opt_pyramids
@@ -75,13 +126,20 @@ class DetectorDLIBHOG:
     im = im_utils.bgr2rgb(im)  # ?
     hog_results = self.detector.run(im, pyramids)
     
-    rois = []
+    bboxes = []
     if len(hog_results[0]) > 0:
+      self.log.debug(hog_results)
       for rect, score, direction in zip(*hog_results):
-        if score > opt_conf_thresh:
+        if score > conf_thresh:
           bbox = BBox.from_dlib_dim(rect, dim)
-          rois.append(bbox)
-    return rois
+          bboxes.append(bbox)
+    
+    if opt_largest and len(bboxes) > 1:
+      # only keep largest
+      bboxes.sort(key=operator.attrgetter('area'), reverse=True)
+      bboxes = [bboxes[0]]
+
+    return bboxes
 
 class DetectorCVDNN:
 
@@ -92,13 +150,14 @@ class DetectorCVDNN:
   conf_thresh = 0.85
   
   def __init__(self):
+    import dlib
     fp_prototxt = join(cfg.DIR_MODELS_CAFFE, 'face_detect', 'opencv_face_detector.prototxt')
     fp_model = join(cfg.DIR_MODELS_CAFFE, 'face_detect', 'opencv_face_detector.caffemodel')
     self.net = cv.dnn.readNet(fp_prototxt, fp_model)
     self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
     self.net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
 
-  def detect(self, im, opt_size=None, opt_conf_thresh=None):
+  def detect(self, im, opt_size=None, opt_conf_thresh=None, opt_largest=False, opt_pyramids=None):
     """Detects faces and returns (list) of (BBox)"""
     conf_thresh = self.conf_thresh if opt_conf_thresh is None else opt_conf_thresh
     dnn_size = self.size if opt_size is None else opt_size
@@ -107,10 +166,16 @@ class DetectorCVDNN:
     self.net.setInput(blob)
     net_outputs = self.net.forward()
 
-    rois = []
+    bboxes = []
     for i in range(0, net_outputs.shape[2]):
       conf = net_outputs[0, 0, i, 2]
-      if conf > opt_conf_thresh:
+      if conf > conf_thresh:
         rect_norm = net_outputs[0, 0, i, 3:7]
-        rois.append(BBox(*rect_norm))
-    return rois
-\ No newline at end of file
+        bboxes.append(BBox(*rect_norm))
+    
+    if opt_largest and len(bboxes) > 1:
+      # only keep largest
+      bboxes.sort(key=operator.attrgetter('area'), reverse=True)
+      bboxes = [bboxes[0]]
+
+    return bboxes
+\ No newline at end of file
diff --git a/megapixels/app/processors/face_landmarks.py b/megapixels/app/processors/face_landmarks.py
new file mode 100644
index 00000000..dfcb9ee8
--- /dev/null
+++ b/megapixels/app/processors/face_landmarks.py
@@ -0,0 +1,60 @@
+import os
+from os.path import join
+from pathlib import Path
+
+import cv2 as cv
+import numpy as np
+import imutils
+from app.utils import im_utils, logger_utils
+from app.models.bbox import BBox
+from app.settings import app_cfg as cfg
+from app.settings import types
+from app.models.bbox import BBox
+
+class LandmarksDLIB:
+
+  def __init__(self):
+    # init dlib
+    import dlib
+    self.log = logger_utils.Logger.getLogger()
+    self.predictor = dlib.shape_predictor(cfg.DIR_MODELS_DLIB_68PT)
+
+  def landmarks(self, im, bbox):
+    # Draw high-confidence faces
+    dim = im.shape[:2][::-1]
+    bbox = bbox.to_dlib()
+    im_gray = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
+    landmarks = [[p.x, p.y] for p in self.predictor(im_gray, bbox).parts()]
+    return landmarks
+
+
+class LandmarksMTCNN:
+  
+  # https://github.com/ipazc/mtcnn
+  # pip install mtcnn
+
+  dnn_size = (400, 400)
+
+  def __init__(self, size=(400,400)):
+    from mtcnn.mtcnn import MTCNN
+    self.detector = MTCNN()
+
+  def detect(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=None):
+    '''Detects face using MTCNN and returns (list) of BBox
+    :param im: (numpy.ndarray) image
+    :returns list of BBox
+    '''
+    rois = []
+    dnn_size = self.dnn_size if opt_size is None else opt_size
+    im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1])
+    dim = im.shape[:2][::-1]
+    
+    # run MTCNN
+    dets = self.detector.detect_faces(im)
+
+    for det in dets:
+      rect = det['box']
+      keypoints = det['keypoints']  # not using here. see 'face_landmarks.py'
+      bbox = BBox.from_xywh_dim(*rect, dim)
+      rois.append(bbox)
+    return rois
+\ No newline at end of file
diff --git a/megapixels/app/processors/face_pose.py b/megapixels/app/processors/face_pose.py
new file mode 100644
index 00000000..67ac685d
--- /dev/null
+++ b/megapixels/app/processors/face_pose.py
@@ -0,0 +1,110 @@
+import os
+from os.path import join
+from pathlib import Path
+import math
+
+import cv2 as cv
+import numpy as np
+import imutils
+
+from app.utils import im_utils, logger_utils
+from app.models.bbox import BBox
+from app.settings import app_cfg as cfg
+from app.settings import types
+
+
+
+class FacePoseDLIB:
+
+
+  dnn_size = (400, 400)
+
+  def __init__(self):
+    pass
+
+  def pose(self, landmarks, dim): 
+    '''Calculates pose
+    '''
+    degrees = compute_pose_degrees(landmarks, dim)    
+    return degrees
+
+
+# -----------------------------------------------------------
+# utilities
+# -----------------------------------------------------------
+
+def compute_pose_degrees(landmarks, dim):
+  # computes pose using 6 / 68 points from dlib face landmarks
+  # based on learnopencv.com and 
+  # https://github.com/jerryhouuu/Face-Yaw-Roll-Pitch-from-Pose-Estimation-using-OpenCV/
+  # NB: not as accurate as MTCNN, see @jerryhouuu for ideas
+  
+  pose_points_idx = (30, 8, 36, 45, 48, 54)
+  axis = np.float32([[500,0,0], [0,500,0], [0,0,500]])
+  
+  # 3D model points.
+  model_points = np.array([
+      (0.0, 0.0, 0.0),             # Nose tip
+      (0.0, -330.0, -65.0),        # Chin
+      (-225.0, 170.0, -135.0),     # Left eye left corner
+      (225.0, 170.0, -135.0),      # Right eye right corne
+      (-150.0, -150.0, -125.0),    # Left Mouth corner
+      (150.0, -150.0, -125.0)      # Right mouth corner
+  ])
+  
+  # Assuming no lens distortion
+  dist_coeffs = np.zeros((4,1))
+
+  # find 6 pose points
+  pose_points = []
+  for j, idx in enumerate(pose_points_idx):
+    pt = landmarks[idx]
+    pose_points.append((pt[0], pt[1]))
+  pose_points = np.array(pose_points, dtype='double')  # convert to double
+  
+  # create camera matrix
+  focal_length = dim[0]
+  center = (dim[0]/2, dim[1]/2)
+  cam_mat = np.array(
+      [[focal_length, 0, center[0]],
+      [0, focal_length, center[1]],
+      [0, 1, 1]], dtype = "double")
+  
+  # solve PnP for rotation and translation
+  (success, rot_vec, tran_vec) = cv.solvePnP(model_points, pose_points, 
+                                             cam_mat, dist_coeffs, 
+                                             flags=cv.SOLVEPNP_ITERATIVE)
+
+  # project points
+  #pts_im, jac = cv.projectPoints(axis, rot_vec, tran_vec, cam_mat, dist_coeffs)
+  #pts_model, jac2 = cv.projectPoints(model_points, rot_vec, tran_vec, cam_mat, dist_coeffs)
+  rvec_matrix = cv.Rodrigues(rot_vec)[0]
+  
+  # convert to degrees
+  proj_matrix = np.hstack((rvec_matrix, tran_vec))
+  eulerAngles = cv.decomposeProjectionMatrix(proj_matrix)[6] 
+  pitch, yaw, roll = [math.radians(x) for x in eulerAngles]
+  pitch = math.degrees(math.asin(math.sin(pitch)))
+  roll = -math.degrees(math.asin(math.sin(roll)))
+  yaw = math.degrees(math.asin(math.sin(yaw)))
+  degrees = {'pitch': pitch, 'roll': roll, 'yaw': yaw}
+  
+  # add nose point
+  #pt_nose = tuple(landmarks[pose_points_idx[0]])
+  return degrees
+  #return pts_im, pts_model, degrees, pt_nose
+
+
+def draw_pose(im, pts_im, pts_model, pt_nose):
+  cv.line(im, pt_nose, tuple(pts_im[1].ravel()), (0,255,0), 3) #GREEN
+  cv.line(im, pt_nose, tuple(pts_im[0].ravel()), (255,0,), 3) #BLUE
+  cv.line(im, pt_nose, tuple(pts_im[2].ravel()), (0,0,255), 3) #RED
+  return im
+
+
+def draw_degrees(im, degrees, color=(0,255,0)):
+  for i, item in enumerate(degrees.items()):
+    k, v = item
+    t = '{}: {:.2f}'.format(k, v)
+    origin = (10, 30 + (25 * i))
+    cv.putText(im, t, origin, cv.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness=2, lineType=2)
+\ No newline at end of file
diff --git a/megapixels/app/processors/face_recognition.py b/megapixels/app/processors/face_recognition.py
new file mode 100644
index 00000000..9c3a301d
--- /dev/null
+++ b/megapixels/app/processors/face_recognition.py
@@ -0,0 +1,43 @@
+import os
+from os.path import join
+from pathlib import Path
+
+import cv2 as cv
+import numpy as np
+import dlib
+import imutils
+
+from app.utils import im_utils, logger_utils
+from app.models.bbox import BBox
+from app.settings import app_cfg as cfg
+from app.settings import types
+    
+class RecognitionDLIB:
+
+  # https://github.com/davisking/dlib/blob/master/python_examples/face_recognition.py
+  # facerec.compute_face_descriptor(img, shape, 100, 0.25)
+
+  def __init__(self, opt_gpu=0):
+    self.log = logger_utils.Logger.getLogger()
+    if opt_gpu > 0:
+      cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '')
+      os.environ['CUDA_VISIBLE_DEVICES'] = str(opt_gpu)
+      self.predictor = dlib.shape_predictor(cfg.DIR_MODELS_DLIB_5PT)
+      self.facerec = dlib.face_recognition_model_v1(cfg.DIR_MODELS_DLIB_FACEREC_RESNET)
+      os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices  # reset GPU env
+
+  def vec(self, im, bbox, width=100, 
+    jitters=cfg.DLIB_FACEREC_JITTERS, padding=cfg.DLIB_FACEREC_PADDING):
+    # Converts image and bbox into 128d vector
+    # scale the image so the face is always 100x100 pixels
+
+    scale = width / bbox.width
+    im = cv.resize(im, (scale, scale), interploation=cv.INTER_LANCZOS4)
+    bbox_dlib = bbox.to_dlib()
+    face_shape = self.predictor(im, bbox_dlib)
+    vec = self.facerec.compute_face_descriptor(im, face_shape, jitters, padding)
+    return vec
+
+
+  def similarity(self, query_enc, known_enc):
+    return np.linalg.norm(query_enc - known_enc, axis=1)