diff options
Diffstat (limited to 'megapixels/app/processors')
| -rw-r--r-- | megapixels/app/processors/face_detector.py | 27 | ||||
| -rw-r--r-- | megapixels/app/processors/face_landmarks.py | 194 | ||||
| -rw-r--r-- | megapixels/app/processors/face_landmarks_2d.py | 87 | ||||
| -rw-r--r-- | megapixels/app/processors/face_landmarks_3d.py | 38 | ||||
| -rw-r--r-- | megapixels/app/processors/face_pose.py | 15 |
5 files changed, 213 insertions, 148 deletions
diff --git a/megapixels/app/processors/face_detector.py b/megapixels/app/processors/face_detector.py index a805a474..6bf27576 100644 --- a/megapixels/app/processors/face_detector.py +++ b/megapixels/app/processors/face_detector.py @@ -65,8 +65,6 @@ class DetectorHaar: class DetectorDLIBCNN: - - dnn_size = (300, 300) pyramids = 0 conf_thresh = 0.85 @@ -79,13 +77,10 @@ class DetectorDLIBCNN: self.detector = dlib.cnn_face_detection_model_v1(cfg.DIR_MODELS_DLIB_CNN) os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices # reset - def detect(self, im, size=None, conf_thresh=None, pyramids=None, largest=False, zone=None): + def detect(self, im, conf_thresh=None, pyramids=None, largest=False, zone=None): bboxes = [] conf_thresh = self.conf_thresh if conf_thresh is None else conf_thresh pyramids = self.pyramids if pyramids is None else pyramids - dnn_size = self.dnn_size if size is None else size - # resize image - im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1]) dim = im.shape[:2][::-1] im = im_utils.bgr2rgb(im) # convert to RGB for dlib # run detector @@ -110,7 +105,6 @@ class DetectorDLIBCNN: class DetectorDLIBHOG: - size = (320, 240) pyramids = 0 conf_thresh = 0.85 @@ -119,12 +113,9 @@ class DetectorDLIBHOG: self.log = logger_utils.Logger.getLogger() self.detector = dlib.get_frontal_face_detector() - def detect(self, im, size=None, conf_thresh=None, pyramids=0, largest=False, zone=False): + def detect(self, im, conf_thresh=None, pyramids=0, largest=False, zone=False): conf_thresh = self.conf_thresh if conf_thresh is None else conf_thresh - dnn_size = self.size if size is None else size pyramids = self.pyramids if pyramids is None else pyramids - - im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1]) dim = im.shape[:2][::-1] im = im_utils.bgr2rgb(im) # ? hog_results = self.detector.run(im, pyramids) @@ -153,23 +144,23 @@ class DetectorCVDNN: dnn_scale = 1.0 # fixed dnn_mean = (104.0, 177.0, 123.0) # fixed dnn_crop = False # crop or force resize - size = (300, 300) - conf_thresh = 0.85 + blob_size = (300, 300) + conf_thresh = 0.95 def __init__(self): - import dlib + self.log = logger_utils.Logger.getLogger() fp_prototxt = join(cfg.DIR_MODELS_CAFFE, 'face_detect', 'opencv_face_detector.prototxt') fp_model = join(cfg.DIR_MODELS_CAFFE, 'face_detect', 'opencv_face_detector.caffemodel') self.net = cv.dnn.readNet(fp_prototxt, fp_model) self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) self.net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) - def detect(self, im, size=None, conf_thresh=None, largest=False, pyramids=None, zone=False): + def detect(self, im, conf_thresh=None, largest=False, pyramids=None, zone=False): """Detects faces and returns (list) of (BBox)""" conf_thresh = self.conf_thresh if conf_thresh is None else conf_thresh - dnn_size = self.size if size is None else size - im = cv.resize(im, dnn_size) - blob = cv.dnn.blobFromImage(im, self.dnn_scale, dnn_size, self.dnn_mean) + im = cv.resize(im, self.blob_size) + dim = im.shape[:2][::-1] + blob = cv.dnn.blobFromImage(im, self.dnn_scale, dim, self.dnn_mean) self.net.setInput(blob) net_outputs = self.net.forward() diff --git a/megapixels/app/processors/face_landmarks.py b/megapixels/app/processors/face_landmarks.py new file mode 100644 index 00000000..8086ba1e --- /dev/null +++ b/megapixels/app/processors/face_landmarks.py @@ -0,0 +1,194 @@ +from os.path import join +from pathlib import Path + +import cv2 as cv +import numpy as np +import imutils + +from app.utils import im_utils, logger_utils +from app.models.bbox import BBox +from app.settings import app_cfg as cfg +from app.settings import types +from app.models.bbox import BBox + + +# ---------------------------------------------------------------------- +# +# 2D landmarks: 5pt and 68pt +# +# ---------------------------------------------------------------------- + +class Landmarks2D: + + # Abstract class + + def __init__(self): + self.log = logger_utils.Logger.getLogger() + + def landmarks(self, im, bbox): + # override + self.log.warn('Define landmarks() function') + pass + + def flatten(self, points): + '''Converts list of point-tupes into a flattened list for CSV + :param points: (list) of x,y points + :returns dict item for each point (eg {'x1':100, 'y1':200}) + ''' + points_formatted = {} + for idx, pt in enumerate(points, 1): + for j, d in enumerate('xy'): + points_formatted[f'{d}{idx}'] = pt[j] + return points_formatted + + def normalize(self, points, dim): + return [np.array(p)/dim for p in points] # divides each point by w,h dim + + + +import face_alignment + +class FaceAlignment2D_68(Landmarks2D): + + # https://github.com/1adrianb/face-alignment + # Estimates 2D facial landmarks + + def __init__(self, gpu=0, flip_input=False): + t = face_alignment.LandmarksType._2D + device = f'cuda:{gpu}' if gpu > -1 else 'cpu' + self.fa = face_alignment.FaceAlignment(t, device=device, flip_input=flip_input) + super().__init__() + self.log.debug(f'{device}') + self.log.debug(f'{t}') + + def landmarks(self, im): + '''Calculates the 2D facial landmarks + :param im: (numpy.ndarray) BGR image + :returns (list) of 68 (int) (tuples) as (x,y) + ''' + # predict landmarks + points = self.fa.get_landmarks(im) # returns array of arrays of 68 2D pts/face + # convert to data type + points = [list(map(int, p)) for p in points[0]] + return points + + +class Dlib2D(Landmarks2D): + + def __init__(self, model): + super().__init__() + # init dlib + import dlib + self.predictor = dlib.shape_predictor(model) + self.log.info(f'loaded predictor model: {model}') + + def landmarks(self, im, bbox): + # Draw high-confidence faces + dim_wh = im.shape[:2][::-1] + bbox = bbox.to_dlib() + im_gray = cv.cvtColor(im, cv.COLOR_BGR2GRAY) + points = [[p.x, p.y] for p in self.predictor(im_gray, bbox).parts()] + return points + + +class Dlib2D_68(Dlib2D): + + def __init__(self): + # Get 68-point landmarks using DLIB + super().__init__(cfg.DIR_MODELS_DLIB_68PT) + + +class Dlib2D_5(Dlib2D): + + def __init__(self): + # Get 5-point landmarks using DLIB + super().__init__(cfg.DIR_MODELS_DLIB_5PT) + + +class MTCNN2D_5(Landmarks2D): + + # Get 5-point landmarks using MTCNN + # https://github.com/ipazc/mtcnn + # pip install mtcnn + + def __init__(self): + super().__init__() + self.log.warn('NB: MTCNN runs both face detector and landmark predictor together.') + self.log.warn(' this will use face with most similar ROI') + from mtcnn.mtcnn import MTCNN + self.detector = MTCNN() + + def landmarks(self, im, bbox): + '''Detects face using MTCNN and returns (list) of BBox + :param im: (numpy.ndarray) image + :returns list of BBox + ''' + results = [] + dim_wh = im.shape[:2][::-1] # (w, h) + + # run MTCNN to get bbox and landmarks + dets = self.detector.detect_faces(im) + keypoints = [] + bboxes = [] + #iterate detections and convert to BBox + for det in dets: + #rect = det['box'] + points = det['keypoints'] + # convert to normalized for contain-comparison + points_norm = [np.array(pt)/dim_wh for pname, pt in points.items()] + contains = False not in [bbox.contains(pn) for pn in points_norm] + if contains: + results.append(points) # append original points + + return results + + +# ---------------------------------------------------------------------- +# +# 3D landmarks +# +# ---------------------------------------------------------------------- + +class Landmarks3D: + + def __init__(self): + self.log = logger_utils.Logger.getLogger() + + def landmarks(self, im, bbox): + pass + + def flatten(self, points): + '''Converts list of point-tupes into a flattened list for CSV + :param points: (list) of x,y points + :returns dict item for each point (eg {'x1':100, 'y1':200}) + ''' + points_formatted = {} + for idx, pt in enumerate(points, 1): + for j, d in enumerate('xyz'): + points_formatted[f'{d}{idx}'] = pt[j] + return points_formatted + + def normalize(self, points, dim): + return [np.array(p)/dim for p in points] # divides each point by w,h dim + + +class FaceAlignment3D_68(Landmarks3D): + + # Estimates 3D facial landmarks + import face_alignment + + def __init__(self, gpu=0, flip_input=False): + super().__init__() + device = f'cuda:{gpu}' if gpu > -1 else 'cpu' + self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device=device, flip_input=flip_input) + + def landmarks(self, im, as_type=str): + '''Calculates the 3D facial landmarks + :param im: (numpy.ndarray) BGR image + :returns (list) of 68 (int) (tuples) as (x,y, z) + ''' + # predict landmarks + points = self.fa.get_landmarks(im) # returns array of arrays of 68 3D pts/face + # convert to data type + points = [list(map(int, p)) for p in points[0]] + return points
\ No newline at end of file diff --git a/megapixels/app/processors/face_landmarks_2d.py b/megapixels/app/processors/face_landmarks_2d.py deleted file mode 100644 index e8ce93c1..00000000 --- a/megapixels/app/processors/face_landmarks_2d.py +++ /dev/null @@ -1,87 +0,0 @@ -import os -from os.path import join -from pathlib import Path - -import cv2 as cv -import numpy as np -import imutils -from app.utils import im_utils, logger_utils -from app.models.bbox import BBox -from app.settings import app_cfg as cfg -from app.settings import types -from app.models.bbox import BBox - -class LandmarksFaceAlignment: - - # Estimates 2D facial landmarks - import face_alignment - - def __init__(self, gpu=0): - self.log = logger_utils.Logger.getLogger() - device = f'cuda:{gpu}' if gpu > -1 else 'cpu' - self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device, flip_input=True) - - def landmarks(self, im, as_type=str): - '''Calculates the 3D facial landmarks - :param im: (numpy.ndarray) image - :param as_type: (str) or (list) type to return data - ''' - preds = self.fa.get_landmarks(im) - # convert to comma separated ints - # storing data as "[1,2], [3,4]" is larger file size than storing as "1,2,3,4" - # storing a list object in Pandas seems to result in 30% larger CSV files - # TODO optimize this - preds_int = [list(map(int, x)) for x in preds[0]] # list of ints - if as_type is str: - return ','.join([','.join(list(map(str,[x,y]))) for x,y in preds_int]) - else: - return preds_int - - -class LandmarksDLIB: - - def __init__(self): - # init dlib - import dlib - self.log = logger_utils.Logger.getLogger() - self.predictor = dlib.shape_predictor(cfg.DIR_MODELS_DLIB_68PT) - - def landmarks(self, im, bbox): - # Draw high-confidence faces - dim = im.shape[:2][::-1] - bbox = bbox.to_dlib() - im_gray = cv.cvtColor(im, cv.COLOR_BGR2GRAY) - landmarks = [[p.x, p.y] for p in self.predictor(im_gray, bbox).parts()] - return landmarks - - -class LandmarksMTCNN: - - # https://github.com/ipazc/mtcnn - # pip install mtcnn - - dnn_size = (400, 400) - - def __init__(self, size=(400,400)): - from mtcnn.mtcnn import MTCNN - self.detector = MTCNN() - - def landmarks(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=None): - '''Detects face using MTCNN and returns (list) of BBox - :param im: (numpy.ndarray) image - :returns list of BBox - ''' - rois = [] - dnn_size = self.dnn_size if opt_size is None else opt_size - im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1]) - dim = im.shape[:2][::-1] - - # run MTCNN - dets = self.detector.detect_faces(im) - - for det in dets: - rect = det['box'] - keypoints = det['keypoints'] # not using here. see 'face_landmarks.py' - bbox = BBox.from_xywh_dim(*rect, dim) - rois.append(bbox) - return rois
\ No newline at end of file diff --git a/megapixels/app/processors/face_landmarks_3d.py b/megapixels/app/processors/face_landmarks_3d.py index 3663364c..470d263c 100644 --- a/megapixels/app/processors/face_landmarks_3d.py +++ b/megapixels/app/processors/face_landmarks_3d.py @@ -12,43 +12,24 @@ from app.models.bbox import BBox from app.settings import app_cfg as cfg from app.settings import types +class Landmarks3D: -class FaceLandmarks2D: - - # Estimates 2D facial landmarks - import face_alignment - - def __init__(self, gpu=0): + def __init__(self): self.log = logger_utils.Logger.getLogger() - device = f'cuda:{gpu}' if gpu > -1 else 'cpu' - self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device, flip_input=True) - - def landmarks(self, im, as_type=str): - '''Calculates the 3D facial landmarks - :param im: (numpy.ndarray) image - :param as_type: (str) or (list) type to return data - ''' - preds = self.fa.get_landmarks(im) - # convert to comma separated ints - # storing data as "[1,2], [3,4]" is larger file size than storing as "1,2,3,4" - # storing a list object in Pandas seems to result in 30% larger CSV files - # TODO optimize this - preds_int = [list(map(int, x)) for x in preds[0]] # list of ints - if as_type is str: - return ','.join([','.join(list(map(str,[x,y]))) for x,y in preds_int]) - else - return preds_int + def landmarks(self, im, bbox): + pass -class FaceLandmarks3D: + +class FaceAlignment3D(Landmarks3D): # Estimates 3D facial landmarks import face_alignment - def __init__(self, gpu=0): - self.log = logger_utils.Logger.getLogger() + def __init__(self, gpu=0, flip_input=False): + super().__init__() device = f'cuda:{gpu}' if gpu > -1 else 'cpu' - self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device=device, flip_input=False) + self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device=device, flip_input=flip_input) def landmarks(self, im, as_type=str): '''Calculates the 3D facial landmarks @@ -66,6 +47,7 @@ class FaceLandmarks3D: else return preds_int + def draw(self, im): '''draws landmarks in 3d scene''' diff --git a/megapixels/app/processors/face_pose.py b/megapixels/app/processors/face_pose.py index 96281637..8bc95f8d 100644 --- a/megapixels/app/processors/face_pose.py +++ b/megapixels/app/processors/face_pose.py @@ -95,18 +95,3 @@ class FacePoseDLIB: result['yaw'] = yaw return result - - - def draw_pose(self, im, pt_nose, image_pts): - cv.line(im, pt_nose, tuple(image_pts['pitch'].ravel()), self.pose_types['pitch'], 3) - cv.line(im, pt_nose, tuple(image_pts['yaw'].ravel()), self.pose_types['yaw'], 3) - cv.line(im, pt_nose, tuple(image_pts['roll'].ravel()), self.pose_types['roll'], 3) - - - def draw_degrees(self, im, pose_data, color=(0,255,0)): - for i, pose_type in enumerate(self.pose_types.items()): - k, clr = pose_type - v = pose_data[k] - t = '{}: {:.2f}'.format(k, v) - origin = (10, 30 + (25 * i)) - cv.putText(im, t, origin, cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, thickness=2, lineType=2)
\ No newline at end of file |
