From bff4e1c50349b0ba7d8e5fab6ce697c0b856f13f Mon Sep 17 00:00:00 2001 From: adamhrv Date: Fri, 4 Jan 2019 13:21:59 +0100 Subject: adding 3D landmarks... --- megapixels/app/processors/face_landmarks.py | 60 ------------------ megapixels/app/processors/face_landmarks_2d.py | 87 ++++++++++++++++++++++++++ megapixels/app/processors/face_landmarks_3d.py | 51 +++++++++++++-- megapixels/app/settings/types.py | 4 +- 4 files changed, 134 insertions(+), 68 deletions(-) delete mode 100644 megapixels/app/processors/face_landmarks.py create mode 100644 megapixels/app/processors/face_landmarks_2d.py (limited to 'megapixels/app') diff --git a/megapixels/app/processors/face_landmarks.py b/megapixels/app/processors/face_landmarks.py deleted file mode 100644 index dfcb9ee8..00000000 --- a/megapixels/app/processors/face_landmarks.py +++ /dev/null @@ -1,60 +0,0 @@ -import os -from os.path import join -from pathlib import Path - -import cv2 as cv -import numpy as np -import imutils -from app.utils import im_utils, logger_utils -from app.models.bbox import BBox -from app.settings import app_cfg as cfg -from app.settings import types -from app.models.bbox import BBox - -class LandmarksDLIB: - - def __init__(self): - # init dlib - import dlib - self.log = logger_utils.Logger.getLogger() - self.predictor = dlib.shape_predictor(cfg.DIR_MODELS_DLIB_68PT) - - def landmarks(self, im, bbox): - # Draw high-confidence faces - dim = im.shape[:2][::-1] - bbox = bbox.to_dlib() - im_gray = cv.cvtColor(im, cv.COLOR_BGR2GRAY) - landmarks = [[p.x, p.y] for p in self.predictor(im_gray, bbox).parts()] - return landmarks - - -class LandmarksMTCNN: - - # https://github.com/ipazc/mtcnn - # pip install mtcnn - - dnn_size = (400, 400) - - def __init__(self, size=(400,400)): - from mtcnn.mtcnn import MTCNN - self.detector = MTCNN() - - def detect(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=None): - '''Detects face using MTCNN and returns (list) of BBox - :param im: (numpy.ndarray) image - :returns list of BBox - ''' - rois = [] - dnn_size = self.dnn_size if opt_size is None else opt_size - im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1]) - dim = im.shape[:2][::-1] - - # run MTCNN - dets = self.detector.detect_faces(im) - - for det in dets: - rect = det['box'] - keypoints = det['keypoints'] # not using here. see 'face_landmarks.py' - bbox = BBox.from_xywh_dim(*rect, dim) - rois.append(bbox) - return rois \ No newline at end of file diff --git a/megapixels/app/processors/face_landmarks_2d.py b/megapixels/app/processors/face_landmarks_2d.py new file mode 100644 index 00000000..e8ce93c1 --- /dev/null +++ b/megapixels/app/processors/face_landmarks_2d.py @@ -0,0 +1,87 @@ +import os +from os.path import join +from pathlib import Path + +import cv2 as cv +import numpy as np +import imutils +from app.utils import im_utils, logger_utils +from app.models.bbox import BBox +from app.settings import app_cfg as cfg +from app.settings import types +from app.models.bbox import BBox + +class LandmarksFaceAlignment: + + # Estimates 2D facial landmarks + import face_alignment + + def __init__(self, gpu=0): + self.log = logger_utils.Logger.getLogger() + device = f'cuda:{gpu}' if gpu > -1 else 'cpu' + self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device, flip_input=True) + + def landmarks(self, im, as_type=str): + '''Calculates the 3D facial landmarks + :param im: (numpy.ndarray) image + :param as_type: (str) or (list) type to return data + ''' + preds = self.fa.get_landmarks(im) + # convert to comma separated ints + # storing data as "[1,2], [3,4]" is larger file size than storing as "1,2,3,4" + # storing a list object in Pandas seems to result in 30% larger CSV files + # TODO optimize this + preds_int = [list(map(int, x)) for x in preds[0]] # list of ints + if as_type is str: + return ','.join([','.join(list(map(str,[x,y]))) for x,y in preds_int]) + else: + return preds_int + + +class LandmarksDLIB: + + def __init__(self): + # init dlib + import dlib + self.log = logger_utils.Logger.getLogger() + self.predictor = dlib.shape_predictor(cfg.DIR_MODELS_DLIB_68PT) + + def landmarks(self, im, bbox): + # Draw high-confidence faces + dim = im.shape[:2][::-1] + bbox = bbox.to_dlib() + im_gray = cv.cvtColor(im, cv.COLOR_BGR2GRAY) + landmarks = [[p.x, p.y] for p in self.predictor(im_gray, bbox).parts()] + return landmarks + + +class LandmarksMTCNN: + + # https://github.com/ipazc/mtcnn + # pip install mtcnn + + dnn_size = (400, 400) + + def __init__(self, size=(400,400)): + from mtcnn.mtcnn import MTCNN + self.detector = MTCNN() + + def landmarks(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=None): + '''Detects face using MTCNN and returns (list) of BBox + :param im: (numpy.ndarray) image + :returns list of BBox + ''' + rois = [] + dnn_size = self.dnn_size if opt_size is None else opt_size + im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1]) + dim = im.shape[:2][::-1] + + # run MTCNN + dets = self.detector.detect_faces(im) + + for det in dets: + rect = det['box'] + keypoints = det['keypoints'] # not using here. see 'face_landmarks.py' + bbox = BBox.from_xywh_dim(*rect, dim) + rois.append(bbox) + return rois \ No newline at end of file diff --git a/megapixels/app/processors/face_landmarks_3d.py b/megapixels/app/processors/face_landmarks_3d.py index 28aff592..3663364c 100644 --- a/megapixels/app/processors/face_landmarks_3d.py +++ b/megapixels/app/processors/face_landmarks_3d.py @@ -13,24 +13,63 @@ from app.settings import app_cfg as cfg from app.settings import types +class FaceLandmarks2D: + + # Estimates 2D facial landmarks + import face_alignment + + def __init__(self, gpu=0): + self.log = logger_utils.Logger.getLogger() + device = f'cuda:{gpu}' if gpu > -1 else 'cpu' + self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device, flip_input=True) + + def landmarks(self, im, as_type=str): + '''Calculates the 3D facial landmarks + :param im: (numpy.ndarray) image + :param as_type: (str) or (list) type to return data + ''' + preds = self.fa.get_landmarks(im) + # convert to comma separated ints + # storing data as "[1,2], [3,4]" is larger file size than storing as "1,2,3,4" + # storing a list object in Pandas seems to result in 30% larger CSV files + # TODO optimize this + preds_int = [list(map(int, x)) for x in preds[0]] # list of ints + if as_type is str: + return ','.join([','.join(list(map(str,[x,y]))) for x,y in preds_int]) + else + return preds_int + class FaceLandmarks3D: # Estimates 3D facial landmarks import face_alignment - from skimage import io - def __init__(self): + def __init__(self, gpu=0): self.log = logger_utils.Logger.getLogger() - self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False) + device = f'cuda:{gpu}' if gpu > -1 else 'cpu' + self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device=device, flip_input=False) - def landmarks(self, im): + def landmarks(self, im, as_type=str): + '''Calculates the 3D facial landmarks + :param im: (numpy.ndarray) image + :param as_type: (str) or (list) type to return data + ''' preds = self.fa.get_landmarks(im) - return preds + # convert to comma separated ints + # storing data as "[1,2], [3,4]" is larger file size than storing as "1,2,3,4" + # storing a list object in Pandas seems to result in 30% larger CSV files + # TODO optimize this + preds_int = [list(map(int, x)) for x in preds[0]] # list of ints + if as_type is str: + return ','.join([','.join(list(map(str,[x,y]))) for x,y in preds_int]) + else + return preds_int def draw(self, im): '''draws landmarks in 3d scene''' + # TODO ''' import face_alignment import numpy as np @@ -74,4 +113,4 @@ class FaceLandmarks3D: ax.set_xlim(ax.get_xlim()[::-1]) plt.show() ''' - return False \ No newline at end of file + return im \ No newline at end of file diff --git a/megapixels/app/settings/types.py b/megapixels/app/settings/types.py index 0805c5bd..c2e2caf7 100644 --- a/megapixels/app/settings/types.py +++ b/megapixels/app/settings/types.py @@ -45,8 +45,8 @@ class LogLevel(Enum): # -------------------------------------------------------------------- class Metadata(Enum): - IDENTITY, FILE_RECORD, FACE_VECTOR, FACE_POSE, FACE_ROI, FACE_LANDMARKS_68, \ - FACE_LANDMARKS_3D = range(7) + IDENTITY, FILE_RECORD, FACE_VECTOR, FACE_POSE, FACE_ROI, FACE_LANDMARKS_2D_68, \ + FACE_LANDMARKS_3D_68 = range(7) class Dataset(Enum): LFW, VGG_FACE2, MSCELEB, UCCS, UMD_FACES = range(5) -- cgit v1.2.3-70-g09d2