diff options
| author | adamhrv <adam@ahprojects.com> | 2019-01-16 13:30:16 +0100 |
|---|---|---|
| committer | adamhrv <adam@ahprojects.com> | 2019-01-16 13:30:16 +0100 |
| commit | 65cb506ca182272e2701136097fd00c55dc6bd69 (patch) | |
| tree | cc5be8e61a8d5173745be1d331b210e967e146b5 /megapixels/app/processors | |
| parent | fceeb3b7adbc8d522e9fe1c40e12e9a529199068 (diff) | |
change bbox to norm, refine face extractor
Diffstat (limited to 'megapixels/app/processors')
| -rw-r--r-- | megapixels/app/processors/face_age_gender.py | 20 | ||||
| -rw-r--r-- | megapixels/app/processors/face_beauty.py | 15 | ||||
| -rw-r--r-- | megapixels/app/processors/face_detector.py | 51 | ||||
| -rw-r--r-- | megapixels/app/processors/face_extractor.py | 42 | ||||
| -rw-r--r-- | megapixels/app/processors/face_landmarks.py | 31 | ||||
| -rw-r--r-- | megapixels/app/processors/face_pose.py | 23 | ||||
| -rw-r--r-- | megapixels/app/processors/face_recognition.py | 68 |
7 files changed, 137 insertions, 113 deletions
diff --git a/megapixels/app/processors/face_age_gender.py b/megapixels/app/processors/face_age_gender.py index 95efa8fc..66c51fa8 100644 --- a/megapixels/app/processors/face_age_gender.py +++ b/megapixels/app/processors/face_age_gender.py @@ -32,19 +32,21 @@ class _FaceAgeGender: ''' dnn_size = (224,224) - dnn_mean = (104.0, 177.0, 123.0) + dnn_mean = (104.0, 177.0, 123.0) # ? + # authors used imagenet mean + #dnn_mean = [103.939, 116.779, 123.68] ages = np.arange(0, 101).reshape(101, 1) + padding = 0.4 def __init__(self, fp_prototxt, fp_model): self.log = logger_utils.Logger.getLogger() self.net = cv.dnn.readNetFromCaffe(fp_prototxt, fp_model) - def _preprocess(self, im, bbox_dim): + def _preprocess(self, im, bbox_norm): # isolate face ROI, expand bbox by 40% according to authors # https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/ dim = im.shape[:2][::-1] - bbox_dim_exp = bbox_dim.expand_dim( int(0.4*bbox_dim.width), dim) - roi = bbox_dim_exp.to_xyxy() + roi = bbox_norm.expand(self.padding).to_dim(dim).to_xyxy() im_face_crop = im[roi[1]:roi[3], roi[0]:roi[2]] # isolate face roi # resize for blob @@ -52,6 +54,7 @@ class _FaceAgeGender: blob = cv.dnn.blobFromImage(im_resized, 1.0, self.dnn_size, self.dnn_mean) return blob + class FaceGender(_FaceAgeGender): # use "apparent" age models @@ -61,17 +64,18 @@ class FaceGender(_FaceAgeGender): def __init__(self): super().__init__(self.fp_prototxt, self.fp_model) - def predict(self, im, bbox_dim): + def predict(self, im, bbox_norm): '''Predicts gender from face crop :param im: (numpy.ndarray) BGR image :param bbox_dim: (BBox) dimensioned :returns (dict) with scores for male and female ''' - im_blob = self._preprocess(im, bbox_dim) + im_blob = self._preprocess(im, bbox_norm) self.net.setInput(im_blob) preds = self.net.forward()[0] return {'f': preds[0], 'm': preds[1]} + class FaceAgeApparent(_FaceAgeGender): # use "apparent" age models @@ -81,13 +85,13 @@ class FaceAgeApparent(_FaceAgeGender): def __init__(self): super().__init__(self.fp_prototxt, self.fp_model) - def predict(self, im, bbox_dim): + def predict(self, im, bbox_norm): '''Predicts apparent age from face crop :param im: (numpy.ndarray) BGR image :param bbox_dim: (BBox) dimensioned :returns (float) predicted age ''' - im_blob = self._preprocess(im, bbox_dim) + im_blob = self._preprocess(im, bbox_norm) self.net.setInput(im_blob) preds = self.net.forward()[0] age = preds.dot(self.ages).flatten()[0] diff --git a/megapixels/app/processors/face_beauty.py b/megapixels/app/processors/face_beauty.py index a01c6834..e2d54c98 100644 --- a/megapixels/app/processors/face_beauty.py +++ b/megapixels/app/processors/face_beauty.py @@ -1,3 +1,7 @@ +""" +https://github.com/ustcqidi/BeautyPredict +""" + import sys import os from os.path import join @@ -45,18 +49,15 @@ class FaceBeauty: self.model.load_weights(fp_model) - def beauty(self, im, bbox_dim): + def beauty(self, im, bbox_norm): '''Predicts facial "beauty" score based on SCUT-FBP attractiveness labels :param im: (numpy.ndarray) BGR image :param bbox_dim: (BBox) dimensioned BBox :returns (float) 0.0-1.0 with 1 being most attractive ''' - - face = bbox_dim.to_xyxy() - self.log.debug(f'face: {face}') - - cropped_im = im[face[1]:face[3], face[0]:face[2]] - + dim = im.shape[:2][::-1] + roi = bbox_norm.to_dim(dim).to_xyxy() + cropped_im = im[roi[1]:roi[3], roi[0]:roi[2]] im_resized = cv.resize(cropped_im, (224, 224)) # force size im_norm = np.array([(im_resized - 127.5) / 127.5]) # subtract mean diff --git a/megapixels/app/processors/face_detector.py b/megapixels/app/processors/face_detector.py index 0e194f7d..fbf91071 100644 --- a/megapixels/app/processors/face_detector.py +++ b/megapixels/app/processors/face_detector.py @@ -14,8 +14,57 @@ from app.settings import app_cfg as cfg from app.settings import types -class DetectorMTCNN: +class DetectorMTCNN_CVDNN: + + # https://github.com/CongWeilin/mtcnn-caffe + + def __init__(self): + pass + + +class DetectorMTCNN_PT: + + # https://github.com/TropComplique/mtcnn-pytorch/ + # pip install mtcnn + + dnn_size = (300, 300) + + def __init__(self, size=(400,400), gpu=0): + self.log = logger_utils.Logger.getLogger() + device_cur = os.getenv('CUDA_VISIBLE_DEVICES', '') + self.log.info(f'Change CUDA_VISIBLE_DEVICES from "{device_cur}" to "{gpu}"') + os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) + from mtcnn.mtcnn import MTCNN + self.detector = MTCNN() + os.environ['CUDA_VISIBLE_DEVICES'] = device_cur # reset + + def detect(self, im, size=(400,400), conf_thresh=None, pyramids=None, largest=False, zone=None): + '''Detects face using MTCNN and returns (list) of BBox + :param im: (numpy.ndarray) image + :returns list of BBox + ''' + bboxes = [] + dnn_size = self.dnn_size if size is None else size + + im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1]) + dim = im.shape[:2][::-1] + dets = self.detector.detect_faces(im) + for det in dets: + rect = det['box'] + #keypoints = det['keypoints'] # not using here. see 'face_landmarks.py' + bbox = BBox.from_xywh_dim(*rect, dim) + bboxes.append(bbox) + + if largest and len(bboxes) > 1: + # only keep largest + bboxes.sort(key=operator.attrgetter('area'), reverse=True) + bboxes = [bboxes[0]] + + return bboxes + +class DetectorMTCNN_TF: + # using TF for inference can cause GPU issues with other frameworks # https://github.com/ipazc/mtcnn # pip install mtcnn diff --git a/megapixels/app/processors/face_extractor.py b/megapixels/app/processors/face_extractor.py index 2666e090..f618cd36 100644 --- a/megapixels/app/processors/face_extractor.py +++ b/megapixels/app/processors/face_extractor.py @@ -44,6 +44,9 @@ class Extractor: vec_flat[f'd{idx}'] = val return vec_flat + def to_str(self, vec): + return ','.join([str(x) for x in vec]) + def unflatten_df(self, df): # convert from return [df[f'd{i}'] for i in range(1,257)] @@ -64,25 +67,54 @@ class ExtractorVGG(Extractor): self.dnn = cv.dnn.readNetFromCaffe(fp_prototxt, fp_model) self.feat_layer = self.dnn.getLayerNames()[-2] - def extract(self, im, bbox_norm, padding=0.3): + def extract_jitter(self, im, bbox_norm): + '''(experimental) Extracts feature vector for face crop + :param im: + :param bbox_norm: (BBox) normalized + :param padding: (float) percent to extend ROI + :param jitters: not used here + :returns (list) of (float)''' + dim = im.shape[:2][::-1] + num_jitters = cfg.DEFAULT_NUM_JITTERS + padding = cfg.DEFAULT_FACE_PADDING_VGG_FACE2 + pad_adj = .00875 * padding # percentage of padding to vary + paddings = np.linspace(padding - pad_adj, padding + pad_adj, num=num_jitters) + jitter_amt = cfg.DEFAULT_JITTER_AMT + vecs = [] + for i in range(num_jitters): + bbox_norm_jit = bbox_norm.jitter(jitter_amt) # jitters w, h, center + bbox_ext = bbox_norm_jit.expand(paddings[i]) + #bbox_ext = bbox_norm.expand(paddings[i]) + x1,y1,x2,y2 = bbox_ext.to_dim(dim).to_xyxy() + im_crop = im[y1:y2, x1:x2] + # According to VGG, model trained using Bilinear interpolation (INTER_LINEAR) + im_crop = cv.resize(im_crop, self.dnn_dim, interpolation=cv.INTER_LINEAR) + blob = cv.dnn.blobFromImage(im_crop, 1.0, self.dnn_dim, self.dnn_mean) + self.dnn.setInput(blob) + vec = np.array(self.dnn.forward(self.feat_layer)[0]) + vec_norm = vec/np.linalg.norm(vec) # normalize + vecs.append(vec_norm) + vec_norm = np.mean(np.array(vecs), axis=0) + return vec_norm + + def extract(self, im, bbox_norm): '''Extracts feature vector for face crop :param im: :param bbox_norm: (BBox) normalized :param padding: (float) percent to extend ROI :param jitters: not used here :returns (list) of (float)''' - + padding = cfg.DEFAULT_FACE_PADDING_VGG_FACE2 bbox_ext = bbox_norm.expand(padding) dim = im.shape[:2][::-1] - bbox_ext_dim = bbox_ext.to_dim(dim) - x1,y1,x2,y2 = bbox_ext_dim.to_xyxy() + x1,y1,x2,y2 = bbox_ext.to_dim(dim).to_xyxy() im = im[y1:y2, x1:x2] # According to VGG, model trained using Bilinear interpolation (INTER_LINEAR) im = cv.resize(im, self.dnn_dim, interpolation=cv.INTER_LINEAR) blob = cv.dnn.blobFromImage(im, 1.0, self.dnn_dim, self.dnn_mean) self.dnn.setInput(blob) vec = np.array(self.dnn.forward(self.feat_layer)[0]) - vec_norm = np.array(vec)/np.linalg.norm(vec) # normalize + vec_norm = vec/np.linalg.norm(vec) # normalize return vec_norm diff --git a/megapixels/app/processors/face_landmarks.py b/megapixels/app/processors/face_landmarks.py index 171fc666..231e378f 100644 --- a/megapixels/app/processors/face_landmarks.py +++ b/megapixels/app/processors/face_landmarks.py @@ -30,6 +30,9 @@ class Landmarks2D: self.log.warn('Define landmarks() function') pass + def to_str(self, vec): + return ','.join([','.join(list(map(str,[x,y]))) for x,y in vec]) + def flatten(self, points): '''Converts list of point-tupes into a flattened list for CSV :param points: (list) of x,y points @@ -69,9 +72,9 @@ class FaceAlignment2D_68(Landmarks2D): # predict landmarks points = self.fa.get_landmarks(im) # returns array of arrays of 68 2D pts/face # convert to data type - points = [list(map(int, p)) for p in points[0]] - return points - + w,h = im.shape[:2][::-1] + points = [tuple(x/w, y/h) for x,y in points[0]] + return points # normalized class Dlib2D(Landmarks2D): @@ -82,15 +85,16 @@ class Dlib2D(Landmarks2D): self.predictor = dlib.shape_predictor(model) self.log.info(f'loaded predictor model: {model}') - def landmarks(self, im, bbox): + def landmarks(self, im, bbox_norm): '''Generates 68-pt landmarks using dlib predictor :param im: (numpy.ndarray) BGR image :param bbox: (app.models.BBox) dimensioned - :returns (list) of (int, int) for x,y values + :returns (list) of (float, float) for normalized x,y values ''' - bbox = bbox.to_dlib() + dim = im.shape[:2][::-1] + roi_dlib = bbox_norm.to_dim(dim).to_dlib() im_gray = cv.cvtColor(im, cv.COLOR_BGR2GRAY) - points = [[p.x, p.y] for p in self.predictor(im_gray, bbox).parts()] + points = [[p.x/dim[0], p.y/dim[1]] for p in self.predictor(im_gray, roi_dlib).parts()] return points @@ -121,13 +125,13 @@ class MTCNN2D_5(Landmarks2D): from mtcnn.mtcnn import MTCNN self.detector = MTCNN() - def landmarks(self, im, bbox): + def landmarks(self, im, bbox_norm): '''Detects face using MTCNN and returns (list) of BBox :param im: (numpy.ndarray) image :returns list of BBox ''' results = [] - dim_wh = im.shape[:2][::-1] # (w, h) + dim = im.shape[:2][::-1] # (w, h) # run MTCNN to get bbox and landmarks dets = self.detector.detect_faces(im) @@ -138,7 +142,7 @@ class MTCNN2D_5(Landmarks2D): #rect = det['box'] points = det['keypoints'] # convert to normalized for contain-comparison - points_norm = [np.array(pt)/dim_wh for pname, pt in points.items()] + points_norm = [np.array(pt)/dim for pname, pt in points.items()] contains = False not in [bbox.contains(pn) for pn in points_norm] if contains: results.append(points) # append original points @@ -185,14 +189,17 @@ class FaceAlignment3D_68(Landmarks3D): device = f'cuda:{gpu}' if gpu > -1 else 'cpu' self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device=device, flip_input=flip_input) - def landmarks(self, im, rect): + def landmarks(self, im, bbox_norm): '''Calculates the 3D facial landmarks :param im: (numpy.ndarray) BGR image - :param rect: (list) of face (x1, y1, x2, y2) + :param bbox_norm: (BBox) of face roi :returns (list) of 68 (int) (tuples) as (x,y, z) ''' # predict landmarks + dim = im.shape[:2][::-1] + rect = bbox_norm.to_dim(dim).to_xyxy() points = self.fa.get_landmarks(im, [rect]) # returns array of arrays of 68 3D pts/face # convert to data type + # TODO normalize this, but how to norm 3D? points = [list(map(int, p)) for p in points[0]] return points
\ No newline at end of file diff --git a/megapixels/app/processors/face_pose.py b/megapixels/app/processors/face_pose.py index 5ac510ec..49a39a53 100644 --- a/megapixels/app/processors/face_pose.py +++ b/megapixels/app/processors/face_pose.py @@ -21,10 +21,10 @@ class FacePoseDLIB: pose_types = {'pitch': (0,0,255), 'roll': (255,0,0), 'yaw': (0,255,0)} def __init__(self): - pass + self.log = logger_utils.Logger.getLogger() - def pose(self, landmarks, dim): + def pose(self, landmarks_norm, dim): '''Returns face pose information :param landmarks: (list) of 68 (int, int) xy tuples :param dim: (tuple|list) of image (width, height) @@ -55,9 +55,10 @@ class FacePoseDLIB: # find 6 pose points pose_points = [] for j, idx in enumerate(pose_points_idx): - pt = landmarks[idx] - pose_points.append((pt[0], pt[1])) - pose_points = np.array(pose_points, dtype='double') # convert to double + x,y = landmarks_norm[idx] + pt = (int(x*dim[0]), int(y*dim[1])) + pose_points.append(pt) + pose_points = np.array(pose_points, dtype='double') # convert to double, real dimensions # create camera matrix focal_length = dim[0] @@ -75,18 +76,16 @@ class FacePoseDLIB: result = {} # project points - #if project_points: pts_im, jac = cv.projectPoints(axis, rot_vec, tran_vec, cam_mat, dist_coeffs) pts_model, jac2 = cv.projectPoints(model_points, rot_vec, tran_vec, cam_mat, dist_coeffs) - #result['points_model'] = pts_model - #result['points_image'] = pts_im + result['points'] = { - 'pitch': pts_im[0], - 'roll': pts_im[2], - 'yaw': pts_im[1] + 'pitch': list(map(int,pts_im[0][0])), + 'roll': list(map(int,pts_im[2][0])), + 'yaw': list(map(int,pts_im[1][0])) } - result['point_nose'] = tuple(landmarks[pose_points_idx[0]]) + result['point_nose'] = tuple(map(int,pose_points[0])) rvec_matrix = cv.Rodrigues(rot_vec)[0] # convert to degrees diff --git a/megapixels/app/processors/face_recognition.py b/megapixels/app/processors/face_recognition.py deleted file mode 100644 index 76f00aa1..00000000 --- a/megapixels/app/processors/face_recognition.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -from os.path import join -from pathlib import Path - -import cv2 as cv -import numpy as np -import dlib -import imutils - -from app.utils import im_utils, logger_utils -from app.models.bbox import BBox -from app.settings import app_cfg as cfg -from app.settings import types - -class RecognitionDLIB: - - # https://github.com/davisking/dlib/blob/master/python_examples/face_recognition.py - # facerec.compute_face_descriptor(img, shape, 100, 0.25) - - def __init__(self, gpu=0): - self.log = logger_utils.Logger.getLogger() - - if gpu > -1: - cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '') - os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) - - self.predictor = dlib.shape_predictor(cfg.DIR_MODELS_DLIB_5PT) - self.facerec = dlib.face_recognition_model_v1(cfg.DIR_MODELS_DLIB_FACEREC_RESNET) - - if gpu > -1: - os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices # reset GPU env - - - def vec(self, im, bbox, width=100, - jitters=cfg.DLIB_FACEREC_JITTERS, padding=cfg.DLIB_FACEREC_PADDING): - '''Converts image and bbox into 128d vector - :param im: (numpy.ndarray) BGR image - :param bbox: (BBox) - ''' - # scale the image so the face is always 100x100 pixels - - #self.log.debug('compute scale') - scale = width / bbox.width - #im = cv.resize(im, (scale, scale), cv.INTER_LANCZOS4) - #self.log.debug('resize') - cv.resize(im, None, fx=scale, fy=scale, interpolation=cv.INTER_LANCZOS4) - #self.log.debug('to dlib') - bbox_dlib = bbox.to_dlib() - #self.log.debug('precitor') - face_shape = self.predictor(im, bbox_dlib) - # vec = self.facerec.compute_face_descriptor(im, face_shape, jitters, padding) - #self.log.debug('vec') - vec = self.facerec.compute_face_descriptor(im, face_shape, jitters) - #vec = self.facerec.compute_face_descriptor(im, face_shape) - return vec - - def flatten(self, vec): - '''Converts 128D vector into a flattened list for CSV - :param points: (list) a feature vector as list of floats - :returns dict item for each point (eg {'d1':0.28442156, 'd1': 0.1868632}) - ''' - vec_flat = {} - for idx, val in enumerate(vec, 1): - vec_flat[f'd{idx}'] = val - return vec_flat - - def similarity(self, query_enc, known_enc): - return np.linalg.norm(query_enc - known_enc, axis=1) |
