diff options
Diffstat (limited to 'megapixels/app')
| -rw-r--r-- | megapixels/app/models/bbox.py | 17 | ||||
| -rw-r--r-- | megapixels/app/models/dataset.py | 25 | ||||
| -rw-r--r-- | megapixels/app/processors/face_age_gender.py | 20 | ||||
| -rw-r--r-- | megapixels/app/processors/face_beauty.py | 15 | ||||
| -rw-r--r-- | megapixels/app/processors/face_detector.py | 51 | ||||
| -rw-r--r-- | megapixels/app/processors/face_extractor.py | 42 | ||||
| -rw-r--r-- | megapixels/app/processors/face_landmarks.py | 31 | ||||
| -rw-r--r-- | megapixels/app/processors/face_pose.py | 23 | ||||
| -rw-r--r-- | megapixels/app/processors/face_recognition.py | 68 | ||||
| -rw-r--r-- | megapixels/app/settings/app_cfg.py | 7 | ||||
| -rw-r--r-- | megapixels/app/settings/types.py | 6 | ||||
| -rw-r--r-- | megapixels/app/utils/display_utils.py | 9 | ||||
| -rw-r--r-- | megapixels/app/utils/draw_utils.py | 56 |
13 files changed, 220 insertions, 150 deletions
diff --git a/megapixels/app/models/bbox.py b/megapixels/app/models/bbox.py index f1216698..f65f7373 100644 --- a/megapixels/app/models/bbox.py +++ b/megapixels/app/models/bbox.py @@ -1,4 +1,5 @@ import math +import random from dlib import rectangle as dlib_rectangle import numpy as np @@ -127,9 +128,23 @@ class BBox: d = int(math.sqrt(math.pow(dcx, 2) + math.pow(dcy, 2))) return d + # ----------------------------------------------------------------- # Modify + def jitter(self, amt): + '''Jitters BBox in x,y,w,h values. Used for face feature extraction + :param amt: (float) percentage of BBox for maximum translation + :returns (BBox) + ''' + w = self._width + (self._width * random.uniform(-amt, amt)) + h = self._height + (self._height * random.uniform(-amt, amt)) + cx = self._cx + (self._cx * random.uniform(-amt, amt)) + cy = self._cy + (self._cy * random.uniform(-amt, amt)) + x1, y1 = np.clip((cx - w/2, cy - h/2), 0.0, 1.0) + x2, y2 = np.clip((cx + w/2, cy + h/2), 0.0, 1.0) + return BBox(x1, y1, x2, y2) + def expand(self, per): """Expands BBox by percentage :param per: (float) percentage to expand 0.0 - 1.0 @@ -186,7 +201,7 @@ class BBox: # print(adj) r = np.add(np.array(r), adj) - return BBox(*r) + return BBox(*r) # updats all BBox values # ----------------------------------------------------------------- diff --git a/megapixels/app/models/dataset.py b/megapixels/app/models/dataset.py index eb0109a7..bbef9ff5 100644 --- a/megapixels/app/models/dataset.py +++ b/megapixels/app/models/dataset.py @@ -44,6 +44,9 @@ class Dataset: self.log.info(f'build face vector dict: {len(self._face_vectors)}') # remove the face vector column, it can be several GB of memory self._metadata[metadata_type].drop('vec', axis=1, inplace=True) + #n_dims = len(self._metadata[metadata_type].keys()) - 2 + #drop_keys = [f'd{i}' for i in range(1,n_dims+1)] + #self._metadata[metadata_type].drop(drop_keys, axis=1, inplace=True) else: self.log.error(f'File not found: {fp_csv}. Exiting.') sys.exit() @@ -53,7 +56,7 @@ class Dataset: fp_csv = self.data_store.metadata(metadata_type) self.log.info(f'loading: {fp_csv}') if Path(fp_csv).is_file(): - self._metadata[metadata_type] = pd.read_csv(fp_csv).set_index('index') + self._metadata[metadata_type] = pd.read_csv(fp_csv, dtype={'fn':str}).set_index('index') else: self.log.error(f'File not found: {fp_csv}. Exiting.') sys.exit() @@ -142,33 +145,37 @@ class Dataset: # find most similar feature vectors indexes #match_idxs = self.similar(query_vec, n_results, threshold) sim_scores = np.linalg.norm(np.array([query_vec]) - np.array(self._face_vectors), axis=1) - match_idxs = np.argpartition(sim_scores, n_results)[:n_results] + match_idxs = np.argpartition(sim_scores, range(n_results))[:n_results] + df_vector = self._metadata[types.Metadata.FACE_VECTOR] + df_record = self._metadata[types.Metadata.FILE_RECORD] + for match_idx in match_idxs: # get the corresponding face vector row roi_index = self._face_vector_roi_idxs[match_idx] - df_record = self._metadata[types.Metadata.FILE_RECORD] - ds_record = df_record.iloc[roi_index] + record_idx = df_vector.iloc[roi_index].record_index + ds_record = df_record.iloc[record_idx] self.log.debug(f'find match index: {match_idx}, --> roi_index: {roi_index}') fp_im = self.data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext) s3_url = self.data_store_s3.face(ds_record.uuid) image_record = ImageRecord(ds_record, fp_im, s3_url) - #roi_index = self._face_vector_roi_idxs[match_idx] - #image_record = self.roi_idx_to_record(roi_index) image_records.append(image_record) return image_records # ---------------------------------------------------------------------- # utilities - def df_vecs_to_dict(self, df): + def df_vecs_to_dict(self, df_vec): # convert the DataFrame CSV to float list of vecs - return [list(map(float,x.vec.split(','))) for x in df.itertuples()] + # n_dims = len(df_vec.keys()) - 2 # number of columns with 'd1, d2,...d256' + #return [[df[f'd{i}'] for i in range(1,n_dims+1)] for df_idx, df in df_vec.iterrows()] + # return [[df[f'd{i}'] for i in range(1,n_dims+1)] for df_idx, df in df_vec.iterrows()] + return [list(map(float, x.vec.split(','))) for x in df_vec.itertuples()] def df_vec_roi_idxs_to_dict(self, df): # convert the DataFrame CSV to float list of vecs #return [x.roi_index for x in df.itertuples()] - return [x.roi_index for x in df.itertuples()] + return [int(x.roi_index) for i,x in df.iterrows()] def similar(self, query_vec, n_results): '''Finds most similar N indices of query face vector diff --git a/megapixels/app/processors/face_age_gender.py b/megapixels/app/processors/face_age_gender.py index 95efa8fc..66c51fa8 100644 --- a/megapixels/app/processors/face_age_gender.py +++ b/megapixels/app/processors/face_age_gender.py @@ -32,19 +32,21 @@ class _FaceAgeGender: ''' dnn_size = (224,224) - dnn_mean = (104.0, 177.0, 123.0) + dnn_mean = (104.0, 177.0, 123.0) # ? + # authors used imagenet mean + #dnn_mean = [103.939, 116.779, 123.68] ages = np.arange(0, 101).reshape(101, 1) + padding = 0.4 def __init__(self, fp_prototxt, fp_model): self.log = logger_utils.Logger.getLogger() self.net = cv.dnn.readNetFromCaffe(fp_prototxt, fp_model) - def _preprocess(self, im, bbox_dim): + def _preprocess(self, im, bbox_norm): # isolate face ROI, expand bbox by 40% according to authors # https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/ dim = im.shape[:2][::-1] - bbox_dim_exp = bbox_dim.expand_dim( int(0.4*bbox_dim.width), dim) - roi = bbox_dim_exp.to_xyxy() + roi = bbox_norm.expand(self.padding).to_dim(dim).to_xyxy() im_face_crop = im[roi[1]:roi[3], roi[0]:roi[2]] # isolate face roi # resize for blob @@ -52,6 +54,7 @@ class _FaceAgeGender: blob = cv.dnn.blobFromImage(im_resized, 1.0, self.dnn_size, self.dnn_mean) return blob + class FaceGender(_FaceAgeGender): # use "apparent" age models @@ -61,17 +64,18 @@ class FaceGender(_FaceAgeGender): def __init__(self): super().__init__(self.fp_prototxt, self.fp_model) - def predict(self, im, bbox_dim): + def predict(self, im, bbox_norm): '''Predicts gender from face crop :param im: (numpy.ndarray) BGR image :param bbox_dim: (BBox) dimensioned :returns (dict) with scores for male and female ''' - im_blob = self._preprocess(im, bbox_dim) + im_blob = self._preprocess(im, bbox_norm) self.net.setInput(im_blob) preds = self.net.forward()[0] return {'f': preds[0], 'm': preds[1]} + class FaceAgeApparent(_FaceAgeGender): # use "apparent" age models @@ -81,13 +85,13 @@ class FaceAgeApparent(_FaceAgeGender): def __init__(self): super().__init__(self.fp_prototxt, self.fp_model) - def predict(self, im, bbox_dim): + def predict(self, im, bbox_norm): '''Predicts apparent age from face crop :param im: (numpy.ndarray) BGR image :param bbox_dim: (BBox) dimensioned :returns (float) predicted age ''' - im_blob = self._preprocess(im, bbox_dim) + im_blob = self._preprocess(im, bbox_norm) self.net.setInput(im_blob) preds = self.net.forward()[0] age = preds.dot(self.ages).flatten()[0] diff --git a/megapixels/app/processors/face_beauty.py b/megapixels/app/processors/face_beauty.py index a01c6834..e2d54c98 100644 --- a/megapixels/app/processors/face_beauty.py +++ b/megapixels/app/processors/face_beauty.py @@ -1,3 +1,7 @@ +""" +https://github.com/ustcqidi/BeautyPredict +""" + import sys import os from os.path import join @@ -45,18 +49,15 @@ class FaceBeauty: self.model.load_weights(fp_model) - def beauty(self, im, bbox_dim): + def beauty(self, im, bbox_norm): '''Predicts facial "beauty" score based on SCUT-FBP attractiveness labels :param im: (numpy.ndarray) BGR image :param bbox_dim: (BBox) dimensioned BBox :returns (float) 0.0-1.0 with 1 being most attractive ''' - - face = bbox_dim.to_xyxy() - self.log.debug(f'face: {face}') - - cropped_im = im[face[1]:face[3], face[0]:face[2]] - + dim = im.shape[:2][::-1] + roi = bbox_norm.to_dim(dim).to_xyxy() + cropped_im = im[roi[1]:roi[3], roi[0]:roi[2]] im_resized = cv.resize(cropped_im, (224, 224)) # force size im_norm = np.array([(im_resized - 127.5) / 127.5]) # subtract mean diff --git a/megapixels/app/processors/face_detector.py b/megapixels/app/processors/face_detector.py index 0e194f7d..fbf91071 100644 --- a/megapixels/app/processors/face_detector.py +++ b/megapixels/app/processors/face_detector.py @@ -14,8 +14,57 @@ from app.settings import app_cfg as cfg from app.settings import types -class DetectorMTCNN: +class DetectorMTCNN_CVDNN: + + # https://github.com/CongWeilin/mtcnn-caffe + + def __init__(self): + pass + + +class DetectorMTCNN_PT: + + # https://github.com/TropComplique/mtcnn-pytorch/ + # pip install mtcnn + + dnn_size = (300, 300) + + def __init__(self, size=(400,400), gpu=0): + self.log = logger_utils.Logger.getLogger() + device_cur = os.getenv('CUDA_VISIBLE_DEVICES', '') + self.log.info(f'Change CUDA_VISIBLE_DEVICES from "{device_cur}" to "{gpu}"') + os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) + from mtcnn.mtcnn import MTCNN + self.detector = MTCNN() + os.environ['CUDA_VISIBLE_DEVICES'] = device_cur # reset + + def detect(self, im, size=(400,400), conf_thresh=None, pyramids=None, largest=False, zone=None): + '''Detects face using MTCNN and returns (list) of BBox + :param im: (numpy.ndarray) image + :returns list of BBox + ''' + bboxes = [] + dnn_size = self.dnn_size if size is None else size + + im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1]) + dim = im.shape[:2][::-1] + dets = self.detector.detect_faces(im) + for det in dets: + rect = det['box'] + #keypoints = det['keypoints'] # not using here. see 'face_landmarks.py' + bbox = BBox.from_xywh_dim(*rect, dim) + bboxes.append(bbox) + + if largest and len(bboxes) > 1: + # only keep largest + bboxes.sort(key=operator.attrgetter('area'), reverse=True) + bboxes = [bboxes[0]] + + return bboxes + +class DetectorMTCNN_TF: + # using TF for inference can cause GPU issues with other frameworks # https://github.com/ipazc/mtcnn # pip install mtcnn diff --git a/megapixels/app/processors/face_extractor.py b/megapixels/app/processors/face_extractor.py index 2666e090..f618cd36 100644 --- a/megapixels/app/processors/face_extractor.py +++ b/megapixels/app/processors/face_extractor.py @@ -44,6 +44,9 @@ class Extractor: vec_flat[f'd{idx}'] = val return vec_flat + def to_str(self, vec): + return ','.join([str(x) for x in vec]) + def unflatten_df(self, df): # convert from return [df[f'd{i}'] for i in range(1,257)] @@ -64,25 +67,54 @@ class ExtractorVGG(Extractor): self.dnn = cv.dnn.readNetFromCaffe(fp_prototxt, fp_model) self.feat_layer = self.dnn.getLayerNames()[-2] - def extract(self, im, bbox_norm, padding=0.3): + def extract_jitter(self, im, bbox_norm): + '''(experimental) Extracts feature vector for face crop + :param im: + :param bbox_norm: (BBox) normalized + :param padding: (float) percent to extend ROI + :param jitters: not used here + :returns (list) of (float)''' + dim = im.shape[:2][::-1] + num_jitters = cfg.DEFAULT_NUM_JITTERS + padding = cfg.DEFAULT_FACE_PADDING_VGG_FACE2 + pad_adj = .00875 * padding # percentage of padding to vary + paddings = np.linspace(padding - pad_adj, padding + pad_adj, num=num_jitters) + jitter_amt = cfg.DEFAULT_JITTER_AMT + vecs = [] + for i in range(num_jitters): + bbox_norm_jit = bbox_norm.jitter(jitter_amt) # jitters w, h, center + bbox_ext = bbox_norm_jit.expand(paddings[i]) + #bbox_ext = bbox_norm.expand(paddings[i]) + x1,y1,x2,y2 = bbox_ext.to_dim(dim).to_xyxy() + im_crop = im[y1:y2, x1:x2] + # According to VGG, model trained using Bilinear interpolation (INTER_LINEAR) + im_crop = cv.resize(im_crop, self.dnn_dim, interpolation=cv.INTER_LINEAR) + blob = cv.dnn.blobFromImage(im_crop, 1.0, self.dnn_dim, self.dnn_mean) + self.dnn.setInput(blob) + vec = np.array(self.dnn.forward(self.feat_layer)[0]) + vec_norm = vec/np.linalg.norm(vec) # normalize + vecs.append(vec_norm) + vec_norm = np.mean(np.array(vecs), axis=0) + return vec_norm + + def extract(self, im, bbox_norm): '''Extracts feature vector for face crop :param im: :param bbox_norm: (BBox) normalized :param padding: (float) percent to extend ROI :param jitters: not used here :returns (list) of (float)''' - + padding = cfg.DEFAULT_FACE_PADDING_VGG_FACE2 bbox_ext = bbox_norm.expand(padding) dim = im.shape[:2][::-1] - bbox_ext_dim = bbox_ext.to_dim(dim) - x1,y1,x2,y2 = bbox_ext_dim.to_xyxy() + x1,y1,x2,y2 = bbox_ext.to_dim(dim).to_xyxy() im = im[y1:y2, x1:x2] # According to VGG, model trained using Bilinear interpolation (INTER_LINEAR) im = cv.resize(im, self.dnn_dim, interpolation=cv.INTER_LINEAR) blob = cv.dnn.blobFromImage(im, 1.0, self.dnn_dim, self.dnn_mean) self.dnn.setInput(blob) vec = np.array(self.dnn.forward(self.feat_layer)[0]) - vec_norm = np.array(vec)/np.linalg.norm(vec) # normalize + vec_norm = vec/np.linalg.norm(vec) # normalize return vec_norm diff --git a/megapixels/app/processors/face_landmarks.py b/megapixels/app/processors/face_landmarks.py index 171fc666..231e378f 100644 --- a/megapixels/app/processors/face_landmarks.py +++ b/megapixels/app/processors/face_landmarks.py @@ -30,6 +30,9 @@ class Landmarks2D: self.log.warn('Define landmarks() function') pass + def to_str(self, vec): + return ','.join([','.join(list(map(str,[x,y]))) for x,y in vec]) + def flatten(self, points): '''Converts list of point-tupes into a flattened list for CSV :param points: (list) of x,y points @@ -69,9 +72,9 @@ class FaceAlignment2D_68(Landmarks2D): # predict landmarks points = self.fa.get_landmarks(im) # returns array of arrays of 68 2D pts/face # convert to data type - points = [list(map(int, p)) for p in points[0]] - return points - + w,h = im.shape[:2][::-1] + points = [tuple(x/w, y/h) for x,y in points[0]] + return points # normalized class Dlib2D(Landmarks2D): @@ -82,15 +85,16 @@ class Dlib2D(Landmarks2D): self.predictor = dlib.shape_predictor(model) self.log.info(f'loaded predictor model: {model}') - def landmarks(self, im, bbox): + def landmarks(self, im, bbox_norm): '''Generates 68-pt landmarks using dlib predictor :param im: (numpy.ndarray) BGR image :param bbox: (app.models.BBox) dimensioned - :returns (list) of (int, int) for x,y values + :returns (list) of (float, float) for normalized x,y values ''' - bbox = bbox.to_dlib() + dim = im.shape[:2][::-1] + roi_dlib = bbox_norm.to_dim(dim).to_dlib() im_gray = cv.cvtColor(im, cv.COLOR_BGR2GRAY) - points = [[p.x, p.y] for p in self.predictor(im_gray, bbox).parts()] + points = [[p.x/dim[0], p.y/dim[1]] for p in self.predictor(im_gray, roi_dlib).parts()] return points @@ -121,13 +125,13 @@ class MTCNN2D_5(Landmarks2D): from mtcnn.mtcnn import MTCNN self.detector = MTCNN() - def landmarks(self, im, bbox): + def landmarks(self, im, bbox_norm): '''Detects face using MTCNN and returns (list) of BBox :param im: (numpy.ndarray) image :returns list of BBox ''' results = [] - dim_wh = im.shape[:2][::-1] # (w, h) + dim = im.shape[:2][::-1] # (w, h) # run MTCNN to get bbox and landmarks dets = self.detector.detect_faces(im) @@ -138,7 +142,7 @@ class MTCNN2D_5(Landmarks2D): #rect = det['box'] points = det['keypoints'] # convert to normalized for contain-comparison - points_norm = [np.array(pt)/dim_wh for pname, pt in points.items()] + points_norm = [np.array(pt)/dim for pname, pt in points.items()] contains = False not in [bbox.contains(pn) for pn in points_norm] if contains: results.append(points) # append original points @@ -185,14 +189,17 @@ class FaceAlignment3D_68(Landmarks3D): device = f'cuda:{gpu}' if gpu > -1 else 'cpu' self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device=device, flip_input=flip_input) - def landmarks(self, im, rect): + def landmarks(self, im, bbox_norm): '''Calculates the 3D facial landmarks :param im: (numpy.ndarray) BGR image - :param rect: (list) of face (x1, y1, x2, y2) + :param bbox_norm: (BBox) of face roi :returns (list) of 68 (int) (tuples) as (x,y, z) ''' # predict landmarks + dim = im.shape[:2][::-1] + rect = bbox_norm.to_dim(dim).to_xyxy() points = self.fa.get_landmarks(im, [rect]) # returns array of arrays of 68 3D pts/face # convert to data type + # TODO normalize this, but how to norm 3D? points = [list(map(int, p)) for p in points[0]] return points
\ No newline at end of file diff --git a/megapixels/app/processors/face_pose.py b/megapixels/app/processors/face_pose.py index 5ac510ec..49a39a53 100644 --- a/megapixels/app/processors/face_pose.py +++ b/megapixels/app/processors/face_pose.py @@ -21,10 +21,10 @@ class FacePoseDLIB: pose_types = {'pitch': (0,0,255), 'roll': (255,0,0), 'yaw': (0,255,0)} def __init__(self): - pass + self.log = logger_utils.Logger.getLogger() - def pose(self, landmarks, dim): + def pose(self, landmarks_norm, dim): '''Returns face pose information :param landmarks: (list) of 68 (int, int) xy tuples :param dim: (tuple|list) of image (width, height) @@ -55,9 +55,10 @@ class FacePoseDLIB: # find 6 pose points pose_points = [] for j, idx in enumerate(pose_points_idx): - pt = landmarks[idx] - pose_points.append((pt[0], pt[1])) - pose_points = np.array(pose_points, dtype='double') # convert to double + x,y = landmarks_norm[idx] + pt = (int(x*dim[0]), int(y*dim[1])) + pose_points.append(pt) + pose_points = np.array(pose_points, dtype='double') # convert to double, real dimensions # create camera matrix focal_length = dim[0] @@ -75,18 +76,16 @@ class FacePoseDLIB: result = {} # project points - #if project_points: pts_im, jac = cv.projectPoints(axis, rot_vec, tran_vec, cam_mat, dist_coeffs) pts_model, jac2 = cv.projectPoints(model_points, rot_vec, tran_vec, cam_mat, dist_coeffs) - #result['points_model'] = pts_model - #result['points_image'] = pts_im + result['points'] = { - 'pitch': pts_im[0], - 'roll': pts_im[2], - 'yaw': pts_im[1] + 'pitch': list(map(int,pts_im[0][0])), + 'roll': list(map(int,pts_im[2][0])), + 'yaw': list(map(int,pts_im[1][0])) } - result['point_nose'] = tuple(landmarks[pose_points_idx[0]]) + result['point_nose'] = tuple(map(int,pose_points[0])) rvec_matrix = cv.Rodrigues(rot_vec)[0] # convert to degrees diff --git a/megapixels/app/processors/face_recognition.py b/megapixels/app/processors/face_recognition.py deleted file mode 100644 index 76f00aa1..00000000 --- a/megapixels/app/processors/face_recognition.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -from os.path import join -from pathlib import Path - -import cv2 as cv -import numpy as np -import dlib -import imutils - -from app.utils import im_utils, logger_utils -from app.models.bbox import BBox -from app.settings import app_cfg as cfg -from app.settings import types - -class RecognitionDLIB: - - # https://github.com/davisking/dlib/blob/master/python_examples/face_recognition.py - # facerec.compute_face_descriptor(img, shape, 100, 0.25) - - def __init__(self, gpu=0): - self.log = logger_utils.Logger.getLogger() - - if gpu > -1: - cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '') - os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) - - self.predictor = dlib.shape_predictor(cfg.DIR_MODELS_DLIB_5PT) - self.facerec = dlib.face_recognition_model_v1(cfg.DIR_MODELS_DLIB_FACEREC_RESNET) - - if gpu > -1: - os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices # reset GPU env - - - def vec(self, im, bbox, width=100, - jitters=cfg.DLIB_FACEREC_JITTERS, padding=cfg.DLIB_FACEREC_PADDING): - '''Converts image and bbox into 128d vector - :param im: (numpy.ndarray) BGR image - :param bbox: (BBox) - ''' - # scale the image so the face is always 100x100 pixels - - #self.log.debug('compute scale') - scale = width / bbox.width - #im = cv.resize(im, (scale, scale), cv.INTER_LANCZOS4) - #self.log.debug('resize') - cv.resize(im, None, fx=scale, fy=scale, interpolation=cv.INTER_LANCZOS4) - #self.log.debug('to dlib') - bbox_dlib = bbox.to_dlib() - #self.log.debug('precitor') - face_shape = self.predictor(im, bbox_dlib) - # vec = self.facerec.compute_face_descriptor(im, face_shape, jitters, padding) - #self.log.debug('vec') - vec = self.facerec.compute_face_descriptor(im, face_shape, jitters) - #vec = self.facerec.compute_face_descriptor(im, face_shape) - return vec - - def flatten(self, vec): - '''Converts 128D vector into a flattened list for CSV - :param points: (list) a feature vector as list of floats - :returns dict item for each point (eg {'d1':0.28442156, 'd1': 0.1868632}) - ''' - vec_flat = {} - for idx, val in enumerate(vec, 1): - vec_flat[f'd{idx}'] = val - return vec_flat - - def similarity(self, query_enc, known_enc): - return np.linalg.norm(query_enc - known_enc, axis=1) diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py index 14e2493c..42e37b7a 100644 --- a/megapixels/app/settings/app_cfg.py +++ b/megapixels/app/settings/app_cfg.py @@ -42,7 +42,7 @@ DIR_PEOPLE = 'people' DIR_MODELS_CAFFE = join(DIR_MODELS,'caffe') DIR_MODELS_DARKNET = join(DIR_MODELS,'darknet') DIR_MODELS_DARKNET_PJREDDIE = join(DIR_MODELS_DARKNET, 'pjreddie') -DIR_MODELS_PYTORCHq = join(DIR_MODELS,'pytorch') +DIR_MODELS_PYTORCH = join(DIR_MODELS,'pytorch') DIR_MODELS_TORCH = join(DIR_MODELS,'torch') DIR_MODELS_MXNET = join(DIR_MODELS,'mxnet') DIR_MODELS_KERAS = join(DIR_MODELS,'keras') @@ -99,7 +99,10 @@ HASH_BRANCH_SIZE = 3 DLIB_FACEREC_JITTERS = 5 # number of face recognition jitters #DLIB_FACEREC_PADDING = 0.25 # default dlib FACEREC_PADDING = 0.3 # VGG FACE2 recommended - +DEFAULT_SIZE_FACE_DETECT = (480,480) +DEFAULT_JITTER_AMT = 0.015 # used for OpenCV DNN face detector with VGG2 face feature extractor +DEFAULT_NUM_JITTERS = 4 # used for smothing the facial feature extraction +DEFAULT_FACE_PADDING_VGG_FACE2 = 0.3 POSE_MINMAX_YAW = (-25,25) POSE_MINMAX_ROLL = (-15,15) POSE_MINMAX_PITCH = (-10,10) diff --git a/megapixels/app/settings/types.py b/megapixels/app/settings/types.py index 940c8b6d..9325fc3c 100644 --- a/megapixels/app/settings/types.py +++ b/megapixels/app/settings/types.py @@ -43,10 +43,10 @@ class LogLevel(Enum): class Metadata(Enum): IDENTITY, FILE_RECORD, FACE_VECTOR, FACE_POSE, \ - FACE_ROI, FACE_LANDMARK_2D_68, FACE_LANDMARK_2D_5,FACE_LANDMARK_3D_68 = range(8) + FACE_ROI, FACE_LANDMARK_2D_68, FACE_LANDMARK_2D_5,FACE_LANDMARK_3D_68, FACE_ATTRIBUTES = range(9) class Dataset(Enum): - LFW, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, SELFIE_DATASET = range(7) + LFW, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK = range(8) # --------------------------------------------------------------------- @@ -59,7 +59,7 @@ class FaceDetectNet(Enum): class FaceExtractor(Enum): """Type of face recognition feature extractor""" # TODO deprecate DLIB resnet and use only CVDNN Caffe models - DLIB_RESNET, VGG_FACE2 = range(2) + DLIB, VGG = range(2) class FaceLandmark2D_5(Enum): DLIB, MTCNN = range(2) diff --git a/megapixels/app/utils/display_utils.py b/megapixels/app/utils/display_utils.py index 7b74aa46..e72cc0f0 100644 --- a/megapixels/app/utils/display_utils.py +++ b/megapixels/app/utils/display_utils.py @@ -2,6 +2,10 @@ import sys import cv2 as cv +from app.utils.logger_utils import Logger + + +log = Logger.getLogger() def handle_keyboard(delay_amt=1): '''Used with cv.imshow('title', image) to wait for keyboard press @@ -11,6 +15,5 @@ def handle_keyboard(delay_amt=1): if k == 27 or k == ord('q'): # ESC cv.destroyAllWindows() sys.exit() - elif k != 255: - # any key to continue - break
\ No newline at end of file + #else: + #log.info('Press Q, q, or ESC to exit') diff --git a/megapixels/app/utils/draw_utils.py b/megapixels/app/utils/draw_utils.py index 3a389e68..3378e3e8 100644 --- a/megapixels/app/utils/draw_utils.py +++ b/megapixels/app/utils/draw_utils.py @@ -4,6 +4,9 @@ from math import sqrt import numpy as np import cv2 as cv +from app.utils import logger_utils + +log = logger_utils.Logger.getLogger() end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1 @@ -105,46 +108,61 @@ def plot_pose_box(im, Ps, pts68s, color=(40, 255, 0), line_width=2): pose_types = {'pitch': (0,0,255), 'roll': (255,0,0), 'yaw': (0,255,0)} -def draw_landmarks2D(im, points, radius=3, color=(0,255,0), stroke_weight=2): +def draw_landmarks2D(im, points_norm, radius=3, color=(0,255,0)): '''Draws facial landmarks, either 5pt or 68pt ''' - for x,y in points: - cv.circle(im, (x,y), radius, color, -1, cv.LINE_AA) - + im_dst = im.copy() + dim = im.shape[:2][::-1] + for x,y in points_norm: + pt = (int(x*dim[0]), int(y*dim[1])) + cv.circle(im_dst, pt, radius, color, -1, cv.LINE_AA) + return im_dst -def draw_landmarks3D(im, points, radius=3, color=(0,255,0), stroke_weight=2): +def draw_landmarks3D(im, points, radius=3, color=(0,255,0)): '''Draws 3D facial landmarks ''' + im_dst = im.copy() for x,y,z in points: - cv.circle(im, (x,y), radius, color, -1, cv.LINE_AA) - + cv.circle(im_dst, (x,y), radius, color, -1, cv.LINE_AA) + return im_dst -def draw_bbox(im, bbox, color=(0,255,0), stroke_weight=2): - '''Draws a dimensioned (not-normalized) BBox onto cv image +def draw_bbox(im, bbox_norm, color=(0,255,0), stroke_weight=2): + '''Draws BBox onto cv image ''' - cv.rectangle(im, bbox.pt_tl, bbox.pt_br, color, stroke_weight) - + im_dst = im.copy() + bbox_dim = bbox_norm.to_dim(im.shape[:2][::-1]) + cv.rectangle(im_dst, bbox_dim.pt_tl, bbox_dim.pt_br, color, stroke_weight) + return im_dst def draw_pose(im, pt_nose, image_pts): '''Draws 3-axis pose over image + TODO: normalize point data ''' - cv.line(im, pt_nose, tuple(image_pts['pitch'].ravel()), pose_types['pitch'], 3) - cv.line(im, pt_nose, tuple(image_pts['yaw'].ravel()), pose_types['yaw'], 3) - cv.line(im, pt_nose, tuple(image_pts['roll'].ravel()), pose_types['roll'], 3) - + im_dst = im.copy() + log.debug(f'pt_nose: {pt_nose}') + log.debug(f'image_pts pitch: {image_pts["pitch"]}') + cv.line(im_dst, pt_nose, tuple(image_pts['pitch']), pose_types['pitch'], 3) + cv.line(im_dst, pt_nose, tuple(image_pts['yaw']), pose_types['yaw'], 3) + cv.line(im_dst, pt_nose, tuple(image_pts['roll']), pose_types['roll'], 3) + return im_dst -def draw_text(im, pt, text, color=(0,255,0)): +def draw_text(im, pt_norm, text, color=(0,255,0)): '''Draws degrees as text over image ''' - cv.putText(im, text, pt, cv.FONT_HERSHEY_SIMPLEX, 0.75, color, thickness=1, lineType=cv.LINE_AA) - + im_dst = im.copy() + dim = im.shape[:2][::-1] + pt = tuple(map(int, (pt_norm[0]*dim[0], pt_norm[1]*dim[1]))) + cv.putText(im_dst, text, pt, cv.FONT_HERSHEY_SIMPLEX, 0.75, color, thickness=1, lineType=cv.LINE_AA) + return im_dst def draw_degrees(im, pose_data, color=(0,255,0)): '''Draws degrees as text over image ''' + im_dst = im.copy() for i, pose_type in enumerate(pose_types.items()): k, clr = pose_type v = pose_data[k] t = '{}: {:.2f}'.format(k, v) origin = (10, 30 + (25 * i)) - cv.putText(im, t, origin, cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, thickness=2, lineType=2) + cv.putText(im_dst, t, origin, cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, thickness=2, lineType=2) + return im_dst
\ No newline at end of file |
