diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-01-17 15:11:47 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-01-17 15:11:47 +0100 |
| commit | 85ae432fb6c6c17292b319bca068e46a4ea81eb3 (patch) | |
| tree | 4d0270fac0fdc7c1c1333af9c4bb82c6eb00669d /megapixels/app | |
| parent | c293006ba43944ffeb4dcab17b2256f3a5491a36 (diff) | |
| parent | 03ad11fb2a3dcd425d50167b15d72d4e0ef536a2 (diff) | |
Merge branch 'master' of github.com:adamhrv/megapixels_dev
Diffstat (limited to 'megapixels/app')
| -rw-r--r-- | megapixels/app/models/bbox.py | 40 | ||||
| -rw-r--r-- | megapixels/app/models/data_store.py | 3 | ||||
| -rw-r--r-- | megapixels/app/models/dataset.py | 107 | ||||
| -rw-r--r-- | megapixels/app/processors/face_age_gender.py | 20 | ||||
| -rw-r--r-- | megapixels/app/processors/face_beauty.py | 15 | ||||
| -rw-r--r-- | megapixels/app/processors/face_detector.py | 77 | ||||
| -rw-r--r-- | megapixels/app/processors/face_extractor.py | 159 | ||||
| -rw-r--r-- | megapixels/app/processors/face_landmarks.py | 31 | ||||
| -rw-r--r-- | megapixels/app/processors/face_pose.py | 23 | ||||
| -rw-r--r-- | megapixels/app/processors/face_recognition.py | 68 | ||||
| -rw-r--r-- | megapixels/app/settings/app_cfg.py | 16 | ||||
| -rw-r--r-- | megapixels/app/settings/types.py | 13 | ||||
| -rw-r--r-- | megapixels/app/utils/display_utils.py | 9 | ||||
| -rw-r--r-- | megapixels/app/utils/draw_utils.py | 56 |
14 files changed, 462 insertions, 175 deletions
diff --git a/megapixels/app/models/bbox.py b/megapixels/app/models/bbox.py index 40874691..608aaaf8 100644 --- a/megapixels/app/models/bbox.py +++ b/megapixels/app/models/bbox.py @@ -1,4 +1,5 @@ import math +import random from dlib import rectangle as dlib_rectangle import numpy as np @@ -127,9 +128,39 @@ class BBox: d = int(math.sqrt(math.pow(dcx, 2) + math.pow(dcy, 2))) return d + # ----------------------------------------------------------------- # Modify + def jitter(self, amt): + '''Jitters BBox in x,y,w,h values. Used for face feature extraction + :param amt: (float) percentage of BBox for maximum translation + :returns (BBox) + ''' + w = self._width + (self._width * random.uniform(-amt, amt)) + h = self._height + (self._height * random.uniform(-amt, amt)) + cx = self._cx + (self._cx * random.uniform(-amt, amt)) + cy = self._cy + (self._cy * random.uniform(-amt, amt)) + x1, y1 = np.clip((cx - w/2, cy - h/2), 0.0, 1.0) + x2, y2 = np.clip((cx + w/2, cy + h/2), 0.0, 1.0) + return BBox(x1, y1, x2, y2) + + def expand(self, per): + """Expands BBox by percentage + :param per: (float) percentage to expand 0.0 - 1.0 + :param dim: (int, int) image width, height + :returns (BBox) expanded + """ + # expand + dw, dh = [(self._width * per), (self._height * per)] + r = list(np.array(self._rect) + np.array([-dw, -dh, dw, dh])) + # threshold expanded rectangle + r[0] = max(r[0], 0.0) + r[1] = max(r[1], 0.0) + r[2] = min(r[2], 1.0) + r[3] = min(r[3], 1.0) + return BBox(*r) + def expand_dim(self, amt, bounds): """Expands BBox within dim :param box: (tuple) left, top, right, bottom @@ -170,7 +201,7 @@ class BBox: # print(adj) r = np.add(np.array(r), adj) - return BBox(*r) + return BBox(*r) # updats all BBox values # ----------------------------------------------------------------- @@ -221,6 +252,13 @@ class BBox: # Create from @classmethod + def from_xywh_norm(cls, x, y, w, h): + """Converts w, y, w, h to normalized BBox + :returns BBox + """ + return cls(x, y, x + w, y + h) + + @classmethod def from_xyxy_dim(cls, x1, y1, x2, y2, dim): """Converts x1, y1, w, h to BBox and normalizes :returns BBox diff --git a/megapixels/app/models/data_store.py b/megapixels/app/models/data_store.py index 626c9da4..a8d6916f 100644 --- a/megapixels/app/models/data_store.py +++ b/megapixels/app/models/data_store.py @@ -24,6 +24,9 @@ class DataStore: def metadata_dir(self): return join(self.dir_metadata) + def media_dir(self): + return join(self.dir_media) + def media_images_original(self): return join(self.dir_media, 'original') diff --git a/megapixels/app/models/dataset.py b/megapixels/app/models/dataset.py index eb0109a7..88986873 100644 --- a/megapixels/app/models/dataset.py +++ b/megapixels/app/models/dataset.py @@ -32,7 +32,7 @@ class Dataset: self.data_store = DataStore(opt_data_store, self._dataset_type) self.data_store_s3 = DataStoreS3(self._dataset_type) - def load_face_vectors(self): + def _load_face_vectors(self): metadata_type = types.Metadata.FACE_VECTOR fp_csv = self.data_store.metadata(metadata_type) self.log.info(f'loading: {fp_csv}') @@ -44,22 +44,24 @@ class Dataset: self.log.info(f'build face vector dict: {len(self._face_vectors)}') # remove the face vector column, it can be several GB of memory self._metadata[metadata_type].drop('vec', axis=1, inplace=True) + #n_dims = len(self._metadata[metadata_type].keys()) - 2 + #drop_keys = [f'd{i}' for i in range(1,n_dims+1)] + #self._metadata[metadata_type].drop(drop_keys, axis=1, inplace=True) else: self.log.error(f'File not found: {fp_csv}. Exiting.') sys.exit() - def load_records(self): + def _load_file_records(self): metadata_type = types.Metadata.FILE_RECORD fp_csv = self.data_store.metadata(metadata_type) self.log.info(f'loading: {fp_csv}') if Path(fp_csv).is_file(): - self._metadata[metadata_type] = pd.read_csv(fp_csv).set_index('index') + self._metadata[metadata_type] = pd.read_csv(fp_csv, dtype=cfg.FILE_RECORD_DTYPES).set_index('index') else: self.log.error(f'File not found: {fp_csv}. Exiting.') sys.exit() - def load_identities(self): - metadata_type = types.Metadata.IDENTITY + def _load_metadata(self, metadata_type): fp_csv = self.data_store.metadata(metadata_type) self.log.info(f'loading: {fp_csv}') if Path(fp_csv).is_file(): @@ -67,6 +69,14 @@ class Dataset: else: self.log.error(f'File not found: {fp_csv}. Exiting.') sys.exit() + + def load_metadata(self, metadata_type): + if metadata_type == types.Metadata.FILE_RECORD: + self._load_file_records() + elif metadata_type == types.Metadata.FACE_VECTOR: + self._load_face_vectors() + else: + self._load_metadata(metadata_type) def metadata(self, opt_metadata_type): return self._metadata.get(opt_metadata_type, None) @@ -79,11 +89,11 @@ class Dataset: # get identity meta df_identity = self._metadata[types.Metadata.IDENTITY] # future datasets can have multiple identities per images - ds_identities = df_identity.iloc[identity_index] + #ds_identities = df_identity.iloc[identity_index] # get filepath and S3 url fp_im = self.data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext) s3_url = self.data_store_s3.face(ds_record.uuid) - image_record = ImageRecord(ds_record, fp_im, s3_url, ds_identities=ds_identities) + image_record = ImageRecord(ds_record, fp_im, s3_url) return image_record def vector_to_record(self, record_index): @@ -142,33 +152,61 @@ class Dataset: # find most similar feature vectors indexes #match_idxs = self.similar(query_vec, n_results, threshold) sim_scores = np.linalg.norm(np.array([query_vec]) - np.array(self._face_vectors), axis=1) - match_idxs = np.argpartition(sim_scores, n_results)[:n_results] + match_idxs = np.argpartition(sim_scores, range(n_results))[:n_results] + + df_record = self._metadata[types.Metadata.FILE_RECORD] + df_vector = self._metadata[types.Metadata.FACE_VECTOR] + df_roi = self._metadata[types.Metadata.FACE_ROI] + if types.Metadata.IDENTITY in self._metadata.keys(): + df_identity = self._metadata[types.Metadata.IDENTITY] + else: + df_identity = None + + identities = [] for match_idx in match_idxs: # get the corresponding face vector row roi_index = self._face_vector_roi_idxs[match_idx] - df_record = self._metadata[types.Metadata.FILE_RECORD] - ds_record = df_record.iloc[roi_index] + ds_roi = df_roi.iloc[roi_index] + record_idx = int(ds_roi.record_index) + ds_record = df_record.iloc[record_idx] + self.log.debug(f'find match index: {match_idx}, --> roi_index: {roi_index}') fp_im = self.data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext) s3_url = self.data_store_s3.face(ds_record.uuid) - image_record = ImageRecord(ds_record, fp_im, s3_url) - #roi_index = self._face_vector_roi_idxs[match_idx] - #image_record = self.roi_idx_to_record(roi_index) + identities = [] + + bbox_norm = BBox.from_xywh_norm(ds_roi.x, ds_roi.y, ds_roi.w, ds_roi.w) + + if types.Metadata.IDENTITY in self._metadata.keys(): + ds_id = df_identity.loc[df_identity['identity_key'] == ds_record.identity_key].iloc[0] + identity = Identity(record_idx, + name_display=ds_id.name_display, + description=ds_id.description, + gender=ds_id.gender, + roi_index=roi_index, + identity_key=ds_id.identity_key, + num_images=ds_id.num_images) + else: + identity = None + image_record = ImageRecord(ds_record, fp_im, s3_url, bbox_norm, identity=identity) image_records.append(image_record) return image_records # ---------------------------------------------------------------------- # utilities - def df_vecs_to_dict(self, df): + def df_vecs_to_dict(self, df_vec): # convert the DataFrame CSV to float list of vecs - return [list(map(float,x.vec.split(','))) for x in df.itertuples()] + # n_dims = len(df_vec.keys()) - 2 # number of columns with 'd1, d2,...d256' + #return [[df[f'd{i}'] for i in range(1,n_dims+1)] for df_idx, df in df_vec.iterrows()] + # return [[df[f'd{i}'] for i in range(1,n_dims+1)] for df_idx, df in df_vec.iterrows()] + return [list(map(float, x.vec.split(','))) for x in df_vec.itertuples()] def df_vec_roi_idxs_to_dict(self, df): # convert the DataFrame CSV to float list of vecs #return [x.roi_index for x in df.itertuples()] - return [x.roi_index for x in df.itertuples()] + return [int(x.roi_index) for i,x in df.iterrows()] def similar(self, query_vec, n_results): '''Finds most similar N indices of query face vector @@ -184,23 +222,20 @@ class Dataset: class ImageRecord: - def __init__(self, ds_record, fp, url, ds_rois=None, ds_identities=None): + def __init__(self, ds_record, fp, url, bbox_norm, identity=None): # maybe more other meta will go there self.image_index = ds_record.index self.sha256 = ds_record.sha256 self.uuid = ds_record.uuid self.filepath = fp + self.width = ds_record.width + self.height = ds_record.height self.url = url - self._identities = [] + self.bbox = bbox_norm + self.identity = identity # image records contain ROIs # ROIs are linked to identities - #self._identities = [Identity(x) for x in ds_identities] - - @property - def identity(self, index): - return self._identity - def summarize(self): '''Summarizes data for debugging''' log = Logger.getLogger() @@ -208,22 +243,20 @@ class ImageRecord: log.info(f'sha256: {self.sha256}') log.info(f'UUID: {self.uuid}') log.info(f'S3 url: {self.url}') - for identity in self._identities: - log.info(f'fullname: {identity.fullname}') - log.info(f'description: {identity.description}') - log.info(f'gender: {identity.gender}') - log.info(f'images: {identity.n_images}') + if self.identity: + log.info(f'name: {self.identity.name_display}') + log.info(f'description: {self.identity.description}') + log.info(f'gender: {self.identity.gender}') + log.info(f'images: {self.identity.num_images}') class Identity: - def __init__(self, idx, name='NA', desc='NA', gender='NA', n_images=1, - url='NA', age='NA', nationality='NA'): + def __init__(self, idx, identity_key=None, name_display=None, num_images=None, + description=None, gender=None, roi_index=None): self.index = idx - self.name = name - self.description = desc + self.name_display = name_display + self.description = description self.gender = gender - self.n_images = n_images - self.url = url - self.age = age - self.nationality = nationality + self.roi_index = roi_index + self.num_images = num_images diff --git a/megapixels/app/processors/face_age_gender.py b/megapixels/app/processors/face_age_gender.py index 95efa8fc..66c51fa8 100644 --- a/megapixels/app/processors/face_age_gender.py +++ b/megapixels/app/processors/face_age_gender.py @@ -32,19 +32,21 @@ class _FaceAgeGender: ''' dnn_size = (224,224) - dnn_mean = (104.0, 177.0, 123.0) + dnn_mean = (104.0, 177.0, 123.0) # ? + # authors used imagenet mean + #dnn_mean = [103.939, 116.779, 123.68] ages = np.arange(0, 101).reshape(101, 1) + padding = 0.4 def __init__(self, fp_prototxt, fp_model): self.log = logger_utils.Logger.getLogger() self.net = cv.dnn.readNetFromCaffe(fp_prototxt, fp_model) - def _preprocess(self, im, bbox_dim): + def _preprocess(self, im, bbox_norm): # isolate face ROI, expand bbox by 40% according to authors # https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/ dim = im.shape[:2][::-1] - bbox_dim_exp = bbox_dim.expand_dim( int(0.4*bbox_dim.width), dim) - roi = bbox_dim_exp.to_xyxy() + roi = bbox_norm.expand(self.padding).to_dim(dim).to_xyxy() im_face_crop = im[roi[1]:roi[3], roi[0]:roi[2]] # isolate face roi # resize for blob @@ -52,6 +54,7 @@ class _FaceAgeGender: blob = cv.dnn.blobFromImage(im_resized, 1.0, self.dnn_size, self.dnn_mean) return blob + class FaceGender(_FaceAgeGender): # use "apparent" age models @@ -61,17 +64,18 @@ class FaceGender(_FaceAgeGender): def __init__(self): super().__init__(self.fp_prototxt, self.fp_model) - def predict(self, im, bbox_dim): + def predict(self, im, bbox_norm): '''Predicts gender from face crop :param im: (numpy.ndarray) BGR image :param bbox_dim: (BBox) dimensioned :returns (dict) with scores for male and female ''' - im_blob = self._preprocess(im, bbox_dim) + im_blob = self._preprocess(im, bbox_norm) self.net.setInput(im_blob) preds = self.net.forward()[0] return {'f': preds[0], 'm': preds[1]} + class FaceAgeApparent(_FaceAgeGender): # use "apparent" age models @@ -81,13 +85,13 @@ class FaceAgeApparent(_FaceAgeGender): def __init__(self): super().__init__(self.fp_prototxt, self.fp_model) - def predict(self, im, bbox_dim): + def predict(self, im, bbox_norm): '''Predicts apparent age from face crop :param im: (numpy.ndarray) BGR image :param bbox_dim: (BBox) dimensioned :returns (float) predicted age ''' - im_blob = self._preprocess(im, bbox_dim) + im_blob = self._preprocess(im, bbox_norm) self.net.setInput(im_blob) preds = self.net.forward()[0] age = preds.dot(self.ages).flatten()[0] diff --git a/megapixels/app/processors/face_beauty.py b/megapixels/app/processors/face_beauty.py index a01c6834..e2d54c98 100644 --- a/megapixels/app/processors/face_beauty.py +++ b/megapixels/app/processors/face_beauty.py @@ -1,3 +1,7 @@ +""" +https://github.com/ustcqidi/BeautyPredict +""" + import sys import os from os.path import join @@ -45,18 +49,15 @@ class FaceBeauty: self.model.load_weights(fp_model) - def beauty(self, im, bbox_dim): + def beauty(self, im, bbox_norm): '''Predicts facial "beauty" score based on SCUT-FBP attractiveness labels :param im: (numpy.ndarray) BGR image :param bbox_dim: (BBox) dimensioned BBox :returns (float) 0.0-1.0 with 1 being most attractive ''' - - face = bbox_dim.to_xyxy() - self.log.debug(f'face: {face}') - - cropped_im = im[face[1]:face[3], face[0]:face[2]] - + dim = im.shape[:2][::-1] + roi = bbox_norm.to_dim(dim).to_xyxy() + cropped_im = im[roi[1]:roi[3], roi[0]:roi[2]] im_resized = cv.resize(cropped_im, (224, 224)) # force size im_norm = np.array([(im_resized - 127.5) / 127.5]) # subtract mean diff --git a/megapixels/app/processors/face_detector.py b/megapixels/app/processors/face_detector.py index 0e194f7d..7b5310c5 100644 --- a/megapixels/app/processors/face_detector.py +++ b/megapixels/app/processors/face_detector.py @@ -14,9 +14,17 @@ from app.settings import app_cfg as cfg from app.settings import types -class DetectorMTCNN: +class DetectorMTCNN_CVDNN: + + # https://github.com/CongWeilin/mtcnn-caffe + + def __init__(self): + pass + + +class DetectorMTCNN_PT: - # https://github.com/ipazc/mtcnn + # https://github.com/TropComplique/mtcnn-pytorch/ # pip install mtcnn dnn_size = (300, 300) @@ -54,6 +62,64 @@ class DetectorMTCNN: return bboxes +class DetectorMTCNN_TF: + + # using TF for inference can cause GPU issues with other frameworks + # https://github.com/ipazc/mtcnn + # pip install mtcnn + + dnn_size = (300, 300) + conf_thresh = 0.9 + + def __init__(self, size=(400,400), gpu=0): + self.log = logger_utils.Logger.getLogger() + device_cur = os.getenv('CUDA_VISIBLE_DEVICES', '') + self.log.info(f'Change CUDA_VISIBLE_DEVICES from "{device_cur}" to "{gpu}"') + os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) + from mtcnn.mtcnn import MTCNN + self.detector = MTCNN() + os.environ['CUDA_VISIBLE_DEVICES'] = device_cur # reset + + def detect(self, im, size=(400,400), conf_thresh=None, pyramids=None, largest=False, zone=None): + '''Detects face using MTCNN and returns (list) of BBox + :param im: (numpy.ndarray) image + :returns list of BBox + ''' + + bboxes = [] + dnn_size = self.dnn_size if size is None else size + conf_thresh = self.conf_thresh if conf_thresh is None else conf_thresh + + im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1]) + dim = im.shape[:2][::-1] + dets = self.detector.detect_faces(im) + ''' + { + 'box': [4, 140, 14, 18], + 'confidence': 0.9588413834571838, + 'keypoints': { + 'left_eye': (8, 147), + 'right_eye': (14, 146), + 'nose': (12, 151), + 'mouth_left': (9, 155), + 'mouth_right': (14, 154) + } + } + ''' + for det in dets: + rect = det['box'] + conf = det['confidence'] + if conf > conf_thresh: + bbox = BBox.from_xywh_dim(*rect, dim) + bboxes.append(bbox) + + if largest and len(bboxes) > 1: + # only keep largest + bboxes.sort(key=operator.attrgetter('area'), reverse=True) + bboxes = [bboxes[0]] + + return bboxes + class DetectorHaar: @@ -173,8 +239,11 @@ class DetectorCVDNN: bboxes = [] for i in range(0, net_outputs.shape[2]): - conf = net_outputs[0, 0, i, 2] - if conf > conf_thresh: + conf = float(net_outputs[0, 0, i, 2]) + # BUG: this face detector creates ghost face detections in stage-left from nose-bottom neck + # temp fix is to elminate ROI extending outside of frame + bounds = np.array(net_outputs[0, 0, i, 3:7]) + if conf > conf_thresh and np.all(bounds < 1): rect_norm = net_outputs[0, 0, i, 3:7] bboxes.append(BBox(*rect_norm)) diff --git a/megapixels/app/processors/face_extractor.py b/megapixels/app/processors/face_extractor.py new file mode 100644 index 00000000..f618cd36 --- /dev/null +++ b/megapixels/app/processors/face_extractor.py @@ -0,0 +1,159 @@ +import os +from os.path import join +from pathlib import Path + +import cv2 as cv +import numpy as np +import dlib +import imutils + +from app.utils import im_utils, logger_utils +from app.models.bbox import BBox +from app.settings import app_cfg as cfg +from app.settings import types + +def similarity(self, query_enc, known_enc): + return np.linalg.norm(query_enc - known_enc, axis=1) + +def flatten(vec): + '''Converts N-D vector into a flattened list for CSV + :param points: (list) a feature vector as list of floats + :returns dict item for each point (eg {'d1':0.28442156, 'd1': 0.1868632}) + ''' + vec_flat = {} + for idx, val in enumerate(vec, 1): + vec_flat[f'd{idx}'] = val + return vec_flat + + + +class Extractor: + + n_dim = None # override + + def __init__(self): + self.log = logger_utils.Logger.getLogger() + + def flatten(self, vec): + '''Converts N-D vector into a flattened list for CSV + :param points: (list) a feature vector as list of floats + :returns dict item for each point (eg {'d1':0.28442156, 'd1': 0.1868632}) + ''' + vec_flat = {} + for idx, val in enumerate(vec, 1): + vec_flat[f'd{idx}'] = val + return vec_flat + + def to_str(self, vec): + return ','.join([str(x) for x in vec]) + + def unflatten_df(self, df): + # convert from + return [df[f'd{i}'] for i in range(1,257)] + + +class ExtractorVGG(Extractor): + + # https://github.com/ox-vgg/vgg_face2 + # Uses OpenCV DNN to extract feature vector for VGG Face 2 models + n_dim = 256 + dnn_dim = (224,224) + dnn_mean = (91.4953, 103.8827, 131.0912) + + def __init__(self): + super().__init__() + fp_model = '/data_store_hdd/apps/megapixels/models/caffe/vgg_face2/resnet50_256_caffe/resnet50_256.caffemodel' + fp_prototxt = '/data_store_hdd/apps/megapixels/models/caffe/vgg_face2/resnet50_256_caffe/resnet50_256.prototxt' + self.dnn = cv.dnn.readNetFromCaffe(fp_prototxt, fp_model) + self.feat_layer = self.dnn.getLayerNames()[-2] + + def extract_jitter(self, im, bbox_norm): + '''(experimental) Extracts feature vector for face crop + :param im: + :param bbox_norm: (BBox) normalized + :param padding: (float) percent to extend ROI + :param jitters: not used here + :returns (list) of (float)''' + dim = im.shape[:2][::-1] + num_jitters = cfg.DEFAULT_NUM_JITTERS + padding = cfg.DEFAULT_FACE_PADDING_VGG_FACE2 + pad_adj = .00875 * padding # percentage of padding to vary + paddings = np.linspace(padding - pad_adj, padding + pad_adj, num=num_jitters) + jitter_amt = cfg.DEFAULT_JITTER_AMT + vecs = [] + for i in range(num_jitters): + bbox_norm_jit = bbox_norm.jitter(jitter_amt) # jitters w, h, center + bbox_ext = bbox_norm_jit.expand(paddings[i]) + #bbox_ext = bbox_norm.expand(paddings[i]) + x1,y1,x2,y2 = bbox_ext.to_dim(dim).to_xyxy() + im_crop = im[y1:y2, x1:x2] + # According to VGG, model trained using Bilinear interpolation (INTER_LINEAR) + im_crop = cv.resize(im_crop, self.dnn_dim, interpolation=cv.INTER_LINEAR) + blob = cv.dnn.blobFromImage(im_crop, 1.0, self.dnn_dim, self.dnn_mean) + self.dnn.setInput(blob) + vec = np.array(self.dnn.forward(self.feat_layer)[0]) + vec_norm = vec/np.linalg.norm(vec) # normalize + vecs.append(vec_norm) + vec_norm = np.mean(np.array(vecs), axis=0) + return vec_norm + + def extract(self, im, bbox_norm): + '''Extracts feature vector for face crop + :param im: + :param bbox_norm: (BBox) normalized + :param padding: (float) percent to extend ROI + :param jitters: not used here + :returns (list) of (float)''' + padding = cfg.DEFAULT_FACE_PADDING_VGG_FACE2 + bbox_ext = bbox_norm.expand(padding) + dim = im.shape[:2][::-1] + x1,y1,x2,y2 = bbox_ext.to_dim(dim).to_xyxy() + im = im[y1:y2, x1:x2] + # According to VGG, model trained using Bilinear interpolation (INTER_LINEAR) + im = cv.resize(im, self.dnn_dim, interpolation=cv.INTER_LINEAR) + blob = cv.dnn.blobFromImage(im, 1.0, self.dnn_dim, self.dnn_mean) + self.dnn.setInput(blob) + vec = np.array(self.dnn.forward(self.feat_layer)[0]) + vec_norm = vec/np.linalg.norm(vec) # normalize + return vec_norm + + +class ExtractorDLIB(Extractor): + + # https://github.com/davisking/dlib/blob/master/python_examples/face_recognition.py + # facerec.compute_face_descriptor(img, shape, 100, 0.25) + # padding=opt_padding not yet implemented in dlib===19.16 but merged in master + n_dim = 128 + process_width = 100 + + def __init__(self, gpu=0, jitters=cfg.DLIB_FACEREC_JITTERS): + super().__init__() + self.num_jitters = cfg.DLIB_FACEREC_JITTERS + # set and swap GPU visibility + if gpu > -1: + cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '') + os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) + self.predictor = dlib.shape_predictor(cfg.DIR_MODELS_DLIB_5PT) + self.facerec = dlib.face_recognition_model_v1(cfg.DIR_MODELS_DLIB_FACEREC_RESNET) + # unset and swap GPU visibility + if gpu > -1: + os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices # reset GPU env + + + def extract(self, im, bbox_norm): + '''Converts image and bbox into 128d vector + :param im: (numpy.ndarray) BGR image + :param bbox_norm: (BBox) normalized + ''' + # scale the image so the face is always 100x100 pixels + dim = im.shape[:2][::-1] + bbox_dim = bbox_norm.to_dim(dim) + scale = self.process_width / bbox_dim.width + cv.resize(im, None, fx=scale, fy=scale, interpolation=cv.INTER_LANCZOS4) + bbox_dim_dlib = bbox_dim.to_dlib() + face_shape = self.predictor(im, bbox_dim_dlib) + # this is only in dlib version 19.6++? + # vec = self.facerec.compute_face_descriptor(im, face_shape, self.num_jitters, self.padding) + # vectors are already normalized + vec = self.facerec.compute_face_descriptor(im, face_shape, self.num_jitters) + return vec diff --git a/megapixels/app/processors/face_landmarks.py b/megapixels/app/processors/face_landmarks.py index 171fc666..231e378f 100644 --- a/megapixels/app/processors/face_landmarks.py +++ b/megapixels/app/processors/face_landmarks.py @@ -30,6 +30,9 @@ class Landmarks2D: self.log.warn('Define landmarks() function') pass + def to_str(self, vec): + return ','.join([','.join(list(map(str,[x,y]))) for x,y in vec]) + def flatten(self, points): '''Converts list of point-tupes into a flattened list for CSV :param points: (list) of x,y points @@ -69,9 +72,9 @@ class FaceAlignment2D_68(Landmarks2D): # predict landmarks points = self.fa.get_landmarks(im) # returns array of arrays of 68 2D pts/face # convert to data type - points = [list(map(int, p)) for p in points[0]] - return points - + w,h = im.shape[:2][::-1] + points = [tuple(x/w, y/h) for x,y in points[0]] + return points # normalized class Dlib2D(Landmarks2D): @@ -82,15 +85,16 @@ class Dlib2D(Landmarks2D): self.predictor = dlib.shape_predictor(model) self.log.info(f'loaded predictor model: {model}') - def landmarks(self, im, bbox): + def landmarks(self, im, bbox_norm): '''Generates 68-pt landmarks using dlib predictor :param im: (numpy.ndarray) BGR image :param bbox: (app.models.BBox) dimensioned - :returns (list) of (int, int) for x,y values + :returns (list) of (float, float) for normalized x,y values ''' - bbox = bbox.to_dlib() + dim = im.shape[:2][::-1] + roi_dlib = bbox_norm.to_dim(dim).to_dlib() im_gray = cv.cvtColor(im, cv.COLOR_BGR2GRAY) - points = [[p.x, p.y] for p in self.predictor(im_gray, bbox).parts()] + points = [[p.x/dim[0], p.y/dim[1]] for p in self.predictor(im_gray, roi_dlib).parts()] return points @@ -121,13 +125,13 @@ class MTCNN2D_5(Landmarks2D): from mtcnn.mtcnn import MTCNN self.detector = MTCNN() - def landmarks(self, im, bbox): + def landmarks(self, im, bbox_norm): '''Detects face using MTCNN and returns (list) of BBox :param im: (numpy.ndarray) image :returns list of BBox ''' results = [] - dim_wh = im.shape[:2][::-1] # (w, h) + dim = im.shape[:2][::-1] # (w, h) # run MTCNN to get bbox and landmarks dets = self.detector.detect_faces(im) @@ -138,7 +142,7 @@ class MTCNN2D_5(Landmarks2D): #rect = det['box'] points = det['keypoints'] # convert to normalized for contain-comparison - points_norm = [np.array(pt)/dim_wh for pname, pt in points.items()] + points_norm = [np.array(pt)/dim for pname, pt in points.items()] contains = False not in [bbox.contains(pn) for pn in points_norm] if contains: results.append(points) # append original points @@ -185,14 +189,17 @@ class FaceAlignment3D_68(Landmarks3D): device = f'cuda:{gpu}' if gpu > -1 else 'cpu' self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device=device, flip_input=flip_input) - def landmarks(self, im, rect): + def landmarks(self, im, bbox_norm): '''Calculates the 3D facial landmarks :param im: (numpy.ndarray) BGR image - :param rect: (list) of face (x1, y1, x2, y2) + :param bbox_norm: (BBox) of face roi :returns (list) of 68 (int) (tuples) as (x,y, z) ''' # predict landmarks + dim = im.shape[:2][::-1] + rect = bbox_norm.to_dim(dim).to_xyxy() points = self.fa.get_landmarks(im, [rect]) # returns array of arrays of 68 3D pts/face # convert to data type + # TODO normalize this, but how to norm 3D? points = [list(map(int, p)) for p in points[0]] return points
\ No newline at end of file diff --git a/megapixels/app/processors/face_pose.py b/megapixels/app/processors/face_pose.py index 5ac510ec..49a39a53 100644 --- a/megapixels/app/processors/face_pose.py +++ b/megapixels/app/processors/face_pose.py @@ -21,10 +21,10 @@ class FacePoseDLIB: pose_types = {'pitch': (0,0,255), 'roll': (255,0,0), 'yaw': (0,255,0)} def __init__(self): - pass + self.log = logger_utils.Logger.getLogger() - def pose(self, landmarks, dim): + def pose(self, landmarks_norm, dim): '''Returns face pose information :param landmarks: (list) of 68 (int, int) xy tuples :param dim: (tuple|list) of image (width, height) @@ -55,9 +55,10 @@ class FacePoseDLIB: # find 6 pose points pose_points = [] for j, idx in enumerate(pose_points_idx): - pt = landmarks[idx] - pose_points.append((pt[0], pt[1])) - pose_points = np.array(pose_points, dtype='double') # convert to double + x,y = landmarks_norm[idx] + pt = (int(x*dim[0]), int(y*dim[1])) + pose_points.append(pt) + pose_points = np.array(pose_points, dtype='double') # convert to double, real dimensions # create camera matrix focal_length = dim[0] @@ -75,18 +76,16 @@ class FacePoseDLIB: result = {} # project points - #if project_points: pts_im, jac = cv.projectPoints(axis, rot_vec, tran_vec, cam_mat, dist_coeffs) pts_model, jac2 = cv.projectPoints(model_points, rot_vec, tran_vec, cam_mat, dist_coeffs) - #result['points_model'] = pts_model - #result['points_image'] = pts_im + result['points'] = { - 'pitch': pts_im[0], - 'roll': pts_im[2], - 'yaw': pts_im[1] + 'pitch': list(map(int,pts_im[0][0])), + 'roll': list(map(int,pts_im[2][0])), + 'yaw': list(map(int,pts_im[1][0])) } - result['point_nose'] = tuple(landmarks[pose_points_idx[0]]) + result['point_nose'] = tuple(map(int,pose_points[0])) rvec_matrix = cv.Rodrigues(rot_vec)[0] # convert to degrees diff --git a/megapixels/app/processors/face_recognition.py b/megapixels/app/processors/face_recognition.py deleted file mode 100644 index 76f00aa1..00000000 --- a/megapixels/app/processors/face_recognition.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -from os.path import join -from pathlib import Path - -import cv2 as cv -import numpy as np -import dlib -import imutils - -from app.utils import im_utils, logger_utils -from app.models.bbox import BBox -from app.settings import app_cfg as cfg -from app.settings import types - -class RecognitionDLIB: - - # https://github.com/davisking/dlib/blob/master/python_examples/face_recognition.py - # facerec.compute_face_descriptor(img, shape, 100, 0.25) - - def __init__(self, gpu=0): - self.log = logger_utils.Logger.getLogger() - - if gpu > -1: - cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '') - os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) - - self.predictor = dlib.shape_predictor(cfg.DIR_MODELS_DLIB_5PT) - self.facerec = dlib.face_recognition_model_v1(cfg.DIR_MODELS_DLIB_FACEREC_RESNET) - - if gpu > -1: - os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices # reset GPU env - - - def vec(self, im, bbox, width=100, - jitters=cfg.DLIB_FACEREC_JITTERS, padding=cfg.DLIB_FACEREC_PADDING): - '''Converts image and bbox into 128d vector - :param im: (numpy.ndarray) BGR image - :param bbox: (BBox) - ''' - # scale the image so the face is always 100x100 pixels - - #self.log.debug('compute scale') - scale = width / bbox.width - #im = cv.resize(im, (scale, scale), cv.INTER_LANCZOS4) - #self.log.debug('resize') - cv.resize(im, None, fx=scale, fy=scale, interpolation=cv.INTER_LANCZOS4) - #self.log.debug('to dlib') - bbox_dlib = bbox.to_dlib() - #self.log.debug('precitor') - face_shape = self.predictor(im, bbox_dlib) - # vec = self.facerec.compute_face_descriptor(im, face_shape, jitters, padding) - #self.log.debug('vec') - vec = self.facerec.compute_face_descriptor(im, face_shape, jitters) - #vec = self.facerec.compute_face_descriptor(im, face_shape) - return vec - - def flatten(self, vec): - '''Converts 128D vector into a flattened list for CSV - :param points: (list) a feature vector as list of floats - :returns dict item for each point (eg {'d1':0.28442156, 'd1': 0.1868632}) - ''' - vec_flat = {} - for idx, val in enumerate(vec, 1): - vec_flat[f'd{idx}'] = val - return vec_flat - - def similarity(self, query_enc, known_enc): - return np.linalg.norm(query_enc - known_enc, axis=1) diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py index fea47572..c256635b 100644 --- a/megapixels/app/settings/app_cfg.py +++ b/megapixels/app/settings/app_cfg.py @@ -21,6 +21,7 @@ DataStoreVar = click_utils.ParamVar(types.DataStore) # Face analysis HaarCascadeVar = click_utils.ParamVar(types.HaarCascade) FaceDetectNetVar = click_utils.ParamVar(types.FaceDetectNet) +FaceExtractorVar = click_utils.ParamVar(types.FaceExtractor) FaceLandmark2D_5Var = click_utils.ParamVar(types.FaceLandmark2D_5) FaceLandmark2D_68Var = click_utils.ParamVar(types.FaceLandmark2D_68) FaceLandmark3D_68Var = click_utils.ParamVar(types.FaceLandmark3D_68) @@ -96,8 +97,12 @@ HASH_TREE_DEPTH = 3 HASH_BRANCH_SIZE = 3 DLIB_FACEREC_JITTERS = 5 # number of face recognition jitters -DLIB_FACEREC_PADDING = 0.25 # default dlib - +#DLIB_FACEREC_PADDING = 0.25 # default dlib +FACEREC_PADDING = 0.3 # VGG FACE2 recommended +DEFAULT_SIZE_FACE_DETECT = (480,480) +DEFAULT_JITTER_AMT = 0.015 # used for OpenCV DNN face detector with VGG2 face feature extractor +DEFAULT_NUM_JITTERS = 4 # used for smothing the facial feature extraction +DEFAULT_FACE_PADDING_VGG_FACE2 = 0.3 POSE_MINMAX_YAW = (-25,25) POSE_MINMAX_ROLL = (-15,15) POSE_MINMAX_PITCH = (-10,10) @@ -105,6 +110,13 @@ POSE_MINMAX_PITCH = (-10,10) POSE_MINMAX_YAW = (-40,40) POSE_MINMAX_ROLL = (-35,35) POSE_MINMAX_PITCH = (-25,25) + +# ----------------------------------------------------------------------------- +# Pandas data +# ----------------------------------------------------------------------------- + +FILE_RECORD_DTYPES = {'fn':str, 'subdir': str} + # ----------------------------------------------------------------------------- # Logging options exposed for custom click Params # ----------------------------------------------------------------------------- diff --git a/megapixels/app/settings/types.py b/megapixels/app/settings/types.py index 1d77fdbd..7a34ccc2 100644 --- a/megapixels/app/settings/types.py +++ b/megapixels/app/settings/types.py @@ -43,10 +43,12 @@ class LogLevel(Enum): class Metadata(Enum): IDENTITY, FILE_RECORD, FACE_VECTOR, FACE_POSE, \ - FACE_ROI, FACE_LANDMARK_2D_68, FACE_LANDMARK_2D_5,FACE_LANDMARK_3D_68 = range(8) + FACE_ROI, FACE_LANDMARK_2D_68, FACE_LANDMARK_2D_5,FACE_LANDMARK_3D_68, \ + FACE_ATTRIBUTES = range(9) class Dataset(Enum): - LFW, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, SELFIE_DATASET = range(7) + LFW, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \ + CASIA_WEBFACE, AFW, PUBFIG83, HELEN, PIPA, MEGAFACE = range(14) # --------------------------------------------------------------------- @@ -54,7 +56,12 @@ class Dataset(Enum): # -------------------------------------------------------------------- class FaceDetectNet(Enum): """Scene text detector networks""" - HAAR, DLIB_CNN, DLIB_HOG, CVDNN, MTCNN = range(5) + HAAR, DLIB_CNN, DLIB_HOG, CVDNN, MTCNN_TF, MTCNN_PT, MTCNN_CAFFE = range(7) + +class FaceExtractor(Enum): + """Type of face recognition feature extractor""" + # TODO deprecate DLIB resnet and use only CVDNN Caffe models + DLIB, VGG = range(2) class FaceLandmark2D_5(Enum): DLIB, MTCNN = range(2) diff --git a/megapixels/app/utils/display_utils.py b/megapixels/app/utils/display_utils.py index 7b74aa46..43328ae9 100644 --- a/megapixels/app/utils/display_utils.py +++ b/megapixels/app/utils/display_utils.py @@ -2,6 +2,10 @@ import sys import cv2 as cv +from app.utils.logger_utils import Logger + + +log = Logger.getLogger() def handle_keyboard(delay_amt=1): '''Used with cv.imshow('title', image) to wait for keyboard press @@ -11,6 +15,7 @@ def handle_keyboard(delay_amt=1): if k == 27 or k == ord('q'): # ESC cv.destroyAllWindows() sys.exit() + elif k == 32 or k == 83: # 83 = right arrow + break elif k != 255: - # any key to continue - break
\ No newline at end of file + log.debug(f'k: {k}') diff --git a/megapixels/app/utils/draw_utils.py b/megapixels/app/utils/draw_utils.py index 3a389e68..3378e3e8 100644 --- a/megapixels/app/utils/draw_utils.py +++ b/megapixels/app/utils/draw_utils.py @@ -4,6 +4,9 @@ from math import sqrt import numpy as np import cv2 as cv +from app.utils import logger_utils + +log = logger_utils.Logger.getLogger() end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1 @@ -105,46 +108,61 @@ def plot_pose_box(im, Ps, pts68s, color=(40, 255, 0), line_width=2): pose_types = {'pitch': (0,0,255), 'roll': (255,0,0), 'yaw': (0,255,0)} -def draw_landmarks2D(im, points, radius=3, color=(0,255,0), stroke_weight=2): +def draw_landmarks2D(im, points_norm, radius=3, color=(0,255,0)): '''Draws facial landmarks, either 5pt or 68pt ''' - for x,y in points: - cv.circle(im, (x,y), radius, color, -1, cv.LINE_AA) - + im_dst = im.copy() + dim = im.shape[:2][::-1] + for x,y in points_norm: + pt = (int(x*dim[0]), int(y*dim[1])) + cv.circle(im_dst, pt, radius, color, -1, cv.LINE_AA) + return im_dst -def draw_landmarks3D(im, points, radius=3, color=(0,255,0), stroke_weight=2): +def draw_landmarks3D(im, points, radius=3, color=(0,255,0)): '''Draws 3D facial landmarks ''' + im_dst = im.copy() for x,y,z in points: - cv.circle(im, (x,y), radius, color, -1, cv.LINE_AA) - + cv.circle(im_dst, (x,y), radius, color, -1, cv.LINE_AA) + return im_dst -def draw_bbox(im, bbox, color=(0,255,0), stroke_weight=2): - '''Draws a dimensioned (not-normalized) BBox onto cv image +def draw_bbox(im, bbox_norm, color=(0,255,0), stroke_weight=2): + '''Draws BBox onto cv image ''' - cv.rectangle(im, bbox.pt_tl, bbox.pt_br, color, stroke_weight) - + im_dst = im.copy() + bbox_dim = bbox_norm.to_dim(im.shape[:2][::-1]) + cv.rectangle(im_dst, bbox_dim.pt_tl, bbox_dim.pt_br, color, stroke_weight) + return im_dst def draw_pose(im, pt_nose, image_pts): '''Draws 3-axis pose over image + TODO: normalize point data ''' - cv.line(im, pt_nose, tuple(image_pts['pitch'].ravel()), pose_types['pitch'], 3) - cv.line(im, pt_nose, tuple(image_pts['yaw'].ravel()), pose_types['yaw'], 3) - cv.line(im, pt_nose, tuple(image_pts['roll'].ravel()), pose_types['roll'], 3) - + im_dst = im.copy() + log.debug(f'pt_nose: {pt_nose}') + log.debug(f'image_pts pitch: {image_pts["pitch"]}') + cv.line(im_dst, pt_nose, tuple(image_pts['pitch']), pose_types['pitch'], 3) + cv.line(im_dst, pt_nose, tuple(image_pts['yaw']), pose_types['yaw'], 3) + cv.line(im_dst, pt_nose, tuple(image_pts['roll']), pose_types['roll'], 3) + return im_dst -def draw_text(im, pt, text, color=(0,255,0)): +def draw_text(im, pt_norm, text, color=(0,255,0)): '''Draws degrees as text over image ''' - cv.putText(im, text, pt, cv.FONT_HERSHEY_SIMPLEX, 0.75, color, thickness=1, lineType=cv.LINE_AA) - + im_dst = im.copy() + dim = im.shape[:2][::-1] + pt = tuple(map(int, (pt_norm[0]*dim[0], pt_norm[1]*dim[1]))) + cv.putText(im_dst, text, pt, cv.FONT_HERSHEY_SIMPLEX, 0.75, color, thickness=1, lineType=cv.LINE_AA) + return im_dst def draw_degrees(im, pose_data, color=(0,255,0)): '''Draws degrees as text over image ''' + im_dst = im.copy() for i, pose_type in enumerate(pose_types.items()): k, clr = pose_type v = pose_data[k] t = '{}: {:.2f}'.format(k, v) origin = (10, 30 + (25 * i)) - cv.putText(im, t, origin, cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, thickness=2, lineType=2) + cv.putText(im_dst, t, origin, cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, thickness=2, lineType=2) + return im_dst
\ No newline at end of file |
