diff options
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | megapixels/app/models/bbox.py | 16 | ||||
| -rw-r--r-- | megapixels/app/processors/face_extractor.py | 127 | ||||
| -rw-r--r-- | megapixels/app/settings/app_cfg.py | 6 | ||||
| -rw-r--r-- | megapixels/app/settings/types.py | 7 | ||||
| -rw-r--r-- | megapixels/commands/cv/face_vector.py | 36 |
6 files changed, 179 insertions, 16 deletions
@@ -2,7 +2,8 @@ 3rdparty/ *_local_* site/public/datasets/ - +site/public/*.html +notebooks/_local_* # Project specific webpack-stats.dev.json diff --git a/megapixels/app/models/bbox.py b/megapixels/app/models/bbox.py index 40874691..f1216698 100644 --- a/megapixels/app/models/bbox.py +++ b/megapixels/app/models/bbox.py @@ -130,6 +130,22 @@ class BBox: # ----------------------------------------------------------------- # Modify + def expand(self, per): + """Expands BBox by percentage + :param per: (float) percentage to expand 0.0 - 1.0 + :param dim: (int, int) image width, height + :returns (BBox) expanded + """ + # expand + dw, dh = [(self._width * per), (self._height * per)] + r = list(np.array(self._rect) + np.array([-dw, -dh, dw, dh])) + # threshold expanded rectangle + r[0] = max(r[0], 0.0) + r[1] = max(r[1], 0.0) + r[2] = min(r[2], 1.0) + r[3] = min(r[3], 1.0) + return BBox(*r) + def expand_dim(self, amt, bounds): """Expands BBox within dim :param box: (tuple) left, top, right, bottom diff --git a/megapixels/app/processors/face_extractor.py b/megapixels/app/processors/face_extractor.py new file mode 100644 index 00000000..2666e090 --- /dev/null +++ b/megapixels/app/processors/face_extractor.py @@ -0,0 +1,127 @@ +import os +from os.path import join +from pathlib import Path + +import cv2 as cv +import numpy as np +import dlib +import imutils + +from app.utils import im_utils, logger_utils +from app.models.bbox import BBox +from app.settings import app_cfg as cfg +from app.settings import types + +def similarity(self, query_enc, known_enc): + return np.linalg.norm(query_enc - known_enc, axis=1) + +def flatten(vec): + '''Converts N-D vector into a flattened list for CSV + :param points: (list) a feature vector as list of floats + :returns dict item for each point (eg {'d1':0.28442156, 'd1': 0.1868632}) + ''' + vec_flat = {} + for idx, val in enumerate(vec, 1): + vec_flat[f'd{idx}'] = val + return vec_flat + + + +class Extractor: + + n_dim = None # override + + def __init__(self): + self.log = logger_utils.Logger.getLogger() + + def flatten(self, vec): + '''Converts N-D vector into a flattened list for CSV + :param points: (list) a feature vector as list of floats + :returns dict item for each point (eg {'d1':0.28442156, 'd1': 0.1868632}) + ''' + vec_flat = {} + for idx, val in enumerate(vec, 1): + vec_flat[f'd{idx}'] = val + return vec_flat + + def unflatten_df(self, df): + # convert from + return [df[f'd{i}'] for i in range(1,257)] + + +class ExtractorVGG(Extractor): + + # https://github.com/ox-vgg/vgg_face2 + # Uses OpenCV DNN to extract feature vector for VGG Face 2 models + n_dim = 256 + dnn_dim = (224,224) + dnn_mean = (91.4953, 103.8827, 131.0912) + + def __init__(self): + super().__init__() + fp_model = '/data_store_hdd/apps/megapixels/models/caffe/vgg_face2/resnet50_256_caffe/resnet50_256.caffemodel' + fp_prototxt = '/data_store_hdd/apps/megapixels/models/caffe/vgg_face2/resnet50_256_caffe/resnet50_256.prototxt' + self.dnn = cv.dnn.readNetFromCaffe(fp_prototxt, fp_model) + self.feat_layer = self.dnn.getLayerNames()[-2] + + def extract(self, im, bbox_norm, padding=0.3): + '''Extracts feature vector for face crop + :param im: + :param bbox_norm: (BBox) normalized + :param padding: (float) percent to extend ROI + :param jitters: not used here + :returns (list) of (float)''' + + bbox_ext = bbox_norm.expand(padding) + dim = im.shape[:2][::-1] + bbox_ext_dim = bbox_ext.to_dim(dim) + x1,y1,x2,y2 = bbox_ext_dim.to_xyxy() + im = im[y1:y2, x1:x2] + # According to VGG, model trained using Bilinear interpolation (INTER_LINEAR) + im = cv.resize(im, self.dnn_dim, interpolation=cv.INTER_LINEAR) + blob = cv.dnn.blobFromImage(im, 1.0, self.dnn_dim, self.dnn_mean) + self.dnn.setInput(blob) + vec = np.array(self.dnn.forward(self.feat_layer)[0]) + vec_norm = np.array(vec)/np.linalg.norm(vec) # normalize + return vec_norm + + +class ExtractorDLIB(Extractor): + + # https://github.com/davisking/dlib/blob/master/python_examples/face_recognition.py + # facerec.compute_face_descriptor(img, shape, 100, 0.25) + # padding=opt_padding not yet implemented in dlib===19.16 but merged in master + n_dim = 128 + process_width = 100 + + def __init__(self, gpu=0, jitters=cfg.DLIB_FACEREC_JITTERS): + super().__init__() + self.num_jitters = cfg.DLIB_FACEREC_JITTERS + # set and swap GPU visibility + if gpu > -1: + cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '') + os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) + self.predictor = dlib.shape_predictor(cfg.DIR_MODELS_DLIB_5PT) + self.facerec = dlib.face_recognition_model_v1(cfg.DIR_MODELS_DLIB_FACEREC_RESNET) + # unset and swap GPU visibility + if gpu > -1: + os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices # reset GPU env + + + def extract(self, im, bbox_norm): + '''Converts image and bbox into 128d vector + :param im: (numpy.ndarray) BGR image + :param bbox_norm: (BBox) normalized + ''' + # scale the image so the face is always 100x100 pixels + dim = im.shape[:2][::-1] + bbox_dim = bbox_norm.to_dim(dim) + scale = self.process_width / bbox_dim.width + cv.resize(im, None, fx=scale, fy=scale, interpolation=cv.INTER_LANCZOS4) + bbox_dim_dlib = bbox_dim.to_dlib() + face_shape = self.predictor(im, bbox_dim_dlib) + # this is only in dlib version 19.6++? + # vec = self.facerec.compute_face_descriptor(im, face_shape, self.num_jitters, self.padding) + # vectors are already normalized + vec = self.facerec.compute_face_descriptor(im, face_shape, self.num_jitters) + return vec diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py index fea47572..14e2493c 100644 --- a/megapixels/app/settings/app_cfg.py +++ b/megapixels/app/settings/app_cfg.py @@ -21,6 +21,7 @@ DataStoreVar = click_utils.ParamVar(types.DataStore) # Face analysis HaarCascadeVar = click_utils.ParamVar(types.HaarCascade) FaceDetectNetVar = click_utils.ParamVar(types.FaceDetectNet) +FaceExtractorVar = click_utils.ParamVar(types.FaceExtractor) FaceLandmark2D_5Var = click_utils.ParamVar(types.FaceLandmark2D_5) FaceLandmark2D_68Var = click_utils.ParamVar(types.FaceLandmark2D_68) FaceLandmark3D_68Var = click_utils.ParamVar(types.FaceLandmark3D_68) @@ -41,7 +42,7 @@ DIR_PEOPLE = 'people' DIR_MODELS_CAFFE = join(DIR_MODELS,'caffe') DIR_MODELS_DARKNET = join(DIR_MODELS,'darknet') DIR_MODELS_DARKNET_PJREDDIE = join(DIR_MODELS_DARKNET, 'pjreddie') -DIR_MODELS_PYTORCH = join(DIR_MODELS,'pytorch') +DIR_MODELS_PYTORCHq = join(DIR_MODELS,'pytorch') DIR_MODELS_TORCH = join(DIR_MODELS,'torch') DIR_MODELS_MXNET = join(DIR_MODELS,'mxnet') DIR_MODELS_KERAS = join(DIR_MODELS,'keras') @@ -96,7 +97,8 @@ HASH_TREE_DEPTH = 3 HASH_BRANCH_SIZE = 3 DLIB_FACEREC_JITTERS = 5 # number of face recognition jitters -DLIB_FACEREC_PADDING = 0.25 # default dlib +#DLIB_FACEREC_PADDING = 0.25 # default dlib +FACEREC_PADDING = 0.3 # VGG FACE2 recommended POSE_MINMAX_YAW = (-25,25) POSE_MINMAX_ROLL = (-15,15) diff --git a/megapixels/app/settings/types.py b/megapixels/app/settings/types.py index 1d77fdbd..940c8b6d 100644 --- a/megapixels/app/settings/types.py +++ b/megapixels/app/settings/types.py @@ -54,7 +54,12 @@ class Dataset(Enum): # -------------------------------------------------------------------- class FaceDetectNet(Enum): """Scene text detector networks""" - HAAR, DLIB_CNN, DLIB_HOG, CVDNN, MTCNN = range(5) + HAAR, DLIB_CNN, DLIB_HOG, CVDNN, MTCNN_TF, MTCNN_PT, MTCNN_CAFFE = range(7) + +class FaceExtractor(Enum): + """Type of face recognition feature extractor""" + # TODO deprecate DLIB resnet and use only CVDNN Caffe models + DLIB_RESNET, VGG_FACE2 = range(2) class FaceLandmark2D_5(Enum): DLIB, MTCNN = range(2) diff --git a/megapixels/commands/cv/face_vector.py b/megapixels/commands/cv/face_vector.py index 4df647f5..9e9f6396 100644 --- a/megapixels/commands/cv/face_vector.py +++ b/megapixels/commands/cv/face_vector.py @@ -1,5 +1,8 @@ """ Converts ROIs to face vector +NB: the VGG Face2 extractor should be used with MTCNN ROIs (not square) + the DLIB face extractor should be used with DLIB ROIs (square) +see https://github.com/ox-vgg/vgg_face2 for TAR@FAR """ import click @@ -26,10 +29,14 @@ from app.settings import app_cfg as cfg @click.option('--size', 'opt_size', type=(int, int), default=(300, 300), help='Output image size') +@click.option('-e', '--extractor', 'opt_extractor', + default=types.FaceExtractor.VGG, + type=cfg.FaceExtractorVar, + help='Type of extractor framework/network to use') @click.option('-j', '--jitters', 'opt_jitters', default=cfg.DLIB_FACEREC_JITTERS, - help='Number of jitters') -@click.option('-p', '--padding', 'opt_padding', default=cfg.DLIB_FACEREC_PADDING, - help='Percentage padding') + help='Number of jitters (only for dlib') +@click.option('-p', '--padding', 'opt_padding', default=cfg.FACEREC_PADDING, + help='Percentage ROI padding') @click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), help='Slice list of files') @click.option('-f', '--force', 'opt_force', is_flag=True, @@ -38,7 +45,7 @@ from app.settings import app_cfg as cfg help='GPU index') @click.pass_context def cli(ctx, opt_fp_out, opt_dir_media, opt_data_store, opt_dataset, opt_size, - opt_slice, opt_force, opt_gpu, opt_jitters, opt_padding): + opt_extractor, opt_slice, opt_force, opt_gpu, opt_jitters, opt_padding): """Converts face ROIs to vectors""" import sys @@ -56,7 +63,7 @@ def cli(ctx, opt_fp_out, opt_dir_media, opt_data_store, opt_dataset, opt_size, from app.models.bbox import BBox from app.models.data_store import DataStore from app.utils import logger_utils, file_utils, im_utils - from app.processors import face_recognition + from app.processors import face_extractor # ------------------------------------------------- @@ -73,7 +80,11 @@ def cli(ctx, opt_fp_out, opt_dir_media, opt_data_store, opt_dataset, opt_size, return # init face processors - facerec = face_recognition.RecognitionDLIB() + if opt_extractor == types.FaceExtractor.DLIB: + log.debug('set dlib') + extractor = face_extractor.ExtractorDLIB(gpu=opt_gpu, jitters=opt_jitters) + elif opt_extractor == types.FaceExtractor.VGG: + extractor = face_extractor.ExtractorVGG() # load data fp_record = data_store.metadata(types.Metadata.FILE_RECORD) @@ -85,7 +96,8 @@ def cli(ctx, opt_fp_out, opt_dir_media, opt_data_store, opt_dataset, opt_size, df_roi = df_roi[opt_slice[0]:opt_slice[1]] # ------------------------------------------------- - # process here + # process images + df_img_groups = df_roi.groupby('record_index') log.debug('processing {:,} groups'.format(len(df_img_groups))) @@ -99,17 +111,17 @@ def cli(ctx, opt_fp_out, opt_dir_media, opt_data_store, opt_dataset, opt_size, # get bbox x, y, w, h = df_img.x, df_img.y, df_img.w, df_img.h dim = (ds_record.width, ds_record.height) - #dim = im.shape[:2][::-1] # get face vector - bbox_dim = BBox.from_xywh(x, y, w, h).to_dim(dim) # convert to int real dimensions + bbox = BBox.from_xywh(x, y, w, h) # norm # compute vec - # padding=opt_padding not yet implemented in dlib===19.16 but merged in master - vec = facerec.vec(im, bbox_dim, jitters=opt_jitters) - vec_flat = facerec.flatten(vec) + vec = extractor.extract(im, bbox) # use normalized BBox + vec_flat = extractor.flatten(vec) vec_flat['roi_index'] = roi_index vec_flat['record_index'] = record_index vecs.append(vec_flat) + # ------------------------------------------------- + # save data # create DataFrame and save to CSV df = pd.DataFrame.from_dict(vecs) |
