diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-01-05 12:35:01 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-01-05 12:35:01 +0100 |
| commit | 374dc54d049766fce225ca84d31fdf51f40f292c (patch) | |
| tree | 915b4bf3ea6645a1a65c30c4aee51870d6f955e3 /megapixels/app | |
| parent | 824c958a7f29ab1fe31d09035c04a150379aecea (diff) | |
| parent | bff4e1c50349b0ba7d8e5fab6ce697c0b856f13f (diff) | |
Merge branch 'master' of github.com:adamhrv/megapixels_dev
Diffstat (limited to 'megapixels/app')
| -rw-r--r-- | megapixels/app/models/data_store.py | 4 | ||||
| -rw-r--r-- | megapixels/app/models/dataset.py | 18 | ||||
| -rw-r--r-- | megapixels/app/processors/face_detector.py | 23 | ||||
| -rw-r--r-- | megapixels/app/processors/face_landmarks_2d.py (renamed from megapixels/app/processors/face_landmarks.py) | 29 | ||||
| -rw-r--r-- | megapixels/app/processors/face_landmarks_3d.py | 97 | ||||
| -rw-r--r-- | megapixels/app/processors/face_pose.py | 42 | ||||
| -rw-r--r-- | megapixels/app/settings/types.py | 5 | ||||
| -rw-r--r-- | megapixels/app/utils/im_utils.py | 6 |
8 files changed, 188 insertions, 36 deletions
diff --git a/megapixels/app/models/data_store.py b/megapixels/app/models/data_store.py index 244aba60..626c9da4 100644 --- a/megapixels/app/models/data_store.py +++ b/megapixels/app/models/data_store.py @@ -21,13 +21,15 @@ class DataStore: def metadata(self, enum_type): return join(self.dir_metadata, f'{enum_type.name.lower()}.csv') - def metadata(self, enum_type): + def metadata_dir(self): return join(self.dir_metadata) def media_images_original(self): return join(self.dir_media, 'original') def face(self, subdir, fn, ext): + if subdir == '' or subdir is None: + subdir = '.' return join(self.dir_media, 'original', subdir, f'{fn}.{ext}') def face_crop(self, subdir, fn, ext): diff --git a/megapixels/app/models/dataset.py b/megapixels/app/models/dataset.py index 35e10465..eb0109a7 100644 --- a/megapixels/app/models/dataset.py +++ b/megapixels/app/models/dataset.py @@ -40,7 +40,7 @@ class Dataset: self._metadata[metadata_type] = pd.read_csv(fp_csv).set_index('index') # convert DataFrame to list of floats self._face_vectors = self.df_vecs_to_dict(self._metadata[metadata_type]) - self._face_vector_idxs = self.df_vec_idxs_to_dict(self._metadata[metadata_type]) + self._face_vector_roi_idxs = self.df_vec_roi_idxs_to_dict(self._metadata[metadata_type]) self.log.info(f'build face vector dict: {len(self._face_vectors)}') # remove the face vector column, it can be several GB of memory self._metadata[metadata_type].drop('vec', axis=1, inplace=True) @@ -81,8 +81,8 @@ class Dataset: # future datasets can have multiple identities per images ds_identities = df_identity.iloc[identity_index] # get filepath and S3 url - fp_im = self.data_store.face_image(ds_record.subdir, ds_record.fn, ds_record.ext) - s3_url = self.data_store_s3.face_image(ds_record.uuid) + fp_im = self.data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext) + s3_url = self.data_store_s3.face(ds_record.uuid) image_record = ImageRecord(ds_record, fp_im, s3_url, ds_identities=ds_identities) return image_record @@ -147,8 +147,14 @@ class Dataset: for match_idx in match_idxs: # get the corresponding face vector row roi_index = self._face_vector_roi_idxs[match_idx] + df_record = self._metadata[types.Metadata.FILE_RECORD] + ds_record = df_record.iloc[roi_index] self.log.debug(f'find match index: {match_idx}, --> roi_index: {roi_index}') - image_record = self.roi_idx_to_record(roi_index) + fp_im = self.data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext) + s3_url = self.data_store_s3.face(ds_record.uuid) + image_record = ImageRecord(ds_record, fp_im, s3_url) + #roi_index = self._face_vector_roi_idxs[match_idx] + #image_record = self.roi_idx_to_record(roi_index) image_records.append(image_record) return image_records @@ -159,10 +165,10 @@ class Dataset: # convert the DataFrame CSV to float list of vecs return [list(map(float,x.vec.split(','))) for x in df.itertuples()] - def df_vec_idxs_to_dict(self, df): + def df_vec_roi_idxs_to_dict(self, df): # convert the DataFrame CSV to float list of vecs #return [x.roi_index for x in df.itertuples()] - return [x.image_index for x in df.itertuples()] + return [x.roi_index for x in df.itertuples()] def similar(self, query_vec, n_results): '''Finds most similar N indices of query face vector diff --git a/megapixels/app/processors/face_detector.py b/megapixels/app/processors/face_detector.py index 3a90c557..a805a474 100644 --- a/megapixels/app/processors/face_detector.py +++ b/megapixels/app/processors/face_detector.py @@ -24,7 +24,7 @@ class DetectorMTCNN: from mtcnn.mtcnn import MTCNN self.detector = MTCNN() - def detect(self, im, size=(400,400), conf_thresh=None, pyramids=None, largest=False): + def detect(self, im, size=(400,400), conf_thresh=None, pyramids=None, largest=False, zone=None): '''Detects face using MTCNN and returns (list) of BBox :param im: (numpy.ndarray) image :returns list of BBox @@ -79,7 +79,7 @@ class DetectorDLIBCNN: self.detector = dlib.cnn_face_detection_model_v1(cfg.DIR_MODELS_DLIB_CNN) os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices # reset - def detect(self, im, size=None, conf_thresh=None, pyramids=None, largest=False): + def detect(self, im, size=None, conf_thresh=None, pyramids=None, largest=False, zone=None): bboxes = [] conf_thresh = self.conf_thresh if conf_thresh is None else conf_thresh pyramids = self.pyramids if pyramids is None else pyramids @@ -96,6 +96,10 @@ class DetectorDLIBCNN: bbox = BBox.from_dlib_dim(mmod_rect.rect, dim) bboxes.append(bbox) + if zone: + bboxes = [b for b in bboxes if b.cx > zone[0] and b.cx < 1.0 - zone[0] \ + and b.cy > zone[1] and b.cy < 1.0 - zone[1]] + if largest and len(bboxes) > 1: # only keep largest bboxes.sort(key=operator.attrgetter('area'), reverse=True) @@ -115,7 +119,7 @@ class DetectorDLIBHOG: self.log = logger_utils.Logger.getLogger() self.detector = dlib.get_frontal_face_detector() - def detect(self, im, size=None, conf_thresh=None, pyramids=0, largest=False): + def detect(self, im, size=None, conf_thresh=None, pyramids=0, largest=False, zone=False): conf_thresh = self.conf_thresh if conf_thresh is None else conf_thresh dnn_size = self.size if size is None else size pyramids = self.pyramids if pyramids is None else pyramids @@ -132,8 +136,13 @@ class DetectorDLIBHOG: bbox = BBox.from_dlib_dim(rect, dim) bboxes.append(bbox) + # filter to keep on faces inside zone + if zone: + bboxes = [b for b in bboxes if b.cx > zone[0] and b.cx < 1.0 - zone[0] \ + and b.cy > zone[1] and b.cy < 1.0 - zone[1]] + + # filter to keep only largest face if largest and len(bboxes) > 1: - # only keep largest bboxes.sort(key=operator.attrgetter('area'), reverse=True) bboxes = [bboxes[0]] @@ -155,7 +164,7 @@ class DetectorCVDNN: self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) self.net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) - def detect(self, im, size=None, conf_thresh=None, largest=False, pyramids=None): + def detect(self, im, size=None, conf_thresh=None, largest=False, pyramids=None, zone=False): """Detects faces and returns (list) of (BBox)""" conf_thresh = self.conf_thresh if conf_thresh is None else conf_thresh dnn_size = self.size if size is None else size @@ -171,6 +180,10 @@ class DetectorCVDNN: rect_norm = net_outputs[0, 0, i, 3:7] bboxes.append(BBox(*rect_norm)) + if zone: + bboxes = [b for b in bboxes if b.cx > zone[0] and b.cx < 1.0 - zone[0] \ + and b.cy > zone[1] and b.cy < 1.0 - zone[1]] + if largest and len(bboxes) > 1: # only keep largest bboxes.sort(key=operator.attrgetter('area'), reverse=True) diff --git a/megapixels/app/processors/face_landmarks.py b/megapixels/app/processors/face_landmarks_2d.py index dfcb9ee8..e8ce93c1 100644 --- a/megapixels/app/processors/face_landmarks.py +++ b/megapixels/app/processors/face_landmarks_2d.py @@ -11,6 +11,33 @@ from app.settings import app_cfg as cfg from app.settings import types from app.models.bbox import BBox +class LandmarksFaceAlignment: + + # Estimates 2D facial landmarks + import face_alignment + + def __init__(self, gpu=0): + self.log = logger_utils.Logger.getLogger() + device = f'cuda:{gpu}' if gpu > -1 else 'cpu' + self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device, flip_input=True) + + def landmarks(self, im, as_type=str): + '''Calculates the 3D facial landmarks + :param im: (numpy.ndarray) image + :param as_type: (str) or (list) type to return data + ''' + preds = self.fa.get_landmarks(im) + # convert to comma separated ints + # storing data as "[1,2], [3,4]" is larger file size than storing as "1,2,3,4" + # storing a list object in Pandas seems to result in 30% larger CSV files + # TODO optimize this + preds_int = [list(map(int, x)) for x in preds[0]] # list of ints + if as_type is str: + return ','.join([','.join(list(map(str,[x,y]))) for x,y in preds_int]) + else: + return preds_int + + class LandmarksDLIB: def __init__(self): @@ -39,7 +66,7 @@ class LandmarksMTCNN: from mtcnn.mtcnn import MTCNN self.detector = MTCNN() - def detect(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=None): + def landmarks(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=None): '''Detects face using MTCNN and returns (list) of BBox :param im: (numpy.ndarray) image :returns list of BBox diff --git a/megapixels/app/processors/face_landmarks_3d.py b/megapixels/app/processors/face_landmarks_3d.py index 84a423b0..3663364c 100644 --- a/megapixels/app/processors/face_landmarks_3d.py +++ b/megapixels/app/processors/face_landmarks_3d.py @@ -13,15 +13,104 @@ from app.settings import app_cfg as cfg from app.settings import types +class FaceLandmarks2D: + + # Estimates 2D facial landmarks + import face_alignment + + def __init__(self, gpu=0): + self.log = logger_utils.Logger.getLogger() + device = f'cuda:{gpu}' if gpu > -1 else 'cpu' + self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device, flip_input=True) + + def landmarks(self, im, as_type=str): + '''Calculates the 3D facial landmarks + :param im: (numpy.ndarray) image + :param as_type: (str) or (list) type to return data + ''' + preds = self.fa.get_landmarks(im) + # convert to comma separated ints + # storing data as "[1,2], [3,4]" is larger file size than storing as "1,2,3,4" + # storing a list object in Pandas seems to result in 30% larger CSV files + # TODO optimize this + preds_int = [list(map(int, x)) for x in preds[0]] # list of ints + if as_type is str: + return ','.join([','.join(list(map(str,[x,y]))) for x,y in preds_int]) + else + return preds_int + class FaceLandmarks3D: # Estimates 3D facial landmarks + import face_alignment - def __init__(self): + def __init__(self, gpu=0): self.log = logger_utils.Logger.getLogger() - pass + device = f'cuda:{gpu}' if gpu > -1 else 'cpu' + self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device=device, flip_input=False) + + def landmarks(self, im, as_type=str): + '''Calculates the 3D facial landmarks + :param im: (numpy.ndarray) image + :param as_type: (str) or (list) type to return data + ''' + preds = self.fa.get_landmarks(im) + # convert to comma separated ints + # storing data as "[1,2], [3,4]" is larger file size than storing as "1,2,3,4" + # storing a list object in Pandas seems to result in 30% larger CSV files + # TODO optimize this + preds_int = [list(map(int, x)) for x in preds[0]] # list of ints + if as_type is str: + return ','.join([','.join(list(map(str,[x,y]))) for x,y in preds_int]) + else + return preds_int + + def draw(self, im): + '''draws landmarks in 3d scene''' + + # TODO + ''' + import face_alignment + import numpy as np + from mpl_toolkits.mplot3d import Axes3D + import matplotlib.pyplot as plt + from skimage import io + + # Run the 3D face alignment on a test image, without CUDA. + fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device='cuda:0', flip_input=True) + + input = io.imread('../test/assets/aflw-test.jpg') + preds = fa.get_landmarks(input)[-1] + + #TODO: Make this nice + fig = plt.figure(figsize=plt.figaspect(.5)) + ax = fig.add_subplot(1, 2, 1) + ax.imshow(input) + ax.plot(preds[0:17,0],preds[0:17,1],marker='o',markersize=6,linestyle='-',color='w',lw=2) + ax.plot(preds[17:22,0],preds[17:22,1],marker='o',markersize=6,linestyle='-',color='w',lw=2) + ax.plot(preds[22:27,0],preds[22:27,1],marker='o',markersize=6,linestyle='-',color='w',lw=2) + ax.plot(preds[27:31,0],preds[27:31,1],marker='o',markersize=6,linestyle='-',color='w',lw=2) + ax.plot(preds[31:36,0],preds[31:36,1],marker='o',markersize=6,linestyle='-',color='w',lw=2) + ax.plot(preds[36:42,0],preds[36:42,1],marker='o',markersize=6,linestyle='-',color='w',lw=2) + ax.plot(preds[42:48,0],preds[42:48,1],marker='o',markersize=6,linestyle='-',color='w',lw=2) + ax.plot(preds[48:60,0],preds[48:60,1],marker='o',markersize=6,linestyle='-',color='w',lw=2) + ax.plot(preds[60:68,0],preds[60:68,1],marker='o',markersize=6,linestyle='-',color='w',lw=2) + ax.axis('off') + ax = fig.add_subplot(1, 2, 2, projection='3d') + surf = ax.scatter(preds[:,0]*1.2,preds[:,1],preds[:,2],c="cyan", alpha=1.0, edgecolor='b') + ax.plot3D(preds[:17,0]*1.2,preds[:17,1], preds[:17,2], color='blue' ) + ax.plot3D(preds[17:22,0]*1.2,preds[17:22,1],preds[17:22,2], color='blue') + ax.plot3D(preds[22:27,0]*1.2,preds[22:27,1],preds[22:27,2], color='blue') + ax.plot3D(preds[27:31,0]*1.2,preds[27:31,1],preds[27:31,2], color='blue') + ax.plot3D(preds[31:36,0]*1.2,preds[31:36,1],preds[31:36,2], color='blue') + ax.plot3D(preds[36:42,0]*1.2,preds[36:42,1],preds[36:42,2], color='blue') + ax.plot3D(preds[42:48,0]*1.2,preds[42:48,1],preds[42:48,2], color='blue') + ax.plot3D(preds[48:,0]*1.2,preds[48:,1],preds[48:,2], color='blue' ) - def landmarks(self): - return [1,2,3,4,100]
\ No newline at end of file + ax.view_init(elev=90., azim=90.) + ax.set_xlim(ax.get_xlim()[::-1]) + plt.show() + ''' + return im
\ No newline at end of file diff --git a/megapixels/app/processors/face_pose.py b/megapixels/app/processors/face_pose.py index f2548b32..96281637 100644 --- a/megapixels/app/processors/face_pose.py +++ b/megapixels/app/processors/face_pose.py @@ -18,12 +18,13 @@ class FacePoseDLIB: dnn_size = (400, 400) + pose_types = {'pitch': (0,0,255), 'roll': (255,0,0), 'yaw': (0,255,0)} def __init__(self): pass - def pose(self, landmarks, dim, project_points=False): + def pose(self, landmarks, dim): # computes pose using 6 / 68 points from dlib face landmarks # based on learnopencv.com and # https://github.com/jerryhouuu/Face-Yaw-Roll-Pitch-from-Pose-Estimation-using-OpenCV/ @@ -68,12 +69,17 @@ class FacePoseDLIB: result = {} # project points - if project_points: - pts_im, jac = cv.projectPoints(axis, rot_vec, tran_vec, cam_mat, dist_coeffs) - pts_model, jac2 = cv.projectPoints(model_points, rot_vec, tran_vec, cam_mat, dist_coeffs) - result['points_model'] = pts_model - result['points_image'] = pts_im - result['point_nose'] = tuple(landmarks[pose_points_idx[0]]) + #if project_points: + pts_im, jac = cv.projectPoints(axis, rot_vec, tran_vec, cam_mat, dist_coeffs) + pts_model, jac2 = cv.projectPoints(model_points, rot_vec, tran_vec, cam_mat, dist_coeffs) + #result['points_model'] = pts_model + #result['points_image'] = pts_im + result['points'] = { + 'pitch': pts_im[0], + 'roll': pts_im[2], + 'yaw': pts_im[1] + } + result['point_nose'] = tuple(landmarks[pose_points_idx[0]]) rvec_matrix = cv.Rodrigues(rot_vec)[0] @@ -84,21 +90,23 @@ class FacePoseDLIB: pitch = math.degrees(math.asin(math.sin(pitch))) roll = -math.degrees(math.asin(math.sin(roll))) yaw = math.degrees(math.asin(math.sin(yaw))) - degrees = {'pitch': pitch, 'roll': roll, 'yaw': yaw} - result['degrees'] = degrees + result['pitch'] = pitch + result['roll'] = roll + result['yaw'] = yaw return result - def draw_pose(self, im, pts_im, pts_model, pt_nose): - cv.line(im, pt_nose, tuple(pts_im[1].ravel()), (0,255,0), 3) #GREEN - cv.line(im, pt_nose, tuple(pts_im[0].ravel()), (255,0,), 3) #BLUE - cv.line(im, pt_nose, tuple(pts_im[2].ravel()), (0,0,255), 3) #RED + def draw_pose(self, im, pt_nose, image_pts): + cv.line(im, pt_nose, tuple(image_pts['pitch'].ravel()), self.pose_types['pitch'], 3) + cv.line(im, pt_nose, tuple(image_pts['yaw'].ravel()), self.pose_types['yaw'], 3) + cv.line(im, pt_nose, tuple(image_pts['roll'].ravel()), self.pose_types['roll'], 3) - def draw_degrees(self, im, degrees, color=(0,255,0)): - for i, item in enumerate(degrees.items()): - k, v = item + def draw_degrees(self, im, pose_data, color=(0,255,0)): + for i, pose_type in enumerate(self.pose_types.items()): + k, clr = pose_type + v = pose_data[k] t = '{}: {:.2f}'.format(k, v) origin = (10, 30 + (25 * i)) - cv.putText(im, t, origin, cv.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness=2, lineType=2)
\ No newline at end of file + cv.putText(im, t, origin, cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, thickness=2, lineType=2)
\ No newline at end of file diff --git a/megapixels/app/settings/types.py b/megapixels/app/settings/types.py index 754be618..c2e2caf7 100644 --- a/megapixels/app/settings/types.py +++ b/megapixels/app/settings/types.py @@ -45,10 +45,11 @@ class LogLevel(Enum): # -------------------------------------------------------------------- class Metadata(Enum): - IDENTITY, FILE_RECORD, FACE_VECTOR, FACE_POSE, FACE_ROI = range(5) + IDENTITY, FILE_RECORD, FACE_VECTOR, FACE_POSE, FACE_ROI, FACE_LANDMARKS_2D_68, \ + FACE_LANDMARKS_3D_68 = range(7) class Dataset(Enum): - LFW, VGG_FACE2 = range(2) + LFW, VGG_FACE2, MSCELEB, UCCS, UMD_FACES = range(5) # --------------------------------------------------------------------- diff --git a/megapixels/app/utils/im_utils.py b/megapixels/app/utils/im_utils.py index d5e92aa3..e882c67f 100644 --- a/megapixels/app/utils/im_utils.py +++ b/megapixels/app/utils/im_utils.py @@ -21,6 +21,12 @@ import datetime +def num_channels(im): + '''Returns number of channels in numpy.ndarray image''' + if len(im.shape) > 2: + return im.shape[2] + else: + return 1 def is_grayscale(im, threshold=5): """Returns True if image is grayscale |
