diff options
Diffstat (limited to 'megapixels/app')
| -rw-r--r-- | megapixels/app/models/data_store.py (renamed from megapixels/app/utils/path_utils.py) | 3 | ||||
| -rw-r--r-- | megapixels/app/models/dataset.py | 41 | ||||
| -rw-r--r-- | megapixels/app/settings/app_cfg.py | 8 |
3 files changed, 36 insertions, 16 deletions
diff --git a/megapixels/app/utils/path_utils.py b/megapixels/app/models/data_store.py index b0262ea0..8ec1f8ba 100644 --- a/megapixels/app/utils/path_utils.py +++ b/megapixels/app/models/data_store.py @@ -21,6 +21,9 @@ class DataStore: def metadata(self, enum_type): return join(self.dir_metadata, f'{enum_type.name.lower()}.csv') + def media_images_original(self): + return join(self.dir_media, 'original') + def face_image(self, subdir, fn, ext): return join(self.dir_media, 'original', subdir, f'{fn}.{ext}') diff --git a/megapixels/app/models/dataset.py b/megapixels/app/models/dataset.py index 11d568a5..8fef8a7e 100644 --- a/megapixels/app/models/dataset.py +++ b/megapixels/app/models/dataset.py @@ -2,6 +2,7 @@ Dataset model: container for all CSVs about a dataset """ import os +import sys from os.path import join from pathlib import Path import logging @@ -12,7 +13,8 @@ import numpy as np from app.settings import app_cfg as cfg from app.settings import types from app.models.bbox import BBox -from app.utils import file_utils, im_utils, path_utils +from app.utils import file_utils, im_utils +from app.models.data_store import DataStore, DataStoreS3 from app.utils.logger_utils import Logger # ------------------------------------------------------------------------- @@ -21,17 +23,19 @@ from app.utils.logger_utils import Logger class Dataset: - def __init__(self, opt_dataset_type, opt_data_store=types.DataStore.NAS): + def __init__(self, opt_data_store, opt_dataset_type, load_files=True): self._dataset_type = opt_dataset_type # enum type self.log = Logger.getLogger() self._metadata = {} self._face_vectors = [] self._nullframe = pd.DataFrame() # empty placeholder - self.data_store = path_utils.DataStore(opt_data_store, self._dataset_type) - self.data_store_s3 = path_utils.DataStoreS3(self._dataset_type) + self.data_store = DataStore(opt_data_store, self._dataset_type) + self.data_store_s3 = DataStoreS3(self._dataset_type) + self.load_metadata() - def load(self, opt_data_store): + def load_metadata(self): '''Loads all CSV files into (dict) of DataFrames''' + self.log.info(f'creating dataset: {self._dataset_type}...') for metadata_type in types.Metadata: self.log.info(f'load metadata: {metadata_type}') fp_csv = self.data_store.metadata(metadata_type) @@ -40,11 +44,12 @@ class Dataset: self._metadata[metadata_type] = pd.read_csv(fp_csv).set_index('index') if metadata_type == types.Metadata.FACE_VECTOR: # convert DataFrame to list of floats - self._face_vecs = self.df_to_vec_list(self._metadata[metadata_type]) + self._face_vectors = self.df_to_vec_list(self._metadata[metadata_type]) + self.log.info(f'build face vector dict: {len(self._face_vectors)}') self._metadata[metadata_type].drop('vec', axis=1, inplace=True) else: - self.log.error('File not found: {fp_csv}. Replaced with empty DataFrame') - self._metadata[metadata_type] = self._nullframe + self.log.error(f'File not found: {fp_csv}. Exiting.') + sys.exit() self.log.info('finished loading') def metadata(self, opt_metadata_type): @@ -80,7 +85,7 @@ class Dataset: image_record = ImageRecord(image_index, sha256, uuid, bbox, fp_im, fp_url) # now get the identity index (if available) identity_index = ds_sha256.identity_index - if identity_index: + if identity_index > -1: # then use the identity index to get the identity meta df_identity = df_filepath = self._metadata[types.Metadata.IDENTITY] ds_identity = df_identity.iloc[identity_index] @@ -95,18 +100,24 @@ class Dataset: identity = Identity(identity_index, name=name, desc=desc, gender=gender, n_images=n_images, url=url, age=age, nationality=nationality) image_record.identity = identity + else: + self.log.info(f'no identity index: {ds_sha256}') return image_record - def matches(self, query_vec, n_results=5, threshold=0.5): + def find_matches(self, query_vec, n_results=5, threshold=0.6): image_records = [] # list of image matches w/identity if available # find most similar feature vectors indexes - match_idxs = self.similar(query_vec, n_results, threshold) + #match_idxs = self.similar(query_vec, n_results, threshold) + sim_scores = np.linalg.norm(np.array([query_vec]) - np.array(self._face_vectors), axis=1) + match_idxs = np.argpartition(sim_scores, n_results)[:n_results] + for match_idx in match_idxs: # get the corresponding face vector row + self.log.debug(f'find match index: {match_idx}') image_record = self.roi_idx_to_record(match_idx) - results.append(image_record) + image_records.append(image_record) return image_records # ---------------------------------------------------------------------- @@ -114,8 +125,7 @@ class Dataset: def df_to_vec_list(self, df): # convert the DataFrame CSV to float list of vecs - vecs = [list(map(float,x.vec.split(','))) for x in df.itertuples()] - return vecs + return [list(map(float,x.vec.split(','))) for x in df.itertuples()] def similar(self, query_vec, n_results): '''Finds most similar N indices of query face vector @@ -124,8 +134,7 @@ class Dataset: :returns (list) of (int) indices ''' # uses np.linalg based on the ageitgey/face_recognition code - vecs_sim_scores = np.linalg.norm(np.array([query_vec]) - np.array(self._face_vectors), axis=1) - top_idxs = np.argpartition(vecs_sim_scores, n_results)[:n_results] + return top_idxs diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py index 50eaf576..7f9ed187 100644 --- a/megapixels/app/settings/app_cfg.py +++ b/megapixels/app/settings/app_cfg.py @@ -75,6 +75,7 @@ DIR_COMMANDS_DATASETS = 'commands/datasets' DIR_COMMANDS_FAISS = 'commands/faiss' DIR_COMMANDS_MISC = 'commands/misc' DIR_COMMANDS_SITE = 'commands/site' +DIR_COMMANDS_DEMO = 'commands/demo' # ----------------------------------------------------------------------------- # Filesystem settings @@ -89,6 +90,13 @@ HASH_BRANCH_SIZE = 3 DLIB_FACEREC_JITTERS = 5 # number of face recognition jitters DLIB_FACEREC_PADDING = 0.25 # default dlib +POSE_MINMAX_YAW = (-25,25) +POSE_MINMAX_ROLL = (-15,15) +POSE_MINMAX_PITCH = (-10,10) + +POSE_MINMAX_YAW = (-40,40) +POSE_MINMAX_ROLL = (-35,35) +POSE_MINMAX_PITCH = (-25,25) # ----------------------------------------------------------------------------- # Logging options exposed for custom click Params # ----------------------------------------------------------------------------- |
