diff options
Diffstat (limited to 'megapixels/app/models/dataset.py')
| -rw-r--r-- | megapixels/app/models/dataset.py | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/megapixels/app/models/dataset.py b/megapixels/app/models/dataset.py index 35e10465..eb0109a7 100644 --- a/megapixels/app/models/dataset.py +++ b/megapixels/app/models/dataset.py @@ -40,7 +40,7 @@ class Dataset: self._metadata[metadata_type] = pd.read_csv(fp_csv).set_index('index') # convert DataFrame to list of floats self._face_vectors = self.df_vecs_to_dict(self._metadata[metadata_type]) - self._face_vector_idxs = self.df_vec_idxs_to_dict(self._metadata[metadata_type]) + self._face_vector_roi_idxs = self.df_vec_roi_idxs_to_dict(self._metadata[metadata_type]) self.log.info(f'build face vector dict: {len(self._face_vectors)}') # remove the face vector column, it can be several GB of memory self._metadata[metadata_type].drop('vec', axis=1, inplace=True) @@ -81,8 +81,8 @@ class Dataset: # future datasets can have multiple identities per images ds_identities = df_identity.iloc[identity_index] # get filepath and S3 url - fp_im = self.data_store.face_image(ds_record.subdir, ds_record.fn, ds_record.ext) - s3_url = self.data_store_s3.face_image(ds_record.uuid) + fp_im = self.data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext) + s3_url = self.data_store_s3.face(ds_record.uuid) image_record = ImageRecord(ds_record, fp_im, s3_url, ds_identities=ds_identities) return image_record @@ -147,8 +147,14 @@ class Dataset: for match_idx in match_idxs: # get the corresponding face vector row roi_index = self._face_vector_roi_idxs[match_idx] + df_record = self._metadata[types.Metadata.FILE_RECORD] + ds_record = df_record.iloc[roi_index] self.log.debug(f'find match index: {match_idx}, --> roi_index: {roi_index}') - image_record = self.roi_idx_to_record(roi_index) + fp_im = self.data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext) + s3_url = self.data_store_s3.face(ds_record.uuid) + image_record = ImageRecord(ds_record, fp_im, s3_url) + #roi_index = self._face_vector_roi_idxs[match_idx] + #image_record = self.roi_idx_to_record(roi_index) image_records.append(image_record) return image_records @@ -159,10 +165,10 @@ class Dataset: # convert the DataFrame CSV to float list of vecs return [list(map(float,x.vec.split(','))) for x in df.itertuples()] - def df_vec_idxs_to_dict(self, df): + def df_vec_roi_idxs_to_dict(self, df): # convert the DataFrame CSV to float list of vecs #return [x.roi_index for x in df.itertuples()] - return [x.image_index for x in df.itertuples()] + return [x.roi_index for x in df.itertuples()] def similar(self, query_vec, n_results): '''Finds most similar N indices of query face vector |
