diff options
Diffstat (limited to 'megapixels/app')
| -rw-r--r-- | megapixels/app/models/data_store.py | 46 | ||||
| -rw-r--r-- | megapixels/app/settings/app_cfg.py | 7 | ||||
| -rw-r--r-- | megapixels/app/settings/types.py | 5 | ||||
| -rw-r--r-- | megapixels/app/utils/api_utils.py | 25 | ||||
| -rw-r--r-- | megapixels/app/utils/identity_utils.py | 78 |
5 files changed, 132 insertions, 29 deletions
diff --git a/megapixels/app/models/data_store.py b/megapixels/app/models/data_store.py index a8d6916f..b4260b9c 100644 --- a/megapixels/app/models/data_store.py +++ b/megapixels/app/models/data_store.py @@ -2,6 +2,7 @@ import os from os.path import join import logging +from app.utils.logger_utils import Logger from app.settings import app_cfg as cfg from app.settings import types @@ -11,41 +12,62 @@ from app.settings import types # ------------------------------------------------------------------------- class DataStore: + # local data store + log = Logger.getLogger() + def __init__(self, opt_data_store, opt_dataset): self.data_store = join(f'/data_store_{opt_data_store.name.lower()}') - self.dir_dataset = join(self.data_store, 'datasets', cfg.DIR_PEOPLE, opt_dataset.name.lower()) - self.dir_media = join(self.dir_dataset, 'media') - self.dir_metadata = join(self.dir_dataset, 'metadata') + self._dir_dataset = join(self.data_store, 'datasets', cfg.DIR_PEOPLE, opt_dataset.name.lower()) + self._dir_media = join(self._dir_dataset, 'media') + self._dir_metadata = join(self._dir_dataset, 'metadata') def metadata(self, enum_type): - return join(self.dir_metadata, f'{enum_type.name.lower()}.csv') + return join(self._dir_metadata, f'{enum_type.name.lower()}.csv') + + @property + def dir_dataset(self): + return self._dir_dataset + + @property + def dir_media(self): + return self._dir_media + + @property + def dir_media_original(self): + return join(self._dir_media, 'original') + + @property + def dir_metadata(self): + return self._dir_metadata def metadata_dir(self): - return join(self.dir_metadata) + self.log.warn('deprecated. use dir_metadata') + return self._dir_metadata def media_dir(self): - return join(self.dir_media) + self.log.warn('deprecated. use dir_media') + return self._dir_media def media_images_original(self): - return join(self.dir_media, 'original') + return join(self._dir_media, 'original') def face(self, subdir, fn, ext): if subdir == '' or subdir is None: subdir = '.' - return join(self.dir_media, 'original', subdir, f'{fn}.{ext}') + return join(self._dir_media, 'original', subdir, f'{fn}.{ext}') def face_crop(self, subdir, fn, ext): - return join(self.dir_media, 'cropped', subdir, f'{fn}.{ext}') + return join(self._dir_media, 'cropped', subdir, f'{fn}.{ext}') def face_uuid(self, uuid, ext): - return join(self.dir_media, 'uuid',f'{uuid}.{ext}') + return join(self._dir_media, 'uuid',f'{uuid}.{ext}') def face_crop_uuid(self, uuid, ext): - return join(self.dir_media, 'uuid', f'{uuid}.{ext}') + return join(self._dir_media, 'uuid', f'{uuid}.{ext}') def uuid_dir(self): - return join(self.dir_media, 'uuid') + return join(self._dir_media, 'uuid') class DataStoreS3: diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py index 2b10f9f0..0b1fb69d 100644 --- a/megapixels/app/settings/app_cfg.py +++ b/megapixels/app/settings/app_cfg.py @@ -6,6 +6,7 @@ from dotenv import load_dotenv from app.settings import types from app.utils import click_utils +from pathlib import Path import codecs codecs.register(lambda name: codecs.lookup('utf8') if name == 'utf8mb4' else None) @@ -26,6 +27,10 @@ FaceLandmark2D_5Var = click_utils.ParamVar(types.FaceLandmark2D_5) FaceLandmark2D_68Var = click_utils.ParamVar(types.FaceLandmark2D_68) FaceLandmark3D_68Var = click_utils.ParamVar(types.FaceLandmark3D_68) +# base path +DIR_SELF = os.path.dirname(os.path.realpath(__file__)) +DIR_ROOT = Path(DIR_SELF).parent.parent.parent + # # data_store DATA_STORE = '/data_store_hdd/' DATA_STORE_NAS = '/data_store_nas/' @@ -64,7 +69,7 @@ DIR_TEST_IMAGES = join(DIR_APP, 'test', 'images') # ----------------------------------------------------------------------------- # .env config for keys # ----------------------------------------------------------------------------- - +FP_KNOWLEDGE_GRAPH_ENV = join(DIR_ROOT, 'env/google_knowledge_graph_api.env') # DIR_DOTENV = join(DIR_APP, '.env') load_dotenv() # dotenv_path=DIR_DOTENV) diff --git a/megapixels/app/settings/types.py b/megapixels/app/settings/types.py index 933d1932..3d7e96c0 100644 --- a/megapixels/app/settings/types.py +++ b/megapixels/app/settings/types.py @@ -47,8 +47,9 @@ class Metadata(Enum): FACE_ATTRIBUTES, IMAGE_COUNT = range(10) class Dataset(Enum): - LFW, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \ - CASIA_WEBFACE, AFW, PUBFIG83, HELEN, PIPA, MEGAFACE, BRAINWASH, IMDB_WIKI = range(16) + LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \ + CASIA_WEBFACE, AFW, PUBFIG83, HELEN, PIPA, MEGAFACE, BRAINWASH, IMDB_WIKI, \ + LARGE_AGE_GAP = range(18) # --------------------------------------------------------------------- diff --git a/megapixels/app/utils/api_utils.py b/megapixels/app/utils/api_utils.py index ec00113e..d9d67425 100644 --- a/megapixels/app/utils/api_utils.py +++ b/megapixels/app/utils/api_utils.py @@ -2,15 +2,21 @@ import json import urllib import urllib.request +from app.settings import app_cfg +from app.utils import file_utils, im_utils, logger_utils + class WikipediaAPI: url_base = 'https://en.wikipedia.org/w/api.php' - + log = logger_utils.Logger.getLogger() + # https://en.wikipedia.org/w/api.php?redirects=& + # ppprop=displaytitle&prop=pageprops|pageimages|description&generator=prefixsearch + # &action=query&format=json&piprop=thumbnail&pithumbsize=160&pilimit=6&gpssearch=Vicente+Fox&gpsnamespace=0&gpslimit=6 + def _url_builder(self, q): # https://www.mediawiki.org/wiki/API%3aProperties#Info%3a_Parameters - params = { 'redirects': '', 'ppprop': 'displaytitle', @@ -56,12 +62,16 @@ class WikipediaAPI: obj['wp_accessed'] = False return obj - def get_meta(self, query_obj): + def get_meta(self, query_obj, verbose=False): '''Searches Wikipedia API for query string''' + if query_obj.get('wp_accessed', False): return query_obj else: url = self._url_builder(query_obj['query']) + if verbose: + self.log.debug(f'querying: {url}') + print(url) return self._api_search(url) def search(self, q): @@ -73,9 +83,14 @@ class WikipediaAPI: class GoogleKnowledgeGraph: url_kg_api = 'https://kgsearch.googleapis.com/v1/entities:search' + log = logger_utils.Logger.getLogger() + fp_api_key = app_cfg.FP_KNOWLEDGE_GRAPH_ENV - def __init__(self, key): - self._api_key = key + def __init__(self, api_key=None): + if api_key is not None: + self._api_key = api_key + else: + self._api_key = open(self.fp_api_key).read() def _get_kg_meta(self, result_obj, params): diff --git a/megapixels/app/utils/identity_utils.py b/megapixels/app/utils/identity_utils.py index e090d16e..f9ed009e 100644 --- a/megapixels/app/utils/identity_utils.py +++ b/megapixels/app/utils/identity_utils.py @@ -5,22 +5,82 @@ import unidecode import difflib from app.settings import types +from app.models.data_store import DataStore from app.utils import logger_utils log = logger_utils.Logger.getLogger() +''' +class Dataset(Enum): + LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \ + CASIA_WEBFACE, AFW, PUBFIG83, HELEN, PIPA, MEGAFACE, BRAINWASH, IMDB_WIKI = range(16) +''' # Get list of names based on Dataset type -def get_names(enum_dataset): - if enum_dataset == types.Dataset.LFW: - dir_lfw = '/data_store_hdd/datasets/people/lfw/media/original/' - names_orig = [x for x in os.listdir(dir_lfw)] +def get_names(opt_dataset, opt_data_store=types.DataStore.HDD): + data_store = DataStore(opt_data_store, opt_dataset) + dir_dataset = data_store.dir_dataset # path to dataset root + dir_media_orig = data_store.dir_media_original + if opt_dataset == types.Dataset.AFW: + # Annotated Faces in the Wild + pass + elif opt_dataset == types.Dataset.BRAINWASH: + # Brainwash IP Cam dataset + pass + elif opt_dataset == types.Dataset.CASIA_WEBFACE: + # + pass + elif opt_dataset == types.Dataset.HELEN: + # Helen + pass + elif opt_dataset == types.Dataset.IMDB_WIKI: + # University of Tennessee Knoxville + pass + elif opt_dataset == types.Dataset.LAG: + # Large Age Gap + pass + elif opt_dataset == types.Dataset.LFW: + # Labeled Faces in The Wild + names_orig = [x for x in os.listdir(dir_media_orig)] names_query = [x.replace('_', ' ') for x in names_orig] - result = {'names_orig': names_orig, 'names_query': names_query} - elif enum_dataset == types.Dataset.YOUTUBE_FACES: - names = [x for x in names if 'labeled faces.txt' not in x] + elif opt_dataset == types.Dataset.MEGAFACE: + # MegaFace + pass + elif opt_dataset == types.Dataset.MSCELEB: + # MS Celeb + pass + elif opt_dataset == types.Dataset.PIPA: + # People in Photo Albums + pass + elif opt_dataset == types.Dataset.PUBFIG83: + # PubFig83 + names_orig = [x for x in os.listdir(dir_media_orig) if Path(x).suffix is not '.txt'] + names_query = [x.replace('_', ' ') for x in names_orig] + elif opt_dataset == types.Dataset.SCUT_FBP: + # SCUT Facial Beauty Perception + pass + elif opt_dataset == types.Dataset.UCCS: + # Unconstrianed College Students + pass + elif opt_dataset == types.Dataset.UMD_FACES: + # University of Maryland Faces + pass + elif opt_dataset == types.Dataset.UTK: + # University of Tennessee Knoxville + pass + elif opt_dataset == types.Dataset.UCF_SELFIE: + # University of Central Florida Selfie + pass + elif opt_dataset == types.Dataset.VGG_FACE: + # Visual Geometry Group Face 1 + pass + elif opt_dataset == types.Dataset.VGG_FACE2: + # Visual Geometry Group Face 2 + pass else: - log.warn(f'{enum_dataset} not yet implemented') - result = {} + log.warn(f'{opt_dataset} not yet implemented') + names_orig = [] + names_query = [] + result = {'names_orig': names_orig, 'names_query': names_query} return result def similarity(a, b): |
