summaryrefslogtreecommitdiff
path: root/megapixels/app
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/app')
-rw-r--r--megapixels/app/models/data_store.py46
-rw-r--r--megapixels/app/settings/app_cfg.py7
-rw-r--r--megapixels/app/settings/types.py5
-rw-r--r--megapixels/app/utils/api_utils.py25
-rw-r--r--megapixels/app/utils/identity_utils.py78
5 files changed, 132 insertions, 29 deletions
diff --git a/megapixels/app/models/data_store.py b/megapixels/app/models/data_store.py
index a8d6916f..b4260b9c 100644
--- a/megapixels/app/models/data_store.py
+++ b/megapixels/app/models/data_store.py
@@ -2,6 +2,7 @@ import os
from os.path import join
import logging
+from app.utils.logger_utils import Logger
from app.settings import app_cfg as cfg
from app.settings import types
@@ -11,41 +12,62 @@ from app.settings import types
# -------------------------------------------------------------------------
class DataStore:
+
# local data store
+ log = Logger.getLogger()
+
def __init__(self, opt_data_store, opt_dataset):
self.data_store = join(f'/data_store_{opt_data_store.name.lower()}')
- self.dir_dataset = join(self.data_store, 'datasets', cfg.DIR_PEOPLE, opt_dataset.name.lower())
- self.dir_media = join(self.dir_dataset, 'media')
- self.dir_metadata = join(self.dir_dataset, 'metadata')
+ self._dir_dataset = join(self.data_store, 'datasets', cfg.DIR_PEOPLE, opt_dataset.name.lower())
+ self._dir_media = join(self._dir_dataset, 'media')
+ self._dir_metadata = join(self._dir_dataset, 'metadata')
def metadata(self, enum_type):
- return join(self.dir_metadata, f'{enum_type.name.lower()}.csv')
+ return join(self._dir_metadata, f'{enum_type.name.lower()}.csv')
+
+ @property
+ def dir_dataset(self):
+ return self._dir_dataset
+
+ @property
+ def dir_media(self):
+ return self._dir_media
+
+ @property
+ def dir_media_original(self):
+ return join(self._dir_media, 'original')
+
+ @property
+ def dir_metadata(self):
+ return self._dir_metadata
def metadata_dir(self):
- return join(self.dir_metadata)
+ self.log.warn('deprecated. use dir_metadata')
+ return self._dir_metadata
def media_dir(self):
- return join(self.dir_media)
+ self.log.warn('deprecated. use dir_media')
+ return self._dir_media
def media_images_original(self):
- return join(self.dir_media, 'original')
+ return join(self._dir_media, 'original')
def face(self, subdir, fn, ext):
if subdir == '' or subdir is None:
subdir = '.'
- return join(self.dir_media, 'original', subdir, f'{fn}.{ext}')
+ return join(self._dir_media, 'original', subdir, f'{fn}.{ext}')
def face_crop(self, subdir, fn, ext):
- return join(self.dir_media, 'cropped', subdir, f'{fn}.{ext}')
+ return join(self._dir_media, 'cropped', subdir, f'{fn}.{ext}')
def face_uuid(self, uuid, ext):
- return join(self.dir_media, 'uuid',f'{uuid}.{ext}')
+ return join(self._dir_media, 'uuid',f'{uuid}.{ext}')
def face_crop_uuid(self, uuid, ext):
- return join(self.dir_media, 'uuid', f'{uuid}.{ext}')
+ return join(self._dir_media, 'uuid', f'{uuid}.{ext}')
def uuid_dir(self):
- return join(self.dir_media, 'uuid')
+ return join(self._dir_media, 'uuid')
class DataStoreS3:
diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py
index 2b10f9f0..0b1fb69d 100644
--- a/megapixels/app/settings/app_cfg.py
+++ b/megapixels/app/settings/app_cfg.py
@@ -6,6 +6,7 @@ from dotenv import load_dotenv
from app.settings import types
from app.utils import click_utils
+from pathlib import Path
import codecs
codecs.register(lambda name: codecs.lookup('utf8') if name == 'utf8mb4' else None)
@@ -26,6 +27,10 @@ FaceLandmark2D_5Var = click_utils.ParamVar(types.FaceLandmark2D_5)
FaceLandmark2D_68Var = click_utils.ParamVar(types.FaceLandmark2D_68)
FaceLandmark3D_68Var = click_utils.ParamVar(types.FaceLandmark3D_68)
+# base path
+DIR_SELF = os.path.dirname(os.path.realpath(__file__))
+DIR_ROOT = Path(DIR_SELF).parent.parent.parent
+
# # data_store
DATA_STORE = '/data_store_hdd/'
DATA_STORE_NAS = '/data_store_nas/'
@@ -64,7 +69,7 @@ DIR_TEST_IMAGES = join(DIR_APP, 'test', 'images')
# -----------------------------------------------------------------------------
# .env config for keys
# -----------------------------------------------------------------------------
-
+FP_KNOWLEDGE_GRAPH_ENV = join(DIR_ROOT, 'env/google_knowledge_graph_api.env')
# DIR_DOTENV = join(DIR_APP, '.env')
load_dotenv() # dotenv_path=DIR_DOTENV)
diff --git a/megapixels/app/settings/types.py b/megapixels/app/settings/types.py
index 933d1932..3d7e96c0 100644
--- a/megapixels/app/settings/types.py
+++ b/megapixels/app/settings/types.py
@@ -47,8 +47,9 @@ class Metadata(Enum):
FACE_ATTRIBUTES, IMAGE_COUNT = range(10)
class Dataset(Enum):
- LFW, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \
- CASIA_WEBFACE, AFW, PUBFIG83, HELEN, PIPA, MEGAFACE, BRAINWASH, IMDB_WIKI = range(16)
+ LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \
+ CASIA_WEBFACE, AFW, PUBFIG83, HELEN, PIPA, MEGAFACE, BRAINWASH, IMDB_WIKI, \
+ LARGE_AGE_GAP = range(18)
# ---------------------------------------------------------------------
diff --git a/megapixels/app/utils/api_utils.py b/megapixels/app/utils/api_utils.py
index ec00113e..d9d67425 100644
--- a/megapixels/app/utils/api_utils.py
+++ b/megapixels/app/utils/api_utils.py
@@ -2,15 +2,21 @@ import json
import urllib
import urllib.request
+from app.settings import app_cfg
+from app.utils import file_utils, im_utils, logger_utils
+
class WikipediaAPI:
url_base = 'https://en.wikipedia.org/w/api.php'
-
+ log = logger_utils.Logger.getLogger()
+ # https://en.wikipedia.org/w/api.php?redirects=&
+ # ppprop=displaytitle&prop=pageprops|pageimages|description&generator=prefixsearch
+ # &action=query&format=json&piprop=thumbnail&pithumbsize=160&pilimit=6&gpssearch=Vicente+Fox&gpsnamespace=0&gpslimit=6
+
def _url_builder(self, q):
# https://www.mediawiki.org/wiki/API%3aProperties#Info%3a_Parameters
-
params = {
'redirects': '',
'ppprop': 'displaytitle',
@@ -56,12 +62,16 @@ class WikipediaAPI:
obj['wp_accessed'] = False
return obj
- def get_meta(self, query_obj):
+ def get_meta(self, query_obj, verbose=False):
'''Searches Wikipedia API for query string'''
+
if query_obj.get('wp_accessed', False):
return query_obj
else:
url = self._url_builder(query_obj['query'])
+ if verbose:
+ self.log.debug(f'querying: {url}')
+ print(url)
return self._api_search(url)
def search(self, q):
@@ -73,9 +83,14 @@ class WikipediaAPI:
class GoogleKnowledgeGraph:
url_kg_api = 'https://kgsearch.googleapis.com/v1/entities:search'
+ log = logger_utils.Logger.getLogger()
+ fp_api_key = app_cfg.FP_KNOWLEDGE_GRAPH_ENV
- def __init__(self, key):
- self._api_key = key
+ def __init__(self, api_key=None):
+ if api_key is not None:
+ self._api_key = api_key
+ else:
+ self._api_key = open(self.fp_api_key).read()
def _get_kg_meta(self, result_obj, params):
diff --git a/megapixels/app/utils/identity_utils.py b/megapixels/app/utils/identity_utils.py
index e090d16e..f9ed009e 100644
--- a/megapixels/app/utils/identity_utils.py
+++ b/megapixels/app/utils/identity_utils.py
@@ -5,22 +5,82 @@ import unidecode
import difflib
from app.settings import types
+from app.models.data_store import DataStore
from app.utils import logger_utils
log = logger_utils.Logger.getLogger()
+'''
+class Dataset(Enum):
+ LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \
+ CASIA_WEBFACE, AFW, PUBFIG83, HELEN, PIPA, MEGAFACE, BRAINWASH, IMDB_WIKI = range(16)
+'''
# Get list of names based on Dataset type
-def get_names(enum_dataset):
- if enum_dataset == types.Dataset.LFW:
- dir_lfw = '/data_store_hdd/datasets/people/lfw/media/original/'
- names_orig = [x for x in os.listdir(dir_lfw)]
+def get_names(opt_dataset, opt_data_store=types.DataStore.HDD):
+ data_store = DataStore(opt_data_store, opt_dataset)
+ dir_dataset = data_store.dir_dataset # path to dataset root
+ dir_media_orig = data_store.dir_media_original
+ if opt_dataset == types.Dataset.AFW:
+ # Annotated Faces in the Wild
+ pass
+ elif opt_dataset == types.Dataset.BRAINWASH:
+ # Brainwash IP Cam dataset
+ pass
+ elif opt_dataset == types.Dataset.CASIA_WEBFACE:
+ #
+ pass
+ elif opt_dataset == types.Dataset.HELEN:
+ # Helen
+ pass
+ elif opt_dataset == types.Dataset.IMDB_WIKI:
+ # University of Tennessee Knoxville
+ pass
+ elif opt_dataset == types.Dataset.LAG:
+ # Large Age Gap
+ pass
+ elif opt_dataset == types.Dataset.LFW:
+ # Labeled Faces in The Wild
+ names_orig = [x for x in os.listdir(dir_media_orig)]
names_query = [x.replace('_', ' ') for x in names_orig]
- result = {'names_orig': names_orig, 'names_query': names_query}
- elif enum_dataset == types.Dataset.YOUTUBE_FACES:
- names = [x for x in names if 'labeled faces.txt' not in x]
+ elif opt_dataset == types.Dataset.MEGAFACE:
+ # MegaFace
+ pass
+ elif opt_dataset == types.Dataset.MSCELEB:
+ # MS Celeb
+ pass
+ elif opt_dataset == types.Dataset.PIPA:
+ # People in Photo Albums
+ pass
+ elif opt_dataset == types.Dataset.PUBFIG83:
+ # PubFig83
+ names_orig = [x for x in os.listdir(dir_media_orig) if Path(x).suffix is not '.txt']
+ names_query = [x.replace('_', ' ') for x in names_orig]
+ elif opt_dataset == types.Dataset.SCUT_FBP:
+ # SCUT Facial Beauty Perception
+ pass
+ elif opt_dataset == types.Dataset.UCCS:
+ # Unconstrianed College Students
+ pass
+ elif opt_dataset == types.Dataset.UMD_FACES:
+ # University of Maryland Faces
+ pass
+ elif opt_dataset == types.Dataset.UTK:
+ # University of Tennessee Knoxville
+ pass
+ elif opt_dataset == types.Dataset.UCF_SELFIE:
+ # University of Central Florida Selfie
+ pass
+ elif opt_dataset == types.Dataset.VGG_FACE:
+ # Visual Geometry Group Face 1
+ pass
+ elif opt_dataset == types.Dataset.VGG_FACE2:
+ # Visual Geometry Group Face 2
+ pass
else:
- log.warn(f'{enum_dataset} not yet implemented')
- result = {}
+ log.warn(f'{opt_dataset} not yet implemented')
+ names_orig = []
+ names_query = []
+ result = {'names_orig': names_orig, 'names_query': names_query}
return result
def similarity(a, b):