diff options
Diffstat (limited to 'megapixels/app')
| -rw-r--r-- | megapixels/app/utils/api_utils.py | 2 | ||||
| -rw-r--r-- | megapixels/app/utils/identity_utils.py | 22 |
2 files changed, 22 insertions, 2 deletions
diff --git a/megapixels/app/utils/api_utils.py b/megapixels/app/utils/api_utils.py index d9d67425..a4dad501 100644 --- a/megapixels/app/utils/api_utils.py +++ b/megapixels/app/utils/api_utils.py @@ -3,7 +3,7 @@ import urllib import urllib.request from app.settings import app_cfg -from app.utils import file_utils, im_utils, logger_utils +from app.utils import logger_utils class WikipediaAPI: diff --git a/megapixels/app/utils/identity_utils.py b/megapixels/app/utils/identity_utils.py index f9ed009e..775652dc 100644 --- a/megapixels/app/utils/identity_utils.py +++ b/megapixels/app/utils/identity_utils.py @@ -10,6 +10,25 @@ from app.utils import logger_utils log = logger_utils.Logger.getLogger() +az = 'abcdefghijklmlopqrstuvwzxyz' +AZ = az.upper() +z9 = list(map(str, list(range(0,10)))) +aZ9 = list(az) + list(AZ) + z9 + +def letter_strip(a, b=aZ9): + # strip every letter from a that is not in b + return ''.join([x for x in a if x in b]) + +def letter_match(a, b): + # check if every letter (a-zA-Z0-9) exists in both + return sum([x in b for x in a]) == len(a) + +def names_match_strict(a, b): + clean_a = letter_strip(a) + clean_b = letter_strip(b) + return len(clean_a) == len(clean_b) and letter_match(clean_a, clean_b) and letter_match(clean_b, clean_a) + + ''' class Dataset(Enum): LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \ @@ -83,6 +102,7 @@ def get_names(opt_dataset, opt_data_store=types.DataStore.HDD): result = {'names_orig': names_orig, 'names_query': names_query} return result + def similarity(a, b): return difflib.SequenceMatcher(a=a.lower(), b=b.lower()).ratio() @@ -111,7 +131,7 @@ def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=Fa scores.append(subscores) # return result - ratio_similar = sum(max(x) for x in scores) / len_min + ratio_similar = sum(max(x) for x in scores) / len(scores) if compound_score: # combine with any missing letters/words |
