summaryrefslogtreecommitdiff
path: root/megapixels/app/utils/identity_utils.py
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2019-04-18 16:55:14 +0200
committerjules@lens <julescarbon@gmail.com>2019-04-18 16:55:14 +0200
commit2e4daed06264f3dc3bbabd8fa4fc0d8ceed4c5af (patch)
tree1a17bb4459776ac91f7006a2a407ca12edd3471e /megapixels/app/utils/identity_utils.py
parent3d32e5b4ddbfbfe5d4abeda57ff200adf1532f4c (diff)
parentf8012f88641b0bb378ba79393f277c8918ebe452 (diff)
Merge branch 'master' of asdf.us:megapixels_dev
Diffstat (limited to 'megapixels/app/utils/identity_utils.py')
-rw-r--r--megapixels/app/utils/identity_utils.py19
1 files changed, 16 insertions, 3 deletions
diff --git a/megapixels/app/utils/identity_utils.py b/megapixels/app/utils/identity_utils.py
index 775652dc..5855fbbd 100644
--- a/megapixels/app/utils/identity_utils.py
+++ b/megapixels/app/utils/identity_utils.py
@@ -29,6 +29,13 @@ def names_match_strict(a, b):
return len(clean_a) == len(clean_b) and letter_match(clean_a, clean_b) and letter_match(clean_b, clean_a)
+def sanitize_name(name, as_str=False):
+ splits = [unidecode.unidecode(x.strip().lower()) for x in name.strip().split(' ')]
+ if as_str:
+ return ' '.join(splits)
+ else:
+ return splits
+
'''
class Dataset(Enum):
LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \
@@ -106,12 +113,18 @@ def get_names(opt_dataset, opt_data_store=types.DataStore.HDD):
def similarity(a, b):
return difflib.SequenceMatcher(a=a.lower(), b=b.lower()).ratio()
-def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False):
+def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False, name_a_pre=False, name_b_pre=False):
'''Returns boolean if names are similar enough
'''
# strip spaces and split names into list of plain text words
- name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')]
- name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')]
+ if name_a_pre:
+ name_a_clean = name_a
+ else:
+ name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')]
+ if name_b_pre:
+ name_b_clean = name_b
+ else:
+ name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')]
# assign short long vars
len_a = len(name_a_clean)