summaryrefslogtreecommitdiff
path: root/megapixels/app/utils/identity_utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/app/utils/identity_utils.py')
-rw-r--r--megapixels/app/utils/identity_utils.py19
1 files changed, 16 insertions, 3 deletions
diff --git a/megapixels/app/utils/identity_utils.py b/megapixels/app/utils/identity_utils.py
index 775652dc..5855fbbd 100644
--- a/megapixels/app/utils/identity_utils.py
+++ b/megapixels/app/utils/identity_utils.py
@@ -29,6 +29,13 @@ def names_match_strict(a, b):
return len(clean_a) == len(clean_b) and letter_match(clean_a, clean_b) and letter_match(clean_b, clean_a)
+def sanitize_name(name, as_str=False):
+ splits = [unidecode.unidecode(x.strip().lower()) for x in name.strip().split(' ')]
+ if as_str:
+ return ' '.join(splits)
+ else:
+ return splits
+
'''
class Dataset(Enum):
LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \
@@ -106,12 +113,18 @@ def get_names(opt_dataset, opt_data_store=types.DataStore.HDD):
def similarity(a, b):
return difflib.SequenceMatcher(a=a.lower(), b=b.lower()).ratio()
-def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False):
+def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False, name_a_pre=False, name_b_pre=False):
'''Returns boolean if names are similar enough
'''
# strip spaces and split names into list of plain text words
- name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')]
- name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')]
+ if name_a_pre:
+ name_a_clean = name_a
+ else:
+ name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')]
+ if name_b_pre:
+ name_b_clean = name_b
+ else:
+ name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')]
# assign short long vars
len_a = len(name_a_clean)