From 8032dc798287b0ae26342063c3016858f2b44974 Mon Sep 17 00:00:00 2001 From: adamhrv Date: Wed, 10 Apr 2019 23:04:29 +0200 Subject: add body detector, mod pull sheet --- megapixels/app/utils/identity_utils.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'megapixels/app/utils/identity_utils.py') diff --git a/megapixels/app/utils/identity_utils.py b/megapixels/app/utils/identity_utils.py index 775652dc..5855fbbd 100644 --- a/megapixels/app/utils/identity_utils.py +++ b/megapixels/app/utils/identity_utils.py @@ -29,6 +29,13 @@ def names_match_strict(a, b): return len(clean_a) == len(clean_b) and letter_match(clean_a, clean_b) and letter_match(clean_b, clean_a) +def sanitize_name(name, as_str=False): + splits = [unidecode.unidecode(x.strip().lower()) for x in name.strip().split(' ')] + if as_str: + return ' '.join(splits) + else: + return splits + ''' class Dataset(Enum): LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \ @@ -106,12 +113,18 @@ def get_names(opt_dataset, opt_data_store=types.DataStore.HDD): def similarity(a, b): return difflib.SequenceMatcher(a=a.lower(), b=b.lower()).ratio() -def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False): +def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False, name_a_pre=False, name_b_pre=False): '''Returns boolean if names are similar enough ''' # strip spaces and split names into list of plain text words - name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')] - name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')] + if name_a_pre: + name_a_clean = name_a + else: + name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')] + if name_b_pre: + name_b_clean = name_b + else: + name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')] # assign short long vars len_a = len(name_a_clean) -- cgit v1.2.3-70-g09d2