import os
from pathlib import Path
from glob import glob
import unidecode
import difflib

from app.settings import types
from app.models.data_store import DataStore
from app.utils import logger_utils

log = logger_utils.Logger.getLogger()

az = 'abcdefghijklmlopqrstuvwzxyz'
AZ = az.upper()
z9 = list(map(str, list(range(0,10))))
aZ9 = list(az) + list(AZ) + z9

def letter_strip(a, b=aZ9):
  # strip every letter from a that is not in b
  return ''.join([x for x in a if x in b])

def letter_match(a, b):
  # check if every letter (a-zA-Z0-9) exists in both
  return sum([x in b for x in a]) == len(a)

def names_match_strict(a, b):
  clean_a = letter_strip(a)
  clean_b = letter_strip(b)
  return len(clean_a) == len(clean_b) and letter_match(clean_a, clean_b) and letter_match(clean_b, clean_a)


'''
class Dataset(Enum):
  LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \
    CASIA_WEBFACE, AFW, PUBFIG83, HELEN, PIPA, MEGAFACE, BRAINWASH, IMDB_WIKI = range(16)
'''
# Get list of names based on Dataset type
def get_names(opt_dataset, opt_data_store=types.DataStore.HDD):
  data_store = DataStore(opt_data_store, opt_dataset)
  dir_dataset = data_store.dir_dataset  # path to dataset root
  dir_media_orig = data_store.dir_media_original
  if opt_dataset == types.Dataset.AFW:
    # Annotated Faces in the Wild
    pass
  elif opt_dataset == types.Dataset.BRAINWASH:
    # Brainwash IP Cam dataset
    pass
  elif opt_dataset == types.Dataset.CASIA_WEBFACE:
    # 
    pass
  elif opt_dataset == types.Dataset.HELEN:
    # Helen
    pass
  elif opt_dataset == types.Dataset.IMDB_WIKI:
    # University of Tennessee Knoxville
    pass
  elif opt_dataset == types.Dataset.LAG:
    # Large Age Gap
    pass
  elif opt_dataset == types.Dataset.LFW:
    # Labeled Faces in The Wild
    names_orig = [x for x in os.listdir(dir_media_orig)]
    names_query = [x.replace('_', ' ') for x in names_orig]
  elif opt_dataset == types.Dataset.MEGAFACE:
    # MegaFace
    pass
  elif opt_dataset == types.Dataset.MSCELEB:
    # MS Celeb
    pass
  elif opt_dataset == types.Dataset.PIPA:
    # People in Photo Albums
    pass
  elif opt_dataset == types.Dataset.PUBFIG83:
    # PubFig83
    names_orig = [x for x in os.listdir(dir_media_orig) if Path(x).suffix is not '.txt']
    names_query = [x.replace('_', ' ') for x in names_orig]
  elif opt_dataset == types.Dataset.SCUT_FBP:
    # SCUT Facial Beauty Perception
    pass
  elif opt_dataset == types.Dataset.UCCS:
    # Unconstrianed College Students
    pass
  elif opt_dataset == types.Dataset.UMD_FACES:
    # University of Maryland Faces
    pass
  elif opt_dataset == types.Dataset.UTK:
    # University of Tennessee Knoxville
    pass
  elif opt_dataset == types.Dataset.UCF_SELFIE:
    # University of Central Florida Selfie
    pass
  elif opt_dataset == types.Dataset.VGG_FACE:
    # Visual Geometry Group Face 1
    pass
  elif opt_dataset == types.Dataset.VGG_FACE2:
    # Visual Geometry Group Face 2
    pass
  else:
    log.warn(f'{opt_dataset} not yet implemented')
    names_orig = []
    names_query = []
  result = {'names_orig': names_orig, 'names_query': names_query}
  return result


def similarity(a, b):
  return difflib.SequenceMatcher(a=a.lower(), b=b.lower()).ratio()

def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False):
  '''Returns boolean if names are similar enough
  '''
  # strip spaces and split names into list of plain text words
  name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')]
  name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')]
  
  # assign short long vars
  len_a = len(name_a_clean)
  len_b = len(name_b_clean)
  len_min = min(len_a, len_b)
  len_max = max(len_a, len_b)
  
  # compute scores
  scores = []
  for i in range(len(name_a_clean)):
    word_a = name_a_clean[i]
    subscores = []
    for j in range(len(name_b_clean)):
      word_b = name_b_clean[j]
      score = similarity(word_a, word_b)
      subscores.append(score)
    scores.append(subscores)
  
  # return result
  ratio_similar = sum(max(x) for x in scores) / len(scores)

  if compound_score:
    # combine with any missing letters/words
    letters_a = sum(len(x) for x in name_a_clean)
    letters_b = sum(len(x) for x in name_b_clean)
    ratio_letters = min(letters_a, letters_b) / max(letters_a, letters_b)
    score = (0.8 * ratio_similar) + (0.2 * ratio_letters)
  else:
    score = ratio_similar

  if as_float:
    return score
  else:
    return score > threshold