import os
import re
import glob

import app.settings.app_cfg as cfg

from app.utils.file_utils import load_json
import app.utils.sheet_utils as sheet

sidebar = sheet.fetch_google_lookup("sidebar", item_key="key")

def read_metadata(fn):
  """
  Read in read a markdown file and extract the metadata
  """
  with open(fn, "r") as file:
    data = file.read()
    data = data.replace("\n ", "\n")
    if "\n" in data:
      data = data.replace("\r", "")
    else:
      data = data.replace("\r", "\n")
    sections = data.split("\n\n")
  return parse_metadata(fn, sections)

def domainFromUrl(url):
  domain = url.split('/')[2].split('.')
  if len(domain) > 2 and len(domain[-2]) == 2:
    return ".".join(domain[-3:])
  return ".".join(domain[-2:])


default_metadata = {
  'status': 'published',
  'title': 'Untitled Page',
  'desc': '',
  'slug': '',
  'published': '2018-12-31',
  'updated': '2018-12-31',
  'authors': 'Adam Harvey',
  'sync': 'true',
  'tagline': '',
}

sidebar_order = [
  { 'key': 'published', 'title': 'Published' },
  { 'key': 'images', 'title': 'Images' },
  { 'key': 'videos', 'title': 'Videos' },
  { 'key': 'identities', 'title': 'Identities' },
  { 'key': 'purpose', 'title': 'Purpose' },
  { 'key': 'created_by', 'title': 'Created by' },
  { 'key': 'funded_by_short', 'title': 'Funded by' },
  { 'key': 'size_gb', 'title': 'Download Size' },
  { 'key': 'website', 'title': 'Website' },
]

def parse_metadata(fn, sections):
  """
  parse the metadata headers in a markdown file
  (everything before the second ---------)
  - determines appropriate urls for this page
  - loads paper if any
  """
  found_meta = False
  metadata = {}
  valid_sections = []
  for section in sections:
    if not found_meta and ': ' in section:
      found_meta = True
      parse_metadata_section(metadata, section)
      continue
    if '-----' in section:
      continue
    if found_meta:
      valid_sections.append(section)

  if 'title' not in metadata:
    print('warning: {} has no title'.format(fn))
  for key in default_metadata:
    if key not in metadata:
      metadata[key] = default_metadata[key]

  basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, ''))
  basename = os.path.basename(fn)
  if basedir == '/':
    metadata['path'] = '/'
    metadata['url'] = '/'
  elif basename == 'index.md':
    metadata['path'] = basedir + '/'
    metadata['url'] = metadata['path']
  else:
    metadata['path'] = basedir + '/'
    metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'

  if metadata['status'] == 'published|draft|private':
    metadata['status'] = 'published'

  metadata['og_desc'] = re.sub('<[^<]+?>', '', metadata['desc'])

  metadata['sync'] = metadata['sync'] != 'false'

  metadata['author_html'] = '<br>'.join(metadata['authors'].split(','))

  dataset_path = os.path.join(cfg.DIR_SITE_FINAL_CITATIONS, metadata['slug'] + '.json')
  if os.path.exists(dataset_path):
    metadata['meta'] = load_json(dataset_path)
    if not metadata['meta']:
      print("Bad metadata? {}".format(dataset_path))
  elif 'datasets' in fn:
    print("/!\\ {} does not exist!".format(dataset_path))

  if metadata['slug'] in sidebar:
    sidebar_row = sidebar[metadata['slug']]
    if sidebar_row:
      metadata['sidebar'] = []
      for item in sidebar_order:
        key = item['key']
        value = sidebar_row[key]
        if value:
          value = value.replace(' - ', ' &ndash; ')
          if key == 'size_gb':
            value += ' GB'
          if key == 'website':
            value = "<a href='" + value + "' target='_blank' rel='nofollow noopener'>" + domainFromUrl(value) + "</a>"
          metadata['sidebar'].append({ 'value': value, 'title': item['title'], })

  if 'meta' not in metadata or not metadata['meta']: # dude
    metadata['meta'] = {}
    metadata['sidebar'] = []

  if 'image' in metadata and metadata['image']:
    metadata['og_image'] = "https://nyc3.digitaloceanspaces.com/megapixels/v1" + metadata['path'] + metadata['image']
    metadata['og_title'] = 'MegaPixels: {}'.format(metadata['title'])
  else:
    metadata['og_image'] = "https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/msceleb/assets/background.jpg"
    metadata['og_title'] = 'MegaPixels: {}'.format(metadata['title'])

  return metadata, valid_sections

def parse_metadata_section(metadata, section):
  """
  parse a metadata key: value pair
  """
  for line in section.split("\n"):
    if ': ' not in line:
      continue
    key, value = line.split(': ', 1)
    metadata[key.lower()] = value


def read_research_post_index():
  """
  Generate an index of the research (blog) posts
  """
  return read_post_index('research')


def read_datasets_index():
  """
  Generate an index of the datasets
  """
  return read_post_index('datasets')


def read_post_index(basedir):
  """
  Generate an index of posts
  """
  posts = []
  for fn in sorted(glob.glob(os.path.join(cfg.DIR_SITE_CONTENT, basedir, '*/index.md'))):
    metadata, valid_sections = read_metadata(fn)
    if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
      continue
    posts.append(metadata)
  if not len(posts):
    posts.append({
      'title': 'Placeholder',
      'slug': 'placeholder',
      'date': 'Placeholder',
      'url': '/',
    })
  return posts