1 files changed, 183 insertions, 164 deletions
diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py
index f739315a..ad4256ad 100644
--- a/megapixels/app/site/parser.py
+++ b/megapixels/app/site/parser.py
@@ -10,9 +10,141 @@ import app.site.s3 as s3
 renderer = mistune.Renderer(escape=False)
 markdown = mistune.Markdown(renderer=renderer)
 
+footnote_count = 0
+
+def parse_markdown(metadata, sections, s3_path, skip_h1=False):
+  """
+  parse page into sections, preprocess the markdown to handle our modifications
+  """
+  groups = []
+  current_group = []
+  footnotes = []
+  in_stats = False
+  in_footnotes = False
+  ignoring = False
+
+  if 'desc' in metadata and 'subdesc' in metadata:
+    groups.append(intro_section(metadata, s3_path))
+
+  for section in sections:
+    if skip_h1 and section.startswith('# '):
+      continue
+    elif section.strip().startswith('---'):
+      continue
+    elif section.lower().strip().startswith('ignore text'):
+      ignoring = True
+      continue
+    elif section.strip().startswith('### Footnotes'):
+      groups.append(format_section(current_group, s3_path))
+      current_group = []
+      footnotes = []
+      in_footnotes = True
+    elif in_footnotes:
+      footnotes.append(section)
+    elif ignoring:
+      continue
+    elif '### statistics' in section.lower() or '### sidebar' in section.lower():
+      if len(current_group):
+        groups.append(format_section(current_group, s3_path))
+      current_group = []
+      if 'sidebar' not in section.lower():
+        current_group.append(section)
+      in_stats = True
+    elif in_stats and not section.strip().startswith('## ') and 'end sidebar' not in section.lower():
+      current_group.append(section)
+    elif in_stats and section.strip().startswith('## ') or 'end sidebar' in section.lower():
+      current_group = [format_section(current_group, s3_path, 'right-sidebar', tag='div')]
+      if 'end sidebar' not in section.lower():
+        current_group.append(section)
+      in_stats = False
+    elif section.strip().startswith('```'):
+      groups.append(format_section(current_group, s3_path))
+      current_group = []
+      current_group.append(section)
+      if section.strip().endswith('```'):
+        groups.append(format_applet("\n\n".join(current_group), s3_path))
+        current_group = []
+    elif section.strip().endswith('```'):
+      current_group.append(section)
+      groups.append(format_applet("\n\n".join(current_group), s3_path))
+      current_group = []
+    elif section.startswith('+ '):
+      groups.append(format_section(current_group, s3_path))
+      groups.append('<section>' + format_metadata(section) + '<section>')
+      current_group = []
+    elif '![fullwidth:' in section:
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_section([section], s3_path, type='fullwidth'))
+      current_group = []
+    elif '![wide:' in section:
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_section([section], s3_path, type='wide'))
+      current_group = []
+    elif '![' in section:
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_section([section], s3_path, type='images'))
+      current_group = []
+    else:
+      current_group.append(section)
+  groups.append(format_section(current_group, s3_path))
+
+  footnote_txt = ''
+  footnote_lookup = {}
+
+  if len(footnotes):
+    footnote_txt, footnote_lookup = format_footnotes(footnotes, s3_path)
+
+  content = "".join(groups)
+
+  if footnote_lookup:
+    for key, index in footnote_lookup.items():
+      global footnote_count
+      footnote_count = 0
+      letters = "abcdefghijklmnopqrstuvwxyz"
+      footnote_backlinks = []
+      def footnote_tag(match):
+        global footnote_count
+        footnote_count += 1
+        footnote_backlinks.append('<a href="#{}_{}">{}</a>'.format(key, footnote_count, letters[footnote_count-1]))
+        return '<a class="footnote_shim" name="{}_{}"> </a><a href="#{}" class="footnote" title="Footnote {}">{}</a>'.format(key, footnote_count, key, index, index)
+      key_regex = re.compile(key.replace('[', '\\[').replace('^', '\\^').replace(']', '\\]'))
+      content = key_regex.sub(footnote_tag, content)
+      footnote_txt = footnote_txt.replace("{}_BACKLINKS".format(index), "".join(footnote_backlinks))
+    content += footnote_txt
+  return content
+
+
+def intro_section(metadata, s3_path):
+  """
+  Build the intro section for datasets
+  """
+
+  section = "<section class='intro_section' style='background-image: url({})'>".format(s3_path + metadata['image'])
+  section += "<div class='inner'>"
+
+  parts = []
+  if 'desc' in metadata:
+    desc = metadata['desc']
+    if 'color' in metadata and metadata['title'] in desc:
+      desc = desc.replace(metadata['title'], "<span style='color: {}'>{}</span>".format(metadata['color'], metadata['title']))
+    section += "<div class='hero_desc'><span>{}</span></div>".format(desc, desc)
+
+  if 'subdesc' in metadata:
+    subdesc = markdown(metadata['subdesc']).replace('<p>', '').replace('</p>', '')
+    section += "<div class='hero_subdesc'><span>{}</span></div>".format(subdesc, subdesc)
+
+  section += "</div>"
+  section += "</section>"
+
+  if 'caption' in metadata:
+    section += "<section><div class='image'><div class='caption'>{}</div></div></section>".format(metadata['caption'])
+
+  return section
+
+
 def fix_images(lines, s3_path):
   """
-  do our own tranformation of the markdown around images to handle wide images etc
+  do our own transformation of the markdown around images to handle wide images etc
   lines: markdown lines
   """
   real_lines = []
@@ -22,48 +154,89 @@ def fix_images(lines, s3_path):
       line = line.replace('![', '')
       alt_text, tail = line.split('](', 1)
       url, tail = tail.split(')', 1)
+      tag = ''
       if ':' in alt_text:
-        tail, alt_text = alt_text.split(':', 1)
+        tag, alt_text = alt_text.split(':', 1)
       img_tag = "<img src='{}' alt='{}'>".format(s3_path + url, alt_text.replace("'", ""))
-      if len(alt_text):
+      if 'sideimage' in tag:
+        line = "<div class='sideimage'>{}<div>{}</div></div>".format(img_tag, markdown(tail))
+      elif len(alt_text):
         line = "<div class='image'>{}<div class='caption'>{}</div></div>".format(img_tag, alt_text)
       else:
         line = "<div class='image'>{}</div>".format(img_tag, alt_text)
     real_lines.append(line)
   return "\n".join(real_lines)
 
-def format_section(lines, s3_path, type=''):
+
+def format_section(lines, s3_path, type='', tag='section'):
   """
   format a normal markdown section
   """
   if len(lines):
+    lines = fix_meta(lines)
     lines = fix_images(lines, s3_path)
     if type:
-      return "<section class='{}'>{}</section>".format(type, markdown(lines))
+      return "<{} class='{}'>{}</{}>".format(tag, type, markdown(lines), tag)
     else:
-      return "<section>" + markdown(lines) + "</section>"
+      return "<{}>{}</{}>".format(tag, markdown(lines), tag)
   return ""
 
+def fix_meta(lines):
+  """
+  Format metadata sections before passing to markdown
+  """
+  new_lines = []
+  for line in lines:
+    if line.startswith('+ '):
+      line = format_metadata(line)
+    new_lines.append(line)
+  return new_lines
+
 def format_metadata(section):
   """
   format a metadata section (+ key: value pairs)
   """
   meta = []
   for line in section.split('\n'):
+    if ': ' not in line:
+      continue
     key, value = line[2:].split(': ', 1)
     meta.append("<div><div class='gray'>{}</div><div>{}</div></div>".format(key, value))
-  return "<section><div class='meta'>{}</div></section>".format(''.join(meta)) 
+  return "<div class='meta'>{}</div>".format(''.join(meta)) 
+
+def format_footnotes(footnotes, s3_path):
+  """
+  Format the footnotes section separately and produce a lookup we can use to update the main site
+  """
+  footnotes = '\n'.join(footnotes).split('\n')
+  index = 1
+  footnote_index_lookup = {}
+  footnote_list = []
+  for footnote in footnotes:
+    if not len(footnote) or '[^' not in footnote:
+      continue
+    key, note = footnote.split(': ', 1)
+    footnote_index_lookup[key] = index
+    footnote_list.append('<a name="{}" class="footnote_shim"></a><span class="backlinks">{}_BACKLINKS</span>'.format(key, index) + markdown(note))
+    index += 1
+
+  footnote_txt = '<section><ul class="footnotes"><li>' + '</li><li>'.join(footnote_list) + '</li></ul></section>'
+  return footnote_txt, footnote_index_lookup
 
 def format_applet(section, s3_path):
+  """
+  Format the applets, which load javascript modules like the map and CSVs
+  """
   # print(section)
   payload = section.strip('```').strip().strip('```').strip().split('\n')
   applet = {}
-  print(payload)
+  # print(payload)
   if ': ' in payload[0]:
-    command, opt = payload[0].split(': ')
+    command, opt = payload[0].split(': ', 1)
   else:
     command = payload[0]
     opt = None
+  print(command)
   if command == 'python' or command == 'javascript' or command == 'code':
     return format_section([ section ], s3_path)
   if command == '':
@@ -79,47 +252,6 @@ def format_applet(section, s3_path):
     applet['fields'] = payload[1:]
   return "<section class='applet_container'><div class='applet' data-payload='{}'></div></section>".format(json.dumps(applet))
 
-def parse_markdown(sections, s3_path, skip_h1=False):
-  """
-  parse page into sections, preprocess the markdown to handle our modifications
-  """
-  groups = []
-  current_group = []
-  for section in sections:
-    if skip_h1 and section.startswith('# '):
-      continue
-    elif section.strip().startswith('```'):
-      groups.append(format_section(current_group, s3_path))
-      current_group = []
-      current_group.append(section)
-      if section.strip().endswith('```'):
-        groups.append(format_applet("\n\n".join(current_group), s3_path))
-        current_group = []
-    elif section.strip().endswith('```'):
-      current_group.append(section)
-      groups.append(format_applet("\n\n".join(current_group), s3_path))
-      current_group = []
-    elif section.startswith('+ '):
-      groups.append(format_section(current_group, s3_path))
-      groups.append(format_metadata(section))
-      current_group = []
-    elif '![fullwidth:' in section:
-      groups.append(format_section(current_group, s3_path))
-      groups.append(format_section([section], s3_path, type='fullwidth'))
-      current_group = []
-    elif '![wide:' in section:
-      groups.append(format_section(current_group, s3_path))
-      groups.append(format_section([section], s3_path, type='wide'))
-      current_group = []
-    elif '![' in section:
-      groups.append(format_section(current_group, s3_path))
-      groups.append(format_section([section], s3_path, type='images'))
-      current_group = []
-    else:
-      current_group.append(section)
-  groups.append(format_section(current_group, s3_path))
-  content = "".join(groups)
-  return content
 
 def parse_research_index(research_posts):
   """
@@ -127,6 +259,7 @@ def parse_research_index(research_posts):
   """
   content = "<div class='research_index'>"
   for post in research_posts:
+    print(post)
     s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, post['path'])
     if 'image' in post:
       post_image = s3_path + post['image']
@@ -140,117 +273,3 @@ def parse_research_index(research_posts):
     content += row
   content += '</div>'
   return content
-
-def read_metadata(fn):
-  """
-  Read in read a markdown file and extract the metadata
-  """
-  with open(fn, "r") as file:
-    data = file.read()
-    data = data.replace("\n ", "\n")
-    if "\n" in data:
-      data = data.replace("\r", "")
-    else:
-      data = data.replace("\r", "\n")
-    sections = data.split("\n\n")
-  return parse_metadata(fn, sections)
-
-default_metadata = {
-  'status': 'published',
-  'title': 'Untitled Page',
-  'desc': '',
-  'slug': '',
-  'published': '2018-12-31',
-  'updated': '2018-12-31',
-  'authors': 'Adam Harvey',
-  'sync': 'true',
-  'tagline': '',
-}
-
-def parse_metadata_section(metadata, section):
-  """
-  parse a metadata key: value pair
-  """
-  for line in section.split("\n"):
-    if ': ' not in line:
-      continue
-    key, value = line.split(': ', 1)
-    metadata[key.lower()] = value
-
-def parse_metadata(fn, sections):
-  """
-  parse the metadata headers in a markdown file
-  (everything before the second ---------)
-  also generates appropriate urls for this page :)
-  """
-  found_meta = False
-  metadata = {}
-  valid_sections = []
-  for section in sections:
-    if not found_meta and ': ' in section:
-      found_meta = True
-      parse_metadata_section(metadata, section)
-      continue
-    if '-----' in section:
-      continue
-    if found_meta:
-      valid_sections.append(section)
-
-  if 'title' not in metadata:
-    print('warning: {} has no title'.format(fn))
-  for key in default_metadata:
-    if key not in metadata:
-      metadata[key] = default_metadata[key]
-
-  basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, ''))
-  basename = os.path.basename(fn)
-  if basedir == '/':
-    metadata['path'] = '/'
-    metadata['url'] = '/'
-  elif basename == 'index.md':
-    metadata['path'] = basedir + '/'
-    metadata['url'] = metadata['path']
-  else:
-    metadata['path'] = basedir + '/'
-    metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'
-
-  if metadata['status'] == 'published|draft|private':
-    metadata['status'] = 'published'
-
-  metadata['sync'] = metadata['sync'] != 'false'
-
-  metadata['author_html'] = '<br>'.join(metadata['authors'].split(','))
-
-  return metadata, valid_sections
-
-def read_research_post_index():
-  """
-  Generate an index of the research (blog) posts
-  """
-  return read_post_index('research')
-
-def read_datasets_index():
-  """
-  Generate an index of the datasets
-  """
-  return read_post_index('datasets')
-
-def read_post_index(basedir):
-  """
-  Generate an index of posts
-  """
-  posts = []
-  for fn in sorted(glob.glob('../site/content/{}/*/index.md'.format(basedir))):
-    metadata, valid_sections = read_metadata(fn)
-    if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
-      continue
-    posts.append(metadata)
-  if not len(posts):
-    posts.append({
-      'title': 'Placeholder',
-      'slug': 'placeholder',
-      'date': 'Placeholder',
-      'url': '/',
-    })
-  return posts
-