3 files changed, 184 insertions, 165 deletions
diff --git a/megapixels/app/site/builder.py b/megapixels/app/site/builder.py
index 188fbc25..15055110 100644
--- a/megapixels/app/site/builder.py
+++ b/megapixels/app/site/builder.py
@@ -7,6 +7,7 @@ from jinja2 import Environment, FileSystemLoader, select_autoescape
 import app.settings.app_cfg as cfg
 
 import app.site.s3 as s3
+import app.site.loader as loader
 import app.site.parser as parser
 
 env = Environment(
@@ -21,7 +22,7 @@ def build_page(fn, research_posts, datasets):
   - syncs any assets with s3
   - handles certain index pages...
   """
-  metadata, sections = parser.read_metadata(fn)
+  metadata, sections = loader.read_metadata(fn)
 
   if metadata is None:
     print("{} has no metadata".format(fn))
@@ -55,7 +56,7 @@ def build_page(fn, research_posts, datasets):
   if 'index.md' in fn:
     s3.sync_directory(dirname, s3_dir, metadata)
 
-  content = parser.parse_markdown(sections, s3_path, skip_h1=skip_h1)
+  content = parser.parse_markdown(metadata, sections, s3_path, skip_h1=skip_h1)
 
   html = template.render(
     metadata=metadata,
@@ -73,11 +74,11 @@ def build_index(key, research_posts, datasets):
   """
   build the index of research (blog) posts
   """
-  metadata, sections = parser.read_metadata(os.path.join(cfg.DIR_SITE_CONTENT, key, 'index.md'))
+  metadata, sections = loader.read_metadata(os.path.join(cfg.DIR_SITE_CONTENT, key, 'index.md'))
   template = env.get_template("page.html")
   s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, metadata['path'])
-  content = parser.parse_markdown(sections, s3_path, skip_h1=False)
-  content += parser.parse_research_index(research_posts)
+  content = parser.parse_markdown(metadata, sections, s3_path, skip_h1=False)
+  content += loader.parse_research_index(research_posts)
   html = template.render(
     metadata=metadata,
     content=content,
@@ -93,8 +94,8 @@ def build_site():
   """
   build the site! =^)
   """
-  research_posts = parser.read_research_post_index()
-  datasets = parser.read_datasets_index()
+  research_posts = loader.read_research_post_index()
+  datasets = loader.read_datasets_index()
   for fn in glob.iglob(os.path.join(cfg.DIR_SITE_CONTENT, "**/*.md"), recursive=True):
     build_page(fn, research_posts, datasets)
   build_index('research', research_posts, datasets)
@@ -103,7 +104,8 @@ def build_file(fn):
   """
   build just one page from a filename! =^)
   """
-  research_posts = parser.read_research_post_index()
-  datasets = parser.read_datasets_index()
-  fn = os.path.join(cfg.DIR_SITE_CONTENT, fn)
+  research_posts = loader.read_research_post_index()
+  datasets = loader.read_datasets_index()
+  if cfg.DIR_SITE_CONTENT not in fn:
+    fn = os.path.join(cfg.DIR_SITE_CONTENT, fn)
   build_page(fn, research_posts, datasets)
diff --git a/megapixels/app/site/loader.py b/megapixels/app/site/loader.py
new file mode 100644
index 00000000..691efb25
--- /dev/null
+++ b/megapixels/app/site/loader.py
@@ -0,0 +1,123 @@
+import os
+import re
+import glob
+import simplejson as json
+
+import app.settings.app_cfg as cfg
+
+def read_metadata(fn):
+  """
+  Read in read a markdown file and extract the metadata
+  """
+  with open(fn, "r") as file:
+    data = file.read()
+    data = data.replace("\n ", "\n")
+    if "\n" in data:
+      data = data.replace("\r", "")
+    else:
+      data = data.replace("\r", "\n")
+    sections = data.split("\n\n")
+  return parse_metadata(fn, sections)
+
+
+default_metadata = {
+  'status': 'published',
+  'title': 'Untitled Page',
+  'desc': '',
+  'slug': '',
+  'published': '2018-12-31',
+  'updated': '2018-12-31',
+  'authors': 'Adam Harvey',
+  'sync': 'true',
+  'tagline': '',
+}
+
+def parse_metadata(fn, sections):
+  """
+  parse the metadata headers in a markdown file
+  (everything before the second ---------)
+  also generates appropriate urls for this page :)
+  """
+  found_meta = False
+  metadata = {}
+  valid_sections = []
+  for section in sections:
+    if not found_meta and ': ' in section:
+      found_meta = True
+      parse_metadata_section(metadata, section)
+      continue
+    if '-----' in section:
+      continue
+    if found_meta:
+      valid_sections.append(section)
+
+  if 'title' not in metadata:
+    print('warning: {} has no title'.format(fn))
+  for key in default_metadata:
+    if key not in metadata:
+      metadata[key] = default_metadata[key]
+
+  basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, ''))
+  basename = os.path.basename(fn)
+  if basedir == '/':
+    metadata['path'] = '/'
+    metadata['url'] = '/'
+  elif basename == 'index.md':
+    metadata['path'] = basedir + '/'
+    metadata['url'] = metadata['path']
+  else:
+    metadata['path'] = basedir + '/'
+    metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'
+
+  if metadata['status'] == 'published|draft|private':
+    metadata['status'] = 'published'
+
+  metadata['sync'] = metadata['sync'] != 'false'
+
+  metadata['author_html'] = '<br>'.join(metadata['authors'].split(','))
+
+  return metadata, valid_sections
+
+def parse_metadata_section(metadata, section):
+  """
+  parse a metadata key: value pair
+  """
+  for line in section.split("\n"):
+    if ': ' not in line:
+      continue
+    key, value = line.split(': ', 1)
+    metadata[key.lower()] = value
+
+
+def read_research_post_index():
+  """
+  Generate an index of the research (blog) posts
+  """
+  return read_post_index('research')
+
+
+def read_datasets_index():
+  """
+  Generate an index of the datasets
+  """
+  return read_post_index('datasets')
+
+
+def read_post_index(basedir):
+  """
+  Generate an index of posts
+  """
+  posts = []
+  for fn in sorted(glob.glob(os.path.join(cfg.DIR_SITE_CONTENT, basedir, '*/index.md'))):
+    metadata, valid_sections = read_metadata(fn)
+    if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
+      continue
+    posts.append(metadata)
+  if not len(posts):
+    posts.append({
+      'title': 'Placeholder',
+      'slug': 'placeholder',
+      'date': 'Placeholder',
+      'url': '/',
+    })
+  return posts
diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py
index d6705214..3792e6f1 100644
--- a/megapixels/app/site/parser.py
+++ b/megapixels/app/site/parser.py
@@ -10,6 +10,49 @@ import app.site.s3 as s3
 renderer = mistune.Renderer(escape=False)
 markdown = mistune.Markdown(renderer=renderer)
 
+def parse_markdown(metadata, sections, s3_path, skip_h1=False):
+  """
+  parse page into sections, preprocess the markdown to handle our modifications
+  """
+  groups = []
+  current_group = []
+  for section in sections:
+    if skip_h1 and section.startswith('# '):
+      continue
+    elif section.strip().startswith('```'):
+      groups.append(format_section(current_group, s3_path))
+      current_group = []
+      current_group.append(section)
+      if section.strip().endswith('```'):
+        groups.append(format_applet("\n\n".join(current_group), s3_path))
+        current_group = []
+    elif section.strip().endswith('```'):
+      current_group.append(section)
+      groups.append(format_applet("\n\n".join(current_group), s3_path))
+      current_group = []
+    elif section.startswith('+ '):
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_metadata(section))
+      current_group = []
+    elif '![fullwidth:' in section:
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_section([section], s3_path, type='fullwidth'))
+      current_group = []
+    elif '![wide:' in section:
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_section([section], s3_path, type='wide'))
+      current_group = []
+    elif '![' in section:
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_section([section], s3_path, type='images'))
+      current_group = []
+    else:
+      current_group.append(section)
+  groups.append(format_section(current_group, s3_path))
+  content = "".join(groups)
+  return content
+
+
 def fix_images(lines, s3_path):
   """
   do our own tranformation of the markdown around images to handle wide images etc
@@ -32,6 +75,7 @@ def fix_images(lines, s3_path):
     real_lines.append(line)
   return "\n".join(real_lines)
 
+
 def format_section(lines, s3_path, type=''):
   """
   format a normal markdown section
@@ -44,6 +88,7 @@ def format_section(lines, s3_path, type=''):
       return "<section>" + markdown(lines) + "</section>"
   return ""
 
+
 def format_metadata(section):
   """
   format a metadata section (+ key: value pairs)
@@ -54,7 +99,11 @@ def format_metadata(section):
     meta.append("<div><div class='gray'>{}</div><div>{}</div></div>".format(key, value))
   return "<section><div class='meta'>{}</div></section>".format(''.join(meta)) 
 
+
 def format_applet(section, s3_path):
+  """
+  Format the applets, which load javascript modules like the map and CSVs
+  """
   # print(section)
   payload = section.strip('```').strip().strip('```').strip().split('\n')
   applet = {}
@@ -79,47 +128,6 @@ def format_applet(section, s3_path):
     applet['fields'] = payload[1:]
   return "<section class='applet_container'><div class='applet' data-payload='{}'></div></section>".format(json.dumps(applet))
 
-def parse_markdown(sections, s3_path, skip_h1=False):
-  """
-  parse page into sections, preprocess the markdown to handle our modifications
-  """
-  groups = []
-  current_group = []
-  for section in sections:
-    if skip_h1 and section.startswith('# '):
-      continue
-    elif section.strip().startswith('```'):
-      groups.append(format_section(current_group, s3_path))
-      current_group = []
-      current_group.append(section)
-      if section.strip().endswith('```'):
-        groups.append(format_applet("\n\n".join(current_group), s3_path))
-        current_group = []
-    elif section.strip().endswith('```'):
-      current_group.append(section)
-      groups.append(format_applet("\n\n".join(current_group), s3_path))
-      current_group = []
-    elif section.startswith('+ '):
-      groups.append(format_section(current_group, s3_path))
-      groups.append(format_metadata(section))
-      current_group = []
-    elif '![fullwidth:' in section:
-      groups.append(format_section(current_group, s3_path))
-      groups.append(format_section([section], s3_path, type='fullwidth'))
-      current_group = []
-    elif '![wide:' in section:
-      groups.append(format_section(current_group, s3_path))
-      groups.append(format_section([section], s3_path, type='wide'))
-      current_group = []
-    elif '![' in section:
-      groups.append(format_section(current_group, s3_path))
-      groups.append(format_section([section], s3_path, type='images'))
-      current_group = []
-    else:
-      current_group.append(section)
-  groups.append(format_section(current_group, s3_path))
-  content = "".join(groups)
-  return content
 
 def parse_research_index(research_posts):
   """
@@ -141,117 +149,3 @@ def parse_research_index(research_posts):
     content += row
   content += '</div>'
   return content
-
-def read_metadata(fn):
-  """
-  Read in read a markdown file and extract the metadata
-  """
-  with open(fn, "r") as file:
-    data = file.read()
-    data = data.replace("\n ", "\n")
-    if "\n" in data:
-      data = data.replace("\r", "")
-    else:
-      data = data.replace("\r", "\n")
-    sections = data.split("\n\n")
-  return parse_metadata(fn, sections)
-
-default_metadata = {
-  'status': 'published',
-  'title': 'Untitled Page',
-  'desc': '',
-  'slug': '',
-  'published': '2018-12-31',
-  'updated': '2018-12-31',
-  'authors': 'Adam Harvey',
-  'sync': 'true',
-  'tagline': '',
-}
-
-def parse_metadata_section(metadata, section):
-  """
-  parse a metadata key: value pair
-  """
-  for line in section.split("\n"):
-    if ': ' not in line:
-      continue
-    key, value = line.split(': ', 1)
-    metadata[key.lower()] = value
-
-def parse_metadata(fn, sections):
-  """
-  parse the metadata headers in a markdown file
-  (everything before the second ---------)
-  also generates appropriate urls for this page :)
-  """
-  found_meta = False
-  metadata = {}
-  valid_sections = []
-  for section in sections:
-    if not found_meta and ': ' in section:
-      found_meta = True
-      parse_metadata_section(metadata, section)
-      continue
-    if '-----' in section:
-      continue
-    if found_meta:
-      valid_sections.append(section)
-
-  if 'title' not in metadata:
-    print('warning: {} has no title'.format(fn))
-  for key in default_metadata:
-    if key not in metadata:
-      metadata[key] = default_metadata[key]
-
-  basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, ''))
-  basename = os.path.basename(fn)
-  if basedir == '/':
-    metadata['path'] = '/'
-    metadata['url'] = '/'
-  elif basename == 'index.md':
-    metadata['path'] = basedir + '/'
-    metadata['url'] = metadata['path']
-  else:
-    metadata['path'] = basedir + '/'
-    metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'
-
-  if metadata['status'] == 'published|draft|private':
-    metadata['status'] = 'published'
-
-  metadata['sync'] = metadata['sync'] != 'false'
-
-  metadata['author_html'] = '<br>'.join(metadata['authors'].split(','))
-
-  return metadata, valid_sections
-
-def read_research_post_index():
-  """
-  Generate an index of the research (blog) posts
-  """
-  return read_post_index('research')
-
-def read_datasets_index():
-  """
-  Generate an index of the datasets
-  """
-  return read_post_index('datasets')
-
-def read_post_index(basedir):
-  """
-  Generate an index of posts
-  """
-  posts = []
-  for fn in sorted(glob.glob(os.path.join(cfg.DIR_SITE_CONTENT, basedir, '*/index.md'))):
-    metadata, valid_sections = read_metadata(fn)
-    if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
-      continue
-    posts.append(metadata)
-  if not len(posts):
-    posts.append({
-      'title': 'Placeholder',
-      'slug': 'placeholder',
-      'date': 'Placeholder',
-      'url': '/',
-    })
-  return posts
-