1 files changed, 123 insertions, 0 deletions
diff --git a/megapixels/app/site/loader.py b/megapixels/app/site/loader.py
new file mode 100644
index 00000000..691efb25
--- /dev/null
+++ b/megapixels/app/site/loader.py
@@ -0,0 +1,123 @@
+import os
+import re
+import glob
+import simplejson as json
+
+import app.settings.app_cfg as cfg
+
+def read_metadata(fn):
+  """
+  Read in read a markdown file and extract the metadata
+  """
+  with open(fn, "r") as file:
+    data = file.read()
+    data = data.replace("\n ", "\n")
+    if "\n" in data:
+      data = data.replace("\r", "")
+    else:
+      data = data.replace("\r", "\n")
+    sections = data.split("\n\n")
+  return parse_metadata(fn, sections)
+
+
+default_metadata = {
+  'status': 'published',
+  'title': 'Untitled Page',
+  'desc': '',
+  'slug': '',
+  'published': '2018-12-31',
+  'updated': '2018-12-31',
+  'authors': 'Adam Harvey',
+  'sync': 'true',
+  'tagline': '',
+}
+
+def parse_metadata(fn, sections):
+  """
+  parse the metadata headers in a markdown file
+  (everything before the second ---------)
+  also generates appropriate urls for this page :)
+  """
+  found_meta = False
+  metadata = {}
+  valid_sections = []
+  for section in sections:
+    if not found_meta and ': ' in section:
+      found_meta = True
+      parse_metadata_section(metadata, section)
+      continue
+    if '-----' in section:
+      continue
+    if found_meta:
+      valid_sections.append(section)
+
+  if 'title' not in metadata:
+    print('warning: {} has no title'.format(fn))
+  for key in default_metadata:
+    if key not in metadata:
+      metadata[key] = default_metadata[key]
+
+  basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, ''))
+  basename = os.path.basename(fn)
+  if basedir == '/':
+    metadata['path'] = '/'
+    metadata['url'] = '/'
+  elif basename == 'index.md':
+    metadata['path'] = basedir + '/'
+    metadata['url'] = metadata['path']
+  else:
+    metadata['path'] = basedir + '/'
+    metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'
+
+  if metadata['status'] == 'published|draft|private':
+    metadata['status'] = 'published'
+
+  metadata['sync'] = metadata['sync'] != 'false'
+
+  metadata['author_html'] = '<br>'.join(metadata['authors'].split(','))
+
+  return metadata, valid_sections
+
+def parse_metadata_section(metadata, section):
+  """
+  parse a metadata key: value pair
+  """
+  for line in section.split("\n"):
+    if ': ' not in line:
+      continue
+    key, value = line.split(': ', 1)
+    metadata[key.lower()] = value
+
+
+def read_research_post_index():
+  """
+  Generate an index of the research (blog) posts
+  """
+  return read_post_index('research')
+
+
+def read_datasets_index():
+  """
+  Generate an index of the datasets
+  """
+  return read_post_index('datasets')
+
+
+def read_post_index(basedir):
+  """
+  Generate an index of posts
+  """
+  posts = []
+  for fn in sorted(glob.glob(os.path.join(cfg.DIR_SITE_CONTENT, basedir, '*/index.md'))):
+    metadata, valid_sections = read_metadata(fn)
+    if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
+      continue
+    posts.append(metadata)
+  if not len(posts):
+    posts.append({
+      'title': 'Placeholder',
+      'slug': 'placeholder',
+      'date': 'Placeholder',
+      'url': '/',
+    })
+  return posts