From b1e7dc570fe25749a2e1b02c9e859df6588b4660 Mon Sep 17 00:00:00 2001
From: Jules Laplace <julescarbon@gmail.com>
Date: Sat, 15 Dec 2018 22:04:41 +0100
Subject: move builder

---
 megapixels/app/site/README.md  |  21 +++++
 megapixels/app/site/builder.py |  85 +++++++++++++++++++
 megapixels/app/site/parser.py  | 187 +++++++++++++++++++++++++++++++++++++++++
 megapixels/app/site/s3.py      |  60 +++++++++++++
 4 files changed, 353 insertions(+)
 create mode 100644 megapixels/app/site/README.md
 create mode 100644 megapixels/app/site/builder.py
 create mode 100644 megapixels/app/site/parser.py
 create mode 100644 megapixels/app/site/s3.py

(limited to 'megapixels/app/site')

diff --git a/megapixels/app/site/README.md b/megapixels/app/site/README.md
new file mode 100644
index 00000000..1a6d3a1e
--- /dev/null
+++ b/megapixels/app/site/README.md
@@ -0,0 +1,21 @@
+Megapixels Static Site Generator
+================================
+
+The index, blog, and about other pages are built using this static site generator.
+
+## Metadata
+
+```
+status: published|draft|private
+title: From 1 to 100 Pixels
+desc: High resolution insights from low resolution imagery
+slug: from-1-to-100-pixels
+published: 2018-12-04
+updated: 2018-12-04
+authors: Adam Harvey, Berit Gilma, Matthew Stender
+```
+
+## S3 Assets
+
+Static assets: `v1/site/about/assets/picture.jpg`
+Dataset assets: `v1/datasets/lfw/assets/picture.jpg`
diff --git a/megapixels/app/site/builder.py b/megapixels/app/site/builder.py
new file mode 100644
index 00000000..df609f60
--- /dev/null
+++ b/megapixels/app/site/builder.py
@@ -0,0 +1,85 @@
+#!/usr/bin/python
+
+import os
+import glob
+from jinja2 import Environment, FileSystemLoader, select_autoescape
+
+import app.settings.app_cfg as cfg
+
+import app.builder.s3 as s3
+import app.builder.parser as parser
+
+env = Environment(
+  loader=FileSystemLoader(cfg.DIR_SITE_TEMPLATES),
+  autoescape=select_autoescape([])
+)
+
+def build_page(fn, research_posts):
+  metadata, sections = parser.read_metadata(fn)
+
+  if metadata is None:
+    print("{} has no metadata".format(fn))
+    return
+
+  print(metadata['url'])
+
+  dirname = os.path.dirname(fn)
+  output_path = cfg.DIR_SITE_PUBLIC + metadata['url']
+  output_fn = os.path.join(output_path, "index.html")
+
+  skip_h1 = False
+
+  if metadata['url'] == '/':
+    template = env.get_template("home.html")
+  elif 'research/' in fn:
+    skip_h1 = True
+    template = env.get_template("research.html")
+  else:
+    template = env.get_template("page.html")
+
+  if 'datasets/' in fn:
+    s3_dir = cfg.S3_DATASETS_PATH
+  else:
+    s3_dir = cfg.S3_SITE_PATH
+
+  s3_path = s3.make_s3_path(s3_dir, metadata['path'])
+
+  if 'index.md' in fn:
+    s3.sync_directory(dirname, s3_dir, metadata)
+
+  content = parser.parse_markdown(sections, s3_path, skip_h1=skip_h1)
+
+  html = template.render(
+    metadata=metadata,
+    content=content,
+    research_posts=research_posts,
+    latest_research_post=research_posts[-1],
+  )
+
+  os.makedirs(output_path, exist_ok=True)
+  with open(output_fn, "w") as file:
+    file.write(html)
+
+  print("______")
+
+def build_research_index(research_posts):
+  metadata, sections = parser.read_metadata('../site/content/research/index.md')
+  template = env.get_template("page.html")
+  s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, metadata['path'])
+  content = parser.parse_markdown(sections, s3_path, skip_h1=False)
+  content += parser.parse_research_index(research_posts)
+  html = template.render(
+    metadata=metadata,
+    content=content,
+    research_posts=research_posts,
+    latest_research_post=research_posts[-1],
+  )
+  output_fn = cfg.DIR_SITE_PUBLIC + '/research/index.html'
+  with open(output_fn, "w") as file:
+    file.write(html)
+
+def build_site():
+  research_posts = parser.read_research_post_index()
+  for fn in glob.iglob(os.path.join(cfg.DIR_SITE_CONTENT, "**/*.md"), recursive=True):
+    build_page(fn, research_posts)
+  build_research_index(research_posts)
diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py
new file mode 100644
index 00000000..add3f386
--- /dev/null
+++ b/megapixels/app/site/parser.py
@@ -0,0 +1,187 @@
+import os
+import re
+import glob
+import mistune
+
+import app.settings.app_cfg as cfg
+import app.builder.s3 as s3
+
+renderer = mistune.Renderer(escape=False)
+markdown = mistune.Markdown(renderer=renderer)
+
+def fix_images(lines, s3_path):
+  real_lines = []
+  block = "\n\n".join(lines)
+  for line in block.split("\n"):
+    if "![" in line:
+      line = line.replace('![', '')
+      alt_text, tail = line.split('](', 1)
+      url, tail = tail.split(')', 1)
+      if ':' in alt_text:
+        tail, alt_text = alt_text.split(':', 1)
+      img_tag = "<img src='{}' alt='{}'>".format(s3_path + url, alt_text.replace("'", ""))
+      if len(alt_text):
+        line = "<div class='image'>{}<div class='caption'>{}</div></div>".format(img_tag, alt_text)
+      else:
+        line = "<div class='image'>{}</div>".format(img_tag, alt_text)
+    real_lines.append(line)
+  return "\n".join(real_lines)
+
+def format_section(lines, s3_path, type=''):
+  if len(lines):
+    lines = fix_images(lines, s3_path)
+    if type:
+      return "<section class='{}'>{}</section>".format(type, markdown(lines))
+    else:
+      return "<section>" + markdown(lines) + "</section>"
+  return ""
+
+def format_metadata(section):
+  meta = []
+  for line in section.split('\n'):
+    key, value = line[2:].split(': ', 1)
+    meta.append("<div><div class='gray'>{}</div><div>{}</div></div>".format(key, value))
+  return "<section><div class='meta'>{}</div></section>".format(''.join(meta)) 
+
+def format_applet(section):
+  payload = section.replace('```', '').strip().split('\n')
+  if ': ' in payload[0]:
+    command, opt = payload[0].split(': ')
+  else:
+    command = payload[0]
+    opt = None
+  if command == 'load_file':
+    return "<section><div class='applet'>{}</div></section>"
+
+
+def parse_markdown(sections, s3_path, skip_h1=False):
+  groups = []
+  current_group = []
+  for section in sections:
+    if skip_h1 and section.startswith('# '):
+      continue
+    elif section.startsWith('```'):
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_applet(section))
+      current_group = []
+    elif section.startswith('+ '):
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_metadata(section))
+      current_group = []
+    elif '![wide:' in section:
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_section([section], s3_path, type='wide'))
+      current_group = []
+    elif '![' in section:
+      groups.append(format_section(current_group, s3_path))
+      groups.append(format_section([section], s3_path, type='images'))
+      current_group = []
+    else:
+      current_group.append(section)
+  groups.append(format_section(current_group, s3_path))
+  content = "".join(groups)
+  return content
+
+def parse_research_index(research_posts):
+  content = "<div class='research_index'>"
+  for post in research_posts:
+    s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, post['path'])
+    if 'image' in post:
+      post_image = s3_path + post['image']
+    else:
+      post_image = 'data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw=='
+    row = "<a href='{}'><section class='wide'><img src='{}' alt='Research post' /><section><h1>{}</h1><h2>{}</h2></section></section></a>".format(
+      post['path'],
+      post_image,
+      post['title'],
+      post['tagline'])
+    content += row
+  content += '</div>'
+  return content
+
+def read_metadata(fn):
+  with open(fn, "r") as file:
+    data = file.read()
+    data = data.replace("\n ", "\n")
+    if "\n" in data:
+      data = data.replace("\r", "")
+    else:
+      data = data.replace("\r", "\n")
+    sections = data.split("\n\n")
+  return parse_metadata(fn, sections)
+
+default_metadata = {
+  'status': 'published',
+  'title': 'Untitled Page',
+  'desc': '',
+  'slug': '',
+  'published': '2018-12-31',
+  'updated': '2018-12-31',
+  'authors': 'Adam Harvey',
+  'sync': 'true',
+  'tagline': '',
+}
+
+def parse_metadata_section(metadata, section):
+  for line in section.split("\n"):
+    if ': ' not in line:
+      continue
+    key, value = line.split(': ', 1)
+    metadata[key.lower()] = value
+
+def parse_metadata(fn, sections):
+  found_meta = False
+  metadata = {}
+  valid_sections = []
+  for section in sections:
+    if not found_meta and ': ' in section:
+      found_meta = True
+      parse_metadata_section(metadata, section)
+      continue
+    if '-----' in section:
+      continue
+    if found_meta:
+      valid_sections.append(section)
+
+  if 'title' not in metadata:
+    print('warning: {} has no title'.format(fn))
+  for key in default_metadata:
+    if key not in metadata:
+      metadata[key] = default_metadata[key]
+
+  basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, ''))
+  basename = os.path.basename(fn)
+  if basedir == '/':
+    metadata['path'] = '/'
+    metadata['url'] = '/'
+  elif basename == 'index.md':
+    metadata['path'] = basedir + '/'
+    metadata['url'] = metadata['path']
+  else:
+    metadata['path'] = basedir + '/'
+    metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'
+
+  if metadata['status'] == 'published|draft|private':
+    metadata['status'] = 'published'
+
+  metadata['sync'] = metadata['sync'] != 'false'
+
+  metadata['author_html'] = '<br>'.join(metadata['authors'].split(','))
+  return metadata, valid_sections
+
+def read_research_post_index():
+  posts = []
+  for fn in sorted(glob.glob('../site/content/research/*/index.md')):
+    metadata, valid_sections = read_metadata(fn)
+    if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
+      continue
+    posts.append(metadata)
+  if not len(posts):
+    posts.append({
+      'title': 'Placeholder',
+      'slug': 'placeholder',
+      'date': 'Placeholder',
+      'url': '/',
+    })
+  return posts
+
diff --git a/megapixels/app/site/s3.py b/megapixels/app/site/s3.py
new file mode 100644
index 00000000..99726a4d
--- /dev/null
+++ b/megapixels/app/site/s3.py
@@ -0,0 +1,60 @@
+import os
+import glob
+import boto3
+
+def sync_directory(base_fn, s3_path, metadata):
+  fns = {}
+  for fn in glob.glob(os.path.join(base_fn, 'assets/*')):
+    fns[os.path.basename(fn)] = True
+
+  if not metadata['sync']:
+    return
+
+  remote_path = s3_path + metadata['url']
+
+  session = boto3.session.Session()
+
+  s3_client = session.client(
+    service_name='s3',
+    aws_access_key_id=os.getenv('S3_KEY'),
+    aws_secret_access_key=os.getenv('S3_SECRET'),
+    endpoint_url=os.getenv('S3_ENDPOINT'),
+    region_name=os.getenv('S3_REGION'),
+  )
+
+  directory = s3_client.list_objects(Bucket=os.getenv('S3_BUCKET'), Prefix=remote_path)
+  prefixes = []
+
+  if 'Contents' in directory:
+    for obj in directory['Contents']:
+      s3_fn = obj['Key']
+      fn = os.path.basename(s3_fn)
+      local_fn = os.path.join(base_fn, 'assets', fn)
+      if fn in fns:
+        del fns[fn]
+        if obj['LastModified'].timestamp() < os.path.getmtime(os.path.join(local_fn)):
+          print("s3 update {}".format(s3_fn))
+          s3_client.upload_file(
+            local_fn,
+            os.getenv('S3_BUCKET'),
+            s3_fn,
+            ExtraArgs={ 'ACL': 'public-read' })
+      else:
+        print("s3 delete {}".format(s3_fn))
+        response = s3_client.delete_object(
+          Bucket=os.getenv('S3_BUCKET'),
+          Key=s3_fn,
+        )
+
+  for fn in fns:
+    local_fn = os.path.join(base_fn, 'assets', fn)
+    s3_fn = os.path.join(remote_path, 'assets', fn)
+    print("s3 create {}".format(s3_fn))
+    s3_client.upload_file(
+      local_fn,
+      os.getenv('S3_BUCKET'),
+      s3_fn,
+      ExtraArgs={ 'ACL': 'public-read' })
+
+def make_s3_path(s3_dir, metadata_path):
+  return "{}/{}/{}{}".format(os.getenv('S3_ENDPOINT'), os.getenv('S3_BUCKET'), s3_dir, metadata_path)
-- 
cgit v1.2.3-70-g09d2