megapixels/app/site/parser.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151

import os
import re
import glob
import simplejson as json
import mistune

import app.settings.app_cfg as cfg
import app.site.s3 as s3

renderer = mistune.Renderer(escape=False)
markdown = mistune.Markdown(renderer=renderer)

def parse_markdown(metadata, sections, s3_path, skip_h1=False):
  """
  parse page into sections, preprocess the markdown to handle our modifications
  """
  groups = []
  current_group = []
  for section in sections:
    if skip_h1 and section.startswith('# '):
      continue
    elif section.strip().startswith('```'):
      groups.append(format_section(current_group, s3_path))
      current_group = []
      current_group.append(section)
      if section.strip().endswith('```'):
        groups.append(format_applet("\n\n".join(current_group), s3_path))
        current_group = []
    elif section.strip().endswith('```'):
      current_group.append(section)
      groups.append(format_applet("\n\n".join(current_group), s3_path))
      current_group = []
    elif section.startswith('+ '):
      groups.append(format_section(current_group, s3_path))
      groups.append(format_metadata(section))
      current_group = []
    elif '![fullwidth:' in section:
      groups.append(format_section(current_group, s3_path))
      groups.append(format_section([section], s3_path, type='fullwidth'))
      current_group = []
    elif '![wide:' in section:
      groups.append(format_section(current_group, s3_path))
      groups.append(format_section([section], s3_path, type='wide'))
      current_group = []
    elif '![' in section:
      groups.append(format_section(current_group, s3_path))
      groups.append(format_section([section], s3_path, type='images'))
      current_group = []
    else:
      current_group.append(section)
  groups.append(format_section(current_group, s3_path))
  content = "".join(groups)
  return content


def fix_images(lines, s3_path):
  """
  do our own tranformation of the markdown around images to handle wide images etc
  lines: markdown lines
  """
  real_lines = []
  block = "\n\n".join(lines)
  for line in block.split("\n"):
    if "![" in line:
      line = line.replace('![', '')
      alt_text, tail = line.split('](', 1)
      url, tail = tail.split(')', 1)
      if ':' in alt_text:
        tail, alt_text = alt_text.split(':', 1)
      img_tag = "<img src='{}' alt='{}'>".format(s3_path + url, alt_text.replace("'", ""))
      if len(alt_text):
        line = "<div class='image'>{}<div class='caption'>{}</div></div>".format(img_tag, alt_text)
      else:
        line = "<div class='image'>{}</div>".format(img_tag, alt_text)
    real_lines.append(line)
  return "\n".join(real_lines)


def format_section(lines, s3_path, type=''):
  """
  format a normal markdown section
  """
  if len(lines):
    lines = fix_images(lines, s3_path)
    if type:
      return "<section class='{}'>{}</section>".format(type, markdown(lines))
    else:
      return "<section>" + markdown(lines) + "</section>"
  return ""


def format_metadata(section):
  """
  format a metadata section (+ key: value pairs)
  """
  meta = []
  for line in section.split('\n'):
    key, value = line[2:].split(': ', 1)
    meta.append("<div><div class='gray'>{}</div><div>{}</div></div>".format(key, value))
  return "<section><div class='meta'>{}</div></section>".format(''.join(meta)) 


def format_applet(section, s3_path):
  """
  Format the applets, which load javascript modules like the map and CSVs
  """
  # print(section)
  payload = section.strip('```').strip().strip('```').strip().split('\n')
  applet = {}
  print(payload)
  if ': ' in payload[0]:
    command, opt = payload[0].split(': ')
  else:
    command = payload[0]
    opt = None
  if command == 'python' or command == 'javascript' or command == 'code':
    return format_section([ section ], s3_path)
  if command == '':
    return ''

  applet['command'] = command
  if opt:
    applet['opt'] = opt
  if command == 'load_file':
    if opt[0:4] != 'http':
      applet['opt'] = s3_path + opt
  if len(payload) > 1:
    applet['fields'] = payload[1:]
  return "<section class='applet_container'><div class='applet' data-payload='{}'></div></section>".format(json.dumps(applet))


def parse_research_index(research_posts):
  """
  Generate an index file for the research pages
  """
  content = "<div class='research_index'>"
  for post in research_posts:
    print(post)
    s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, post['path'])
    if 'image' in post:
      post_image = s3_path + post['image']
    else:
      post_image = 'data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw=='
    row = "<a href='{}'><section class='wide'><img src='{}' alt='Research post' /><section><h1>{}</h1><h2>{}</h2></section></section></a>".format(
      post['path'],
      post_image,
      post['title'],
      post['tagline'])
    content += row
  content += '</div>'
  return content