import os
import re
import glob
import simplejson as json
import mistune
import app.settings.app_cfg as cfg
import app.site.s3 as s3
renderer = mistune.Renderer(escape=False)
markdown = mistune.Markdown(renderer=renderer)
def fix_images(lines, s3_path):
"""
do our own tranformation of the markdown around images to handle wide images etc
lines: markdown lines
"""
real_lines = []
block = "\n\n".join(lines)
for line in block.split("\n"):
if "
url, tail = tail.split(')', 1)
if ':' in alt_text:
tail, alt_text = alt_text.split(':', 1)
img_tag = "
".format(s3_path + url, alt_text.replace("'", ""))
if len(alt_text):
line = "
".format(img_tag, alt_text)
else:
line = "{}
".format(img_tag, alt_text)
real_lines.append(line)
return "\n".join(real_lines)
def format_section(lines, s3_path, type=''):
"""
format a normal markdown section
"""
if len(lines):
lines = fix_images(lines, s3_path)
if type:
return "".format(type, markdown(lines))
else:
return ""
return ""
def format_metadata(section):
"""
format a metadata section (+ key: value pairs)
"""
meta = []
for line in section.split('\n'):
key, value = line[2:].split(': ', 1)
meta.append("".format(key, value))
return "".format(''.join(meta))
def format_applet(section, s3_path):
# print(section)
payload = section.strip('```').strip().strip('```').strip().split('\n')
applet = {}
print(payload)
if ': ' in payload[0]:
command, opt = payload[0].split(': ')
else:
command = payload[0]
opt = None
if command == 'python' or command == 'javascript' or command == 'code':
return format_section([ section ], s3_path)
if command == '':
return ''
applet['command'] = command
if opt:
applet['opt'] = opt
if command == 'load_file':
if opt[0:4] != 'http':
applet['opt'] = s3_path + opt
if len(payload) > 1:
applet['fields'] = payload[1:]
return "".format(json.dumps(applet))
def parse_markdown(sections, s3_path, skip_h1=False):
"""
parse page into sections, preprocess the markdown to handle our modifications
"""
groups = []
current_group = []
for section in sections:
if skip_h1 and section.startswith('# '):
continue
elif section.strip().startswith('```'):
groups.append(format_section(current_group, s3_path))
current_group = []
current_group.append(section)
if section.strip().endswith('```'):
groups.append(format_applet("\n\n".join(current_group), s3_path))
current_group = []
elif section.strip().endswith('```'):
current_group.append(section)
groups.append(format_applet("\n\n".join(current_group), s3_path))
current_group = []
elif section.startswith('+ '):
groups.append(format_section(current_group, s3_path))
groups.append(format_metadata(section))
current_group = []
elif '![fullwidth:' in section:
groups.append(format_section(current_group, s3_path))
groups.append(format_section([section], s3_path, type='fullwidth'))
current_group = []
elif '![wide:' in section:
groups.append(format_section(current_group, s3_path))
groups.append(format_section([section], s3_path, type='wide'))
current_group = []
elif '![' in section:
groups.append(format_section(current_group, s3_path))
groups.append(format_section([section], s3_path, type='images'))
current_group = []
else:
current_group.append(section)
groups.append(format_section(current_group, s3_path))
content = "".join(groups)
return content
def parse_research_index(research_posts):
"""
Generate an index file for the research pages
"""
content = ""
for post in research_posts:
s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, post['path'])
if 'image' in post:
post_image = s3_path + post['image']
else:
post_image = ''
row = "
".format(
post['path'],
post_image,
post['title'],
post['tagline'])
content += row
content += '
'
return content
def read_metadata(fn):
"""
Read in read a markdown file and extract the metadata
"""
with open(fn, "r") as file:
data = file.read()
data = data.replace("\n ", "\n")
if "\n" in data:
data = data.replace("\r", "")
else:
data = data.replace("\r", "\n")
sections = data.split("\n\n")
return parse_metadata(fn, sections)
default_metadata = {
'status': 'published',
'title': 'Untitled Page',
'desc': '',
'slug': '',
'published': '2018-12-31',
'updated': '2018-12-31',
'authors': 'Adam Harvey',
'sync': 'true',
'tagline': '',
}
def parse_metadata_section(metadata, section):
"""
parse a metadata key: value pair
"""
for line in section.split("\n"):
if ': ' not in line:
continue
key, value = line.split(': ', 1)
metadata[key.lower()] = value
def parse_metadata(fn, sections):
"""
parse the metadata headers in a markdown file
(everything before the second ---------)
also generates appropriate urls for this page :)
"""
found_meta = False
metadata = {}
valid_sections = []
for section in sections:
if not found_meta and ': ' in section:
found_meta = True
parse_metadata_section(metadata, section)
continue
if '-----' in section:
continue
if found_meta:
valid_sections.append(section)
if 'title' not in metadata:
print('warning: {} has no title'.format(fn))
for key in default_metadata:
if key not in metadata:
metadata[key] = default_metadata[key]
basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, ''))
basename = os.path.basename(fn)
if basedir == '/':
metadata['path'] = '/'
metadata['url'] = '/'
elif basename == 'index.md':
metadata['path'] = basedir + '/'
metadata['url'] = metadata['path']
else:
metadata['path'] = basedir + '/'
metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'
if metadata['status'] == 'published|draft|private':
metadata['status'] = 'published'
metadata['sync'] = metadata['sync'] != 'false'
metadata['author_html'] = '
'.join(metadata['authors'].split(','))
return metadata, valid_sections
def read_research_post_index():
"""
Generate an index of the research (blog) posts
"""
return read_post_index('research')
def read_datasets_index():
"""
Generate an index of the datasets
"""
return read_post_index('datasets')
def read_post_index(basedir):
"""
Generate an index of posts
"""
posts = []
for fn in sorted(glob.glob('../site/content/{}/*/index.md'.format(basedir))):
metadata, valid_sections = read_metadata(fn)
if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
continue
posts.append(metadata)
if not len(posts):
posts.append({
'title': 'Placeholder',
'slug': 'placeholder',
'date': 'Placeholder',
'url': '/',
})
return posts