summaryrefslogtreecommitdiff
path: root/megapixels/app/site/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/app/site/parser.py')
-rw-r--r--megapixels/app/site/parser.py187
1 files changed, 187 insertions, 0 deletions
diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py
new file mode 100644
index 00000000..add3f386
--- /dev/null
+++ b/megapixels/app/site/parser.py
@@ -0,0 +1,187 @@
+import os
+import re
+import glob
+import mistune
+
+import app.settings.app_cfg as cfg
+import app.builder.s3 as s3
+
+renderer = mistune.Renderer(escape=False)
+markdown = mistune.Markdown(renderer=renderer)
+
+def fix_images(lines, s3_path):
+ real_lines = []
+ block = "\n\n".join(lines)
+ for line in block.split("\n"):
+ if "![" in line:
+ line = line.replace('![', '')
+ alt_text, tail = line.split('](', 1)
+ url, tail = tail.split(')', 1)
+ if ':' in alt_text:
+ tail, alt_text = alt_text.split(':', 1)
+ img_tag = "<img src='{}' alt='{}'>".format(s3_path + url, alt_text.replace("'", ""))
+ if len(alt_text):
+ line = "<div class='image'>{}<div class='caption'>{}</div></div>".format(img_tag, alt_text)
+ else:
+ line = "<div class='image'>{}</div>".format(img_tag, alt_text)
+ real_lines.append(line)
+ return "\n".join(real_lines)
+
+def format_section(lines, s3_path, type=''):
+ if len(lines):
+ lines = fix_images(lines, s3_path)
+ if type:
+ return "<section class='{}'>{}</section>".format(type, markdown(lines))
+ else:
+ return "<section>" + markdown(lines) + "</section>"
+ return ""
+
+def format_metadata(section):
+ meta = []
+ for line in section.split('\n'):
+ key, value = line[2:].split(': ', 1)
+ meta.append("<div><div class='gray'>{}</div><div>{}</div></div>".format(key, value))
+ return "<section><div class='meta'>{}</div></section>".format(''.join(meta))
+
+def format_applet(section):
+ payload = section.replace('```', '').strip().split('\n')
+ if ': ' in payload[0]:
+ command, opt = payload[0].split(': ')
+ else:
+ command = payload[0]
+ opt = None
+ if command == 'load_file':
+ return "<section><div class='applet'>{}</div></section>"
+
+
+def parse_markdown(sections, s3_path, skip_h1=False):
+ groups = []
+ current_group = []
+ for section in sections:
+ if skip_h1 and section.startswith('# '):
+ continue
+ elif section.startsWith('```'):
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_applet(section))
+ current_group = []
+ elif section.startswith('+ '):
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_metadata(section))
+ current_group = []
+ elif '![wide:' in section:
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_section([section], s3_path, type='wide'))
+ current_group = []
+ elif '![' in section:
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_section([section], s3_path, type='images'))
+ current_group = []
+ else:
+ current_group.append(section)
+ groups.append(format_section(current_group, s3_path))
+ content = "".join(groups)
+ return content
+
+def parse_research_index(research_posts):
+ content = "<div class='research_index'>"
+ for post in research_posts:
+ s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, post['path'])
+ if 'image' in post:
+ post_image = s3_path + post['image']
+ else:
+ post_image = ''
+ row = "<a href='{}'><section class='wide'><img src='{}' alt='Research post' /><section><h1>{}</h1><h2>{}</h2></section></section></a>".format(
+ post['path'],
+ post_image,
+ post['title'],
+ post['tagline'])
+ content += row
+ content += '</div>'
+ return content
+
+def read_metadata(fn):
+ with open(fn, "r") as file:
+ data = file.read()
+ data = data.replace("\n ", "\n")
+ if "\n" in data:
+ data = data.replace("\r", "")
+ else:
+ data = data.replace("\r", "\n")
+ sections = data.split("\n\n")
+ return parse_metadata(fn, sections)
+
+default_metadata = {
+ 'status': 'published',
+ 'title': 'Untitled Page',
+ 'desc': '',
+ 'slug': '',
+ 'published': '2018-12-31',
+ 'updated': '2018-12-31',
+ 'authors': 'Adam Harvey',
+ 'sync': 'true',
+ 'tagline': '',
+}
+
+def parse_metadata_section(metadata, section):
+ for line in section.split("\n"):
+ if ': ' not in line:
+ continue
+ key, value = line.split(': ', 1)
+ metadata[key.lower()] = value
+
+def parse_metadata(fn, sections):
+ found_meta = False
+ metadata = {}
+ valid_sections = []
+ for section in sections:
+ if not found_meta and ': ' in section:
+ found_meta = True
+ parse_metadata_section(metadata, section)
+ continue
+ if '-----' in section:
+ continue
+ if found_meta:
+ valid_sections.append(section)
+
+ if 'title' not in metadata:
+ print('warning: {} has no title'.format(fn))
+ for key in default_metadata:
+ if key not in metadata:
+ metadata[key] = default_metadata[key]
+
+ basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, ''))
+ basename = os.path.basename(fn)
+ if basedir == '/':
+ metadata['path'] = '/'
+ metadata['url'] = '/'
+ elif basename == 'index.md':
+ metadata['path'] = basedir + '/'
+ metadata['url'] = metadata['path']
+ else:
+ metadata['path'] = basedir + '/'
+ metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'
+
+ if metadata['status'] == 'published|draft|private':
+ metadata['status'] = 'published'
+
+ metadata['sync'] = metadata['sync'] != 'false'
+
+ metadata['author_html'] = '<br>'.join(metadata['authors'].split(','))
+ return metadata, valid_sections
+
+def read_research_post_index():
+ posts = []
+ for fn in sorted(glob.glob('../site/content/research/*/index.md')):
+ metadata, valid_sections = read_metadata(fn)
+ if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
+ continue
+ posts.append(metadata)
+ if not len(posts):
+ posts.append({
+ 'title': 'Placeholder',
+ 'slug': 'placeholder',
+ 'date': 'Placeholder',
+ 'url': '/',
+ })
+ return posts
+