diff options
| author | Adam Harvey <adam@ahprojects.com> | 2018-12-23 01:37:03 +0100 |
|---|---|---|
| committer | Adam Harvey <adam@ahprojects.com> | 2018-12-23 01:37:03 +0100 |
| commit | 4452e02e8b04f3476273574a875bb60cfbb4568b (patch) | |
| tree | 3ffa44f9621b736250a8b94da14a187dc785c2fe /megapixels/app/site/parser.py | |
| parent | 2a65f7a157bd4bace970cef73529867b0e0a374d (diff) | |
| parent | 5340bee951c18910fd764241945f1f136b5a22b4 (diff) | |
.
Diffstat (limited to 'megapixels/app/site/parser.py')
| -rw-r--r-- | megapixels/app/site/parser.py | 238 |
1 files changed, 238 insertions, 0 deletions
diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py new file mode 100644 index 00000000..b3d3a8c2 --- /dev/null +++ b/megapixels/app/site/parser.py @@ -0,0 +1,238 @@ +import os +import re +import glob +import simplejson as json +import mistune + +import app.settings.app_cfg as cfg +import app.site.s3 as s3 + +renderer = mistune.Renderer(escape=False) +markdown = mistune.Markdown(renderer=renderer) + +def fix_images(lines, s3_path): + """ + do our own tranformation of the markdown around images to handle wide images etc + lines: markdown lines + """ + real_lines = [] + block = "\n\n".join(lines) + for line in block.split("\n"): + if " + url, tail = tail.split(')', 1) + if ':' in alt_text: + tail, alt_text = alt_text.split(':', 1) + img_tag = "<img src='{}' alt='{}'>".format(s3_path + url, alt_text.replace("'", "")) + if len(alt_text): + line = "<div class='image'>{}<div class='caption'>{}</div></div>".format(img_tag, alt_text) + else: + line = "<div class='image'>{}</div>".format(img_tag, alt_text) + real_lines.append(line) + return "\n".join(real_lines) + +def format_section(lines, s3_path, type=''): + """ + format a normal markdown section + """ + if len(lines): + lines = fix_images(lines, s3_path) + if type: + return "<section class='{}'>{}</section>".format(type, markdown(lines)) + else: + return "<section>" + markdown(lines) + "</section>" + return "" + +def format_metadata(section): + """ + format a metadata section (+ key: value pairs) + """ + meta = [] + for line in section.split('\n'): + key, value = line[2:].split(': ', 1) + meta.append("<div><div class='gray'>{}</div><div>{}</div></div>".format(key, value)) + return "<section><div class='meta'>{}</div></section>".format(''.join(meta)) + +def format_applet(section, s3_path): + # print(section) + payload = section.strip('```').strip().strip('```').strip().split('\n') + applet = {} + print(payload) + if ': ' in payload[0]: + command, opt = payload[0].split(': ') + else: + command = payload[0] + opt = None + if command == 'python' or command == 'javascript' or command == 'code': + return format_section([ section ], s3_path) + + applet['command'] = command + if opt: + applet['opt'] = opt + if command == 'load_file': + if opt[0:4] != 'http': + applet['opt'] = s3_path + opt + if len(payload) > 1: + applet['fields'] = payload[1:] + return "<section class='applet_container'><div class='applet' data-payload='{}'></div></section>".format(json.dumps(applet)) + +def parse_markdown(sections, s3_path, skip_h1=False): + """ + parse page into sections, preprocess the markdown to handle our modifications + """ + groups = [] + current_group = [] + for section in sections: + if skip_h1 and section.startswith('# '): + continue + elif section.strip().startswith('```'): + groups.append(format_section(current_group, s3_path)) + current_group = [] + current_group.append(section) + if section.strip().endswith('```'): + groups.append(format_applet("\n\n".join(current_group), s3_path)) + current_group = [] + elif section.strip().endswith('```'): + current_group.append(section) + groups.append(format_applet("\n\n".join(current_group), s3_path)) + current_group = [] + elif section.startswith('+ '): + groups.append(format_section(current_group, s3_path)) + groups.append(format_metadata(section)) + current_group = [] + elif '![wide:' in section: + groups.append(format_section(current_group, s3_path)) + groups.append(format_section([section], s3_path, type='wide')) + current_group = [] + elif '![' in section: + groups.append(format_section(current_group, s3_path)) + groups.append(format_section([section], s3_path, type='images')) + current_group = [] + else: + current_group.append(section) + groups.append(format_section(current_group, s3_path)) + content = "".join(groups) + return content + +def parse_research_index(research_posts): + """ + Generate an index file for the research pages + """ + content = "<div class='research_index'>" + for post in research_posts: + s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, post['path']) + if 'image' in post: + post_image = s3_path + post['image'] + else: + post_image = '' + row = "<a href='{}'><section class='wide'><img src='{}' alt='Research post' /><section><h1>{}</h1><h2>{}</h2></section></section></a>".format( + post['path'], + post_image, + post['title'], + post['tagline']) + content += row + content += '</div>' + return content + +def read_metadata(fn): + """ + Read in read a markdown file and extract the metadata + """ + with open(fn, "r") as file: + data = file.read() + data = data.replace("\n ", "\n") + if "\n" in data: + data = data.replace("\r", "") + else: + data = data.replace("\r", "\n") + sections = data.split("\n\n") + return parse_metadata(fn, sections) + +default_metadata = { + 'status': 'published', + 'title': 'Untitled Page', + 'desc': '', + 'slug': '', + 'published': '2018-12-31', + 'updated': '2018-12-31', + 'authors': 'Adam Harvey', + 'sync': 'true', + 'tagline': '', +} + +def parse_metadata_section(metadata, section): + """ + parse a metadata key: value pair + """ + for line in section.split("\n"): + if ': ' not in line: + continue + key, value = line.split(': ', 1) + metadata[key.lower()] = value + +def parse_metadata(fn, sections): + """ + parse the metadata headers in a markdown file + (everything before the second ---------) + also generates appropriate urls for this page :) + """ + found_meta = False + metadata = {} + valid_sections = [] + for section in sections: + if not found_meta and ': ' in section: + found_meta = True + parse_metadata_section(metadata, section) + continue + if '-----' in section: + continue + if found_meta: + valid_sections.append(section) + + if 'title' not in metadata: + print('warning: {} has no title'.format(fn)) + for key in default_metadata: + if key not in metadata: + metadata[key] = default_metadata[key] + + basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, '')) + basename = os.path.basename(fn) + if basedir == '/': + metadata['path'] = '/' + metadata['url'] = '/' + elif basename == 'index.md': + metadata['path'] = basedir + '/' + metadata['url'] = metadata['path'] + else: + metadata['path'] = basedir + '/' + metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/' + + if metadata['status'] == 'published|draft|private': + metadata['status'] = 'published' + + metadata['sync'] = metadata['sync'] != 'false' + + metadata['author_html'] = '<br>'.join(metadata['authors'].split(',')) + + return metadata, valid_sections + +def read_research_post_index(): + """ + Generate an index of the research (blog) posts + """ + posts = [] + for fn in sorted(glob.glob('../site/content/research/*/index.md')): + metadata, valid_sections = read_metadata(fn) + if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft': + continue + posts.append(metadata) + if not len(posts): + posts.append({ + 'title': 'Placeholder', + 'slug': 'placeholder', + 'date': 'Placeholder', + 'url': '/', + }) + return posts + |
