diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-27 20:29:08 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-27 20:29:08 +0100 |
| commit | 67896d3cdde877de940a282bebacd10ca1c56499 (patch) | |
| tree | c523b7b5443c1eb0aa061c2394f8440dba567620 /megapixels/app/site/parser.py | |
| parent | c8e7a10be948c2405d46d8c3caf4a8c6675eee29 (diff) | |
site watcher / loader
Diffstat (limited to 'megapixels/app/site/parser.py')
| -rw-r--r-- | megapixels/app/site/parser.py | 204 |
1 files changed, 49 insertions, 155 deletions
diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index d6705214..3792e6f1 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -10,6 +10,49 @@ import app.site.s3 as s3 renderer = mistune.Renderer(escape=False) markdown = mistune.Markdown(renderer=renderer) +def parse_markdown(metadata, sections, s3_path, skip_h1=False): + """ + parse page into sections, preprocess the markdown to handle our modifications + """ + groups = [] + current_group = [] + for section in sections: + if skip_h1 and section.startswith('# '): + continue + elif section.strip().startswith('```'): + groups.append(format_section(current_group, s3_path)) + current_group = [] + current_group.append(section) + if section.strip().endswith('```'): + groups.append(format_applet("\n\n".join(current_group), s3_path)) + current_group = [] + elif section.strip().endswith('```'): + current_group.append(section) + groups.append(format_applet("\n\n".join(current_group), s3_path)) + current_group = [] + elif section.startswith('+ '): + groups.append(format_section(current_group, s3_path)) + groups.append(format_metadata(section)) + current_group = [] + elif '![fullwidth:' in section: + groups.append(format_section(current_group, s3_path)) + groups.append(format_section([section], s3_path, type='fullwidth')) + current_group = [] + elif '![wide:' in section: + groups.append(format_section(current_group, s3_path)) + groups.append(format_section([section], s3_path, type='wide')) + current_group = [] + elif '![' in section: + groups.append(format_section(current_group, s3_path)) + groups.append(format_section([section], s3_path, type='images')) + current_group = [] + else: + current_group.append(section) + groups.append(format_section(current_group, s3_path)) + content = "".join(groups) + return content + + def fix_images(lines, s3_path): """ do our own tranformation of the markdown around images to handle wide images etc @@ -32,6 +75,7 @@ def fix_images(lines, s3_path): real_lines.append(line) return "\n".join(real_lines) + def format_section(lines, s3_path, type=''): """ format a normal markdown section @@ -44,6 +88,7 @@ def format_section(lines, s3_path, type=''): return "<section>" + markdown(lines) + "</section>" return "" + def format_metadata(section): """ format a metadata section (+ key: value pairs) @@ -54,7 +99,11 @@ def format_metadata(section): meta.append("<div><div class='gray'>{}</div><div>{}</div></div>".format(key, value)) return "<section><div class='meta'>{}</div></section>".format(''.join(meta)) + def format_applet(section, s3_path): + """ + Format the applets, which load javascript modules like the map and CSVs + """ # print(section) payload = section.strip('```').strip().strip('```').strip().split('\n') applet = {} @@ -79,47 +128,6 @@ def format_applet(section, s3_path): applet['fields'] = payload[1:] return "<section class='applet_container'><div class='applet' data-payload='{}'></div></section>".format(json.dumps(applet)) -def parse_markdown(sections, s3_path, skip_h1=False): - """ - parse page into sections, preprocess the markdown to handle our modifications - """ - groups = [] - current_group = [] - for section in sections: - if skip_h1 and section.startswith('# '): - continue - elif section.strip().startswith('```'): - groups.append(format_section(current_group, s3_path)) - current_group = [] - current_group.append(section) - if section.strip().endswith('```'): - groups.append(format_applet("\n\n".join(current_group), s3_path)) - current_group = [] - elif section.strip().endswith('```'): - current_group.append(section) - groups.append(format_applet("\n\n".join(current_group), s3_path)) - current_group = [] - elif section.startswith('+ '): - groups.append(format_section(current_group, s3_path)) - groups.append(format_metadata(section)) - current_group = [] - elif '![fullwidth:' in section: - groups.append(format_section(current_group, s3_path)) - groups.append(format_section([section], s3_path, type='fullwidth')) - current_group = [] - elif '![wide:' in section: - groups.append(format_section(current_group, s3_path)) - groups.append(format_section([section], s3_path, type='wide')) - current_group = [] - elif '![' in section: - groups.append(format_section(current_group, s3_path)) - groups.append(format_section([section], s3_path, type='images')) - current_group = [] - else: - current_group.append(section) - groups.append(format_section(current_group, s3_path)) - content = "".join(groups) - return content def parse_research_index(research_posts): """ @@ -141,117 +149,3 @@ def parse_research_index(research_posts): content += row content += '</div>' return content - -def read_metadata(fn): - """ - Read in read a markdown file and extract the metadata - """ - with open(fn, "r") as file: - data = file.read() - data = data.replace("\n ", "\n") - if "\n" in data: - data = data.replace("\r", "") - else: - data = data.replace("\r", "\n") - sections = data.split("\n\n") - return parse_metadata(fn, sections) - -default_metadata = { - 'status': 'published', - 'title': 'Untitled Page', - 'desc': '', - 'slug': '', - 'published': '2018-12-31', - 'updated': '2018-12-31', - 'authors': 'Adam Harvey', - 'sync': 'true', - 'tagline': '', -} - -def parse_metadata_section(metadata, section): - """ - parse a metadata key: value pair - """ - for line in section.split("\n"): - if ': ' not in line: - continue - key, value = line.split(': ', 1) - metadata[key.lower()] = value - -def parse_metadata(fn, sections): - """ - parse the metadata headers in a markdown file - (everything before the second ---------) - also generates appropriate urls for this page :) - """ - found_meta = False - metadata = {} - valid_sections = [] - for section in sections: - if not found_meta and ': ' in section: - found_meta = True - parse_metadata_section(metadata, section) - continue - if '-----' in section: - continue - if found_meta: - valid_sections.append(section) - - if 'title' not in metadata: - print('warning: {} has no title'.format(fn)) - for key in default_metadata: - if key not in metadata: - metadata[key] = default_metadata[key] - - basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, '')) - basename = os.path.basename(fn) - if basedir == '/': - metadata['path'] = '/' - metadata['url'] = '/' - elif basename == 'index.md': - metadata['path'] = basedir + '/' - metadata['url'] = metadata['path'] - else: - metadata['path'] = basedir + '/' - metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/' - - if metadata['status'] == 'published|draft|private': - metadata['status'] = 'published' - - metadata['sync'] = metadata['sync'] != 'false' - - metadata['author_html'] = '<br>'.join(metadata['authors'].split(',')) - - return metadata, valid_sections - -def read_research_post_index(): - """ - Generate an index of the research (blog) posts - """ - return read_post_index('research') - -def read_datasets_index(): - """ - Generate an index of the datasets - """ - return read_post_index('datasets') - -def read_post_index(basedir): - """ - Generate an index of posts - """ - posts = [] - for fn in sorted(glob.glob(os.path.join(cfg.DIR_SITE_CONTENT, basedir, '*/index.md'))): - metadata, valid_sections = read_metadata(fn) - if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft': - continue - posts.append(metadata) - if not len(posts): - posts.append({ - 'title': 'Placeholder', - 'slug': 'placeholder', - 'date': 'Placeholder', - 'url': '/', - }) - return posts - |
