summaryrefslogtreecommitdiff
path: root/megapixels/app/site/parser.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-27 20:29:08 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-27 20:29:08 +0100
commit67896d3cdde877de940a282bebacd10ca1c56499 (patch)
treec523b7b5443c1eb0aa061c2394f8440dba567620 /megapixels/app/site/parser.py
parentc8e7a10be948c2405d46d8c3caf4a8c6675eee29 (diff)
site watcher / loader
Diffstat (limited to 'megapixels/app/site/parser.py')
-rw-r--r--megapixels/app/site/parser.py204
1 files changed, 49 insertions, 155 deletions
diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py
index d6705214..3792e6f1 100644
--- a/megapixels/app/site/parser.py
+++ b/megapixels/app/site/parser.py
@@ -10,6 +10,49 @@ import app.site.s3 as s3
renderer = mistune.Renderer(escape=False)
markdown = mistune.Markdown(renderer=renderer)
+def parse_markdown(metadata, sections, s3_path, skip_h1=False):
+ """
+ parse page into sections, preprocess the markdown to handle our modifications
+ """
+ groups = []
+ current_group = []
+ for section in sections:
+ if skip_h1 and section.startswith('# '):
+ continue
+ elif section.strip().startswith('```'):
+ groups.append(format_section(current_group, s3_path))
+ current_group = []
+ current_group.append(section)
+ if section.strip().endswith('```'):
+ groups.append(format_applet("\n\n".join(current_group), s3_path))
+ current_group = []
+ elif section.strip().endswith('```'):
+ current_group.append(section)
+ groups.append(format_applet("\n\n".join(current_group), s3_path))
+ current_group = []
+ elif section.startswith('+ '):
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_metadata(section))
+ current_group = []
+ elif '![fullwidth:' in section:
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_section([section], s3_path, type='fullwidth'))
+ current_group = []
+ elif '![wide:' in section:
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_section([section], s3_path, type='wide'))
+ current_group = []
+ elif '![' in section:
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_section([section], s3_path, type='images'))
+ current_group = []
+ else:
+ current_group.append(section)
+ groups.append(format_section(current_group, s3_path))
+ content = "".join(groups)
+ return content
+
+
def fix_images(lines, s3_path):
"""
do our own tranformation of the markdown around images to handle wide images etc
@@ -32,6 +75,7 @@ def fix_images(lines, s3_path):
real_lines.append(line)
return "\n".join(real_lines)
+
def format_section(lines, s3_path, type=''):
"""
format a normal markdown section
@@ -44,6 +88,7 @@ def format_section(lines, s3_path, type=''):
return "<section>" + markdown(lines) + "</section>"
return ""
+
def format_metadata(section):
"""
format a metadata section (+ key: value pairs)
@@ -54,7 +99,11 @@ def format_metadata(section):
meta.append("<div><div class='gray'>{}</div><div>{}</div></div>".format(key, value))
return "<section><div class='meta'>{}</div></section>".format(''.join(meta))
+
def format_applet(section, s3_path):
+ """
+ Format the applets, which load javascript modules like the map and CSVs
+ """
# print(section)
payload = section.strip('```').strip().strip('```').strip().split('\n')
applet = {}
@@ -79,47 +128,6 @@ def format_applet(section, s3_path):
applet['fields'] = payload[1:]
return "<section class='applet_container'><div class='applet' data-payload='{}'></div></section>".format(json.dumps(applet))
-def parse_markdown(sections, s3_path, skip_h1=False):
- """
- parse page into sections, preprocess the markdown to handle our modifications
- """
- groups = []
- current_group = []
- for section in sections:
- if skip_h1 and section.startswith('# '):
- continue
- elif section.strip().startswith('```'):
- groups.append(format_section(current_group, s3_path))
- current_group = []
- current_group.append(section)
- if section.strip().endswith('```'):
- groups.append(format_applet("\n\n".join(current_group), s3_path))
- current_group = []
- elif section.strip().endswith('```'):
- current_group.append(section)
- groups.append(format_applet("\n\n".join(current_group), s3_path))
- current_group = []
- elif section.startswith('+ '):
- groups.append(format_section(current_group, s3_path))
- groups.append(format_metadata(section))
- current_group = []
- elif '![fullwidth:' in section:
- groups.append(format_section(current_group, s3_path))
- groups.append(format_section([section], s3_path, type='fullwidth'))
- current_group = []
- elif '![wide:' in section:
- groups.append(format_section(current_group, s3_path))
- groups.append(format_section([section], s3_path, type='wide'))
- current_group = []
- elif '![' in section:
- groups.append(format_section(current_group, s3_path))
- groups.append(format_section([section], s3_path, type='images'))
- current_group = []
- else:
- current_group.append(section)
- groups.append(format_section(current_group, s3_path))
- content = "".join(groups)
- return content
def parse_research_index(research_posts):
"""
@@ -141,117 +149,3 @@ def parse_research_index(research_posts):
content += row
content += '</div>'
return content
-
-def read_metadata(fn):
- """
- Read in read a markdown file and extract the metadata
- """
- with open(fn, "r") as file:
- data = file.read()
- data = data.replace("\n ", "\n")
- if "\n" in data:
- data = data.replace("\r", "")
- else:
- data = data.replace("\r", "\n")
- sections = data.split("\n\n")
- return parse_metadata(fn, sections)
-
-default_metadata = {
- 'status': 'published',
- 'title': 'Untitled Page',
- 'desc': '',
- 'slug': '',
- 'published': '2018-12-31',
- 'updated': '2018-12-31',
- 'authors': 'Adam Harvey',
- 'sync': 'true',
- 'tagline': '',
-}
-
-def parse_metadata_section(metadata, section):
- """
- parse a metadata key: value pair
- """
- for line in section.split("\n"):
- if ': ' not in line:
- continue
- key, value = line.split(': ', 1)
- metadata[key.lower()] = value
-
-def parse_metadata(fn, sections):
- """
- parse the metadata headers in a markdown file
- (everything before the second ---------)
- also generates appropriate urls for this page :)
- """
- found_meta = False
- metadata = {}
- valid_sections = []
- for section in sections:
- if not found_meta and ': ' in section:
- found_meta = True
- parse_metadata_section(metadata, section)
- continue
- if '-----' in section:
- continue
- if found_meta:
- valid_sections.append(section)
-
- if 'title' not in metadata:
- print('warning: {} has no title'.format(fn))
- for key in default_metadata:
- if key not in metadata:
- metadata[key] = default_metadata[key]
-
- basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, ''))
- basename = os.path.basename(fn)
- if basedir == '/':
- metadata['path'] = '/'
- metadata['url'] = '/'
- elif basename == 'index.md':
- metadata['path'] = basedir + '/'
- metadata['url'] = metadata['path']
- else:
- metadata['path'] = basedir + '/'
- metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'
-
- if metadata['status'] == 'published|draft|private':
- metadata['status'] = 'published'
-
- metadata['sync'] = metadata['sync'] != 'false'
-
- metadata['author_html'] = '<br>'.join(metadata['authors'].split(','))
-
- return metadata, valid_sections
-
-def read_research_post_index():
- """
- Generate an index of the research (blog) posts
- """
- return read_post_index('research')
-
-def read_datasets_index():
- """
- Generate an index of the datasets
- """
- return read_post_index('datasets')
-
-def read_post_index(basedir):
- """
- Generate an index of posts
- """
- posts = []
- for fn in sorted(glob.glob(os.path.join(cfg.DIR_SITE_CONTENT, basedir, '*/index.md'))):
- metadata, valid_sections = read_metadata(fn)
- if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
- continue
- posts.append(metadata)
- if not len(posts):
- posts.append({
- 'title': 'Placeholder',
- 'slug': 'placeholder',
- 'date': 'Placeholder',
- 'url': '/',
- })
- return posts
-