summaryrefslogtreecommitdiff
path: root/megapixels/app/site/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/app/site/parser.py')
-rw-r--r--megapixels/app/site/parser.py347
1 files changed, 183 insertions, 164 deletions
diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py
index f739315a..ad4256ad 100644
--- a/megapixels/app/site/parser.py
+++ b/megapixels/app/site/parser.py
@@ -10,9 +10,141 @@ import app.site.s3 as s3
renderer = mistune.Renderer(escape=False)
markdown = mistune.Markdown(renderer=renderer)
+footnote_count = 0
+
+def parse_markdown(metadata, sections, s3_path, skip_h1=False):
+ """
+ parse page into sections, preprocess the markdown to handle our modifications
+ """
+ groups = []
+ current_group = []
+ footnotes = []
+ in_stats = False
+ in_footnotes = False
+ ignoring = False
+
+ if 'desc' in metadata and 'subdesc' in metadata:
+ groups.append(intro_section(metadata, s3_path))
+
+ for section in sections:
+ if skip_h1 and section.startswith('# '):
+ continue
+ elif section.strip().startswith('---'):
+ continue
+ elif section.lower().strip().startswith('ignore text'):
+ ignoring = True
+ continue
+ elif section.strip().startswith('### Footnotes'):
+ groups.append(format_section(current_group, s3_path))
+ current_group = []
+ footnotes = []
+ in_footnotes = True
+ elif in_footnotes:
+ footnotes.append(section)
+ elif ignoring:
+ continue
+ elif '### statistics' in section.lower() or '### sidebar' in section.lower():
+ if len(current_group):
+ groups.append(format_section(current_group, s3_path))
+ current_group = []
+ if 'sidebar' not in section.lower():
+ current_group.append(section)
+ in_stats = True
+ elif in_stats and not section.strip().startswith('## ') and 'end sidebar' not in section.lower():
+ current_group.append(section)
+ elif in_stats and section.strip().startswith('## ') or 'end sidebar' in section.lower():
+ current_group = [format_section(current_group, s3_path, 'right-sidebar', tag='div')]
+ if 'end sidebar' not in section.lower():
+ current_group.append(section)
+ in_stats = False
+ elif section.strip().startswith('```'):
+ groups.append(format_section(current_group, s3_path))
+ current_group = []
+ current_group.append(section)
+ if section.strip().endswith('```'):
+ groups.append(format_applet("\n\n".join(current_group), s3_path))
+ current_group = []
+ elif section.strip().endswith('```'):
+ current_group.append(section)
+ groups.append(format_applet("\n\n".join(current_group), s3_path))
+ current_group = []
+ elif section.startswith('+ '):
+ groups.append(format_section(current_group, s3_path))
+ groups.append('<section>' + format_metadata(section) + '<section>')
+ current_group = []
+ elif '![fullwidth:' in section:
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_section([section], s3_path, type='fullwidth'))
+ current_group = []
+ elif '![wide:' in section:
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_section([section], s3_path, type='wide'))
+ current_group = []
+ elif '![' in section:
+ groups.append(format_section(current_group, s3_path))
+ groups.append(format_section([section], s3_path, type='images'))
+ current_group = []
+ else:
+ current_group.append(section)
+ groups.append(format_section(current_group, s3_path))
+
+ footnote_txt = ''
+ footnote_lookup = {}
+
+ if len(footnotes):
+ footnote_txt, footnote_lookup = format_footnotes(footnotes, s3_path)
+
+ content = "".join(groups)
+
+ if footnote_lookup:
+ for key, index in footnote_lookup.items():
+ global footnote_count
+ footnote_count = 0
+ letters = "abcdefghijklmnopqrstuvwxyz"
+ footnote_backlinks = []
+ def footnote_tag(match):
+ global footnote_count
+ footnote_count += 1
+ footnote_backlinks.append('<a href="#{}_{}">{}</a>'.format(key, footnote_count, letters[footnote_count-1]))
+ return '<a class="footnote_shim" name="{}_{}"> </a><a href="#{}" class="footnote" title="Footnote {}">{}</a>'.format(key, footnote_count, key, index, index)
+ key_regex = re.compile(key.replace('[', '\\[').replace('^', '\\^').replace(']', '\\]'))
+ content = key_regex.sub(footnote_tag, content)
+ footnote_txt = footnote_txt.replace("{}_BACKLINKS".format(index), "".join(footnote_backlinks))
+ content += footnote_txt
+ return content
+
+
+def intro_section(metadata, s3_path):
+ """
+ Build the intro section for datasets
+ """
+
+ section = "<section class='intro_section' style='background-image: url({})'>".format(s3_path + metadata['image'])
+ section += "<div class='inner'>"
+
+ parts = []
+ if 'desc' in metadata:
+ desc = metadata['desc']
+ if 'color' in metadata and metadata['title'] in desc:
+ desc = desc.replace(metadata['title'], "<span style='color: {}'>{}</span>".format(metadata['color'], metadata['title']))
+ section += "<div class='hero_desc'><span>{}</span></div>".format(desc, desc)
+
+ if 'subdesc' in metadata:
+ subdesc = markdown(metadata['subdesc']).replace('<p>', '').replace('</p>', '')
+ section += "<div class='hero_subdesc'><span>{}</span></div>".format(subdesc, subdesc)
+
+ section += "</div>"
+ section += "</section>"
+
+ if 'caption' in metadata:
+ section += "<section><div class='image'><div class='caption'>{}</div></div></section>".format(metadata['caption'])
+
+ return section
+
+
def fix_images(lines, s3_path):
"""
- do our own tranformation of the markdown around images to handle wide images etc
+ do our own transformation of the markdown around images to handle wide images etc
lines: markdown lines
"""
real_lines = []
@@ -22,48 +154,89 @@ def fix_images(lines, s3_path):
line = line.replace('![', '')
alt_text, tail = line.split('](', 1)
url, tail = tail.split(')', 1)
+ tag = ''
if ':' in alt_text:
- tail, alt_text = alt_text.split(':', 1)
+ tag, alt_text = alt_text.split(':', 1)
img_tag = "<img src='{}' alt='{}'>".format(s3_path + url, alt_text.replace("'", ""))
- if len(alt_text):
+ if 'sideimage' in tag:
+ line = "<div class='sideimage'>{}<div>{}</div></div>".format(img_tag, markdown(tail))
+ elif len(alt_text):
line = "<div class='image'>{}<div class='caption'>{}</div></div>".format(img_tag, alt_text)
else:
line = "<div class='image'>{}</div>".format(img_tag, alt_text)
real_lines.append(line)
return "\n".join(real_lines)
-def format_section(lines, s3_path, type=''):
+
+def format_section(lines, s3_path, type='', tag='section'):
"""
format a normal markdown section
"""
if len(lines):
+ lines = fix_meta(lines)
lines = fix_images(lines, s3_path)
if type:
- return "<section class='{}'>{}</section>".format(type, markdown(lines))
+ return "<{} class='{}'>{}</{}>".format(tag, type, markdown(lines), tag)
else:
- return "<section>" + markdown(lines) + "</section>"
+ return "<{}>{}</{}>".format(tag, markdown(lines), tag)
return ""
+def fix_meta(lines):
+ """
+ Format metadata sections before passing to markdown
+ """
+ new_lines = []
+ for line in lines:
+ if line.startswith('+ '):
+ line = format_metadata(line)
+ new_lines.append(line)
+ return new_lines
+
def format_metadata(section):
"""
format a metadata section (+ key: value pairs)
"""
meta = []
for line in section.split('\n'):
+ if ': ' not in line:
+ continue
key, value = line[2:].split(': ', 1)
meta.append("<div><div class='gray'>{}</div><div>{}</div></div>".format(key, value))
- return "<section><div class='meta'>{}</div></section>".format(''.join(meta))
+ return "<div class='meta'>{}</div>".format(''.join(meta))
+
+def format_footnotes(footnotes, s3_path):
+ """
+ Format the footnotes section separately and produce a lookup we can use to update the main site
+ """
+ footnotes = '\n'.join(footnotes).split('\n')
+ index = 1
+ footnote_index_lookup = {}
+ footnote_list = []
+ for footnote in footnotes:
+ if not len(footnote) or '[^' not in footnote:
+ continue
+ key, note = footnote.split(': ', 1)
+ footnote_index_lookup[key] = index
+ footnote_list.append('<a name="{}" class="footnote_shim"></a><span class="backlinks">{}_BACKLINKS</span>'.format(key, index) + markdown(note))
+ index += 1
+
+ footnote_txt = '<section><ul class="footnotes"><li>' + '</li><li>'.join(footnote_list) + '</li></ul></section>'
+ return footnote_txt, footnote_index_lookup
def format_applet(section, s3_path):
+ """
+ Format the applets, which load javascript modules like the map and CSVs
+ """
# print(section)
payload = section.strip('```').strip().strip('```').strip().split('\n')
applet = {}
- print(payload)
+ # print(payload)
if ': ' in payload[0]:
- command, opt = payload[0].split(': ')
+ command, opt = payload[0].split(': ', 1)
else:
command = payload[0]
opt = None
+ print(command)
if command == 'python' or command == 'javascript' or command == 'code':
return format_section([ section ], s3_path)
if command == '':
@@ -79,47 +252,6 @@ def format_applet(section, s3_path):
applet['fields'] = payload[1:]
return "<section class='applet_container'><div class='applet' data-payload='{}'></div></section>".format(json.dumps(applet))
-def parse_markdown(sections, s3_path, skip_h1=False):
- """
- parse page into sections, preprocess the markdown to handle our modifications
- """
- groups = []
- current_group = []
- for section in sections:
- if skip_h1 and section.startswith('# '):
- continue
- elif section.strip().startswith('```'):
- groups.append(format_section(current_group, s3_path))
- current_group = []
- current_group.append(section)
- if section.strip().endswith('```'):
- groups.append(format_applet("\n\n".join(current_group), s3_path))
- current_group = []
- elif section.strip().endswith('```'):
- current_group.append(section)
- groups.append(format_applet("\n\n".join(current_group), s3_path))
- current_group = []
- elif section.startswith('+ '):
- groups.append(format_section(current_group, s3_path))
- groups.append(format_metadata(section))
- current_group = []
- elif '![fullwidth:' in section:
- groups.append(format_section(current_group, s3_path))
- groups.append(format_section([section], s3_path, type='fullwidth'))
- current_group = []
- elif '![wide:' in section:
- groups.append(format_section(current_group, s3_path))
- groups.append(format_section([section], s3_path, type='wide'))
- current_group = []
- elif '![' in section:
- groups.append(format_section(current_group, s3_path))
- groups.append(format_section([section], s3_path, type='images'))
- current_group = []
- else:
- current_group.append(section)
- groups.append(format_section(current_group, s3_path))
- content = "".join(groups)
- return content
def parse_research_index(research_posts):
"""
@@ -127,6 +259,7 @@ def parse_research_index(research_posts):
"""
content = "<div class='research_index'>"
for post in research_posts:
+ print(post)
s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, post['path'])
if 'image' in post:
post_image = s3_path + post['image']
@@ -140,117 +273,3 @@ def parse_research_index(research_posts):
content += row
content += '</div>'
return content
-
-def read_metadata(fn):
- """
- Read in read a markdown file and extract the metadata
- """
- with open(fn, "r") as file:
- data = file.read()
- data = data.replace("\n ", "\n")
- if "\n" in data:
- data = data.replace("\r", "")
- else:
- data = data.replace("\r", "\n")
- sections = data.split("\n\n")
- return parse_metadata(fn, sections)
-
-default_metadata = {
- 'status': 'published',
- 'title': 'Untitled Page',
- 'desc': '',
- 'slug': '',
- 'published': '2018-12-31',
- 'updated': '2018-12-31',
- 'authors': 'Adam Harvey',
- 'sync': 'true',
- 'tagline': '',
-}
-
-def parse_metadata_section(metadata, section):
- """
- parse a metadata key: value pair
- """
- for line in section.split("\n"):
- if ': ' not in line:
- continue
- key, value = line.split(': ', 1)
- metadata[key.lower()] = value
-
-def parse_metadata(fn, sections):
- """
- parse the metadata headers in a markdown file
- (everything before the second ---------)
- also generates appropriate urls for this page :)
- """
- found_meta = False
- metadata = {}
- valid_sections = []
- for section in sections:
- if not found_meta and ': ' in section:
- found_meta = True
- parse_metadata_section(metadata, section)
- continue
- if '-----' in section:
- continue
- if found_meta:
- valid_sections.append(section)
-
- if 'title' not in metadata:
- print('warning: {} has no title'.format(fn))
- for key in default_metadata:
- if key not in metadata:
- metadata[key] = default_metadata[key]
-
- basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, ''))
- basename = os.path.basename(fn)
- if basedir == '/':
- metadata['path'] = '/'
- metadata['url'] = '/'
- elif basename == 'index.md':
- metadata['path'] = basedir + '/'
- metadata['url'] = metadata['path']
- else:
- metadata['path'] = basedir + '/'
- metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'
-
- if metadata['status'] == 'published|draft|private':
- metadata['status'] = 'published'
-
- metadata['sync'] = metadata['sync'] != 'false'
-
- metadata['author_html'] = '<br>'.join(metadata['authors'].split(','))
-
- return metadata, valid_sections
-
-def read_research_post_index():
- """
- Generate an index of the research (blog) posts
- """
- return read_post_index('research')
-
-def read_datasets_index():
- """
- Generate an index of the datasets
- """
- return read_post_index('datasets')
-
-def read_post_index(basedir):
- """
- Generate an index of posts
- """
- posts = []
- for fn in sorted(glob.glob('../site/content/{}/*/index.md'.format(basedir))):
- metadata, valid_sections = read_metadata(fn)
- if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
- continue
- posts.append(metadata)
- if not len(posts):
- posts.append({
- 'title': 'Placeholder',
- 'slug': 'placeholder',
- 'date': 'Placeholder',
- 'url': '/',
- })
- return posts
-