builder/parser.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

import os
import glob
import mistune
from paths import *

renderer = mistune.Renderer(escape=False)
markdown = mistune.Markdown(renderer=renderer)

def fix_images(lines, s3_path):
  real_lines = []
  block = "\n\n".join(lines)
  for line in block.split("\n"):
    if "![" in line and "](http" not in line:
      line = line.replace('](', '](' + s3_path)
    real_lines.append(line)
  return "\n".join(real_lines)

def wide_section(line, s3_path):
  lines = fix_images(lines, s3_path)
  return "<section class='wide'>" + markdown(lines) + "</section>"

def normal_section(lines, s3_path):
  if len(lines):
    lines = fix_images(lines, s3_path)
    return "<section>" + markdown(lines) + "</section>"
  return ""

def parse_markdown(sections, s3_path):
  groups = []
  current_group = []
  for section in sections:
    if section.startswith('# '):
      continue
    if '![wide]' in section:
      groups.append(normal_section(current_group, s3_path))
      groups.append(wide_section([section], s3_path))
      current_group = []
    else:
      current_group.append(section)
  groups.append(normal_section(current_group, s3_path))
  content = "".join(groups)
  return content

def read_metadata(fn):
  with open(fn, "r") as file:
    data = file.read()
    data = data.replace("\n ", "\n")
    if "\n" in data:
      data = data.replace("\r", "")
    else:
      data = data.replace("\r", "\n")
    sections = data.split("\n\n")
  return parse_metadata(fn, sections)

default_metadata = {
  'status': 'published',
  'title': 'Untitled Page',
  'desc': '',
  'slug': '',
  'published': '2018-12-31',
  'updated': '2018-12-31',
  'authors': 'Adam Harvey',
}

def parse_metadata_section(metadata, section):
  for line in section.split("\n"):
    if ': ' not in line:
      continue
    key, value = line.split(': ', 1)
    metadata[key.lower()] = value

def parse_metadata(fn, sections):
  found_meta = False
  metadata = {}
  valid_sections = []
  for section in sections:
    if not found_meta and ': ' in section:
      found_meta = True
      parse_metadata_section(metadata, section)
      continue
    if '-----' in section:
      continue
    if found_meta:
      valid_sections.append(section)

  if 'title' not in metadata:
    print('warning: {} has no title'.format(fn))
  for key in default_metadata:
    if key not in metadata:
      metadata[key] = default_metadata[key]
  basename = os.path.basename(fn)
  metadata['path'] = os.path.dirname(fn.replace(content_path, '')) + '/'
  if basename == 'index.md':
    metadata['url'] = metadata['path']
  else:
    metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/'

  if metadata['status'] == 'published|draft|private':
    metadata['status'] = 'published'
  metadata['authors'] = '<br>'.join(metadata['authors'].split(','))
  return metadata, valid_sections

def read_research_post_index():
  posts = []
  for fn in sorted(glob.glob(os.path.join(content_path, 'research/**/index.md'), recursive=True)):
    metadata, valid_sections = read_metadata(fn)
    if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft':
      continue
    posts.append(metadata)
  return posts