import os import re import glob import app.settings.app_cfg as cfg from app.utils.file_utils import load_json import app.utils.sheet_utils as sheet sidebar = sheet.fetch_google_lookup("sidebar", item_key="key") def read_metadata(fn): """ Read in read a markdown file and extract the metadata """ with open(fn, "r") as file: data = file.read() data = data.replace("\n ", "\n") if "\n" in data: data = data.replace("\r", "") else: data = data.replace("\r", "\n") sections = data.split("\n\n") return parse_metadata(fn, sections) def domainFromUrl(url): domain = url.split('/')[2].split('.') if len(domain) > 2 and len(domain[-2]) == 2: return ".".join(domain[-3:]) return ".".join(domain[-2:]) default_metadata = { 'status': 'published', 'title': 'Untitled Page', 'desc': '', 'slug': '', 'published': '2018-12-31', 'updated': '2018-12-31', 'authors': 'Adam Harvey', 'sync': 'true', 'tagline': '', } sidebar_order = [ { 'key': 'published', 'title': 'Published' }, { 'key': 'images', 'title': 'Images' }, { 'key': 'videos', 'title': 'Videos' }, { 'key': 'identities', 'title': 'Identities' }, { 'key': 'purpose', 'title': 'Purpose' }, { 'key': 'created_by', 'title': 'Created by' }, { 'key': 'funded_by_short', 'title': 'Funded by' }, { 'key': 'size_gb', 'title': 'Download Size' }, { 'key': 'website', 'title': 'Website' }, ] def parse_metadata(fn, sections): """ parse the metadata headers in a markdown file (everything before the second ---------) - determines appropriate urls for this page - loads paper if any """ found_meta = False metadata = {} valid_sections = [] for section in sections: if not found_meta and ': ' in section: found_meta = True parse_metadata_section(metadata, section) continue if '-----' in section: continue if found_meta: valid_sections.append(section) if 'title' not in metadata: print('warning: {} has no title'.format(fn)) for key in default_metadata: if key not in metadata: metadata[key] = default_metadata[key] basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, '')) basename = os.path.basename(fn) if basedir == '/': metadata['path'] = '/' metadata['url'] = '/' elif basename == 'index.md': metadata['path'] = basedir + '/' metadata['url'] = metadata['path'] else: metadata['path'] = basedir + '/' metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/' if metadata['status'] == 'published|draft|private': metadata['status'] = 'published' metadata['og_desc'] = re.sub('<[^<]+?>', '', metadata['desc']) metadata['sync'] = metadata['sync'] != 'false' metadata['author_html'] = '
'.join(metadata['authors'].split(',')) dataset_path = os.path.join(cfg.DIR_SITE_FINAL_CITATIONS, metadata['slug'] + '.json') if os.path.exists(dataset_path): metadata['meta'] = load_json(dataset_path) if not metadata['meta']: print("Bad metadata? {}".format(dataset_path)) elif 'datasets' in fn: print("/!\\ {} does not exist!".format(dataset_path)) if metadata['slug'] in sidebar: sidebar_row = sidebar[metadata['slug']] if sidebar_row: metadata['sidebar'] = [] for item in sidebar_order: key = item['key'] value = sidebar_row[key] if value: value = value.replace(' - ', ' – ') if key == 'size_gb': value += ' GB' if key == 'website': value = "" + domainFromUrl(value) + "" metadata['sidebar'].append({ 'value': value, 'title': item['title'], }) if 'meta' not in metadata or not metadata['meta']: # dude metadata['meta'] = {} metadata['sidebar'] = [] if 'image' in metadata and metadata['image']: metadata['og_image'] = "https://nyc3.digitaloceanspaces.com/megapixels/v1" + metadata['path'] + metadata['image'] metadata['og_title'] = 'MegaPixels: {}'.format(metadata['title']) else: metadata['og_image'] = "https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/msceleb/assets/background.jpg" metadata['og_title'] = 'MegaPixels: {}'.format(metadata['title']) return metadata, valid_sections def parse_metadata_section(metadata, section): """ parse a metadata key: value pair """ for line in section.split("\n"): if ': ' not in line: continue key, value = line.split(': ', 1) metadata[key.lower()] = value def read_research_post_index(): """ Generate an index of the research (blog) posts """ return read_post_index('research') def read_datasets_index(): """ Generate an index of the datasets """ return read_post_index('datasets') def read_post_index(basedir): """ Generate an index of posts """ posts = [] for fn in sorted(glob.glob(os.path.join(cfg.DIR_SITE_CONTENT, basedir, '*/index.md'))): metadata, valid_sections = read_metadata(fn) if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft': continue posts.append(metadata) if not len(posts): posts.append({ 'title': 'Placeholder', 'slug': 'placeholder', 'date': 'Placeholder', 'url': '/', }) return posts