summaryrefslogtreecommitdiff
path: root/megapixels
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2019-04-03 23:24:12 +0200
committeradamhrv <adam@ahprojects.com>2019-04-03 23:24:12 +0200
commit7d2043eb24cd171e809d83219e543ce9541ba821 (patch)
treec415aae6c682093b18052d622fd201e983e18fd1 /megapixels
parent2db426422eedc4847618f972b02189f3d49fe03a (diff)
parent24e4f4af71f1e146f33688822ac3e4242339faa4 (diff)
Merge branch 'master' of github.com:adamhrv/megapixels_dev
Diffstat (limited to 'megapixels')
-rw-r--r--megapixels/app/settings/app_cfg.py1
-rw-r--r--megapixels/app/site/loader.py38
-rw-r--r--megapixels/app/site/parser.py11
-rw-r--r--megapixels/app/utils/sheet_utils.py82
4 files changed, 122 insertions, 10 deletions
diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py
index f6d0a7df..1eed1a41 100644
--- a/megapixels/app/settings/app_cfg.py
+++ b/megapixels/app/settings/app_cfg.py
@@ -169,6 +169,7 @@ DIR_SITE_INCLUDES = "../site/includes"
DIR_SITE_USER_CONTENT = "../site/public/user_content"
DIR_SITE_DATASETS = "../site/datasets/"
DIR_SITE_FINAL_CITATIONS = "../site/datasets/final/"
+GOOGLE_ACCOUNT_CREDS_PATH = os.path.join("../", os.getenv("GOOGLE_ACCOUNT_CREDS_PATH"))
# -----------------------------------------------------------------------------
# Celery
diff --git a/megapixels/app/site/loader.py b/megapixels/app/site/loader.py
index 779f68ba..701c78b2 100644
--- a/megapixels/app/site/loader.py
+++ b/megapixels/app/site/loader.py
@@ -5,6 +5,9 @@ import glob
import app.settings.app_cfg as cfg
from app.utils.file_utils import load_json
+import app.utils.sheet_utils as sheet
+
+sidebar = sheet.fetch_google_lookup("sidebar", item_key="key")
def read_metadata(fn):
"""
@@ -20,6 +23,12 @@ def read_metadata(fn):
sections = data.split("\n\n")
return parse_metadata(fn, sections)
+def domainFromUrl(url):
+ domain = url.split('/')[2].split('.')
+ if len(domain) > 2 and len(domain[-2]) == 2:
+ return ".".join(domain[-3:])
+ return ".".join(domain[-2:])
+
default_metadata = {
'status': 'published',
@@ -33,6 +42,18 @@ default_metadata = {
'tagline': '',
}
+sidebar_order = [
+ { 'key': 'published', 'title': 'Published' },
+ { 'key': 'images', 'title': 'Images' },
+ { 'key': 'videos', 'title': 'Videos' },
+ { 'key': 'identities', 'title': 'Identities' },
+ { 'key': 'purpose', 'title': 'Purpose' },
+ { 'key': 'created_by', 'title': 'Created by' },
+ { 'key': 'funded_by_short', 'title': 'Funded by' },
+ { 'key': 'size_gb', 'title': 'Download Size' },
+ { 'key': 'website', 'title': 'Website' },
+]
+
def parse_metadata(fn, sections):
"""
parse the metadata headers in a markdown file
@@ -87,8 +108,25 @@ def parse_metadata(fn, sections):
print("Bad metadata? {}".format(dataset_path))
elif 'datasets' in fn:
print("/!\\ {} does not exist!".format(dataset_path))
+
+ if metadata['slug'] in sidebar:
+ sidebar_row = sidebar[metadata['slug']]
+ if sidebar_row:
+ metadata['sidebar'] = []
+ for item in sidebar_order:
+ key = item['key']
+ value = sidebar_row[key]
+ if value:
+ value = value.replace(' - ', ' &ndash; ')
+ if key == 'size_gb':
+ value += ' GB'
+ if key == 'website':
+ value = "<a href='" + value + "' target='_blank' rel='nofollow noopener'>" + domainFromUrl(value) + "</a>"
+ metadata['sidebar'].append({ 'value': value, 'title': item['title'], })
+
if 'meta' not in metadata or not metadata['meta']: # dude
metadata['meta'] = {}
+ metadata['sidebar'] = []
return metadata, valid_sections
diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py
index 06c45f41..dc2a09f2 100644
--- a/megapixels/app/site/parser.py
+++ b/megapixels/app/site/parser.py
@@ -55,7 +55,7 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False):
elif '### statistics' in section.lower() or '### sidebar' in section.lower():
if len(current_group):
groups.append(format_section(current_group, s3_path))
- current_group = []
+ current_group = [format_include("{% include 'sidebar.html' %}", metadata)]
if 'sidebar' not in section.lower():
current_group.append(section)
in_stats = True
@@ -267,15 +267,6 @@ def format_include(section, metadata):
include_fn = section.strip().strip('\n').strip().strip('{%').strip().strip('%}').strip()
include_fn = include_fn.strip('include').strip().strip('"').strip().strip("'").strip()
return includes_env.get_template(include_fn).render(metadata=metadata)
- # include_dir = cfg.DIR_SITE_INCLUDES
- # try:
- # includes_env.get_template(fp_html)
- # with open(join(include_dir, fp_html), 'r') as fp:
- # html = fp.read().replace('\n', '')
- # return html
- # except Exception as e:
- # print(f'Error parsing include: {e}')
- # return ''
def format_applet(section, s3_path):
"""
diff --git a/megapixels/app/utils/sheet_utils.py b/megapixels/app/utils/sheet_utils.py
new file mode 100644
index 00000000..85f979c6
--- /dev/null
+++ b/megapixels/app/utils/sheet_utils.py
@@ -0,0 +1,82 @@
+import os
+import gspread
+from oauth2client.service_account import ServiceAccountCredentials
+
+from app.settings import app_cfg as cfg
+
+def fetch_spreadsheet():
+ """Open the Google Spreadsheet, which contains the individual worksheets"""
+ scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
+ path = os.path.dirname(os.path.abspath(__file__))
+ credentials = ServiceAccountCredentials.from_json_keyfile_name(cfg.GOOGLE_ACCOUNT_CREDS_PATH, scope)
+ docid = "1denb7TjYsN9igHyvYah7fQ0daABW32Z30lwV7QrDJQc"
+ client = gspread.authorize(credentials)
+ spreadsheet = client.open_by_key(docid)
+ return spreadsheet
+
+def fetch_worksheet(name):
+ """Get a reference to a particular "worksheet" from the Google Spreadsheet"""
+ spreadsheet = fetch_spreadsheet()
+ return spreadsheet.worksheet(name)
+
+def fetch_google_sheet(name):
+ """Get all the values from a particular worksheet as a list of lists.
+ Returns:
+ :keys - the first row of the document
+ :lines - a list of lists with the rest of the rows"""
+ rows = fetch_worksheet(name).get_all_values()
+ keys = rows[0]
+ lines = rows[1:]
+ return keys, lines
+
+def fetch_google_sheet_objects(name):
+ """Get all the values from a worksheet as a list of dictionaries"""
+ keys, rows = fetch_google_sheet(name)
+ recs = []
+ for row in rows:
+ rec = {}
+ for index, key in enumerate(keys):
+ rec[key] = row[index]
+ recs.append(rec)
+ return recs
+
+def fetch_google_lookup(name, item_key='key'):
+ """Get all the values from a worksheet as a dictionary of dictionaries.
+ Specify which field you want to use as the dictionary key."""
+ keys, rows = fetch_google_sheet(name)
+ lookup = {}
+ for row in rows:
+ rec = {}
+ for index, key in enumerate(keys):
+ rec[key] = row[index]
+ lookup[rec[item_key]] = rec
+ return lookup
+
+def fetch_verified_paper_lookup():
+ """Fetch a lookup keyed by dataset, where each dataset points to a hash of valid or invalid papers..."""
+ keys, rows = fetch_google_sheet('verifications')
+ verified_lookup = {}
+ for row in rows:
+ rec = {}
+ for index, key in enumerate(keys):
+ rec[key] = row[index]
+ if rec['dataset'] not in verified_lookup:
+ verified_lookup[rec['dataset']] = {}
+ if str(rec['uses_dataset']) == '1':
+ verified_lookup[rec['dataset']][rec['paper_id']] = rec
+ return verified_lookup
+
+def update_or_append_worksheet(name, form):
+ """Update a row if it exists, create it if it doesn't"""
+ worksheet = fetch_worksheet(name)
+ keys = worksheet.row_values(1)
+ row = [ form[key] if key in form else '' for key in keys ]
+ try:
+ cell = worksheet.find(form['paper_id'])
+ except:
+ cell = None
+ if cell:
+ for i, item in enumerate(row):
+ worksheet.update_cell(cell.row, i+1, item)
+ else:
+ worksheet.append_row(row)