diff options
| author | adamhrv <adam@ahprojects.com> | 2019-04-03 23:24:12 +0200 |
|---|---|---|
| committer | adamhrv <adam@ahprojects.com> | 2019-04-03 23:24:12 +0200 |
| commit | 7d2043eb24cd171e809d83219e543ce9541ba821 (patch) | |
| tree | c415aae6c682093b18052d622fd201e983e18fd1 /megapixels/app | |
| parent | 2db426422eedc4847618f972b02189f3d49fe03a (diff) | |
| parent | 24e4f4af71f1e146f33688822ac3e4242339faa4 (diff) | |
Merge branch 'master' of github.com:adamhrv/megapixels_dev
Diffstat (limited to 'megapixels/app')
| -rw-r--r-- | megapixels/app/settings/app_cfg.py | 1 | ||||
| -rw-r--r-- | megapixels/app/site/loader.py | 38 | ||||
| -rw-r--r-- | megapixels/app/site/parser.py | 11 | ||||
| -rw-r--r-- | megapixels/app/utils/sheet_utils.py | 82 |
4 files changed, 122 insertions, 10 deletions
diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py index f6d0a7df..1eed1a41 100644 --- a/megapixels/app/settings/app_cfg.py +++ b/megapixels/app/settings/app_cfg.py @@ -169,6 +169,7 @@ DIR_SITE_INCLUDES = "../site/includes" DIR_SITE_USER_CONTENT = "../site/public/user_content" DIR_SITE_DATASETS = "../site/datasets/" DIR_SITE_FINAL_CITATIONS = "../site/datasets/final/" +GOOGLE_ACCOUNT_CREDS_PATH = os.path.join("../", os.getenv("GOOGLE_ACCOUNT_CREDS_PATH")) # ----------------------------------------------------------------------------- # Celery diff --git a/megapixels/app/site/loader.py b/megapixels/app/site/loader.py index 779f68ba..701c78b2 100644 --- a/megapixels/app/site/loader.py +++ b/megapixels/app/site/loader.py @@ -5,6 +5,9 @@ import glob import app.settings.app_cfg as cfg from app.utils.file_utils import load_json +import app.utils.sheet_utils as sheet + +sidebar = sheet.fetch_google_lookup("sidebar", item_key="key") def read_metadata(fn): """ @@ -20,6 +23,12 @@ def read_metadata(fn): sections = data.split("\n\n") return parse_metadata(fn, sections) +def domainFromUrl(url): + domain = url.split('/')[2].split('.') + if len(domain) > 2 and len(domain[-2]) == 2: + return ".".join(domain[-3:]) + return ".".join(domain[-2:]) + default_metadata = { 'status': 'published', @@ -33,6 +42,18 @@ default_metadata = { 'tagline': '', } +sidebar_order = [ + { 'key': 'published', 'title': 'Published' }, + { 'key': 'images', 'title': 'Images' }, + { 'key': 'videos', 'title': 'Videos' }, + { 'key': 'identities', 'title': 'Identities' }, + { 'key': 'purpose', 'title': 'Purpose' }, + { 'key': 'created_by', 'title': 'Created by' }, + { 'key': 'funded_by_short', 'title': 'Funded by' }, + { 'key': 'size_gb', 'title': 'Download Size' }, + { 'key': 'website', 'title': 'Website' }, +] + def parse_metadata(fn, sections): """ parse the metadata headers in a markdown file @@ -87,8 +108,25 @@ def parse_metadata(fn, sections): print("Bad metadata? {}".format(dataset_path)) elif 'datasets' in fn: print("/!\\ {} does not exist!".format(dataset_path)) + + if metadata['slug'] in sidebar: + sidebar_row = sidebar[metadata['slug']] + if sidebar_row: + metadata['sidebar'] = [] + for item in sidebar_order: + key = item['key'] + value = sidebar_row[key] + if value: + value = value.replace(' - ', ' – ') + if key == 'size_gb': + value += ' GB' + if key == 'website': + value = "<a href='" + value + "' target='_blank' rel='nofollow noopener'>" + domainFromUrl(value) + "</a>" + metadata['sidebar'].append({ 'value': value, 'title': item['title'], }) + if 'meta' not in metadata or not metadata['meta']: # dude metadata['meta'] = {} + metadata['sidebar'] = [] return metadata, valid_sections diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index 06c45f41..dc2a09f2 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -55,7 +55,7 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): elif '### statistics' in section.lower() or '### sidebar' in section.lower(): if len(current_group): groups.append(format_section(current_group, s3_path)) - current_group = [] + current_group = [format_include("{% include 'sidebar.html' %}", metadata)] if 'sidebar' not in section.lower(): current_group.append(section) in_stats = True @@ -267,15 +267,6 @@ def format_include(section, metadata): include_fn = section.strip().strip('\n').strip().strip('{%').strip().strip('%}').strip() include_fn = include_fn.strip('include').strip().strip('"').strip().strip("'").strip() return includes_env.get_template(include_fn).render(metadata=metadata) - # include_dir = cfg.DIR_SITE_INCLUDES - # try: - # includes_env.get_template(fp_html) - # with open(join(include_dir, fp_html), 'r') as fp: - # html = fp.read().replace('\n', '') - # return html - # except Exception as e: - # print(f'Error parsing include: {e}') - # return '' def format_applet(section, s3_path): """ diff --git a/megapixels/app/utils/sheet_utils.py b/megapixels/app/utils/sheet_utils.py new file mode 100644 index 00000000..85f979c6 --- /dev/null +++ b/megapixels/app/utils/sheet_utils.py @@ -0,0 +1,82 @@ +import os +import gspread +from oauth2client.service_account import ServiceAccountCredentials + +from app.settings import app_cfg as cfg + +def fetch_spreadsheet(): + """Open the Google Spreadsheet, which contains the individual worksheets""" + scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] + path = os.path.dirname(os.path.abspath(__file__)) + credentials = ServiceAccountCredentials.from_json_keyfile_name(cfg.GOOGLE_ACCOUNT_CREDS_PATH, scope) + docid = "1denb7TjYsN9igHyvYah7fQ0daABW32Z30lwV7QrDJQc" + client = gspread.authorize(credentials) + spreadsheet = client.open_by_key(docid) + return spreadsheet + +def fetch_worksheet(name): + """Get a reference to a particular "worksheet" from the Google Spreadsheet""" + spreadsheet = fetch_spreadsheet() + return spreadsheet.worksheet(name) + +def fetch_google_sheet(name): + """Get all the values from a particular worksheet as a list of lists. + Returns: + :keys - the first row of the document + :lines - a list of lists with the rest of the rows""" + rows = fetch_worksheet(name).get_all_values() + keys = rows[0] + lines = rows[1:] + return keys, lines + +def fetch_google_sheet_objects(name): + """Get all the values from a worksheet as a list of dictionaries""" + keys, rows = fetch_google_sheet(name) + recs = [] + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + recs.append(rec) + return recs + +def fetch_google_lookup(name, item_key='key'): + """Get all the values from a worksheet as a dictionary of dictionaries. + Specify which field you want to use as the dictionary key.""" + keys, rows = fetch_google_sheet(name) + lookup = {} + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + lookup[rec[item_key]] = rec + return lookup + +def fetch_verified_paper_lookup(): + """Fetch a lookup keyed by dataset, where each dataset points to a hash of valid or invalid papers...""" + keys, rows = fetch_google_sheet('verifications') + verified_lookup = {} + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + if rec['dataset'] not in verified_lookup: + verified_lookup[rec['dataset']] = {} + if str(rec['uses_dataset']) == '1': + verified_lookup[rec['dataset']][rec['paper_id']] = rec + return verified_lookup + +def update_or_append_worksheet(name, form): + """Update a row if it exists, create it if it doesn't""" + worksheet = fetch_worksheet(name) + keys = worksheet.row_values(1) + row = [ form[key] if key in form else '' for key in keys ] + try: + cell = worksheet.find(form['paper_id']) + except: + cell = None + if cell: + for i, item in enumerate(row): + worksheet.update_cell(cell.row, i+1, item) + else: + worksheet.append_row(row) |
