diff options
| author | jules@lens <julescarbon@gmail.com> | 2019-04-02 20:36:51 +0200 |
|---|---|---|
| committer | jules@lens <julescarbon@gmail.com> | 2019-04-02 20:36:51 +0200 |
| commit | 1d238346b5609e9454a4917c75631a550b5b43d1 (patch) | |
| tree | 8a936e721e78c7b5948b303e6a1686c96b882d51 /megapixels/app | |
| parent | b4b58f2279fb01fa0240006c460c0b5ec95c1126 (diff) | |
| parent | f58d41731fc07d94d594d5582aef203564f990ec (diff) | |
Merge branch 'master' of asdf.us:megapixels_dev
Diffstat (limited to 'megapixels/app')
| -rw-r--r-- | megapixels/app/settings/app_cfg.py | 1 | ||||
| -rw-r--r-- | megapixels/app/site/builder.py | 2 | ||||
| -rw-r--r-- | megapixels/app/site/loader.py | 40 | ||||
| -rw-r--r-- | megapixels/app/site/parser.py | 33 | ||||
| -rw-r--r-- | megapixels/app/utils/sheet_utils.py | 82 |
5 files changed, 147 insertions, 11 deletions
diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py index f6d0a7df..1eed1a41 100644 --- a/megapixels/app/settings/app_cfg.py +++ b/megapixels/app/settings/app_cfg.py @@ -169,6 +169,7 @@ DIR_SITE_INCLUDES = "../site/includes" DIR_SITE_USER_CONTENT = "../site/public/user_content" DIR_SITE_DATASETS = "../site/datasets/" DIR_SITE_FINAL_CITATIONS = "../site/datasets/final/" +GOOGLE_ACCOUNT_CREDS_PATH = os.path.join("../", os.getenv("GOOGLE_ACCOUNT_CREDS_PATH")) # ----------------------------------------------------------------------------- # Celery diff --git a/megapixels/app/site/builder.py b/megapixels/app/site/builder.py index 603d4788..55a85b0f 100644 --- a/megapixels/app/site/builder.py +++ b/megapixels/app/site/builder.py @@ -57,7 +57,7 @@ def build_page(fn, research_posts, datasets): s3.sync_directory(dirname, s3_dir, metadata) content = parser.parse_markdown(metadata, sections, s3_path, skip_h1=skip_h1) - + html = template.render( metadata=metadata, content=content, diff --git a/megapixels/app/site/loader.py b/megapixels/app/site/loader.py index a544333b..701c78b2 100644 --- a/megapixels/app/site/loader.py +++ b/megapixels/app/site/loader.py @@ -5,6 +5,9 @@ import glob import app.settings.app_cfg as cfg from app.utils.file_utils import load_json +import app.utils.sheet_utils as sheet + +sidebar = sheet.fetch_google_lookup("sidebar", item_key="key") def read_metadata(fn): """ @@ -20,6 +23,12 @@ def read_metadata(fn): sections = data.split("\n\n") return parse_metadata(fn, sections) +def domainFromUrl(url): + domain = url.split('/')[2].split('.') + if len(domain) > 2 and len(domain[-2]) == 2: + return ".".join(domain[-3:]) + return ".".join(domain[-2:]) + default_metadata = { 'status': 'published', @@ -33,6 +42,18 @@ default_metadata = { 'tagline': '', } +sidebar_order = [ + { 'key': 'published', 'title': 'Published' }, + { 'key': 'images', 'title': 'Images' }, + { 'key': 'videos', 'title': 'Videos' }, + { 'key': 'identities', 'title': 'Identities' }, + { 'key': 'purpose', 'title': 'Purpose' }, + { 'key': 'created_by', 'title': 'Created by' }, + { 'key': 'funded_by_short', 'title': 'Funded by' }, + { 'key': 'size_gb', 'title': 'Download Size' }, + { 'key': 'website', 'title': 'Website' }, +] + def parse_metadata(fn, sections): """ parse the metadata headers in a markdown file @@ -85,8 +106,27 @@ def parse_metadata(fn, sections): metadata['meta'] = load_json(dataset_path) if not metadata['meta']: print("Bad metadata? {}".format(dataset_path)) + elif 'datasets' in fn: + print("/!\\ {} does not exist!".format(dataset_path)) + + if metadata['slug'] in sidebar: + sidebar_row = sidebar[metadata['slug']] + if sidebar_row: + metadata['sidebar'] = [] + for item in sidebar_order: + key = item['key'] + value = sidebar_row[key] + if value: + value = value.replace(' - ', ' – ') + if key == 'size_gb': + value += ' GB' + if key == 'website': + value = "<a href='" + value + "' target='_blank' rel='nofollow noopener'>" + domainFromUrl(value) + "</a>" + metadata['sidebar'].append({ 'value': value, 'title': item['title'], }) + if 'meta' not in metadata or not metadata['meta']: # dude metadata['meta'] = {} + metadata['sidebar'] = [] return metadata, valid_sections diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index 00470e4b..dc2a09f2 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -27,6 +27,7 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): current_group = [] footnotes = [] in_stats = False + in_columns = False in_footnotes = False ignoring = False @@ -50,10 +51,11 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): footnotes.append(section) elif ignoring: continue + elif '### statistics' in section.lower() or '### sidebar' in section.lower(): if len(current_group): groups.append(format_section(current_group, s3_path)) - current_group = [] + current_group = [format_include("{% include 'sidebar.html' %}", metadata)] if 'sidebar' not in section.lower(): current_group.append(section) in_stats = True @@ -64,6 +66,26 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): if 'end sidebar' not in section.lower(): current_group.append(section) in_stats = False + + elif '=== columns' in section.lower(): + if len(current_group): + groups.append(format_section(current_group, s3_path)) + current_group = [] + in_columns = True + column_partz = section.split(' ') + if len(column_partz) == 3: + column_count = column_partz[2] + else: + column_count = "N" + groups.append("<section><div class='columns columns-{}'>".format(column_count)) + elif in_columns is True and '===' in section: + groups.append(format_section(current_group, s3_path, type='column', tag='div')) + current_group = [] + if 'end columns' in section: + groups.append("</div></section>") + in_columns = False + current_group = [] + elif section.strip().startswith('{% include'): groups.append(format_section(current_group, s3_path)) current_group = [] @@ -245,15 +267,6 @@ def format_include(section, metadata): include_fn = section.strip().strip('\n').strip().strip('{%').strip().strip('%}').strip() include_fn = include_fn.strip('include').strip().strip('"').strip().strip("'").strip() return includes_env.get_template(include_fn).render(metadata=metadata) - # include_dir = cfg.DIR_SITE_INCLUDES - # try: - # includes_env.get_template(fp_html) - # with open(join(include_dir, fp_html), 'r') as fp: - # html = fp.read().replace('\n', '') - # return html - # except Exception as e: - # print(f'Error parsing include: {e}') - # return '' def format_applet(section, s3_path): """ diff --git a/megapixels/app/utils/sheet_utils.py b/megapixels/app/utils/sheet_utils.py new file mode 100644 index 00000000..85f979c6 --- /dev/null +++ b/megapixels/app/utils/sheet_utils.py @@ -0,0 +1,82 @@ +import os +import gspread +from oauth2client.service_account import ServiceAccountCredentials + +from app.settings import app_cfg as cfg + +def fetch_spreadsheet(): + """Open the Google Spreadsheet, which contains the individual worksheets""" + scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] + path = os.path.dirname(os.path.abspath(__file__)) + credentials = ServiceAccountCredentials.from_json_keyfile_name(cfg.GOOGLE_ACCOUNT_CREDS_PATH, scope) + docid = "1denb7TjYsN9igHyvYah7fQ0daABW32Z30lwV7QrDJQc" + client = gspread.authorize(credentials) + spreadsheet = client.open_by_key(docid) + return spreadsheet + +def fetch_worksheet(name): + """Get a reference to a particular "worksheet" from the Google Spreadsheet""" + spreadsheet = fetch_spreadsheet() + return spreadsheet.worksheet(name) + +def fetch_google_sheet(name): + """Get all the values from a particular worksheet as a list of lists. + Returns: + :keys - the first row of the document + :lines - a list of lists with the rest of the rows""" + rows = fetch_worksheet(name).get_all_values() + keys = rows[0] + lines = rows[1:] + return keys, lines + +def fetch_google_sheet_objects(name): + """Get all the values from a worksheet as a list of dictionaries""" + keys, rows = fetch_google_sheet(name) + recs = [] + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + recs.append(rec) + return recs + +def fetch_google_lookup(name, item_key='key'): + """Get all the values from a worksheet as a dictionary of dictionaries. + Specify which field you want to use as the dictionary key.""" + keys, rows = fetch_google_sheet(name) + lookup = {} + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + lookup[rec[item_key]] = rec + return lookup + +def fetch_verified_paper_lookup(): + """Fetch a lookup keyed by dataset, where each dataset points to a hash of valid or invalid papers...""" + keys, rows = fetch_google_sheet('verifications') + verified_lookup = {} + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + if rec['dataset'] not in verified_lookup: + verified_lookup[rec['dataset']] = {} + if str(rec['uses_dataset']) == '1': + verified_lookup[rec['dataset']][rec['paper_id']] = rec + return verified_lookup + +def update_or_append_worksheet(name, form): + """Update a row if it exists, create it if it doesn't""" + worksheet = fetch_worksheet(name) + keys = worksheet.row_values(1) + row = [ form[key] if key in form else '' for key in keys ] + try: + cell = worksheet.find(form['paper_id']) + except: + cell = None + if cell: + for i, item in enumerate(row): + worksheet.update_cell(cell.row, i+1, item) + else: + worksheet.append_row(row) |
