From 1d3e450b5204baf72765f2a319adc8146ba460ad Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 25 Feb 2019 16:15:28 +0100 Subject: check ft field --- scraper/s2-final-report.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'scraper/s2-final-report.py') diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index 4b74750a..fb7a78dc 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -19,7 +19,7 @@ def s2_final_report(): megapixels = load_megapixels_lookup() items = [] for key, item in megapixels.items(): - if item['dataset']['ft_share'] == '1': + if 'ft_share' in item['dataset'] and item['dataset']['ft_share'] == 'Y': items.append((item,)) parallelize(process_paper, items) @@ -184,6 +184,7 @@ def load_megapixels_lookup(): lookup[paper_key]['dataset'] = dataset_lookup[paper_key] else: print("not in datasets lookup:", paper_key) + lookup[paper_key]['dataset'] = {} # recs.append(rec) return lookup -- cgit v1.2.3-70-g09d2 From 308465b19fb57626b8042878a4b6a6a49fb28879 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 25 Feb 2019 16:16:21 +0100 Subject: comment --- scraper/s2-final-report.py | 1 - 1 file changed, 1 deletion(-) (limited to 'scraper/s2-final-report.py') diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index fb7a78dc..8fee98f6 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -185,7 +185,6 @@ def load_megapixels_lookup(): else: print("not in datasets lookup:", paper_key) lookup[paper_key]['dataset'] = {} - # recs.append(rec) return lookup if __name__ == '__main__': -- cgit v1.2.3-70-g09d2 From 2d3963d9a6f39786dd07717f05392ae74e6bb685 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 25 Feb 2019 16:16:36 +0100 Subject: stats --- scraper/s2-final-report.py | 1 - 1 file changed, 1 deletion(-) (limited to 'scraper/s2-final-report.py') diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index 8fee98f6..197d5642 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -42,7 +42,6 @@ def process_paper(row): json.dump({ 'id': papers[0]['paper_id'], 'dataset': row['dataset'], - 'statistics': row['statistics'], 'paper': papers[0], 'addresses': address_list, 'additional_papers': papers[1:], -- cgit v1.2.3-70-g09d2 From bc240ba2a4b5e30710d37af88eccd905209fc263 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 25 Feb 2019 17:07:47 +0100 Subject: update final report --- client/map/index.js | 17 +++-------------- scraper/s2-citation-report.py | 2 +- scraper/s2-final-report.py | 27 +++++++++++++++++++++++---- 3 files changed, 27 insertions(+), 19 deletions(-) (limited to 'scraper/s2-final-report.py') diff --git a/client/map/index.js b/client/map/index.js index e8543c77..b744c8e2 100644 --- a/client/map/index.js +++ b/client/map/index.js @@ -1,17 +1,6 @@ import L from 'leaflet' import './leaflet.bezier' -function getCitations(dataset) { - // console.log(dataset.citations) - return dataset.citations.map(c => ({ - title: c[0], - location: c[2], - lat: c[5], - lng: c[6], - type: c[7], - })) -} - const arcStyle = { color: 'rgb(245, 246, 150)', fillColor: 'rgb(245, 246, 150)', @@ -51,7 +40,7 @@ export default function append(el, payload) { const { data } = payload let { paper, address } = data let source = [0, 0] - const citations = getCitations(data) + const data.citations let map = L.map(el).setView([25, 0], 2) L.tileLayer('https://api.tiles.mapbox.com/v4/{id}/{z}/{x}/{y}.png?access_token={accessToken}', { @@ -65,13 +54,13 @@ export default function append(el, payload) { }).addTo(map) if (address) { - source = address.slice(3, 5).map(n => parseFloat(n)) + source = [address.lat, address.lng] } citations.forEach(point => { const latlng = [point.lat, point.lng] if (Number.isNaN(latlng[0]) || Number.isNaN(latlng[1])) return - addMarker(map, latlng, point.title, point.location) + addMarker(map, latlng, point.title, point.address) addArc(map, source, latlng) }) diff --git a/scraper/s2-citation-report.py b/scraper/s2-citation-report.py index 91e433a6..78bb98bb 100644 --- a/scraper/s2-citation-report.py +++ b/scraper/s2-citation-report.py @@ -35,7 +35,7 @@ def s2_citation_report(): write_master_report('{}/{}'.format(DIR_PUBLIC_CITATIONS, "datasets.csv"), papers) - sts = subprocess.call([ + subprocess.call([ "s3cmd", "put", "-P", "--recursive", DIR_PUBLIC_CITATIONS + '/', "s3://megapixels/v1/citations/", diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index 197d5642..878640ac 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -8,7 +8,8 @@ import click import subprocess from util import * -DIR_PUBLIC_CITATIONS = "../site/datasets/final" +DIR_PUBLIC_CITATIONS = "../site/datasets/public" +DIR_FINAL_CITATIONS = "../site/datasets/final" DIR_UNKNOWN_CITATIONS = "../site/datasets/unknown" addresses = AddressBook() @@ -22,6 +23,11 @@ def s2_final_report(): if 'ft_share' in item['dataset'] and item['dataset']['ft_share'] == 'Y': items.append((item,)) parallelize(process_paper, items) + subprocess.call([ + "s3cmd", "put", "-P", "--recursive", + DIR_PUBLIC_CITATIONS + '/', + "s3://megapixels/v1/citations/", + ]) def process_paper(row): aggregate_citations = {} @@ -38,11 +44,12 @@ def process_paper(row): address_list.append(res['address']) if not len(papers): return - with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f: + paper = papers[0] + with open('{}/{}.json'.format(DIR_FINAL_CITATIONS, row['key']), 'w') as f: json.dump({ - 'id': papers[0]['paper_id'], + 'id': paper['paper_id'], 'dataset': row['dataset'], - 'paper': papers[0], + 'paper': paper, 'addresses': address_list, 'additional_papers': papers[1:], 'citations': [aggregate_citations[key] for key in aggregate_citations.keys()], @@ -52,6 +59,18 @@ def process_paper(row): 'id': papers[0]['paper_id'], 'citations': [unknown_citations[key] for key in unknown_citations.keys()], }, f) + with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f: + json.dump({ + 'id': paper['paper_id'], + 'paper': { + 'key': row['key'], + 'name': row['name'], + 'title': paper['title'], + 'year': paper['year'], + }, + 'address': address_list[0], + 'citations': [aggregate_citations[key] for key in aggregate_citations.keys()], + }, f) def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_citations): res = { -- cgit v1.2.3-70-g09d2 From f7a021e069dacef6908ba9c94b087ce82b34e83c Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 25 Feb 2019 17:09:47 +0100 Subject: k --- scraper/s2-final-report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scraper/s2-final-report.py') diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index 878640ac..f4043404 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -8,7 +8,7 @@ import click import subprocess from util import * -DIR_PUBLIC_CITATIONS = "../site/datasets/public" +DIR_PUBLIC_CITATIONS = "../site/datasets/citations" DIR_FINAL_CITATIONS = "../site/datasets/final" DIR_UNKNOWN_CITATIONS = "../site/datasets/unknown" -- cgit v1.2.3-70-g09d2 From 2b6a40134d9dd4edf40881f6a86c463f522a509f Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 25 Feb 2019 17:10:28 +0100 Subject: k --- scraper/s2-final-report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scraper/s2-final-report.py') diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index f4043404..ec4ad25b 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -68,7 +68,7 @@ def process_paper(row): 'title': paper['title'], 'year': paper['year'], }, - 'address': address_list[0], + 'address': address_list[0] if len(address_list) else {}, 'citations': [aggregate_citations[key] for key in aggregate_citations.keys()], }, f) -- cgit v1.2.3-70-g09d2