summaryrefslogtreecommitdiff
path: root/scraper/s2-final-report.py
diff options
context:
space:
mode:
authorAdam Harvey <adam@ahprojects.com>2019-02-19 23:14:02 +0100
committerAdam Harvey <adam@ahprojects.com>2019-02-19 23:14:02 +0100
commit2c469720811145abb07e5e59281f917eb8b1cc67 (patch)
tree5651671371b44929e464e6ec671856f1e84d5200 /scraper/s2-final-report.py
parentfe0dee2f8c8a7127d1ac2f01c5989f5011a2ee8a (diff)
parent768757fe47d55b62c1d3ef87c982332e0292393e (diff)
..
Diffstat (limited to 'scraper/s2-final-report.py')
-rw-r--r--scraper/s2-final-report.py11
1 files changed, 6 insertions, 5 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index eff9a736..2ebd516c 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -12,7 +12,7 @@ DIR_PUBLIC_CITATIONS = "../site/datasets/final"
DIR_UNKNOWN_CITATIONS = "../site/datasets/unknown"
addresses = AddressBook()
-paper_location_lookup = fetch_google_lookup('paper_locations')
+paper_location_lookup = fetch_google_lookup('paper_locations', item_key='paper_id')
@click.command()
def s2_final_report():
@@ -27,14 +27,15 @@ def process_paper(row):
aggregate_citations = {}
unknown_citations = {}
address = None
+ address_list = []
papers = []
- print(row['paper_ids'])
+ # print(row['paper_ids'])
for paper_id in row['paper_ids']:
res = process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_citations)
if res:
papers.append(res)
if res['address']:
- address = res['address']
+ address_list.append(res['address'])
if not len(papers):
return
with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f:
@@ -43,7 +44,7 @@ def process_paper(row):
'dataset': row['dataset'],
'statistics': row['statistics'],
'paper': papers[0],
- 'address': address,
+ 'addresses': address_list,
'additional_papers': papers[1:],
'citations': [aggregate_citations[key] for key in aggregate_citations.keys()],
}, f)
@@ -94,7 +95,7 @@ def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_
paper_institutions = load_institutions(paper.paper_id, paper_location_lookup)
paper_address = None
for inst in sorted(paper_institutions, key=operator.itemgetter(1)):
- # print(inst[1])
+ #print(inst[1])
institution = inst[1]
if paper_address is None:
paper_address = addresses.findObject(institution)