diff options
| author | Adam Harvey <adam@ahprojects.com> | 2019-02-19 23:14:02 +0100 |
|---|---|---|
| committer | Adam Harvey <adam@ahprojects.com> | 2019-02-19 23:14:02 +0100 |
| commit | 2c469720811145abb07e5e59281f917eb8b1cc67 (patch) | |
| tree | 5651671371b44929e464e6ec671856f1e84d5200 /scraper/s2-final-report.py | |
| parent | fe0dee2f8c8a7127d1ac2f01c5989f5011a2ee8a (diff) | |
| parent | 768757fe47d55b62c1d3ef87c982332e0292393e (diff) | |
..
Diffstat (limited to 'scraper/s2-final-report.py')
| -rw-r--r-- | scraper/s2-final-report.py | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index eff9a736..2ebd516c 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -12,7 +12,7 @@ DIR_PUBLIC_CITATIONS = "../site/datasets/final" DIR_UNKNOWN_CITATIONS = "../site/datasets/unknown" addresses = AddressBook() -paper_location_lookup = fetch_google_lookup('paper_locations') +paper_location_lookup = fetch_google_lookup('paper_locations', item_key='paper_id') @click.command() def s2_final_report(): @@ -27,14 +27,15 @@ def process_paper(row): aggregate_citations = {} unknown_citations = {} address = None + address_list = [] papers = [] - print(row['paper_ids']) + # print(row['paper_ids']) for paper_id in row['paper_ids']: res = process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_citations) if res: papers.append(res) if res['address']: - address = res['address'] + address_list.append(res['address']) if not len(papers): return with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f: @@ -43,7 +44,7 @@ def process_paper(row): 'dataset': row['dataset'], 'statistics': row['statistics'], 'paper': papers[0], - 'address': address, + 'addresses': address_list, 'additional_papers': papers[1:], 'citations': [aggregate_citations[key] for key in aggregate_citations.keys()], }, f) @@ -94,7 +95,7 @@ def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_ paper_institutions = load_institutions(paper.paper_id, paper_location_lookup) paper_address = None for inst in sorted(paper_institutions, key=operator.itemgetter(1)): - # print(inst[1]) + #print(inst[1]) institution = inst[1] if paper_address is None: paper_address = addresses.findObject(institution) |
