From 8e26cbff5171fb204082e1b6778d17f786c1eb16 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Fri, 8 Feb 2019 23:19:04 +0100 Subject: reports of which paper titles matched --- scraper/s2-final-report.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'scraper/s2-final-report.py') diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index a958e4bc..f81d1835 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -44,16 +44,16 @@ def process_single_paper(row, addresses, aggregate_citations): 'paper_id': '', 'key': '', 'title': '', - 'journal': '', + # 'journal': '', 'year': '', + 'pdf': '', 'address': '', - 'pdf_link': '', - 'citation_count': 0, - 'citations_geocoded': 0, - 'citations_unknown': 0, - 'citations_empty': 0, - 'citations_pdf': 0, - 'citations_doi': 0, + # 'citation_count': 0, + # 'citations_geocoded': 0, + # 'citations_unknown': 0, + # 'citations_empty': 0, + # 'citations_pdf': 0, + # 'citations_doi': 0, } geocoded_citations = [] @@ -78,12 +78,9 @@ def process_single_paper(row, addresses, aggregate_citations): res['name'] = row['name'] res['paper_id'] = paper.paper_id res['title'] = paper.title - res['journal'] = paper.journal - res['report_link'] = 'papers/{}.html'.format(paper.paper_id) - pdf_link = paper.pdf_link - if type(pdf_link) == dict and 'url' in pdf_link: - pdf_link = pdf_link['url'] - res['pdf_link'] = pdf_link + # res['journal'] = paper.journal + res['year'] = paper.year + res['pdf'] = paper.pdf_link # res['authors'] = ', '.join(paper.authors) # res['citations'] = [] @@ -143,15 +140,12 @@ def process_single_paper(row, addresses, aggregate_citations): geocoded_addresses.append(next_address) if address: if citationId not in aggregate_citations: - pdf_link = citation.pdf_link - if type(pdf_link) == dict and 'url' in pdf_link: - pdf_link = pdf_link['url'] aggregate_citations[citationId] = { 'id': citationId, 'title': citation.title, 'addresses': geocoded_addresses, 'year': citation.year, - 'pdf': pdf_link, + 'pdf': citation.pdf_link, } # res['citation_count'] = len(data['citations']) -- cgit v1.2.3-70-g09d2