summaryrefslogtreecommitdiff
path: root/scraper/s2-final-report.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-08 23:19:04 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-08 23:19:04 +0100
commit8e26cbff5171fb204082e1b6778d17f786c1eb16 (patch)
treef8420a6268d1c624572091881f0b02cf17d0b695 /scraper/s2-final-report.py
parent6059ce2eb68a931a4cbb12049c202c3299e4966b (diff)
reports of which paper titles matched
Diffstat (limited to 'scraper/s2-final-report.py')
-rw-r--r--scraper/s2-final-report.py30
1 files changed, 12 insertions, 18 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index a958e4bc..f81d1835 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -44,16 +44,16 @@ def process_single_paper(row, addresses, aggregate_citations):
'paper_id': '',
'key': '',
'title': '',
- 'journal': '',
+ # 'journal': '',
'year': '',
+ 'pdf': '',
'address': '',
- 'pdf_link': '',
- 'citation_count': 0,
- 'citations_geocoded': 0,
- 'citations_unknown': 0,
- 'citations_empty': 0,
- 'citations_pdf': 0,
- 'citations_doi': 0,
+ # 'citation_count': 0,
+ # 'citations_geocoded': 0,
+ # 'citations_unknown': 0,
+ # 'citations_empty': 0,
+ # 'citations_pdf': 0,
+ # 'citations_doi': 0,
}
geocoded_citations = []
@@ -78,12 +78,9 @@ def process_single_paper(row, addresses, aggregate_citations):
res['name'] = row['name']
res['paper_id'] = paper.paper_id
res['title'] = paper.title
- res['journal'] = paper.journal
- res['report_link'] = 'papers/{}.html'.format(paper.paper_id)
- pdf_link = paper.pdf_link
- if type(pdf_link) == dict and 'url' in pdf_link:
- pdf_link = pdf_link['url']
- res['pdf_link'] = pdf_link
+ # res['journal'] = paper.journal
+ res['year'] = paper.year
+ res['pdf'] = paper.pdf_link
# res['authors'] = ', '.join(paper.authors)
# res['citations'] = []
@@ -143,15 +140,12 @@ def process_single_paper(row, addresses, aggregate_citations):
geocoded_addresses.append(next_address)
if address:
if citationId not in aggregate_citations:
- pdf_link = citation.pdf_link
- if type(pdf_link) == dict and 'url' in pdf_link:
- pdf_link = pdf_link['url']
aggregate_citations[citationId] = {
'id': citationId,
'title': citation.title,
'addresses': geocoded_addresses,
'year': citation.year,
- 'pdf': pdf_link,
+ 'pdf': citation.pdf_link,
}
# res['citation_count'] = len(data['citations'])