summaryrefslogtreecommitdiff
path: root/scraper/s2-final-report.py
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/s2-final-report.py')
-rw-r--r--scraper/s2-final-report.py30
1 files changed, 12 insertions, 18 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index a958e4bc..f81d1835 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -44,16 +44,16 @@ def process_single_paper(row, addresses, aggregate_citations):
'paper_id': '',
'key': '',
'title': '',
- 'journal': '',
+ # 'journal': '',
'year': '',
+ 'pdf': '',
'address': '',
- 'pdf_link': '',
- 'citation_count': 0,
- 'citations_geocoded': 0,
- 'citations_unknown': 0,
- 'citations_empty': 0,
- 'citations_pdf': 0,
- 'citations_doi': 0,
+ # 'citation_count': 0,
+ # 'citations_geocoded': 0,
+ # 'citations_unknown': 0,
+ # 'citations_empty': 0,
+ # 'citations_pdf': 0,
+ # 'citations_doi': 0,
}
geocoded_citations = []
@@ -78,12 +78,9 @@ def process_single_paper(row, addresses, aggregate_citations):
res['name'] = row['name']
res['paper_id'] = paper.paper_id
res['title'] = paper.title
- res['journal'] = paper.journal
- res['report_link'] = 'papers/{}.html'.format(paper.paper_id)
- pdf_link = paper.pdf_link
- if type(pdf_link) == dict and 'url' in pdf_link:
- pdf_link = pdf_link['url']
- res['pdf_link'] = pdf_link
+ # res['journal'] = paper.journal
+ res['year'] = paper.year
+ res['pdf'] = paper.pdf_link
# res['authors'] = ', '.join(paper.authors)
# res['citations'] = []
@@ -143,15 +140,12 @@ def process_single_paper(row, addresses, aggregate_citations):
geocoded_addresses.append(next_address)
if address:
if citationId not in aggregate_citations:
- pdf_link = citation.pdf_link
- if type(pdf_link) == dict and 'url' in pdf_link:
- pdf_link = pdf_link['url']
aggregate_citations[citationId] = {
'id': citationId,
'title': citation.title,
'addresses': geocoded_addresses,
'year': citation.year,
- 'pdf': pdf_link,
+ 'pdf': citation.pdf_link,
}
# res['citation_count'] = len(data['citations'])