diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-08 23:19:04 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-08 23:19:04 +0100 |
| commit | 8e26cbff5171fb204082e1b6778d17f786c1eb16 (patch) | |
| tree | f8420a6268d1c624572091881f0b02cf17d0b695 /scraper/s2-final-report.py | |
| parent | 6059ce2eb68a931a4cbb12049c202c3299e4966b (diff) | |
reports of which paper titles matched
Diffstat (limited to 'scraper/s2-final-report.py')
| -rw-r--r-- | scraper/s2-final-report.py | 30 |
1 files changed, 12 insertions, 18 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index a958e4bc..f81d1835 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -44,16 +44,16 @@ def process_single_paper(row, addresses, aggregate_citations): 'paper_id': '', 'key': '', 'title': '', - 'journal': '', + # 'journal': '', 'year': '', + 'pdf': '', 'address': '', - 'pdf_link': '', - 'citation_count': 0, - 'citations_geocoded': 0, - 'citations_unknown': 0, - 'citations_empty': 0, - 'citations_pdf': 0, - 'citations_doi': 0, + # 'citation_count': 0, + # 'citations_geocoded': 0, + # 'citations_unknown': 0, + # 'citations_empty': 0, + # 'citations_pdf': 0, + # 'citations_doi': 0, } geocoded_citations = [] @@ -78,12 +78,9 @@ def process_single_paper(row, addresses, aggregate_citations): res['name'] = row['name'] res['paper_id'] = paper.paper_id res['title'] = paper.title - res['journal'] = paper.journal - res['report_link'] = 'papers/{}.html'.format(paper.paper_id) - pdf_link = paper.pdf_link - if type(pdf_link) == dict and 'url' in pdf_link: - pdf_link = pdf_link['url'] - res['pdf_link'] = pdf_link + # res['journal'] = paper.journal + res['year'] = paper.year + res['pdf'] = paper.pdf_link # res['authors'] = ', '.join(paper.authors) # res['citations'] = [] @@ -143,15 +140,12 @@ def process_single_paper(row, addresses, aggregate_citations): geocoded_addresses.append(next_address) if address: if citationId not in aggregate_citations: - pdf_link = citation.pdf_link - if type(pdf_link) == dict and 'url' in pdf_link: - pdf_link = pdf_link['url'] aggregate_citations[citationId] = { 'id': citationId, 'title': citation.title, 'addresses': geocoded_addresses, 'year': citation.year, - 'pdf': pdf_link, + 'pdf': citation.pdf_link, } # res['citation_count'] = len(data['citations']) |
