diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-16 16:09:48 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-16 16:09:48 +0100 |
| commit | fd1dff17a90afdd2478a98856ee3ad509c5c4900 (patch) | |
| tree | 36c259aca509bf84783c810fd4b1c490b602a464 | |
| parent | b8811569b5c3962695c91befdbed0d75fb490ebe (diff) | |
store many pdfs
| -rw-r--r-- | megapixels/commands/datasets/citations_to_csv.py | 4 | ||||
| -rw-r--r-- | scraper/client/paper/paper.address.js | 1 | ||||
| -rw-r--r-- | scraper/s2-final-report.py | 4 |
3 files changed, 6 insertions, 3 deletions
diff --git a/megapixels/commands/datasets/citations_to_csv.py b/megapixels/commands/datasets/citations_to_csv.py index e54d0dac..253d15fc 100644 --- a/megapixels/commands/datasets/citations_to_csv.py +++ b/megapixels/commands/datasets/citations_to_csv.py @@ -94,6 +94,8 @@ def get_orig_paper(json_data): addresses = p.get('address','') if addresses: for a in addresses: + if type(a) == str or a is None: + continue paper = Paper(p['key'], p['name'], p['paper_id'], p['title'], d_type, year, p['pdf'], a['address'], a['type'], a['lat'], a['lng']) @@ -101,4 +103,4 @@ def get_orig_paper(json_data): else: paper = Paper(p['key'], p['name'], p['paper_id'], p['title'], d_type, year, p['pdf']) papers.append(paper) - return papers
\ No newline at end of file + return papers diff --git a/scraper/client/paper/paper.address.js b/scraper/client/paper/paper.address.js index 9256d4ad..c571d8e2 100644 --- a/scraper/client/paper/paper.address.js +++ b/scraper/client/paper/paper.address.js @@ -142,6 +142,7 @@ class PaperAddress extends Component { if (!citation) { return <div>Citation not found in this paper</div> } + console.log(citation) return ( <div className='form'> <h3>{citation.title}</h3> diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index 451c1f78..f08bc748 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -152,14 +152,14 @@ def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_ 'title': citation.title, 'addresses': geocoded_addresses, 'year': citation.year, - 'pdf': citation.pdf_link, + 'pdf': citation.pdf_links(), } else: unknown_citations[citationId] = { 'id': citationId, 'title': citation.title, 'year': citation.year, - 'pdf': citation.pdf_link, + 'pdf': citation.pdf_links(), } return res |
