diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-06-05 16:39:00 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-06-05 16:39:00 +0200 |
| commit | 76c058b87f94fb1ed7b37869a8082c25c7ab37de (patch) | |
| tree | 3297ffda57fd834e8c32bdc95749bece5d18dbd8 /scraper/s2-doi-report.py | |
| parent | 5e8e3bd6d26fc34f2bf2fcedb50398454b729e0b (diff) | |
| parent | c6e85e5545fb94be31342d54c151012d883536b9 (diff) | |
Merge branch 'master' of asdf.us:megapixels_dev
Diffstat (limited to 'scraper/s2-doi-report.py')
| -rw-r--r-- | scraper/s2-doi-report.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/scraper/s2-doi-report.py b/scraper/s2-doi-report.py index c715b647..7d142a14 100644 --- a/scraper/s2-doi-report.py +++ b/scraper/s2-doi-report.py @@ -10,6 +10,7 @@ from bs4 import BeautifulSoup from urllib.parse import unquote from importlib import import_module doi = import_module('s2-fetch-doi') +raw_paper = import_module('s2-raw-papers') DOI_DIR = 'datasets/s2/doi' @@ -39,7 +40,11 @@ def doi_report(): paper_id = url_info['paper_id'] paper = load_paper(paper_id) if paper is None: - continue + raw_paper.fetch_raw_paper(paper_id, freshen=False) + paper = load_paper(paper_id) + if paper is None: + print("Problem fetching raw paper {}".format(paper_id)) + continue if paper.data is None: continue doi_fn = fn.replace('.url', '.doi') |
