summaryrefslogtreecommitdiff
path: root/scraper/s2-doi-report.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-06-05 16:39:00 +0200
committerJules Laplace <julescarbon@gmail.com>2019-06-05 16:39:00 +0200
commit76c058b87f94fb1ed7b37869a8082c25c7ab37de (patch)
tree3297ffda57fd834e8c32bdc95749bece5d18dbd8 /scraper/s2-doi-report.py
parent5e8e3bd6d26fc34f2bf2fcedb50398454b729e0b (diff)
parentc6e85e5545fb94be31342d54c151012d883536b9 (diff)
Merge branch 'master' of asdf.us:megapixels_dev
Diffstat (limited to 'scraper/s2-doi-report.py')
-rw-r--r--scraper/s2-doi-report.py7
1 files changed, 6 insertions, 1 deletions
diff --git a/scraper/s2-doi-report.py b/scraper/s2-doi-report.py
index c715b647..7d142a14 100644
--- a/scraper/s2-doi-report.py
+++ b/scraper/s2-doi-report.py
@@ -10,6 +10,7 @@ from bs4 import BeautifulSoup
from urllib.parse import unquote
from importlib import import_module
doi = import_module('s2-fetch-doi')
+raw_paper = import_module('s2-raw-papers')
DOI_DIR = 'datasets/s2/doi'
@@ -39,7 +40,11 @@ def doi_report():
paper_id = url_info['paper_id']
paper = load_paper(paper_id)
if paper is None:
- continue
+ raw_paper.fetch_raw_paper(paper_id, freshen=False)
+ paper = load_paper(paper_id)
+ if paper is None:
+ print("Problem fetching raw paper {}".format(paper_id))
+ continue
if paper.data is None:
continue
doi_fn = fn.replace('.url', '.doi')