diff options
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/s2-papers.py | 4 | ||||
| -rw-r--r-- | scraper/s2-raw-papers.py | 4 |
2 files changed, 6 insertions, 2 deletions
diff --git a/scraper/s2-papers.py b/scraper/s2-papers.py index 40489e75..92bf8ebf 100644 --- a/scraper/s2-papers.py +++ b/scraper/s2-papers.py @@ -9,6 +9,8 @@ import operator import click from s2 import SemanticScholarAPI from util import * +from urllib.parse import unquote +raw_papers_api = import_module('s2-raw-papers') s2 = SemanticScholarAPI() @@ -34,6 +36,8 @@ def fetch_papers(freshen): paper = fetch_paper(s2, paper_id, freshen) if paper is None: continue + if freshen: + raw_papers_api.fetch_raw_paper(paper_id, freshen) db_paper = load_paper(paper_id) pdf_link = db_paper.pdf_link if db_paper else "" diff --git a/scraper/s2-raw-papers.py b/scraper/s2-raw-papers.py index 612c8099..8881cda0 100644 --- a/scraper/s2-raw-papers.py +++ b/scraper/s2-raw-papers.py @@ -18,10 +18,10 @@ def fetch_raw_papers(fn): lines = read_csv(fn, keys=False) parallelize(fetch_raw_paper, lines) -def fetch_raw_paper(paper_id): +def fetch_raw_paper(paper_id, freshen=False): os.makedirs(make_raw_paper_path(paper_id), exist_ok=True) paper_fn = make_raw_paper_fn(paper_id) - if os.path.exists(paper_fn): + if os.path.exists(paper_fn) and not freshen: paper = read_json(paper_fn) else: paper = s2.raw_paper(paper_id) |
