summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
Diffstat (limited to 'scraper')
-rw-r--r--scraper/s2-papers.py4
-rw-r--r--scraper/s2-raw-papers.py4
2 files changed, 6 insertions, 2 deletions
diff --git a/scraper/s2-papers.py b/scraper/s2-papers.py
index 40489e75..92bf8ebf 100644
--- a/scraper/s2-papers.py
+++ b/scraper/s2-papers.py
@@ -9,6 +9,8 @@ import operator
import click
from s2 import SemanticScholarAPI
from util import *
+from urllib.parse import unquote
+raw_papers_api = import_module('s2-raw-papers')
s2 = SemanticScholarAPI()
@@ -34,6 +36,8 @@ def fetch_papers(freshen):
paper = fetch_paper(s2, paper_id, freshen)
if paper is None:
continue
+ if freshen:
+ raw_papers_api.fetch_raw_paper(paper_id, freshen)
db_paper = load_paper(paper_id)
pdf_link = db_paper.pdf_link if db_paper else ""
diff --git a/scraper/s2-raw-papers.py b/scraper/s2-raw-papers.py
index 612c8099..8881cda0 100644
--- a/scraper/s2-raw-papers.py
+++ b/scraper/s2-raw-papers.py
@@ -18,10 +18,10 @@ def fetch_raw_papers(fn):
lines = read_csv(fn, keys=False)
parallelize(fetch_raw_paper, lines)
-def fetch_raw_paper(paper_id):
+def fetch_raw_paper(paper_id, freshen=False):
os.makedirs(make_raw_paper_path(paper_id), exist_ok=True)
paper_fn = make_raw_paper_fn(paper_id)
- if os.path.exists(paper_fn):
+ if os.path.exists(paper_fn) and not freshen:
paper = read_json(paper_fn)
else:
paper = s2.raw_paper(paper_id)