summaryrefslogtreecommitdiff
path: root/scraper/util.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-03-08 21:02:58 +0100
committerJules Laplace <julescarbon@gmail.com>2019-03-08 21:02:58 +0100
commit8f0d59a5f44c71aeb4eecf60cb323d2fe0306a3e (patch)
tree397f8b4b738217fd87f3460b44e06c1fb21d03ac /scraper/util.py
parent1b086936a927aed44e505b12239c78fefa1e058c (diff)
possibly freshen raw papers
Diffstat (limited to 'scraper/util.py')
-rw-r--r--scraper/util.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/scraper/util.py b/scraper/util.py
index fdbc0534..830dbe8b 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -386,10 +386,10 @@ def parallelize(func, rows):
with Pool(processes=processCount) as pool:
pool.starmap(func, rows, chunksize)
-def fetch_paper(s2, paper_id):
+def fetch_paper(s2, paper_id, freshen=False):
os.makedirs('./datasets/s2/papers/{}/{}'.format(paper_id[0:2], paper_id), exist_ok=True)
paper_fn = './datasets/s2/papers/{}/{}/paper.json'.format(paper_id[0:2], paper_id)
- if os.path.exists(paper_fn):
+ if os.path.exists(paper_fn) and not freshen:
return read_json(paper_fn)
print(paper_id)
paper = s2.paper(paper_id)