From 8f0d59a5f44c71aeb4eecf60cb323d2fe0306a3e Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Fri, 8 Mar 2019 21:02:58 +0100 Subject: possibly freshen raw papers --- scraper/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'scraper/util.py') diff --git a/scraper/util.py b/scraper/util.py index fdbc0534..830dbe8b 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -386,10 +386,10 @@ def parallelize(func, rows): with Pool(processes=processCount) as pool: pool.starmap(func, rows, chunksize) -def fetch_paper(s2, paper_id): +def fetch_paper(s2, paper_id, freshen=False): os.makedirs('./datasets/s2/papers/{}/{}'.format(paper_id[0:2], paper_id), exist_ok=True) paper_fn = './datasets/s2/papers/{}/{}/paper.json'.format(paper_id[0:2], paper_id) - if os.path.exists(paper_fn): + if os.path.exists(paper_fn) and not freshen: return read_json(paper_fn) print(paper_id) paper = s2.paper(paper_id) -- cgit v1.2.3-70-g09d2