From 178381871ce0b8b2be7946320f90b0568ba58911 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 11 Feb 2019 13:50:14 +0100 Subject: update --- scraper/util.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'scraper/util.py') diff --git a/scraper/util.py b/scraper/util.py index d3f4e751..788caa3f 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -303,6 +303,20 @@ class AddressBook (object): } return None + +def load_institutions(paperId): + if os.path.exists(file_path('pdf', paperId, 'institutions.json')): + return read_json(file_path('pdf', paperId, 'institutions.json'))['institutions'] + elif os.path.exists(file_path('doi', paperId, 'institutions.json')): + return read_json(file_path('doi', paperId, 'institutions.json'))['institutions'] + else: + return [] + +def data_path(key, paper_id): + return 'datasets/s2/{}/{}/{}'.format(key, paper_id[0:2], paper_id) +def file_path(key, paper_id, fn): + return os.path.join(data_path(key, paper_id), fn) + def parallelize(func, rows): print("Fetching {} items".format(len(rows))) chunksize = 3 -- cgit v1.2.3-70-g09d2