summaryrefslogtreecommitdiff
path: root/scraper/util.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-11 13:50:14 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-11 13:50:14 +0100
commit178381871ce0b8b2be7946320f90b0568ba58911 (patch)
treeedb25065c07444474391d9e59ca223293988cbff /scraper/util.py
parent730a32a51cac1d1b70fdade93d0986b8b4e1ac69 (diff)
update
Diffstat (limited to 'scraper/util.py')
-rw-r--r--scraper/util.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/scraper/util.py b/scraper/util.py
index d3f4e751..788caa3f 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -303,6 +303,20 @@ class AddressBook (object):
}
return None
+
+def load_institutions(paperId):
+ if os.path.exists(file_path('pdf', paperId, 'institutions.json')):
+ return read_json(file_path('pdf', paperId, 'institutions.json'))['institutions']
+ elif os.path.exists(file_path('doi', paperId, 'institutions.json')):
+ return read_json(file_path('doi', paperId, 'institutions.json'))['institutions']
+ else:
+ return []
+
+def data_path(key, paper_id):
+ return 'datasets/s2/{}/{}/{}'.format(key, paper_id[0:2], paper_id)
+def file_path(key, paper_id, fn):
+ return os.path.join(data_path(key, paper_id), fn)
+
def parallelize(func, rows):
print("Fetching {} items".format(len(rows)))
chunksize = 3