update

author: Jules Laplace <julescarbon@gmail.com> 2019-02-11 13:50:14 +0100
committer: Jules Laplace <julescarbon@gmail.com> 2019-02-11 13:50:14 +0100
commit: 178381871ce0b8b2be7946320f90b0568ba58911 (patch)
tree: edb25065c07444474391d9e59ca223293988cbff /scraper/util.py
parent: 730a32a51cac1d1b70fdade93d0986b8b4e1ac69 (diff)
1 files changed, 14 insertions, 0 deletions
diff --git a/scraper/util.py b/scraper/util.py
index d3f4e751..788caa3f 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -303,6 +303,20 @@ class AddressBook (object):
       }
     return None
 
+
+def load_institutions(paperId):
+  if os.path.exists(file_path('pdf', paperId, 'institutions.json')):
+    return read_json(file_path('pdf', paperId, 'institutions.json'))['institutions']
+  elif os.path.exists(file_path('doi', paperId, 'institutions.json')):
+    return read_json(file_path('doi', paperId, 'institutions.json'))['institutions']
+  else:
+    return []
+
+def data_path(key, paper_id):
+  return 'datasets/s2/{}/{}/{}'.format(key, paper_id[0:2], paper_id)
+def file_path(key, paper_id, fn):
+  return os.path.join(data_path(key, paper_id), fn)
+
 def parallelize(func, rows):
   print("Fetching {} items".format(len(rows)))
   chunksize = 3
author	Jules Laplace <julescarbon@gmail.com>	2019-02-11 13:50:14 +0100
committer	Jules Laplace <julescarbon@gmail.com>	2019-02-11 13:50:14 +0100
commit	178381871ce0b8b2be7946320f90b0568ba58911 (patch)
tree	edb25065c07444474391d9e59ca223293988cbff /scraper/util.py
parent	730a32a51cac1d1b70fdade93d0986b8b4e1ac69 (diff)