summaryrefslogtreecommitdiff
path: root/scraper/util.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-13 16:46:10 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-13 16:46:10 +0100
commitd0dc5cd83f1c436185d247600c3c5be9360bf1ca (patch)
tree92db65b2a525b6512fd7f5349da561c476fe997e /scraper/util.py
parent1563d1da307a78ddc388483fd95a68a511e18048 (diff)
displaying more info about the papers
Diffstat (limited to 'scraper/util.py')
-rw-r--r--scraper/util.py26
1 files changed, 25 insertions, 1 deletions
diff --git a/scraper/util.py b/scraper/util.py
index 9b47510a..6c671cec 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -319,8 +319,12 @@ def file_path(key, paper_id, fn):
def parallelize(func, rows):
print("Fetching {} items".format(len(rows)))
+ if hasattr(os, 'sched_getaffinity'):
+ processCount = len(os.sched_getaffinity(0))
+ else:
+ processCount = 4
chunksize = 3
- with Pool(processes=len(os.sched_getaffinity(0))) as pool:
+ with Pool(processes=processCount) as pool:
pool.starmap(func, rows, chunksize)
def fetch_paper(s2, paper_id):
@@ -359,3 +363,23 @@ def fetch_google_sheet(name="institutions"):
keys = rows[0]
lines = rows[1:]
return keys, lines
+
+def fetch_google_sheet_objects(name):
+ keys, rows = fetch_google_sheet(name)
+ recs = []
+ for row in rows:
+ rec = {}
+ for index, key in enumerate(keys):
+ rec[key] = row[index]
+ recs.append(rec)
+ return recs
+
+def fetch_google_lookup(name, item_key='key'):
+ keys, rows = fetch_google_sheet(name)
+ lookup = {}
+ for row in rows:
+ rec = {}
+ for index, key in enumerate(keys):
+ rec[key] = row[index]
+ lookup[rec[item_key]] = rec
+ return lookup