diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-13 16:46:10 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-13 16:46:10 +0100 |
| commit | d0dc5cd83f1c436185d247600c3c5be9360bf1ca (patch) | |
| tree | 92db65b2a525b6512fd7f5349da561c476fe997e /scraper/util.py | |
| parent | 1563d1da307a78ddc388483fd95a68a511e18048 (diff) | |
displaying more info about the papers
Diffstat (limited to 'scraper/util.py')
| -rw-r--r-- | scraper/util.py | 26 |
1 files changed, 25 insertions, 1 deletions
diff --git a/scraper/util.py b/scraper/util.py index 9b47510a..6c671cec 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -319,8 +319,12 @@ def file_path(key, paper_id, fn): def parallelize(func, rows): print("Fetching {} items".format(len(rows))) + if hasattr(os, 'sched_getaffinity'): + processCount = len(os.sched_getaffinity(0)) + else: + processCount = 4 chunksize = 3 - with Pool(processes=len(os.sched_getaffinity(0))) as pool: + with Pool(processes=processCount) as pool: pool.starmap(func, rows, chunksize) def fetch_paper(s2, paper_id): @@ -359,3 +363,23 @@ def fetch_google_sheet(name="institutions"): keys = rows[0] lines = rows[1:] return keys, lines + +def fetch_google_sheet_objects(name): + keys, rows = fetch_google_sheet(name) + recs = [] + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + recs.append(rec) + return recs + +def fetch_google_lookup(name, item_key='key'): + keys, rows = fetch_google_sheet(name) + lookup = {} + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + lookup[rec[item_key]] = rec + return lookup |
