summaryrefslogtreecommitdiff
path: root/scraper/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/util.py')
-rw-r--r--scraper/util.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/scraper/util.py b/scraper/util.py
index 7b55afae..d3f4e751 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -4,6 +4,7 @@ import csv
import string
import codecs
import gspread
+from multiprocessing import Pool
import simplejson as json
from oauth2client.service_account import ServiceAccountCredentials
@@ -302,6 +303,12 @@ class AddressBook (object):
}
return None
+def parallelize(func, rows):
+ print("Fetching {} items".format(len(rows)))
+ chunksize = 3
+ with Pool(processes=len(os.sched_getaffinity(0))) as pool:
+ pool.starmap(func, rows, chunksize)
+
def fetch_paper(s2, paper_id):
os.makedirs('./datasets/s2/papers/{}/{}'.format(paper_id[0:2], paper_id), exist_ok=True)
paper_fn = './datasets/s2/papers/{}/{}/paper.json'.format(paper_id[0:2], paper_id)