summaryrefslogtreecommitdiff
path: root/scraper/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/util.py')
-rw-r--r--scraper/util.py27
1 files changed, 25 insertions, 2 deletions
diff --git a/scraper/util.py b/scraper/util.py
index 0c3e2169..0401b342 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -331,10 +331,13 @@ class AddressBook (object):
row = self.find(address)
if row is not None:
return {
- 'address': row[0],
+ 'name': row[0],
+ 'source_name': row[1],
+ 'street_adddress': row[2],
'lat': row[3],
'lng': row[4],
'type': row[5],
+ 'country': row[7],
}
return None
@@ -372,7 +375,7 @@ def file_path(key, paper_id, fn):
return os.path.join(data_path(key, paper_id), fn)
def parallelize(func, rows):
- print("Fetching {} items".format(len(rows)))
+ print("Processing {} items".format(len(rows)))
if hasattr(os, 'sched_getaffinity'):
processCount = len(os.sched_getaffinity(0))
else:
@@ -400,6 +403,7 @@ def fetch_paper(s2, paper_id):
return paper
def fetch_spreadsheet():
+ """Open the Google Spreadsheet, which contains the individual worksheets"""
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
path = os.path.dirname(os.path.abspath(__file__))
credentials = ServiceAccountCredentials.from_json_keyfile_name(os.path.join(path, '.creds/Megapixels-ef28f91112a9.json'), scope)
@@ -409,16 +413,22 @@ def fetch_spreadsheet():
return spreadsheet
def fetch_worksheet(name="institutions"):
+ """Get a reference to a particular "worksheet" from the Google Spreadsheet"""
spreadsheet = fetch_spreadsheet()
return spreadsheet.worksheet(name)
def fetch_google_sheet(name="institutions"):
+ """Get all the values from a particular worksheet as a list of lists.
+ Returns:
+ :keys - the first row of the document
+ :lines - a list of lists with the rest of the rows"""
rows = fetch_worksheet(name).get_all_values()
keys = rows[0]
lines = rows[1:]
return keys, lines
def fetch_google_sheet_objects(name):
+ """Get all the values from a worksheet as a list of dictionaries"""
keys, rows = fetch_google_sheet(name)
recs = []
for row in rows:
@@ -429,6 +439,8 @@ def fetch_google_sheet_objects(name):
return recs
def fetch_google_lookup(name, item_key='key'):
+ """Get all the values from a worksheet as a dictionary of dictionaries.
+ Specify which field you want to use as the dictionary key."""
keys, rows = fetch_google_sheet(name)
lookup = {}
for row in rows:
@@ -438,3 +450,14 @@ def fetch_google_lookup(name, item_key='key'):
lookup[rec[item_key]] = rec
return lookup
+def load_countries():
+ countries = read_json('countries.json')
+ lookup = {}
+ for country in countries:
+ name = country['name']
+ lookup[name] = name
+ if 'alt' in country:
+ for alt_name in country['alt']:
+ lookup[alt_name] = name
+ return lookup
+