mergez

author: jules@lens <julescarbon@gmail.com> 2019-02-20 16:21:53 +0100
committer: jules@lens <julescarbon@gmail.com> 2019-02-20 16:21:53 +0100
commit: e0038fbc4b891fe4393acfad8d9755fa1834278e (patch)
tree: c35aa2d2b0a76c10e57904ed1f41f5a7dcdd4870 /scraper/util.py
parent: 225b7936cd1b80effa4bf77b1ffc3c92a8f17526 (diff)
parent: 9b97ddf7e1bc1febc4066cd5e083cee688d77027 (diff)
1 files changed, 25 insertions, 2 deletions
diff --git a/scraper/util.py b/scraper/util.py
index 0c3e2169..0401b342 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -331,10 +331,13 @@ class AddressBook (object):
     row = self.find(address)
     if row is not None:
       return {
-        'address': row[0],
+        'name': row[0],
+        'source_name': row[1],
+        'street_adddress': row[2],
         'lat': row[3],
         'lng': row[4],
         'type': row[5],
+        'country': row[7],
       }
     return None
 
@@ -372,7 +375,7 @@ def file_path(key, paper_id, fn):
   return os.path.join(data_path(key, paper_id), fn)
 
 def parallelize(func, rows):
-  print("Fetching {} items".format(len(rows)))
+  print("Processing {} items".format(len(rows)))
   if hasattr(os, 'sched_getaffinity'):
     processCount = len(os.sched_getaffinity(0))
   else:
@@ -400,6 +403,7 @@ def fetch_paper(s2, paper_id):
   return paper
 
 def fetch_spreadsheet():
+  """Open the Google Spreadsheet, which contains the individual worksheets"""
   scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
   path = os.path.dirname(os.path.abspath(__file__))
   credentials = ServiceAccountCredentials.from_json_keyfile_name(os.path.join(path, '.creds/Megapixels-ef28f91112a9.json'), scope)
@@ -409,16 +413,22 @@ def fetch_spreadsheet():
   return spreadsheet
 
 def fetch_worksheet(name="institutions"):
+  """Get a reference to a particular "worksheet" from the Google Spreadsheet"""
   spreadsheet = fetch_spreadsheet()
   return spreadsheet.worksheet(name)
 
 def fetch_google_sheet(name="institutions"):
+  """Get all the values from a particular worksheet as a list of lists.
+  Returns:
+  :keys - the first row of the document
+  :lines - a list of lists with the rest of the rows"""
   rows = fetch_worksheet(name).get_all_values()
   keys = rows[0]
   lines = rows[1:]
   return keys, lines
 
 def fetch_google_sheet_objects(name):
+  """Get all the values from a worksheet as a list of dictionaries"""
   keys, rows = fetch_google_sheet(name)
   recs = []
   for row in rows:
@@ -429,6 +439,8 @@ def fetch_google_sheet_objects(name):
   return recs
 
 def fetch_google_lookup(name, item_key='key'):
+  """Get all the values from a worksheet as a dictionary of dictionaries.
+  Specify which field you want to use as the dictionary key."""
   keys, rows = fetch_google_sheet(name)
   lookup = {}
   for row in rows:
@@ -438,3 +450,14 @@ def fetch_google_lookup(name, item_key='key'):
     lookup[rec[item_key]] = rec
   return lookup
 
+def load_countries():
+  countries = read_json('countries.json')
+  lookup = {}
+  for country in countries:
+    name = country['name']
+    lookup[name] = name
+    if 'alt' in country:
+      for alt_name in country['alt']:
+        lookup[alt_name] = name
+  return lookup
+
author	jules@lens <julescarbon@gmail.com>	2019-02-20 16:21:53 +0100
committer	jules@lens <julescarbon@gmail.com>	2019-02-20 16:21:53 +0100
commit	e0038fbc4b891fe4393acfad8d9755fa1834278e (patch)
tree	c35aa2d2b0a76c10e57904ed1f41f5a7dcdd4870 /scraper/util.py
parent	225b7936cd1b80effa4bf77b1ffc3c92a8f17526 (diff)
parent	9b97ddf7e1bc1febc4066cd5e083cee688d77027 (diff)