From 7885a180e1b3ddc37ef2192c74a897b911e48a14 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 20 Feb 2019 16:05:25 +0100 Subject: adding countries to citation feed / geocode step --- scraper/util.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'scraper/util.py') diff --git a/scraper/util.py b/scraper/util.py index fa9f6a22..0401b342 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -331,10 +331,13 @@ class AddressBook (object): row = self.find(address) if row is not None: return { - 'address': row[0], + 'name': row[0], + 'source_name': row[1], + 'street_adddress': row[2], 'lat': row[3], 'lng': row[4], 'type': row[5], + 'country': row[7], } return None @@ -372,7 +375,7 @@ def file_path(key, paper_id, fn): return os.path.join(data_path(key, paper_id), fn) def parallelize(func, rows): - print("Fetching {} items".format(len(rows))) + print("Processing {} items".format(len(rows))) if hasattr(os, 'sched_getaffinity'): processCount = len(os.sched_getaffinity(0)) else: @@ -447,3 +450,14 @@ def fetch_google_lookup(name, item_key='key'): lookup[rec[item_key]] = rec return lookup +def load_countries(): + countries = read_json('countries.json') + lookup = {} + for country in countries: + name = country['name'] + lookup[name] = name + if 'alt' in country: + for alt_name in country['alt']: + lookup[alt_name] = name + return lookup + -- cgit v1.2.3-70-g09d2