diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-20 16:05:25 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-20 16:05:25 +0100 |
| commit | 7885a180e1b3ddc37ef2192c74a897b911e48a14 (patch) | |
| tree | bac496d50008c035668d7e6c0143b7ecabd3193d /scraper/util.py | |
| parent | 8f0d09f4c5bfb8b09757c3dbdb6d29061f0405d4 (diff) | |
adding countries to citation feed / geocode step
Diffstat (limited to 'scraper/util.py')
| -rw-r--r-- | scraper/util.py | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/scraper/util.py b/scraper/util.py index fa9f6a22..0401b342 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -331,10 +331,13 @@ class AddressBook (object): row = self.find(address) if row is not None: return { - 'address': row[0], + 'name': row[0], + 'source_name': row[1], + 'street_adddress': row[2], 'lat': row[3], 'lng': row[4], 'type': row[5], + 'country': row[7], } return None @@ -372,7 +375,7 @@ def file_path(key, paper_id, fn): return os.path.join(data_path(key, paper_id), fn) def parallelize(func, rows): - print("Fetching {} items".format(len(rows))) + print("Processing {} items".format(len(rows))) if hasattr(os, 'sched_getaffinity'): processCount = len(os.sched_getaffinity(0)) else: @@ -447,3 +450,14 @@ def fetch_google_lookup(name, item_key='key'): lookup[rec[item_key]] = rec return lookup +def load_countries(): + countries = read_json('countries.json') + lookup = {} + for country in countries: + name = country['name'] + lookup[name] = name + if 'alt' in country: + for alt_name in country['alt']: + lookup[alt_name] = name + return lookup + |
