diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-06 15:05:40 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-06 15:05:40 +0100 |
| commit | aacdf0fa056b51000ff88479da479ded3f36b59c (patch) | |
| tree | de9e221ffd9bf8c67ef54607d6267f00b5233312 /s2-geocode.py | |
| parent | 002e72bb172c34bb71756f9e6c23294913f1ef85 (diff) | |
we geocoding
Diffstat (limited to 's2-geocode.py')
| -rw-r--r-- | s2-geocode.py | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/s2-geocode.py b/s2-geocode.py new file mode 100644 index 00000000..bd0885c3 --- /dev/null +++ b/s2-geocode.py @@ -0,0 +1,51 @@ +import os +import glob +import time +import simplejson as json +import click +from urllib.parse import urlparse +import operator +from util import * +from geopy.geocoders import Nominatim +import random + +@click.command() +@click.option('--fn', '-f', default='reports/institution_names.txt', help='List of institution names, to be geocoded :)') +def s2_geocode(fn): + geolocator = Nominatim(user_agent="cool geocoding service") + print(fn) + rows = read_csv(fn, keys=False) + valid = read_csv('./reports/institutions_geocoded.csv', create=True) + invalid = read_csv('./reports/institutions_not_found.csv', create=True) + valid_names = [] + invalid_names = [] + random.shuffle(rows) + for row, i in rows: + name = row[0] + if name in invalid_names: + continue + if name in valid_names: + continue + location = geolocator.geocode(name) + if location: + print("found: {}".format(name)) + valid.append([ + name, + location.latitude, + location.longitude, + location.address, + ]) + valid_names.append(name) + else: + print("not found: {}".format(name)) + invalid.append(row) + invalid_names.append(name) + if (i % 20) == 0: + write_csv('./reports/institutions_geocoded.csv', keys=None, rows=valid) + write_csv('./reports/institutions_not_found.csv', keys=None, rows=invalid) + if (i % 100) == 0: + print("{}...".format(i)) + time.sleep(5) + +if __name__ == '__main__': + s2_geocode() |
