summaryrefslogtreecommitdiff
path: root/s2-geocode.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-06 15:05:40 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-06 15:05:40 +0100
commitaacdf0fa056b51000ff88479da479ded3f36b59c (patch)
treede9e221ffd9bf8c67ef54607d6267f00b5233312 /s2-geocode.py
parent002e72bb172c34bb71756f9e6c23294913f1ef85 (diff)
we geocoding
Diffstat (limited to 's2-geocode.py')
-rw-r--r--s2-geocode.py51
1 files changed, 51 insertions, 0 deletions
diff --git a/s2-geocode.py b/s2-geocode.py
new file mode 100644
index 00000000..bd0885c3
--- /dev/null
+++ b/s2-geocode.py
@@ -0,0 +1,51 @@
+import os
+import glob
+import time
+import simplejson as json
+import click
+from urllib.parse import urlparse
+import operator
+from util import *
+from geopy.geocoders import Nominatim
+import random
+
+@click.command()
+@click.option('--fn', '-f', default='reports/institution_names.txt', help='List of institution names, to be geocoded :)')
+def s2_geocode(fn):
+ geolocator = Nominatim(user_agent="cool geocoding service")
+ print(fn)
+ rows = read_csv(fn, keys=False)
+ valid = read_csv('./reports/institutions_geocoded.csv', create=True)
+ invalid = read_csv('./reports/institutions_not_found.csv', create=True)
+ valid_names = []
+ invalid_names = []
+ random.shuffle(rows)
+ for row, i in rows:
+ name = row[0]
+ if name in invalid_names:
+ continue
+ if name in valid_names:
+ continue
+ location = geolocator.geocode(name)
+ if location:
+ print("found: {}".format(name))
+ valid.append([
+ name,
+ location.latitude,
+ location.longitude,
+ location.address,
+ ])
+ valid_names.append(name)
+ else:
+ print("not found: {}".format(name))
+ invalid.append(row)
+ invalid_names.append(name)
+ if (i % 20) == 0:
+ write_csv('./reports/institutions_geocoded.csv', keys=None, rows=valid)
+ write_csv('./reports/institutions_not_found.csv', keys=None, rows=invalid)
+ if (i % 100) == 0:
+ print("{}...".format(i))
+ time.sleep(5)
+
+if __name__ == '__main__':
+ s2_geocode()