import os import glob import time import simplejson as json import click from urllib.parse import urlparse import operator from util import * from geopy.geocoders import Nominatim import random @click.command() @click.option('--fn', '-f', default='reports/institution_names.txt', help='List of institution names, to be geocoded :)') def s2_geocode(fn): geolocator = Nominatim(user_agent="cool geocoding service") print(fn) rows = read_csv(fn, keys=False) valid = read_csv('./reports/institutions_geocoded.csv', create=True) invalid = read_csv('./reports/institutions_not_found.csv', create=True) valid_names = [] invalid_names = [] random.shuffle(rows) for row, i in rows: name = row[0] if name in invalid_names: continue if name in valid_names: continue location = geolocator.geocode(name) if location: print("found: {}".format(name)) valid.append([ name, location.latitude, location.longitude, location.address, ]) valid_names.append(name) else: print("not found: {}".format(name)) invalid.append(row) invalid_names.append(name) if (i % 20) == 0: write_csv('./reports/institutions_geocoded.csv', keys=None, rows=valid) write_csv('./reports/institutions_not_found.csv', keys=None, rows=invalid) if (i % 100) == 0: print("{}...".format(i)) time.sleep(5) if __name__ == '__main__': s2_geocode()