import os import glob import time import simplejson as json import click from urllib.parse import urlparse import operator from util import * from geopy.geocoders import Nominatim import random @click.command() @click.option('--fn', '-f', default='reports/institution_names.csv', help='List of institution names, to be geocoded :)') def s2_geocode(fn): geolocator = Nominatim(user_agent="cool geocoding service") print(fn) rows = read_csv(fn, keys=False) valid = read_csv('./reports/institutions_geocoded.csv', keys=False, create=True) invalid = read_csv('./reports/institutions_not_found.csv', keys=False, create=True) valid_names = [] invalid_names = [] random.shuffle(rows) for i, row in enumerate(rows): name = row[0] if name in invalid_names: continue if name in valid_names: continue location = geolocator.geocode(name) if location: print("found: {}".format(name)) valid.append([ name, location.latitude, location.longitude, location.address, ]) valid_names.append(name) else: print("not found: {}".format(name)) invalid.append(row) invalid_names.append(name) if i and (i % 20) == 0: print("{}...".format(i)) write_csv('./reports/institutions_geocoded.csv', keys=None, rows=valid) write_csv('./reports/institutions_not_found.csv', keys=None, rows=invalid) time.sleep(5) if __name__ == '__main__': s2_geocode()