import os import csv import click import time import html from geopy import geocoders from dotenv import load_dotenv from util import * load_dotenv() geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY')) print('loading institutions...') worksheet = fetch_worksheet("institutions") keys, rows = fetch_google_sheet("institutions") print("got {} rows".format(len(rows))) countries = load_countries() print('loaded countries...') cname_lookup = {} @click.command() def s2_geocode_spreadsheet(): # row_tuples = [] for i, row in enumerate(rows): # row_tuples.append((i, row,)) cname, name, address, lat, lng, org_type, extra_address, country = row if len(cname) < 3: print("very short cname: {}".format(cname)) if cname == name or cname not in cname_lookup: cname_lookup[cname] = i print("built lookup") print("processing sheet...") seen = {} for i, row in enumerate(rows): if (i % 1000) == 0: print("{}...".format(i)) if row[1] in seen: continue seen[row[1]] = True hit_api = s2_geocode_row(i, row) if hit_api: time.sleep(1) # parallelize(s2_geocode_row, row_tuples) def s2_geocode_row(i, row): # 0 cname 1 name 2 address 3 lat 4 lng 5 org_type 6 notes 7 country cname, name, address, lat, lng, org_type, extra_address, country = row decoded_cname = html.unescape(cname) # print(decoded_cname) if cname != decoded_cname: worksheet.update_cell(i+2, 0+1, decoded_cname) cname = decoded_cname if "@" in cname: print(cname) if lat and lng: if not country: update_country_from_address(address, i, countries, worksheet) return True return False c_row = rows[cname_lookup[cname]] if c_row[3] and c_row[4]: print("name {}, found cname: {}".format(name, cname)) worksheet.update_cell(i+2, 2+1, c_row[2]) worksheet.update_cell(i+2, 3+1, c_row[3]) worksheet.update_cell(i+2, 4+1, c_row[4]) worksheet.update_cell(i+2, 7+1, c_row[7]) return True if address: address_to_geocode = address elif name: address_to_geocode = name elif cname: address_to_geocode = cname if not address_to_geocode: return False print(address_to_geocode) location = geolocator.geocode(address_to_geocode) if location: print("{} found: {}".format(i+1, name)) print(location.raw) worksheet.update_cell(i+2, 2+1, location.address) worksheet.update_cell(i+2, 3+1, location.latitude) worksheet.update_cell(i+2, 4+1, location.longitude) if address and address != location.address: worksheet.update_cell(i+2, 6+1, address) # store alt address in "notes" field #valid_count += 1 country = update_country_from_address(location.address, i, countries, worksheet) row[2] = location.address row[3] = location.latitude row[4] = location.longitude row[7] = country return True else: print("{} not found: {}".format(i+1, address_to_geocode)) #invalid_count += 1 return False def update_country_from_address(address, i, countries, worksheet): address_partz = address.split(', ') possible_country = address_partz[-1] country = None if possible_country in countries: country = countries[possible_country] elif "China" in address: country = "China" elif "Hong Kong" in address: country = "China" elif "Singapore" in address: country = "Singapore" elif "Taiwan" in address: country = "Taiwan" elif "Russia" in address: country = "Russia" elif "Ukraine" in address: country = "Ukraine" elif "Hungary" in address: country = "Hungary" elif "Japan" in address: country = "Japan" elif "Iran" in address: country = "Iran" elif "South Korea" in address: country = "South Korea" elif "Egypt" in address: country = "Egypt" elif "پاکستان" in address: country = "Pakistan" elif "السعودية" in address: country = "Saudi Arabia" else: print("unknown country: {}".format(address)) return "" worksheet.update_cell(i+2, 7+1, country) # print(country) return country if __name__ == '__main__': s2_geocode_spreadsheet()