1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
import os
import csv
import click
import time
from geopy import geocoders
from dotenv import load_dotenv
from util import *
load_dotenv()
geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY'))
print('loading institutions...')
worksheet = fetch_worksheet("institutions")
keys, rows = fetch_google_sheet("institutions")
print("got {} rows".format(len(rows)))
countries = load_countries()
print('loaded countries...')
cname_lookup = {}
@click.command()
def s2_geocode_spreadsheet():
# row_tuples = []
for i, row in enumerate(rows):
# row_tuples.append((i, row,))
cname, name, address, lat, lng, org_type, extra_address, country = row
if len(cname) < 3:
print("very short cname: {}".format(cname))
if cname == name or cname not in cname_lookup:
cname_lookup[cname] = i
print("built lookup")
print("processing sheet...")
for i, row in enumerate(rows):
hit_api = s2_geocode_row(i, row)
if hit_api:
time.sleep(1)
# parallelize(s2_geocode_row, row_tuples)
def s2_geocode_row(i, row):
# 0 cname 1 name 2 address 3 lat 4 lng 5 org_type 6 notes 7 country
cname, name, address, lat, lng, org_type, extra_address, country = row
if lat and lng:
if not country:
update_country_from_address(address, i, countries, worksheet)
return True
return False
c_row = rows[cname_lookup[cname]]
if c_row[3] and c_row[4]:
print("name {}, found cname: {}".format(name, cname))
worksheet.update_cell(i+2, 2+1, c_row[2])
worksheet.update_cell(i+2, 3+1, c_row[3])
worksheet.update_cell(i+2, 4+1, c_row[4])
worksheet.update_cell(i+2, 7+1, c_row[7])
return True
if address:
address_to_geocode = address
elif name:
address_to_geocode = name
elif cname:
address_to_geocode = cname
if not address_to_geocode:
return False
print(address_to_geocode)
location = geolocator.geocode(address_to_geocode)
if location:
print("{} found: {}".format(i+1, name))
print(location.raw)
worksheet.update_cell(i+2, 2+1, location.address)
worksheet.update_cell(i+2, 3+1, location.latitude)
worksheet.update_cell(i+2, 4+1, location.longitude)
if address and address != location.address:
worksheet.update_cell(i+2, 6+1, address) # store alt address in "notes" field
valid_count += 1
country = update_country_from_address(location.address, i, countries, worksheet)
row[2] = location.address
row[3] = location.latitude
row[4] = location.longitude
row[7] = country
return True
else:
print("{} not found: {}".format(i+1, address_to_geocode))
invalid_count += 1
return False
def update_country_from_address(address, i, countries, worksheet):
address_partz = address.split(', ')
possible_country = address_partz[-1]
country = None
if possible_country in countries:
country = countries[possible_country]
elif "China" in address:
country = "China"
elif "Singapore" in address:
country = "Singapore"
elif "Taiwan" in address:
country = "Taiwan"
elif "Russia" in address:
country = "Russia"
elif "Japan" in address:
country = "Japan"
elif "Iran" in address:
country = "Iran"
elif "Egypt" in address:
country = "Egypt"
elif "پاکستان" in address:
country = "Pakistan"
elif "السعودية" in address:
country = "Saudi Arabia"
else:
print("unknown country: {}".format(possible_country))
return ""
worksheet.update_cell(i+2, 7+1, country)
# print(country)
return country
if __name__ == '__main__':
s2_geocode_spreadsheet()
|