summaryrefslogtreecommitdiff
path: root/s2-geocode-spreadsheet.py
blob: d0fd2050785e7cc2fe34062ae9ff462b75eee08b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import csv
import click
import time
from geopy import geocoders
from dotenv import load_dotenv
from util import *
load_dotenv()

@click.command()
def s2_geocode_spreadsheet():
  geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY'))

  worksheet = fetch_worksheet()
  rows = fetch_google_sheet()
  valid_count = 0
  invalid_count = 0

  print("got {} rows".format(len(rows)))

  cname_lookup = {}
  for i, row in enumerate(rows):
    if len(row) == 6:
      cname, name, address, lat, lng, org_type = row
    elif len(row) == 7:
      cname, name, address, lat, lng, org_type, extra_address = row
    else:
      print("Weirdly formatted row {}".format(i))
      continue
    if cname == name or cname not in cname_lookup:
      cname_lookup[cname] = i

  # 0 cname 1 name 2 address 3 lat 4 lng 5 org_type
  for i, row in enumerate(rows):
    if len(row) == 6:
      cname, name, address, lat, lng, org_type = row
    elif len(row) == 7:
      cname, name, address, lat, lng, org_type, extra_address = row
    else:
      print("Weirdly formatted row {}: {} entries".format(i, len(row)))
      continue
    if lat and lng:
      continue
    c_row = rows[cname_lookup[cname]]
    if c_row[3] and c_row[4]:
      print("name {}, found cname: {}".format(name, cname))
      worksheet.update_cell(i+2, 3, c_row[2])
      worksheet.update_cell(i+2, 4, c_row[3])
      worksheet.update_cell(i+2, 5, c_row[4])
      continue
    if address:
      address_to_geocode = address
    elif name:
      address_to_geocode = name
    elif cname:
      address_to_geocode = cname

    if not address_to_geocode:
      continue

    print(address_to_geocode)
    location = geolocator.geocode(address_to_geocode)
    if location:
      print("{} found: {}".format(i+1, name))
      print(location.raw)
      worksheet.update_cell(i+2, 3, location.address)
      worksheet.update_cell(i+2, 4, location.latitude)
      worksheet.update_cell(i+2, 5, location.longitude)
      if address and address != location.address:
        worksheet.update_cell(i+2, 7, address)
      valid_count += 1
      row[2] = location.address
      row[3] = location.latitude
      row[4] = location.longitude
    else:
      print("{} not found: {}".format(i+1, address_to_geocode))
      invalid_count += 1
    time.sleep(2)

  print("geocoded {} addresses, {} found, {} not found".format(len(rows), valid_count, invalid_count))

if __name__ == '__main__':
  s2_geocode_spreadsheet()