summaryrefslogtreecommitdiff
path: root/scraper/s2-geocode-spreadsheet.py
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2018-12-15 19:57:49 +0100
committeradamhrv <adam@ahprojects.com>2018-12-15 19:57:49 +0100
commit82b2c0b5d6d7baccbe4d574d96e18fe2078047d7 (patch)
treea8784b7ec2bc5a0451c252f66a6b786f3a2504f5 /scraper/s2-geocode-spreadsheet.py
parent8e978af21c2b29f678a09701afb3ec7d65d0a6ab (diff)
parentc5b02ffab8d388e8a2925e51736b902a48a95e71 (diff)
Merge branch 'master' of github.com:adamhrv/megapixels_dev
Diffstat (limited to 'scraper/s2-geocode-spreadsheet.py')
-rw-r--r--scraper/s2-geocode-spreadsheet.py83
1 files changed, 83 insertions, 0 deletions
diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py
new file mode 100644
index 00000000..d0fd2050
--- /dev/null
+++ b/scraper/s2-geocode-spreadsheet.py
@@ -0,0 +1,83 @@
+import os
+import csv
+import click
+import time
+from geopy import geocoders
+from dotenv import load_dotenv
+from util import *
+load_dotenv()
+
+@click.command()
+def s2_geocode_spreadsheet():
+ geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY'))
+
+ worksheet = fetch_worksheet()
+ rows = fetch_google_sheet()
+ valid_count = 0
+ invalid_count = 0
+
+ print("got {} rows".format(len(rows)))
+
+ cname_lookup = {}
+ for i, row in enumerate(rows):
+ if len(row) == 6:
+ cname, name, address, lat, lng, org_type = row
+ elif len(row) == 7:
+ cname, name, address, lat, lng, org_type, extra_address = row
+ else:
+ print("Weirdly formatted row {}".format(i))
+ continue
+ if cname == name or cname not in cname_lookup:
+ cname_lookup[cname] = i
+
+ # 0 cname 1 name 2 address 3 lat 4 lng 5 org_type
+ for i, row in enumerate(rows):
+ if len(row) == 6:
+ cname, name, address, lat, lng, org_type = row
+ elif len(row) == 7:
+ cname, name, address, lat, lng, org_type, extra_address = row
+ else:
+ print("Weirdly formatted row {}: {} entries".format(i, len(row)))
+ continue
+ if lat and lng:
+ continue
+ c_row = rows[cname_lookup[cname]]
+ if c_row[3] and c_row[4]:
+ print("name {}, found cname: {}".format(name, cname))
+ worksheet.update_cell(i+2, 3, c_row[2])
+ worksheet.update_cell(i+2, 4, c_row[3])
+ worksheet.update_cell(i+2, 5, c_row[4])
+ continue
+ if address:
+ address_to_geocode = address
+ elif name:
+ address_to_geocode = name
+ elif cname:
+ address_to_geocode = cname
+
+ if not address_to_geocode:
+ continue
+
+ print(address_to_geocode)
+ location = geolocator.geocode(address_to_geocode)
+ if location:
+ print("{} found: {}".format(i+1, name))
+ print(location.raw)
+ worksheet.update_cell(i+2, 3, location.address)
+ worksheet.update_cell(i+2, 4, location.latitude)
+ worksheet.update_cell(i+2, 5, location.longitude)
+ if address and address != location.address:
+ worksheet.update_cell(i+2, 7, address)
+ valid_count += 1
+ row[2] = location.address
+ row[3] = location.latitude
+ row[4] = location.longitude
+ else:
+ print("{} not found: {}".format(i+1, address_to_geocode))
+ invalid_count += 1
+ time.sleep(2)
+
+ print("geocoded {} addresses, {} found, {} not found".format(len(rows), valid_count, invalid_count))
+
+if __name__ == '__main__':
+ s2_geocode_spreadsheet()