diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-25 22:19:15 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-25 22:19:15 +0100 |
| commit | ee3d0d98e19f1d8177d85af1866fd0ee431fe9ea (patch) | |
| tree | 41372528e78d4328bc2a47bbbabac7e809c58894 /scraper/s2-geocode-spreadsheet.py | |
| parent | 255b8178af1e25a71fd23703d30c0d1f74911f47 (diff) | |
moving stuff
Diffstat (limited to 'scraper/s2-geocode-spreadsheet.py')
| -rw-r--r-- | scraper/s2-geocode-spreadsheet.py | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py new file mode 100644 index 00000000..d0fd2050 --- /dev/null +++ b/scraper/s2-geocode-spreadsheet.py @@ -0,0 +1,83 @@ +import os +import csv +import click +import time +from geopy import geocoders +from dotenv import load_dotenv +from util import * +load_dotenv() + +@click.command() +def s2_geocode_spreadsheet(): + geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY')) + + worksheet = fetch_worksheet() + rows = fetch_google_sheet() + valid_count = 0 + invalid_count = 0 + + print("got {} rows".format(len(rows))) + + cname_lookup = {} + for i, row in enumerate(rows): + if len(row) == 6: + cname, name, address, lat, lng, org_type = row + elif len(row) == 7: + cname, name, address, lat, lng, org_type, extra_address = row + else: + print("Weirdly formatted row {}".format(i)) + continue + if cname == name or cname not in cname_lookup: + cname_lookup[cname] = i + + # 0 cname 1 name 2 address 3 lat 4 lng 5 org_type + for i, row in enumerate(rows): + if len(row) == 6: + cname, name, address, lat, lng, org_type = row + elif len(row) == 7: + cname, name, address, lat, lng, org_type, extra_address = row + else: + print("Weirdly formatted row {}: {} entries".format(i, len(row))) + continue + if lat and lng: + continue + c_row = rows[cname_lookup[cname]] + if c_row[3] and c_row[4]: + print("name {}, found cname: {}".format(name, cname)) + worksheet.update_cell(i+2, 3, c_row[2]) + worksheet.update_cell(i+2, 4, c_row[3]) + worksheet.update_cell(i+2, 5, c_row[4]) + continue + if address: + address_to_geocode = address + elif name: + address_to_geocode = name + elif cname: + address_to_geocode = cname + + if not address_to_geocode: + continue + + print(address_to_geocode) + location = geolocator.geocode(address_to_geocode) + if location: + print("{} found: {}".format(i+1, name)) + print(location.raw) + worksheet.update_cell(i+2, 3, location.address) + worksheet.update_cell(i+2, 4, location.latitude) + worksheet.update_cell(i+2, 5, location.longitude) + if address and address != location.address: + worksheet.update_cell(i+2, 7, address) + valid_count += 1 + row[2] = location.address + row[3] = location.latitude + row[4] = location.longitude + else: + print("{} not found: {}".format(i+1, address_to_geocode)) + invalid_count += 1 + time.sleep(2) + + print("geocoded {} addresses, {} found, {} not found".format(len(rows), valid_count, invalid_count)) + +if __name__ == '__main__': + s2_geocode_spreadsheet() |
