summaryrefslogtreecommitdiff
path: root/scraper/s2-geocode-spreadsheet.py
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/s2-geocode-spreadsheet.py')
-rw-r--r--scraper/s2-geocode-spreadsheet.py15
1 files changed, 13 insertions, 2 deletions
diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py
index 98baf4b5..c48685f4 100644
--- a/scraper/s2-geocode-spreadsheet.py
+++ b/scraper/s2-geocode-spreadsheet.py
@@ -2,6 +2,7 @@ import os
import csv
import click
import time
+import html
from geopy import geocoders
from dotenv import load_dotenv
from util import *
@@ -32,7 +33,11 @@ def s2_geocode_spreadsheet():
cname_lookup[cname] = i
print("built lookup")
print("processing sheet...")
+ seen = {}
for i, row in enumerate(rows):
+ if row[1] in seen:
+ continue
+ seen[row[1]] = True
hit_api = s2_geocode_row(i, row)
if hit_api:
time.sleep(1)
@@ -41,6 +46,12 @@ def s2_geocode_spreadsheet():
def s2_geocode_row(i, row):
# 0 cname 1 name 2 address 3 lat 4 lng 5 org_type 6 notes 7 country
cname, name, address, lat, lng, org_type, extra_address, country = row
+ decoded_cname = html.unescape(cname)
+ # print(decoded_cname)
+ if cname != decoded_cname:
+ worksheet.update_cell(i+2, 0+1, decoded_cname)
+ cname = decoded_cname
+
if lat and lng:
if not country:
update_country_from_address(address, i, countries, worksheet)
@@ -75,7 +86,7 @@ def s2_geocode_row(i, row):
worksheet.update_cell(i+2, 4+1, location.longitude)
if address and address != location.address:
worksheet.update_cell(i+2, 6+1, address) # store alt address in "notes" field
- valid_count += 1
+ #valid_count += 1
country = update_country_from_address(location.address, i, countries, worksheet)
row[2] = location.address
row[3] = location.latitude
@@ -84,7 +95,7 @@ def s2_geocode_row(i, row):
return True
else:
print("{} not found: {}".format(i+1, address_to_geocode))
- invalid_count += 1
+ #invalid_count += 1
return False
def update_country_from_address(address, i, countries, worksheet):