diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-20 17:19:03 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-20 17:19:03 +0100 |
| commit | 16f7583dbdb2928c557d0ee3766f809779ae9b39 (patch) | |
| tree | 78711119681f7dbd21c7523c77ea0a4f31e56cbe /scraper/s2-geocode-spreadsheet.py | |
| parent | 9b97ddf7e1bc1febc4066cd5e083cee688d77027 (diff) | |
smoother geocode process, fix html entities
Diffstat (limited to 'scraper/s2-geocode-spreadsheet.py')
| -rw-r--r-- | scraper/s2-geocode-spreadsheet.py | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py index 98baf4b5..c48685f4 100644 --- a/scraper/s2-geocode-spreadsheet.py +++ b/scraper/s2-geocode-spreadsheet.py @@ -2,6 +2,7 @@ import os import csv import click import time +import html from geopy import geocoders from dotenv import load_dotenv from util import * @@ -32,7 +33,11 @@ def s2_geocode_spreadsheet(): cname_lookup[cname] = i print("built lookup") print("processing sheet...") + seen = {} for i, row in enumerate(rows): + if row[1] in seen: + continue + seen[row[1]] = True hit_api = s2_geocode_row(i, row) if hit_api: time.sleep(1) @@ -41,6 +46,12 @@ def s2_geocode_spreadsheet(): def s2_geocode_row(i, row): # 0 cname 1 name 2 address 3 lat 4 lng 5 org_type 6 notes 7 country cname, name, address, lat, lng, org_type, extra_address, country = row + decoded_cname = html.unescape(cname) + # print(decoded_cname) + if cname != decoded_cname: + worksheet.update_cell(i+2, 0+1, decoded_cname) + cname = decoded_cname + if lat and lng: if not country: update_country_from_address(address, i, countries, worksheet) @@ -75,7 +86,7 @@ def s2_geocode_row(i, row): worksheet.update_cell(i+2, 4+1, location.longitude) if address and address != location.address: worksheet.update_cell(i+2, 6+1, address) # store alt address in "notes" field - valid_count += 1 + #valid_count += 1 country = update_country_from_address(location.address, i, countries, worksheet) row[2] = location.address row[3] = location.latitude @@ -84,7 +95,7 @@ def s2_geocode_row(i, row): return True else: print("{} not found: {}".format(i+1, address_to_geocode)) - invalid_count += 1 + #invalid_count += 1 return False def update_country_from_address(address, i, countries, worksheet): |
