From c5a81e2159c0b0d8909a72de4fc606de951c5656 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 20 Feb 2019 17:36:47 +0100 Subject: more exceptions --- scraper/s2-geocode-spreadsheet.py | 8 +++++++- scraper/s2-geocode.py | 5 ++--- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'scraper') diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py index c48685f4..aa8c2578 100644 --- a/scraper/s2-geocode-spreadsheet.py +++ b/scraper/s2-geocode-spreadsheet.py @@ -35,6 +35,8 @@ def s2_geocode_spreadsheet(): print("processing sheet...") seen = {} for i, row in enumerate(rows): + if (i % 1000) == 0: + print("{}...".format(i)) if row[1] in seen: continue seen[row[1]] = True @@ -112,10 +114,14 @@ def update_country_from_address(address, i, countries, worksheet): country = "Taiwan" elif "Russia" in address: country = "Russia" + elif "Ukraine" in address: + country = "Ukraine" elif "Japan" in address: country = "Japan" elif "Iran" in address: country = "Iran" + elif "South Korea" in address: + country = "South Korea" elif "Egypt" in address: country = "Egypt" elif "پاکستان" in address: @@ -123,7 +129,7 @@ def update_country_from_address(address, i, countries, worksheet): elif "السعودية" in address: country = "Saudi Arabia" else: - print("unknown country: {}".format(possible_country)) + print("unknown country: {}".format(address)) return "" worksheet.update_cell(i+2, 7+1, country) diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py index 1fcc690d..e1f012c9 100644 --- a/scraper/s2-geocode.py +++ b/scraper/s2-geocode.py @@ -31,9 +31,8 @@ def s2_geocode(fn): for i, row in enumerate(rows): name = row[2] name = remove_department_name(name) - if not name or len(name) < 2: - if cname in countries: - print("cname is a country: {}".format(cname)) + if not name or len(name) < 2 or name in countries: + print("weird name: {}".format(name)) continue try: location = geolocator.geocode(name) -- cgit v1.2.3-70-g09d2