summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-20 17:36:47 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-20 17:36:47 +0100
commitc5a81e2159c0b0d8909a72de4fc606de951c5656 (patch)
treeda624dfb89a3b7edd102db142be089312e23dc76 /scraper
parentb744e9a307dbcaefc1b95957124aed0e96f29e14 (diff)
more exceptions
Diffstat (limited to 'scraper')
-rw-r--r--scraper/s2-geocode-spreadsheet.py8
-rw-r--r--scraper/s2-geocode.py5
2 files changed, 9 insertions, 4 deletions
diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py
index c48685f4..aa8c2578 100644
--- a/scraper/s2-geocode-spreadsheet.py
+++ b/scraper/s2-geocode-spreadsheet.py
@@ -35,6 +35,8 @@ def s2_geocode_spreadsheet():
print("processing sheet...")
seen = {}
for i, row in enumerate(rows):
+ if (i % 1000) == 0:
+ print("{}...".format(i))
if row[1] in seen:
continue
seen[row[1]] = True
@@ -112,10 +114,14 @@ def update_country_from_address(address, i, countries, worksheet):
country = "Taiwan"
elif "Russia" in address:
country = "Russia"
+ elif "Ukraine" in address:
+ country = "Ukraine"
elif "Japan" in address:
country = "Japan"
elif "Iran" in address:
country = "Iran"
+ elif "South Korea" in address:
+ country = "South Korea"
elif "Egypt" in address:
country = "Egypt"
elif "پاکستان" in address:
@@ -123,7 +129,7 @@ def update_country_from_address(address, i, countries, worksheet):
elif "السعودية" in address:
country = "Saudi Arabia"
else:
- print("unknown country: {}".format(possible_country))
+ print("unknown country: {}".format(address))
return ""
worksheet.update_cell(i+2, 7+1, country)
diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py
index 1fcc690d..e1f012c9 100644
--- a/scraper/s2-geocode.py
+++ b/scraper/s2-geocode.py
@@ -31,9 +31,8 @@ def s2_geocode(fn):
for i, row in enumerate(rows):
name = row[2]
name = remove_department_name(name)
- if not name or len(name) < 2:
- if cname in countries:
- print("cname is a country: {}".format(cname))
+ if not name or len(name) < 2 or name in countries:
+ print("weird name: {}".format(name))
continue
try:
location = geolocator.geocode(name)