diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-20 17:36:47 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-20 17:36:47 +0100 |
| commit | c5a81e2159c0b0d8909a72de4fc606de951c5656 (patch) | |
| tree | da624dfb89a3b7edd102db142be089312e23dc76 /scraper | |
| parent | b744e9a307dbcaefc1b95957124aed0e96f29e14 (diff) | |
more exceptions
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/s2-geocode-spreadsheet.py | 8 | ||||
| -rw-r--r-- | scraper/s2-geocode.py | 5 |
2 files changed, 9 insertions, 4 deletions
diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py index c48685f4..aa8c2578 100644 --- a/scraper/s2-geocode-spreadsheet.py +++ b/scraper/s2-geocode-spreadsheet.py @@ -35,6 +35,8 @@ def s2_geocode_spreadsheet(): print("processing sheet...") seen = {} for i, row in enumerate(rows): + if (i % 1000) == 0: + print("{}...".format(i)) if row[1] in seen: continue seen[row[1]] = True @@ -112,10 +114,14 @@ def update_country_from_address(address, i, countries, worksheet): country = "Taiwan" elif "Russia" in address: country = "Russia" + elif "Ukraine" in address: + country = "Ukraine" elif "Japan" in address: country = "Japan" elif "Iran" in address: country = "Iran" + elif "South Korea" in address: + country = "South Korea" elif "Egypt" in address: country = "Egypt" elif "پاکستان" in address: @@ -123,7 +129,7 @@ def update_country_from_address(address, i, countries, worksheet): elif "السعودية" in address: country = "Saudi Arabia" else: - print("unknown country: {}".format(possible_country)) + print("unknown country: {}".format(address)) return "" worksheet.update_cell(i+2, 7+1, country) diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py index 1fcc690d..e1f012c9 100644 --- a/scraper/s2-geocode.py +++ b/scraper/s2-geocode.py @@ -31,9 +31,8 @@ def s2_geocode(fn): for i, row in enumerate(rows): name = row[2] name = remove_department_name(name) - if not name or len(name) < 2: - if cname in countries: - print("cname is a country: {}".format(cname)) + if not name or len(name) < 2 or name in countries: + print("weird name: {}".format(name)) continue try: location = geolocator.geocode(name) |
