summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-03-04 22:25:29 +0100
committerJules Laplace <julescarbon@gmail.com>2019-03-04 22:25:29 +0100
commit312fb050ef76e0d48a89ca2c46a801cede4cb6d7 (patch)
treef55f653b9b188c95b56dbad39b75ac574aa8ff37
parent7f44de5007077cd8cb2c2e0a20144b3321523c74 (diff)
geocode spreadsheet tidying
-rw-r--r--scraper/s2-geocode-spreadsheet.py4
-rw-r--r--scraper/s2-geocode.py14
2 files changed, 18 insertions, 0 deletions
diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py
index b9f148a3..375c8fde 100644
--- a/scraper/s2-geocode-spreadsheet.py
+++ b/scraper/s2-geocode-spreadsheet.py
@@ -111,6 +111,8 @@ def update_country_from_address(address, i, countries, worksheet):
country = countries[possible_country]
elif "China" in address:
country = "China"
+ elif "Hong Kong" in address:
+ country = "China"
elif "Singapore" in address:
country = "Singapore"
elif "Taiwan" in address:
@@ -119,6 +121,8 @@ def update_country_from_address(address, i, countries, worksheet):
country = "Russia"
elif "Ukraine" in address:
country = "Ukraine"
+ elif "Hungary" in address:
+ country = "Hungary"
elif "Japan" in address:
country = "Japan"
elif "Iran" in address:
diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py
index e1f012c9..b9c31d64 100644
--- a/scraper/s2-geocode.py
+++ b/scraper/s2-geocode.py
@@ -21,6 +21,18 @@ def s2_geocode(fn):
countries = load_countries()
# print(fn)
+ cname_lookup = {}
+ name_lookup = {}
+ institution_keys, institution_rows = fetch_google_sheet("institutions")
+ for i, row in enumerate(institution_rows):
+ # row_tuples.append((i, row,))
+ cname, name, address, lat, lng, org_type, extra_address, country = row
+ if len(cname) < 3:
+ print("very short cname: {}".format(cname))
+ if cname == name or cname not in cname_lookup:
+ cname_lookup[cname] = i
+ name_lookup[name] = True
+ print("built lookup")
rows = read_csv(fn, keys=False)
valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True)
@@ -34,6 +46,8 @@ def s2_geocode(fn):
if not name or len(name) < 2 or name in countries:
print("weird name: {}".format(name))
continue
+ if name in cname_lookup or name in name_lookup:
+ continue
try:
location = geolocator.geocode(name)
except: