summaryrefslogtreecommitdiff
path: root/scraper/s2-geocode.py
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/s2-geocode.py')
-rw-r--r--scraper/s2-geocode.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py
index e1f012c9..b9c31d64 100644
--- a/scraper/s2-geocode.py
+++ b/scraper/s2-geocode.py
@@ -21,6 +21,18 @@ def s2_geocode(fn):
countries = load_countries()
# print(fn)
+ cname_lookup = {}
+ name_lookup = {}
+ institution_keys, institution_rows = fetch_google_sheet("institutions")
+ for i, row in enumerate(institution_rows):
+ # row_tuples.append((i, row,))
+ cname, name, address, lat, lng, org_type, extra_address, country = row
+ if len(cname) < 3:
+ print("very short cname: {}".format(cname))
+ if cname == name or cname not in cname_lookup:
+ cname_lookup[cname] = i
+ name_lookup[name] = True
+ print("built lookup")
rows = read_csv(fn, keys=False)
valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True)
@@ -34,6 +46,8 @@ def s2_geocode(fn):
if not name or len(name) < 2 or name in countries:
print("weird name: {}".format(name))
continue
+ if name in cname_lookup or name in name_lookup:
+ continue
try:
location = geolocator.geocode(name)
except: