From 312fb050ef76e0d48a89ca2c46a801cede4cb6d7 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 4 Mar 2019 22:25:29 +0100 Subject: geocode spreadsheet tidying --- scraper/s2-geocode.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'scraper/s2-geocode.py') diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py index e1f012c9..b9c31d64 100644 --- a/scraper/s2-geocode.py +++ b/scraper/s2-geocode.py @@ -21,6 +21,18 @@ def s2_geocode(fn): countries = load_countries() # print(fn) + cname_lookup = {} + name_lookup = {} + institution_keys, institution_rows = fetch_google_sheet("institutions") + for i, row in enumerate(institution_rows): + # row_tuples.append((i, row,)) + cname, name, address, lat, lng, org_type, extra_address, country = row + if len(cname) < 3: + print("very short cname: {}".format(cname)) + if cname == name or cname not in cname_lookup: + cname_lookup[cname] = i + name_lookup[name] = True + print("built lookup") rows = read_csv(fn, keys=False) valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True) @@ -34,6 +46,8 @@ def s2_geocode(fn): if not name or len(name) < 2 or name in countries: print("weird name: {}".format(name)) continue + if name in cname_lookup or name in name_lookup: + continue try: location = geolocator.geocode(name) except: -- cgit v1.2.3-70-g09d2