diff options
Diffstat (limited to 'scraper/s2-geocode.py')
| -rw-r--r-- | scraper/s2-geocode.py | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py index 25eb6f8a..1fcc690d 100644 --- a/scraper/s2-geocode.py +++ b/scraper/s2-geocode.py @@ -18,6 +18,7 @@ def s2_geocode(fn): # geolocator = geocoders.Nominatim(user_agent="cool geocoding service") geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY')) worksheet = fetch_worksheet('institutions') + countries = load_countries() # print(fn) @@ -30,7 +31,9 @@ def s2_geocode(fn): for i, row in enumerate(rows): name = row[2] name = remove_department_name(name) - if not name: + if not name or len(name) < 2: + if cname in countries: + print("cname is a country: {}".format(cname)) continue try: location = geolocator.geocode(name) @@ -40,10 +43,10 @@ def s2_geocode(fn): print("found: {}".format(name)) cname = name for word in name.split(', '): - if "university" in word.lower(): + if "university" in word.lower() and 'california' not in word.lower(): cname = word worksheet.append_row([ - cname, name, location.address, location.latitude, location.longitude, 'edu' + cname, name, location.address, location.latitude, location.longitude, 'edu', '', ]) valid.append([ name, @@ -77,3 +80,4 @@ def remove_department_name(name): if __name__ == '__main__': s2_geocode() + |
