diff options
| author | jules@lens <julescarbon@gmail.com> | 2019-02-20 16:21:53 +0100 |
|---|---|---|
| committer | jules@lens <julescarbon@gmail.com> | 2019-02-20 16:21:53 +0100 |
| commit | e0038fbc4b891fe4393acfad8d9755fa1834278e (patch) | |
| tree | c35aa2d2b0a76c10e57904ed1f41f5a7dcdd4870 /scraper/s2-geocode.py | |
| parent | 225b7936cd1b80effa4bf77b1ffc3c92a8f17526 (diff) | |
| parent | 9b97ddf7e1bc1febc4066cd5e083cee688d77027 (diff) | |
mergez
Diffstat (limited to 'scraper/s2-geocode.py')
| -rw-r--r-- | scraper/s2-geocode.py | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py index 25eb6f8a..1fcc690d 100644 --- a/scraper/s2-geocode.py +++ b/scraper/s2-geocode.py @@ -18,6 +18,7 @@ def s2_geocode(fn): # geolocator = geocoders.Nominatim(user_agent="cool geocoding service") geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY')) worksheet = fetch_worksheet('institutions') + countries = load_countries() # print(fn) @@ -30,7 +31,9 @@ def s2_geocode(fn): for i, row in enumerate(rows): name = row[2] name = remove_department_name(name) - if not name: + if not name or len(name) < 2: + if cname in countries: + print("cname is a country: {}".format(cname)) continue try: location = geolocator.geocode(name) @@ -40,10 +43,10 @@ def s2_geocode(fn): print("found: {}".format(name)) cname = name for word in name.split(', '): - if "university" in word.lower(): + if "university" in word.lower() and 'california' not in word.lower(): cname = word worksheet.append_row([ - cname, name, location.address, location.latitude, location.longitude, 'edu' + cname, name, location.address, location.latitude, location.longitude, 'edu', '', ]) valid.append([ name, @@ -77,3 +80,4 @@ def remove_department_name(name): if __name__ == '__main__': s2_geocode() + |
