diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-16 15:27:04 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-16 15:27:04 +0100 |
| commit | 7de24b96b74b00599a674a5c5d7c6b6381b69955 (patch) | |
| tree | 53b67be89dfdc125777e12f29657c9cf280bc24f /scraper | |
| parent | 49ff00e162370e4dd1f4688b0a3537b4674f36a4 (diff) | |
update geocoder
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/s2-geocode.py | 42 |
1 files changed, 20 insertions, 22 deletions
diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py index eee11c4d..25eb6f8a 100644 --- a/scraper/s2-geocode.py +++ b/scraper/s2-geocode.py @@ -17,18 +17,16 @@ load_dotenv() def s2_geocode(fn): # geolocator = geocoders.Nominatim(user_agent="cool geocoding service") geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY')) - worksheet = fetch_worksheet() + worksheet = fetch_worksheet('institutions') - ## DISABLED!! - return - print(fn) + # print(fn) rows = read_csv(fn, keys=False) - # valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True) - # invalid = read_csv('./reports/doi_institutions_not_found.csv', keys=False, create=True) - # valid_names = [row[0] for row in valid] - # invalid_names = [row[0] for row in invalid] - # random.shuffle(rows) + valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True) + invalid = read_csv('./reports/doi_institutions_not_found.csv', keys=False, create=True) + valid_names = [row[0] for row in valid] + invalid_names = [row[0] for row in invalid] + random.shuffle(rows) for i, row in enumerate(rows): name = row[2] name = remove_department_name(name) @@ -47,21 +45,21 @@ def s2_geocode(fn): worksheet.append_row([ cname, name, location.address, location.latitude, location.longitude, 'edu' ]) - # valid.append([ - # name, - # location.latitude, - # location.longitude, - # location.address, - # ]) - # valid_names.append(name) + valid.append([ + name, + location.latitude, + location.longitude, + location.address, + ]) + valid_names.append(name) else: print("not found: {}".format(name)) - # invalid.append(row) - # invalid_names.append(row[0]) - # if i and (i % 20) == 0: - # print("{}...".format(i)) - # write_csv('./reports/doi_institutions_geocoded.csv', keys=None, rows=valid) - # write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid) + invalid.append(row) + invalid_names.append(row[0]) + if i and (i % 20) == 0: + print("{}...".format(i)) + write_csv('./reports/doi_institutions_geocoded.csv', keys=None, rows=valid) + write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid) time.sleep(2) def remove_department_name(name): |
