diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-07 00:04:38 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-07 00:04:38 +0100 |
| commit | 7e8161af7bbb6dbfaefeef986299f8fb6d2e0915 (patch) | |
| tree | 84a8029410c5a4ccc9cbb0d47feda3a1df70ea4e /s2-geocode.py | |
| parent | 77226327bf7cc228a47d7765cf76f52e7dd799ae (diff) | |
ieee domain reports
Diffstat (limited to 's2-geocode.py')
| -rw-r--r-- | s2-geocode.py | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/s2-geocode.py b/s2-geocode.py index 5f1ab88d..e8dff470 100644 --- a/s2-geocode.py +++ b/s2-geocode.py @@ -10,18 +10,20 @@ from geopy.geocoders import Nominatim import random @click.command() -@click.option('--fn', '-f', default='reports/institution_names.csv', help='List of institution names, to be geocoded :)') +@click.option('--fn', '-f', default='reports/doi_institutions.csv', help='List of institution names, to be geocoded :)') def s2_geocode(fn): geolocator = Nominatim(user_agent="cool geocoding service") print(fn) rows = read_csv(fn, keys=False) - valid = read_csv('./reports/institutions_geocoded.csv', keys=False, create=True) - invalid = read_csv('./reports/institutions_not_found.csv', keys=False, create=True) + valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True) + invalid = read_csv('./reports/doi_institutions_not_found.csv', keys=False, create=True) valid_names = [row[0] for row in valid] invalid_names = [row[0] for row in invalid] random.shuffle(rows) for i, row in enumerate(rows): name = row[0] + if not name: + continue if name in invalid_names: continue if name in valid_names: @@ -42,8 +44,8 @@ def s2_geocode(fn): invalid_names.append(name) if i and (i % 20) == 0: print("{}...".format(i)) - write_csv('./reports/institutions_geocoded.csv', keys=None, rows=valid) - write_csv('./reports/institutions_not_found.csv', keys=None, rows=invalid) + write_csv('./reports/doi_institutions_geocoded.csv', keys=None, rows=valid) + write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid) time.sleep(2) if __name__ == '__main__': |
