summaryrefslogtreecommitdiff
path: root/s2-geocode.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-07 00:04:38 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-07 00:04:38 +0100
commit7e8161af7bbb6dbfaefeef986299f8fb6d2e0915 (patch)
tree84a8029410c5a4ccc9cbb0d47feda3a1df70ea4e /s2-geocode.py
parent77226327bf7cc228a47d7765cf76f52e7dd799ae (diff)
ieee domain reports
Diffstat (limited to 's2-geocode.py')
-rw-r--r--s2-geocode.py12
1 files changed, 7 insertions, 5 deletions
diff --git a/s2-geocode.py b/s2-geocode.py
index 5f1ab88d..e8dff470 100644
--- a/s2-geocode.py
+++ b/s2-geocode.py
@@ -10,18 +10,20 @@ from geopy.geocoders import Nominatim
import random
@click.command()
-@click.option('--fn', '-f', default='reports/institution_names.csv', help='List of institution names, to be geocoded :)')
+@click.option('--fn', '-f', default='reports/doi_institutions.csv', help='List of institution names, to be geocoded :)')
def s2_geocode(fn):
geolocator = Nominatim(user_agent="cool geocoding service")
print(fn)
rows = read_csv(fn, keys=False)
- valid = read_csv('./reports/institutions_geocoded.csv', keys=False, create=True)
- invalid = read_csv('./reports/institutions_not_found.csv', keys=False, create=True)
+ valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True)
+ invalid = read_csv('./reports/doi_institutions_not_found.csv', keys=False, create=True)
valid_names = [row[0] for row in valid]
invalid_names = [row[0] for row in invalid]
random.shuffle(rows)
for i, row in enumerate(rows):
name = row[0]
+ if not name:
+ continue
if name in invalid_names:
continue
if name in valid_names:
@@ -42,8 +44,8 @@ def s2_geocode(fn):
invalid_names.append(name)
if i and (i % 20) == 0:
print("{}...".format(i))
- write_csv('./reports/institutions_geocoded.csv', keys=None, rows=valid)
- write_csv('./reports/institutions_not_found.csv', keys=None, rows=invalid)
+ write_csv('./reports/doi_institutions_geocoded.csv', keys=None, rows=valid)
+ write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid)
time.sleep(2)
if __name__ == '__main__':