summaryrefslogtreecommitdiff
path: root/s2-geocode.py
diff options
context:
space:
mode:
Diffstat (limited to 's2-geocode.py')
-rw-r--r--s2-geocode.py12
1 files changed, 7 insertions, 5 deletions
diff --git a/s2-geocode.py b/s2-geocode.py
index 5f1ab88d..e8dff470 100644
--- a/s2-geocode.py
+++ b/s2-geocode.py
@@ -10,18 +10,20 @@ from geopy.geocoders import Nominatim
import random
@click.command()
-@click.option('--fn', '-f', default='reports/institution_names.csv', help='List of institution names, to be geocoded :)')
+@click.option('--fn', '-f', default='reports/doi_institutions.csv', help='List of institution names, to be geocoded :)')
def s2_geocode(fn):
geolocator = Nominatim(user_agent="cool geocoding service")
print(fn)
rows = read_csv(fn, keys=False)
- valid = read_csv('./reports/institutions_geocoded.csv', keys=False, create=True)
- invalid = read_csv('./reports/institutions_not_found.csv', keys=False, create=True)
+ valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True)
+ invalid = read_csv('./reports/doi_institutions_not_found.csv', keys=False, create=True)
valid_names = [row[0] for row in valid]
invalid_names = [row[0] for row in invalid]
random.shuffle(rows)
for i, row in enumerate(rows):
name = row[0]
+ if not name:
+ continue
if name in invalid_names:
continue
if name in valid_names:
@@ -42,8 +44,8 @@ def s2_geocode(fn):
invalid_names.append(name)
if i and (i % 20) == 0:
print("{}...".format(i))
- write_csv('./reports/institutions_geocoded.csv', keys=None, rows=valid)
- write_csv('./reports/institutions_not_found.csv', keys=None, rows=invalid)
+ write_csv('./reports/doi_institutions_geocoded.csv', keys=None, rows=valid)
+ write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid)
time.sleep(2)
if __name__ == '__main__':