summaryrefslogtreecommitdiff
path: root/s2-geocode.py
diff options
context:
space:
mode:
Diffstat (limited to 's2-geocode.py')
-rw-r--r--s2-geocode.py81
1 files changed, 0 insertions, 81 deletions
diff --git a/s2-geocode.py b/s2-geocode.py
deleted file mode 100644
index eee11c4d..00000000
--- a/s2-geocode.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import random
-import re
-import os
-import glob
-import time
-import simplejson as json
-from geopy import geocoders
-import click
-from urllib.parse import urlparse
-from dotenv import load_dotenv
-import operator
-from util import *
-load_dotenv()
-
-@click.command()
-@click.option('--fn', '-f', default='reports/doi_institutions_unknown.csv', help='List of institution names, to be geocoded :)')
-def s2_geocode(fn):
- # geolocator = geocoders.Nominatim(user_agent="cool geocoding service")
- geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY'))
- worksheet = fetch_worksheet()
-
- ## DISABLED!!
- return
- print(fn)
-
- rows = read_csv(fn, keys=False)
- # valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True)
- # invalid = read_csv('./reports/doi_institutions_not_found.csv', keys=False, create=True)
- # valid_names = [row[0] for row in valid]
- # invalid_names = [row[0] for row in invalid]
- # random.shuffle(rows)
- for i, row in enumerate(rows):
- name = row[2]
- name = remove_department_name(name)
- if not name:
- continue
- try:
- location = geolocator.geocode(name)
- except:
- location = None
- if location:
- print("found: {}".format(name))
- cname = name
- for word in name.split(', '):
- if "university" in word.lower():
- cname = word
- worksheet.append_row([
- cname, name, location.address, location.latitude, location.longitude, 'edu'
- ])
- # valid.append([
- # name,
- # location.latitude,
- # location.longitude,
- # location.address,
- # ])
- # valid_names.append(name)
- else:
- print("not found: {}".format(name))
- # invalid.append(row)
- # invalid_names.append(row[0])
- # if i and (i % 20) == 0:
- # print("{}...".format(i))
- # write_csv('./reports/doi_institutions_geocoded.csv', keys=None, rows=valid)
- # write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid)
- time.sleep(2)
-
-def remove_department_name(name):
- name_partz = name.split(', ')
- valid_partz = []
- for part in name_partz:
- if 'school of' in part.lower():
- continue
- if 'department' in part.lower():
- continue
- if 'dept' in part.lower():
- continue
- valid_partz.append(part)
- return ', '.join(valid_partz)
-
-if __name__ == '__main__':
- s2_geocode()