diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-25 22:19:15 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-25 22:19:15 +0100 |
| commit | ee3d0d98e19f1d8177d85af1866fd0ee431fe9ea (patch) | |
| tree | 41372528e78d4328bc2a47bbbabac7e809c58894 /s2-geocode.py | |
| parent | 255b8178af1e25a71fd23703d30c0d1f74911f47 (diff) | |
moving stuff
Diffstat (limited to 's2-geocode.py')
| -rw-r--r-- | s2-geocode.py | 81 |
1 files changed, 0 insertions, 81 deletions
diff --git a/s2-geocode.py b/s2-geocode.py deleted file mode 100644 index eee11c4d..00000000 --- a/s2-geocode.py +++ /dev/null @@ -1,81 +0,0 @@ -import random -import re -import os -import glob -import time -import simplejson as json -from geopy import geocoders -import click -from urllib.parse import urlparse -from dotenv import load_dotenv -import operator -from util import * -load_dotenv() - -@click.command() -@click.option('--fn', '-f', default='reports/doi_institutions_unknown.csv', help='List of institution names, to be geocoded :)') -def s2_geocode(fn): - # geolocator = geocoders.Nominatim(user_agent="cool geocoding service") - geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY')) - worksheet = fetch_worksheet() - - ## DISABLED!! - return - print(fn) - - rows = read_csv(fn, keys=False) - # valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True) - # invalid = read_csv('./reports/doi_institutions_not_found.csv', keys=False, create=True) - # valid_names = [row[0] for row in valid] - # invalid_names = [row[0] for row in invalid] - # random.shuffle(rows) - for i, row in enumerate(rows): - name = row[2] - name = remove_department_name(name) - if not name: - continue - try: - location = geolocator.geocode(name) - except: - location = None - if location: - print("found: {}".format(name)) - cname = name - for word in name.split(', '): - if "university" in word.lower(): - cname = word - worksheet.append_row([ - cname, name, location.address, location.latitude, location.longitude, 'edu' - ]) - # valid.append([ - # name, - # location.latitude, - # location.longitude, - # location.address, - # ]) - # valid_names.append(name) - else: - print("not found: {}".format(name)) - # invalid.append(row) - # invalid_names.append(row[0]) - # if i and (i % 20) == 0: - # print("{}...".format(i)) - # write_csv('./reports/doi_institutions_geocoded.csv', keys=None, rows=valid) - # write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid) - time.sleep(2) - -def remove_department_name(name): - name_partz = name.split(', ') - valid_partz = [] - for part in name_partz: - if 'school of' in part.lower(): - continue - if 'department' in part.lower(): - continue - if 'dept' in part.lower(): - continue - valid_partz.append(part) - return ', '.join(valid_partz) - -if __name__ == '__main__': - s2_geocode() |
