summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-16 15:27:04 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-16 15:27:04 +0100
commit7de24b96b74b00599a674a5c5d7c6b6381b69955 (patch)
tree53b67be89dfdc125777e12f29657c9cf280bc24f /scraper
parent49ff00e162370e4dd1f4688b0a3537b4674f36a4 (diff)
update geocoder
Diffstat (limited to 'scraper')
-rw-r--r--scraper/s2-geocode.py42
1 files changed, 20 insertions, 22 deletions
diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py
index eee11c4d..25eb6f8a 100644
--- a/scraper/s2-geocode.py
+++ b/scraper/s2-geocode.py
@@ -17,18 +17,16 @@ load_dotenv()
def s2_geocode(fn):
# geolocator = geocoders.Nominatim(user_agent="cool geocoding service")
geolocator = geocoders.GoogleV3(os.getenv('MAPS_API_KEY'))
- worksheet = fetch_worksheet()
+ worksheet = fetch_worksheet('institutions')
- ## DISABLED!!
- return
- print(fn)
+ # print(fn)
rows = read_csv(fn, keys=False)
- # valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True)
- # invalid = read_csv('./reports/doi_institutions_not_found.csv', keys=False, create=True)
- # valid_names = [row[0] for row in valid]
- # invalid_names = [row[0] for row in invalid]
- # random.shuffle(rows)
+ valid = read_csv('./reports/doi_institutions_geocoded.csv', keys=False, create=True)
+ invalid = read_csv('./reports/doi_institutions_not_found.csv', keys=False, create=True)
+ valid_names = [row[0] for row in valid]
+ invalid_names = [row[0] for row in invalid]
+ random.shuffle(rows)
for i, row in enumerate(rows):
name = row[2]
name = remove_department_name(name)
@@ -47,21 +45,21 @@ def s2_geocode(fn):
worksheet.append_row([
cname, name, location.address, location.latitude, location.longitude, 'edu'
])
- # valid.append([
- # name,
- # location.latitude,
- # location.longitude,
- # location.address,
- # ])
- # valid_names.append(name)
+ valid.append([
+ name,
+ location.latitude,
+ location.longitude,
+ location.address,
+ ])
+ valid_names.append(name)
else:
print("not found: {}".format(name))
- # invalid.append(row)
- # invalid_names.append(row[0])
- # if i and (i % 20) == 0:
- # print("{}...".format(i))
- # write_csv('./reports/doi_institutions_geocoded.csv', keys=None, rows=valid)
- # write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid)
+ invalid.append(row)
+ invalid_names.append(row[0])
+ if i and (i % 20) == 0:
+ print("{}...".format(i))
+ write_csv('./reports/doi_institutions_geocoded.csv', keys=None, rows=valid)
+ write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid)
time.sleep(2)
def remove_department_name(name):