summaryrefslogtreecommitdiff
path: root/s2-geocode.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-07 00:07:54 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-07 00:07:54 +0100
commit040c07261183fbf1fab3345f8fbec4d592bd97d1 (patch)
tree4eafa2910ec1f7635d192c30655c6dea90788b82 /s2-geocode.py
parent7e8161af7bbb6dbfaefeef986299f8fb6d2e0915 (diff)
remove department name
Diffstat (limited to 's2-geocode.py')
-rw-r--r--s2-geocode.py13
1 files changed, 12 insertions, 1 deletions
diff --git a/s2-geocode.py b/s2-geocode.py
index e8dff470..63b3c463 100644
--- a/s2-geocode.py
+++ b/s2-geocode.py
@@ -21,7 +21,7 @@ def s2_geocode(fn):
invalid_names = [row[0] for row in invalid]
random.shuffle(rows)
for i, row in enumerate(rows):
- name = row[0]
+ name = remove_department_name(row[0])
if not name:
continue
if name in invalid_names:
@@ -48,5 +48,16 @@ def s2_geocode(fn):
write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid)
time.sleep(2)
+def remove_department_name(name):
+ name_partz = name.split(', ')
+ valid_partz = []
+ for part in name_partz:
+ if 'school of' in part.lower():
+ continue
+ if 'department of' in part.lower():
+ continue
+ valid_partz.append(part)
+ return ', '.join(valid_partz)
+
if __name__ == '__main__':
s2_geocode()