diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-07 00:07:54 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-07 00:07:54 +0100 |
| commit | 040c07261183fbf1fab3345f8fbec4d592bd97d1 (patch) | |
| tree | 4eafa2910ec1f7635d192c30655c6dea90788b82 | |
| parent | 7e8161af7bbb6dbfaefeef986299f8fb6d2e0915 (diff) | |
remove department name
| -rw-r--r-- | s2-geocode.py | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/s2-geocode.py b/s2-geocode.py index e8dff470..63b3c463 100644 --- a/s2-geocode.py +++ b/s2-geocode.py @@ -21,7 +21,7 @@ def s2_geocode(fn): invalid_names = [row[0] for row in invalid] random.shuffle(rows) for i, row in enumerate(rows): - name = row[0] + name = remove_department_name(row[0]) if not name: continue if name in invalid_names: @@ -48,5 +48,16 @@ def s2_geocode(fn): write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid) time.sleep(2) +def remove_department_name(name): + name_partz = name.split(', ') + valid_partz = [] + for part in name_partz: + if 'school of' in part.lower(): + continue + if 'department of' in part.lower(): + continue + valid_partz.append(part) + return ', '.join(valid_partz) + if __name__ == '__main__': s2_geocode() |
