diff options
Diffstat (limited to 's2-geocode.py')
| -rw-r--r-- | s2-geocode.py | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/s2-geocode.py b/s2-geocode.py index e8dff470..63b3c463 100644 --- a/s2-geocode.py +++ b/s2-geocode.py @@ -21,7 +21,7 @@ def s2_geocode(fn): invalid_names = [row[0] for row in invalid] random.shuffle(rows) for i, row in enumerate(rows): - name = row[0] + name = remove_department_name(row[0]) if not name: continue if name in invalid_names: @@ -48,5 +48,16 @@ def s2_geocode(fn): write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid) time.sleep(2) +def remove_department_name(name): + name_partz = name.split(', ') + valid_partz = [] + for part in name_partz: + if 'school of' in part.lower(): + continue + if 'department of' in part.lower(): + continue + valid_partz.append(part) + return ', '.join(valid_partz) + if __name__ == '__main__': s2_geocode() |
