From 040c07261183fbf1fab3345f8fbec4d592bd97d1 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 7 Nov 2018 00:07:54 +0100 Subject: remove department name --- s2-geocode.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 's2-geocode.py') diff --git a/s2-geocode.py b/s2-geocode.py index e8dff470..63b3c463 100644 --- a/s2-geocode.py +++ b/s2-geocode.py @@ -21,7 +21,7 @@ def s2_geocode(fn): invalid_names = [row[0] for row in invalid] random.shuffle(rows) for i, row in enumerate(rows): - name = row[0] + name = remove_department_name(row[0]) if not name: continue if name in invalid_names: @@ -48,5 +48,16 @@ def s2_geocode(fn): write_csv('./reports/doi_institutions_not_found.csv', keys=None, rows=invalid) time.sleep(2) +def remove_department_name(name): + name_partz = name.split(', ') + valid_partz = [] + for part in name_partz: + if 'school of' in part.lower(): + continue + if 'department of' in part.lower(): + continue + valid_partz.append(part) + return ', '.join(valid_partz) + if __name__ == '__main__': s2_geocode() -- cgit v1.2.3-70-g09d2