diff options
Diffstat (limited to 'expand-uni-lookup.py')
| -rw-r--r-- | expand-uni-lookup.py | 42 |
1 files changed, 0 insertions, 42 deletions
diff --git a/expand-uni-lookup.py b/expand-uni-lookup.py deleted file mode 100644 index 4ba531fd..00000000 --- a/expand-uni-lookup.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -import gzip -import glob -import json -import math -import operator -import click -from util import * - -@click.command() -def expand_uni_lookup(): - addresses = load_unexpanded_addresses() - write_csv('reports/all_institutions_sorted.csv', keys=None, rows=sorted(addresses.values(), key=lambda x: x[0])) - -def load_unexpanded_addresses(): - data = read_csv('reports/all_institutions.csv', keys=None) - lookup = {} - for row in data: - name = row[0] - if len(name.strip()) > 10: - uni_name = name - for part in name.split(', '): - if 'universit' in part.lower(): - uni_name = part - new_row = convert_row(row) - if uni_name != name: - print(uni_name) - new_row[0] = uni_name - uni_row = new_row.copy() - uni_row[1] = uni_name - if uni_name not in lookup: - lookup[uni_name] = uni_row - lookup[name] = new_row - return lookup - -def convert_row(row): - return [ - row[0], row[0], row[3], row[1], row[2], - ] - -if __name__ == '__main__': - expand_uni_lookup() |
