import os import gzip import glob import json import math import operator import click from util import * @click.command() def expand_uni_lookup(): addresses = load_unexpanded_addresses() write_csv('reports/all_institutions_sorted.csv', keys=None, rows=sorted(addresses.values(), key=lambda x: x[0])) def load_unexpanded_addresses(): data = read_csv('reports/all_institutions.csv', keys=None) lookup = {} for row in data: name = row[0] if len(name.strip()) > 10: uni_name = name for part in name.split(', '): if 'universit' in part.lower(): uni_name = part new_row = convert_row(row) if uni_name != name: print(uni_name) new_row[0] = uni_name uni_row = new_row.copy() uni_row[1] = uni_name if uni_name not in lookup: lookup[uni_name] = uni_row lookup[name] = new_row return lookup def convert_row(row): return [ row[0], row[0], row[3], row[1], row[2], ] if __name__ == '__main__': expand_uni_lookup()