From ee3d0d98e19f1d8177d85af1866fd0ee431fe9ea Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Sun, 25 Nov 2018 22:19:15 +0100 Subject: moving stuff --- expand-uni-lookup.py | 42 ------------------------------------------ 1 file changed, 42 deletions(-) delete mode 100644 expand-uni-lookup.py (limited to 'expand-uni-lookup.py') diff --git a/expand-uni-lookup.py b/expand-uni-lookup.py deleted file mode 100644 index 4ba531fd..00000000 --- a/expand-uni-lookup.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -import gzip -import glob -import json -import math -import operator -import click -from util import * - -@click.command() -def expand_uni_lookup(): - addresses = load_unexpanded_addresses() - write_csv('reports/all_institutions_sorted.csv', keys=None, rows=sorted(addresses.values(), key=lambda x: x[0])) - -def load_unexpanded_addresses(): - data = read_csv('reports/all_institutions.csv', keys=None) - lookup = {} - for row in data: - name = row[0] - if len(name.strip()) > 10: - uni_name = name - for part in name.split(', '): - if 'universit' in part.lower(): - uni_name = part - new_row = convert_row(row) - if uni_name != name: - print(uni_name) - new_row[0] = uni_name - uni_row = new_row.copy() - uni_row[1] = uni_name - if uni_name not in lookup: - lookup[uni_name] = uni_row - lookup[name] = new_row - return lookup - -def convert_row(row): - return [ - row[0], row[0], row[3], row[1], row[2], - ] - -if __name__ == '__main__': - expand_uni_lookup() -- cgit v1.2.3-70-g09d2