blob: 4ba531fda9f123643cec58848e5290fadb06d56d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
import os
import gzip
import glob
import json
import math
import operator
import click
from util import *
@click.command()
def expand_uni_lookup():
addresses = load_unexpanded_addresses()
write_csv('reports/all_institutions_sorted.csv', keys=None, rows=sorted(addresses.values(), key=lambda x: x[0]))
def load_unexpanded_addresses():
data = read_csv('reports/all_institutions.csv', keys=None)
lookup = {}
for row in data:
name = row[0]
if len(name.strip()) > 10:
uni_name = name
for part in name.split(', '):
if 'universit' in part.lower():
uni_name = part
new_row = convert_row(row)
if uni_name != name:
print(uni_name)
new_row[0] = uni_name
uni_row = new_row.copy()
uni_row[1] = uni_name
if uni_name not in lookup:
lookup[uni_name] = uni_row
lookup[name] = new_row
return lookup
def convert_row(row):
return [
row[0], row[0], row[3], row[1], row[2],
]
if __name__ == '__main__':
expand_uni_lookup()
|