From ca626447b49c55f40ef58d97ee7ff1784f3481b0 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Fri, 9 Nov 2018 02:52:17 +0100 Subject: arcs on dark maps --- util.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'util.py') diff --git a/util.py b/util.py index d4e5c01f..d851d797 100644 --- a/util.py +++ b/util.py @@ -1,5 +1,6 @@ import os import csv +import string import codecs import simplejson as json @@ -181,3 +182,37 @@ def load_paper(paper_id): return RawPaper(paper_id) print('no paper') return None + +def dedupe(a): + p = {} + for s in a: + p[s] = None + ss = sorted(p.keys()) + return ss + +class AddressBook (object): + def __init__(self): + entities = {} + lookup = {} + data = read_csv('reports/all_institutions_sorted.csv', keys=None) + for index, line in enumerate(data): + if line[0] == line[1]: + entities[line[0]] = index + lookup[line[1].lower().strip()] = line[0] + self.data = data + self.lookup = lookup + self.entities = entities + + def find(self, address): + address = address.lower().strip().strip(string.digits) + if address in self.lookup: + entity = self.lookup[address] + index = self.entities[entity] + return self.data[index] + for part in address.split(','): + part = part.strip().replace(' ', ' ') + if part in self.lookup: + entity = self.lookup[part] + index = self.entities[entity] + return self.data[index] + return None -- cgit v1.2.3-70-g09d2