summaryrefslogtreecommitdiff
path: root/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'util.py')
-rw-r--r--util.py35
1 files changed, 35 insertions, 0 deletions
diff --git a/util.py b/util.py
index d4e5c01f..d851d797 100644
--- a/util.py
+++ b/util.py
@@ -1,5 +1,6 @@
import os
import csv
+import string
import codecs
import simplejson as json
@@ -181,3 +182,37 @@ def load_paper(paper_id):
return RawPaper(paper_id)
print('no paper')
return None
+
+def dedupe(a):
+ p = {}
+ for s in a:
+ p[s] = None
+ ss = sorted(p.keys())
+ return ss
+
+class AddressBook (object):
+ def __init__(self):
+ entities = {}
+ lookup = {}
+ data = read_csv('reports/all_institutions_sorted.csv', keys=None)
+ for index, line in enumerate(data):
+ if line[0] == line[1]:
+ entities[line[0]] = index
+ lookup[line[1].lower().strip()] = line[0]
+ self.data = data
+ self.lookup = lookup
+ self.entities = entities
+
+ def find(self, address):
+ address = address.lower().strip().strip(string.digits)
+ if address in self.lookup:
+ entity = self.lookup[address]
+ index = self.entities[entity]
+ return self.data[index]
+ for part in address.split(','):
+ part = part.strip().replace(' ', ' ')
+ if part in self.lookup:
+ entity = self.lookup[part]
+ index = self.entities[entity]
+ return self.data[index]
+ return None