summaryrefslogtreecommitdiff
path: root/util.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-09 02:52:17 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-09 02:52:17 +0100
commitca626447b49c55f40ef58d97ee7ff1784f3481b0 (patch)
treebc442fdeeaec70bad6286a03b5ae96738e716428 /util.py
parent2fd066e9c3cb0e45d7a055d090084f941a40fadb (diff)
arcs on dark maps
Diffstat (limited to 'util.py')
-rw-r--r--util.py35
1 files changed, 35 insertions, 0 deletions
diff --git a/util.py b/util.py
index d4e5c01f..d851d797 100644
--- a/util.py
+++ b/util.py
@@ -1,5 +1,6 @@
import os
import csv
+import string
import codecs
import simplejson as json
@@ -181,3 +182,37 @@ def load_paper(paper_id):
return RawPaper(paper_id)
print('no paper')
return None
+
+def dedupe(a):
+ p = {}
+ for s in a:
+ p[s] = None
+ ss = sorted(p.keys())
+ return ss
+
+class AddressBook (object):
+ def __init__(self):
+ entities = {}
+ lookup = {}
+ data = read_csv('reports/all_institutions_sorted.csv', keys=None)
+ for index, line in enumerate(data):
+ if line[0] == line[1]:
+ entities[line[0]] = index
+ lookup[line[1].lower().strip()] = line[0]
+ self.data = data
+ self.lookup = lookup
+ self.entities = entities
+
+ def find(self, address):
+ address = address.lower().strip().strip(string.digits)
+ if address in self.lookup:
+ entity = self.lookup[address]
+ index = self.entities[entity]
+ return self.data[index]
+ for part in address.split(','):
+ part = part.strip().replace(' ', ' ')
+ if part in self.lookup:
+ entity = self.lookup[part]
+ index = self.entities[entity]
+ return self.data[index]
+ return None