summaryrefslogtreecommitdiff
path: root/util.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-06 15:05:40 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-06 15:05:40 +0100
commitaacdf0fa056b51000ff88479da479ded3f36b59c (patch)
treede9e221ffd9bf8c67ef54607d6267f00b5233312 /util.py
parent002e72bb172c34bb71756f9e6c23294913f1ef85 (diff)
we geocoding
Diffstat (limited to 'util.py')
-rw-r--r--util.py33
1 files changed, 21 insertions, 12 deletions
diff --git a/util.py b/util.py
index 400c7ee3..b38cfec1 100644
--- a/util.py
+++ b/util.py
@@ -14,15 +14,20 @@ def read_citation_list(index=0):
lines = lines[1:]
return keys, lines
-def read_csv(fn, keys=True):
- with open(fn, 'r') as f:
- reader = csv.reader(f)
- lines = list(reader)
- if keys:
- keys = lines[0]
- lines = lines[1:]
- return keys, lines
- return lines
+def read_csv(fn, keys=True, create=False):
+ try:
+ with open(fn, 'r') as f:
+ reader = csv.reader( (line.replace('\0','') for line in f) )
+ lines = list(reader)
+ if keys:
+ keys = lines[0]
+ lines = lines[1:]
+ return keys, lines
+ return lines
+ except:
+ if create:
+ return []
+ raise
def read_json(fn):
with open(fn, 'r') as json_file:
@@ -41,6 +46,7 @@ def write_csv(fn, keys, rows):
writer.writerow(row)
def write_report(fn, title=None, keys=None, rows=[]):
+ count = 0
with open(fn, 'w') as f:
f.write("<!doctype html>")
f.write("<html>")
@@ -57,6 +63,9 @@ def write_report(fn, title=None, keys=None, rows=[]):
for key in keys:
f.write("<th>{}</th>".format(key))
for row in rows:
+ if row is None:
+ return
+ count += 1
f.write("<tr>")
for cell in row:
if isinstance(cell, list) or isinstance(cell, tuple):
@@ -67,6 +76,7 @@ def write_report(fn, title=None, keys=None, rows=[]):
f.write("</table>")
f.write("</body>")
f.write("</html>")
+ print("{} {}".format(fn, count))
def paper_path(key='papers', paper_id=''):
return '{}/{}/{}/{}/paper.json'.format('./datasets/s2', key, paper_id[0:2], paper_id)
@@ -100,12 +110,11 @@ class RawPaper(object):
return [ (author[0]['ids'][0], author[0]['name']) for author in self.data['authors'] ]
def load_paper(paper_id):
- print('_______________')
if os.path.exists(paper_path('db_papers', paper_id)):
- print('db paper')
+ # print('db paper')
return DbPaper(paper_id)
if os.path.exists(paper_path('raw_papers', paper_id)):
- print('raw paper')
+ # print('raw paper')
return RawPaper(paper_id)
print('no paper')
return None