we geocoding

author: Jules Laplace <julescarbon@gmail.com> 2018-11-06 15:05:40 +0100
committer: Jules Laplace <julescarbon@gmail.com> 2018-11-06 15:05:40 +0100
commit: aacdf0fa056b51000ff88479da479ded3f36b59c (patch)
tree: de9e221ffd9bf8c67ef54607d6267f00b5233312 /util.py
parent: 002e72bb172c34bb71756f9e6c23294913f1ef85 (diff)
1 files changed, 21 insertions, 12 deletions
diff --git a/util.py b/util.py
index 400c7ee3..b38cfec1 100644
--- a/util.py
+++ b/util.py
@@ -14,15 +14,20 @@ def read_citation_list(index=0):
     lines = lines[1:]
     return keys, lines
 
-def read_csv(fn, keys=True):
-  with open(fn, 'r') as f:
-    reader = csv.reader(f)
-    lines = list(reader)
-    if keys:
-      keys = lines[0]
-      lines = lines[1:]
-      return keys, lines
-    return lines
+def read_csv(fn, keys=True, create=False):
+  try:
+    with open(fn, 'r') as f:
+      reader = csv.reader( (line.replace('\0','') for line in f) )
+      lines = list(reader)
+      if keys:
+        keys = lines[0]
+        lines = lines[1:]
+        return keys, lines
+      return lines
+  except:
+    if create:
+      return []
+    raise
 
 def read_json(fn):
   with open(fn, 'r') as json_file:
@@ -41,6 +46,7 @@ def write_csv(fn, keys, rows):
       writer.writerow(row)
 
 def write_report(fn, title=None, keys=None, rows=[]):
+  count = 0
   with open(fn, 'w') as f:
     f.write("<!doctype html>")
     f.write("<html>")
@@ -57,6 +63,9 @@ def write_report(fn, title=None, keys=None, rows=[]):
       for key in keys:
         f.write("<th>{}</th>".format(key))
     for row in rows:
+      if row is None:
+         return
+      count += 1
       f.write("<tr>")
       for cell in row:
         if isinstance(cell, list) or isinstance(cell, tuple):
@@ -67,6 +76,7 @@ def write_report(fn, title=None, keys=None, rows=[]):
     f.write("</table>")
     f.write("</body>")
     f.write("</html>")
+    print("{} {}".format(fn, count))
 
 def paper_path(key='papers', paper_id=''):
   return '{}/{}/{}/{}/paper.json'.format('./datasets/s2', key, paper_id[0:2], paper_id)
@@ -100,12 +110,11 @@ class RawPaper(object):
     return [ (author[0]['ids'][0], author[0]['name']) for author in self.data['authors'] ]
 
 def load_paper(paper_id):
-  print('_______________')
   if os.path.exists(paper_path('db_papers', paper_id)):
-    print('db paper')
+    # print('db paper')
     return DbPaper(paper_id)
   if os.path.exists(paper_path('raw_papers', paper_id)):
-    print('raw paper')
+    # print('raw paper')
     return RawPaper(paper_id)
   print('no paper')
   return None
author	Jules Laplace <julescarbon@gmail.com>	2018-11-06 15:05:40 +0100
committer	Jules Laplace <julescarbon@gmail.com>	2018-11-06 15:05:40 +0100
commit	aacdf0fa056b51000ff88479da479ded3f36b59c (patch)
tree	de9e221ffd9bf8c67ef54607d6267f00b5233312 /util.py
parent	002e72bb172c34bb71756f9e6c23294913f1ef85 (diff)