summaryrefslogtreecommitdiff
path: root/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'util.py')
-rw-r--r--util.py84
1 files changed, 65 insertions, 19 deletions
diff --git a/util.py b/util.py
index 2ef986ef..d4e5c01f 100644
--- a/util.py
+++ b/util.py
@@ -42,6 +42,10 @@ def read_csv(fn, keys=True, create=False):
return []
raise
+def csv_writer(fn):
+ with open(fn, 'w', newline='', encoding='utf-8') as f:
+ return csv.writer(f)
+
def write_csv(fn, keys, rows):
with open(fn, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
@@ -50,6 +54,10 @@ def write_csv(fn, keys, rows):
for row in rows:
writer.writerow(row)
+def read_text(fn):
+ with open(fn, 'r') as f:
+ return f.read()
+
def read_json(fn):
with open(fn, 'r') as json_file:
return json.load(json_file)
@@ -59,7 +67,6 @@ def write_json(fn, data):
json.dump(data, outfile)
def write_report(fn, title=None, keys=None, rows=[]):
- count = 0
with open(fn, 'w') as f:
f.write("<!doctype html>")
f.write("<html>")
@@ -71,26 +78,50 @@ def write_report(fn, title=None, keys=None, rows=[]):
f.write("<body>")
if title is not None:
f.write("<h2>{}</h2>".format(title))
- f.write("<table border='1' cellpadding='3' cellspacing='3'>")
- if keys is not None:
- for key in keys:
- f.write("<th>{}</th>".format(key))
- for row in rows:
- if row is None:
- return
- count += 1
- f.write("<tr>")
- for cell in row:
- if isinstance(cell, list) or isinstance(cell, tuple):
- f.write("<td>{}</td>".format('<br/>'.join(str(x) for x in cell)))
- else:
- f.write("<td>{}</td>".format(cell))
- f.write("</tr>")
- f.write("</table>")
+ count = write_table(f, keys=keys, rows=rows)
f.write("</body>")
f.write("</html>")
print("{} {}".format(fn, count))
+class NameLine(object):
+ def __init__(self, s):
+ self.s = s.strip()
+ def __str__(self):
+ return '<span class="name">' + self.s + '</span>'
+
+class BoldLine(object):
+ def __init__(self, s):
+ self.s = s.strip()
+ def __str__(self):
+ return '<b>' + self.s + '</b>'
+
+class LinkLine(object):
+ def __init__(self, href, txt):
+ self.href = href
+ self.txt = txt.strip
+ def __str__(self):
+ return '<a href="{}">{}</a>'.format(self.href, self.txt)
+
+def write_table(f, keys, rows):
+ count = 0
+ f.write("<table border='1' cellpadding='3' cellspacing='3'>")
+ if keys is not None:
+ for key in keys:
+ f.write("<th>{}</th>".format(key))
+ for row in rows:
+ if row is None:
+ return
+ count += 1
+ f.write("<tr>")
+ for cell in row:
+ if isinstance(cell, list) or isinstance(cell, tuple):
+ f.write("<td>{}</td>".format('<br/>'.join(str(x) for x in cell)))
+ else:
+ f.write("<td>{}</td>".format(cell))
+ f.write("</tr>")
+ f.write("</table>")
+ return count
+
def paper_path(key='papers', paper_id=''):
return '{}/{}/{}/{}/paper.json'.format('./datasets/s2', key, paper_id[0:2], paper_id)
@@ -105,26 +136,41 @@ class DbPaper(object):
def journal(self):
return self.data['journalName']
@property
+ def year(self):
+ return self.data['year'] if 'year' in self.data else ''
+ @property
def authors(self):
return [ (author['ids'][0] if len(author['ids']) else '', author['name']) for author in self.data['authors'] ]
+ def record(self):
+ return [ self.paper_id, self.title, self.journal, self.year ]
class RawPaper(object):
def __init__(self, paper_id):
self.paper_id = paper_id
data = read_json(paper_path('raw_papers', paper_id))
+ # print(data)
if 'paper' not in data:
print(data)
+ self.data = None
return None
self.data = data['paper']
@property
def title(self):
return self.data['title']['text']
@property
+ def year(self):
+ return self.data['year']['text']
+ @property
def journal(self):
- return self.data['journal']['name']
+ if 'journal' in self.data and 'name' in self.data['journal']:
+ return self.data['journal']['name']
+ else:
+ return 'Unknown'
@property
def authors(self):
- return [ (author[0]['ids'][0], author[0]['name']) for author in self.data['authors'] ]
+ return [ (author[0]['ids'][0] if len(author[0]['ids']) else '', author[0]['name']) for author in self.data['authors'] ]
+ def record(self):
+ return [ self.paper_id, self.title, self.journal, self.year ]
def load_paper(paper_id):
if os.path.exists(paper_path('db_papers', paper_id)):