diff options
Diffstat (limited to 'util.py')
| -rw-r--r-- | util.py | 84 |
1 files changed, 65 insertions, 19 deletions
@@ -42,6 +42,10 @@ def read_csv(fn, keys=True, create=False): return [] raise +def csv_writer(fn): + with open(fn, 'w', newline='', encoding='utf-8') as f: + return csv.writer(f) + def write_csv(fn, keys, rows): with open(fn, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) @@ -50,6 +54,10 @@ def write_csv(fn, keys, rows): for row in rows: writer.writerow(row) +def read_text(fn): + with open(fn, 'r') as f: + return f.read() + def read_json(fn): with open(fn, 'r') as json_file: return json.load(json_file) @@ -59,7 +67,6 @@ def write_json(fn, data): json.dump(data, outfile) def write_report(fn, title=None, keys=None, rows=[]): - count = 0 with open(fn, 'w') as f: f.write("<!doctype html>") f.write("<html>") @@ -71,26 +78,50 @@ def write_report(fn, title=None, keys=None, rows=[]): f.write("<body>") if title is not None: f.write("<h2>{}</h2>".format(title)) - f.write("<table border='1' cellpadding='3' cellspacing='3'>") - if keys is not None: - for key in keys: - f.write("<th>{}</th>".format(key)) - for row in rows: - if row is None: - return - count += 1 - f.write("<tr>") - for cell in row: - if isinstance(cell, list) or isinstance(cell, tuple): - f.write("<td>{}</td>".format('<br/>'.join(str(x) for x in cell))) - else: - f.write("<td>{}</td>".format(cell)) - f.write("</tr>") - f.write("</table>") + count = write_table(f, keys=keys, rows=rows) f.write("</body>") f.write("</html>") print("{} {}".format(fn, count)) +class NameLine(object): + def __init__(self, s): + self.s = s.strip() + def __str__(self): + return '<span class="name">' + self.s + '</span>' + +class BoldLine(object): + def __init__(self, s): + self.s = s.strip() + def __str__(self): + return '<b>' + self.s + '</b>' + +class LinkLine(object): + def __init__(self, href, txt): + self.href = href + self.txt = txt.strip + def __str__(self): + return '<a href="{}">{}</a>'.format(self.href, self.txt) + +def write_table(f, keys, rows): + count = 0 + f.write("<table border='1' cellpadding='3' cellspacing='3'>") + if keys is not None: + for key in keys: + f.write("<th>{}</th>".format(key)) + for row in rows: + if row is None: + return + count += 1 + f.write("<tr>") + for cell in row: + if isinstance(cell, list) or isinstance(cell, tuple): + f.write("<td>{}</td>".format('<br/>'.join(str(x) for x in cell))) + else: + f.write("<td>{}</td>".format(cell)) + f.write("</tr>") + f.write("</table>") + return count + def paper_path(key='papers', paper_id=''): return '{}/{}/{}/{}/paper.json'.format('./datasets/s2', key, paper_id[0:2], paper_id) @@ -105,26 +136,41 @@ class DbPaper(object): def journal(self): return self.data['journalName'] @property + def year(self): + return self.data['year'] if 'year' in self.data else '' + @property def authors(self): return [ (author['ids'][0] if len(author['ids']) else '', author['name']) for author in self.data['authors'] ] + def record(self): + return [ self.paper_id, self.title, self.journal, self.year ] class RawPaper(object): def __init__(self, paper_id): self.paper_id = paper_id data = read_json(paper_path('raw_papers', paper_id)) + # print(data) if 'paper' not in data: print(data) + self.data = None return None self.data = data['paper'] @property def title(self): return self.data['title']['text'] @property + def year(self): + return self.data['year']['text'] + @property def journal(self): - return self.data['journal']['name'] + if 'journal' in self.data and 'name' in self.data['journal']: + return self.data['journal']['name'] + else: + return 'Unknown' @property def authors(self): - return [ (author[0]['ids'][0], author[0]['name']) for author in self.data['authors'] ] + return [ (author[0]['ids'][0] if len(author[0]['ids']) else '', author[0]['name']) for author in self.data['authors'] ] + def record(self): + return [ self.paper_id, self.title, self.journal, self.year ] def load_paper(paper_id): if os.path.exists(paper_path('db_papers', paper_id)): |
