From 2fd066e9c3cb0e45d7a055d090084f941a40fadb Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Thu, 8 Nov 2018 19:25:04 +0100 Subject: taking another look at the papers --- util.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 19 deletions(-) (limited to 'util.py') diff --git a/util.py b/util.py index 2ef986ef..d4e5c01f 100644 --- a/util.py +++ b/util.py @@ -42,6 +42,10 @@ def read_csv(fn, keys=True, create=False): return [] raise +def csv_writer(fn): + with open(fn, 'w', newline='', encoding='utf-8') as f: + return csv.writer(f) + def write_csv(fn, keys, rows): with open(fn, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) @@ -50,6 +54,10 @@ def write_csv(fn, keys, rows): for row in rows: writer.writerow(row) +def read_text(fn): + with open(fn, 'r') as f: + return f.read() + def read_json(fn): with open(fn, 'r') as json_file: return json.load(json_file) @@ -59,7 +67,6 @@ def write_json(fn, data): json.dump(data, outfile) def write_report(fn, title=None, keys=None, rows=[]): - count = 0 with open(fn, 'w') as f: f.write("") f.write("") @@ -71,26 +78,50 @@ def write_report(fn, title=None, keys=None, rows=[]): f.write("") if title is not None: f.write("

{}

".format(title)) - f.write("") - if keys is not None: - for key in keys: - f.write("".format(key)) - for row in rows: - if row is None: - return - count += 1 - f.write("") - for cell in row: - if isinstance(cell, list) or isinstance(cell, tuple): - f.write("".format('
'.join(str(x) for x in cell))) - else: - f.write("".format(cell)) - f.write("") - f.write("
{}
{}{}
") + count = write_table(f, keys=keys, rows=rows) f.write("") f.write("") print("{} {}".format(fn, count)) +class NameLine(object): + def __init__(self, s): + self.s = s.strip() + def __str__(self): + return '' + self.s + '' + +class BoldLine(object): + def __init__(self, s): + self.s = s.strip() + def __str__(self): + return '' + self.s + '' + +class LinkLine(object): + def __init__(self, href, txt): + self.href = href + self.txt = txt.strip + def __str__(self): + return '{}'.format(self.href, self.txt) + +def write_table(f, keys, rows): + count = 0 + f.write("") + if keys is not None: + for key in keys: + f.write("".format(key)) + for row in rows: + if row is None: + return + count += 1 + f.write("") + for cell in row: + if isinstance(cell, list) or isinstance(cell, tuple): + f.write("".format('
'.join(str(x) for x in cell))) + else: + f.write("".format(cell)) + f.write("") + f.write("
{}
{}{}
") + return count + def paper_path(key='papers', paper_id=''): return '{}/{}/{}/{}/paper.json'.format('./datasets/s2', key, paper_id[0:2], paper_id) @@ -105,26 +136,41 @@ class DbPaper(object): def journal(self): return self.data['journalName'] @property + def year(self): + return self.data['year'] if 'year' in self.data else '' + @property def authors(self): return [ (author['ids'][0] if len(author['ids']) else '', author['name']) for author in self.data['authors'] ] + def record(self): + return [ self.paper_id, self.title, self.journal, self.year ] class RawPaper(object): def __init__(self, paper_id): self.paper_id = paper_id data = read_json(paper_path('raw_papers', paper_id)) + # print(data) if 'paper' not in data: print(data) + self.data = None return None self.data = data['paper'] @property def title(self): return self.data['title']['text'] @property + def year(self): + return self.data['year']['text'] + @property def journal(self): - return self.data['journal']['name'] + if 'journal' in self.data and 'name' in self.data['journal']: + return self.data['journal']['name'] + else: + return 'Unknown' @property def authors(self): - return [ (author[0]['ids'][0], author[0]['name']) for author in self.data['authors'] ] + return [ (author[0]['ids'][0] if len(author[0]['ids']) else '', author[0]['name']) for author in self.data['authors'] ] + def record(self): + return [ self.paper_id, self.title, self.journal, self.year ] def load_paper(paper_id): if os.path.exists(paper_path('db_papers', paper_id)): -- cgit v1.2.3-70-g09d2