From a7529c979563e87fe9c518a3b6a084772d4b63a4 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 5 Nov 2018 23:34:55 +0100 Subject: k --- pdf_report_first_pages.py | 23 +++++++++++++---------- split-csv.py | 2 +- util.py | 4 ++-- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/pdf_report_first_pages.py b/pdf_report_first_pages.py index d7fd3061..ae080539 100644 --- a/pdf_report_first_pages.py +++ b/pdf_report_first_pages.py @@ -6,27 +6,30 @@ import click from util import * PDF_DIR = 'datasets/s2/pdf' +FIRST_PAGES_KEYS = '' @click.command() def pdf_report_first_pages(): - ids = {} + rows = [] for fn in glob.iglob('{}/**/*.txt'.format(PDF_DIR), recursive=True): - process_paper(fn, ids) - first_pages = list(ids.keys()) - print("Wrote {} ids".format(len(id_list))) - write_html('reports/first_pages.html', first_pages) + row = process_paper(fn) + rows.append(row) + write_report('reports/first_pages.html', title='First pages', keys=FIRST_PAGES_KEYS, rows=rows) + print("Wrote {} rows".format(len(rows))) -def process_paper(fn, ids): +def process_paper(fn): + index = fn.replace(PDF_DIR, '').split('/')[2] with open(fn, 'r') as f: - lines = [] - for line in f.readlines: + lines = '' + for line in f.readlines(): if 'abstract' in line.lower(): break if len(line) < 3: continue - lines.append(line) + lines += line + '
' return [ - lines.join(''), + index, + lines ] def paper_path(paper_id): diff --git a/split-csv.py b/split-csv.py index 2db45d85..e7c12883 100644 --- a/split-csv.py +++ b/split-csv.py @@ -9,7 +9,7 @@ import click @click.command() @click.option('--count', '-c', default=2, help='Number of subdivisions.') -@click.option('--has_keys/--has_no_keys', '-k', default=False, help='Whether to split off the keys.') +@click.option('--has_keys/--no_keys', '-k', default=False, help='Whether to split off the keys.') @click.option('--shuffle/--no_shuffle', default=False, help='Whether to shuffle.') @click.argument('filename') def split_csv(count, has_keys, shuffle, filename): diff --git a/util.py b/util.py index f3cdb814..d5796c8e 100644 --- a/util.py +++ b/util.py @@ -47,12 +47,12 @@ def write_report(fn, title=None, keys=None, rows=[]): f.write("") if title is not None: f.write("{}".format(title)) - f.write("") + f.write("") f.write("") f.write("") if title is not None: f.write("

{}

".format(title)) - f.write("") + f.write("
") if keys is not None: for key in keys: f.write("".format(key)) -- cgit v1.2.3-70-g09d2
{}