From 002e72bb172c34bb71756f9e6c23294913f1ef85 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Tue, 6 Nov 2018 01:42:13 +0100 Subject: maybe rm empty txts --- s2-pdf-report.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 's2-pdf-report.py') diff --git a/s2-pdf-report.py b/s2-pdf-report.py index 6ef5c0f7..7c89381f 100644 --- a/s2-pdf-report.py +++ b/s2-pdf-report.py @@ -12,10 +12,11 @@ PDF_DIR = 'datasets/s2/pdf' def pdf_report_first_pages(): rows = [] for fn in glob.iglob('{}/**/*.txt'.format(PDF_DIR), recursive=True): - row = process_paper(fn) + row, institutions = process_paper(fn) print(row) rows.append(row) write_report('reports/first_pages.html', title='First pages', keys=None, rows=rows) + write_report('reports/institutions.html', title='Institutions', keys=None, rows=institutions) print("Wrote {} rows".format(len(rows))) def process_paper(fn): @@ -27,6 +28,7 @@ def process_paper(fn): with open(fn, 'r') as f: lines = [] emails = [] + institutions = [] authors = [ (a[0], a[1], a[1].lower(),) for a in paper.authors ] journal = paper.journal.lower() found_authors = [] @@ -55,6 +57,7 @@ def process_paper(fn): # lines.append(NameLine(line)) continue if 'university' in l or 'universiteit' in l or 'research center' in l: + institutions.append(line) lines.append(BoldLine(line)) continue lines.append(line) @@ -63,6 +66,9 @@ def process_paper(fn): lines, found_authors, emails, + ], [ + paper_id, + sorted(institutions), ] class NameLine(object): -- cgit v1.2.3-70-g09d2