diff options
Diffstat (limited to 's2-pdf-report.py')
| -rw-r--r-- | s2-pdf-report.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/s2-pdf-report.py b/s2-pdf-report.py index 6ef5c0f7..7c89381f 100644 --- a/s2-pdf-report.py +++ b/s2-pdf-report.py @@ -12,10 +12,11 @@ PDF_DIR = 'datasets/s2/pdf' def pdf_report_first_pages(): rows = [] for fn in glob.iglob('{}/**/*.txt'.format(PDF_DIR), recursive=True): - row = process_paper(fn) + row, institutions = process_paper(fn) print(row) rows.append(row) write_report('reports/first_pages.html', title='First pages', keys=None, rows=rows) + write_report('reports/institutions.html', title='Institutions', keys=None, rows=institutions) print("Wrote {} rows".format(len(rows))) def process_paper(fn): @@ -27,6 +28,7 @@ def process_paper(fn): with open(fn, 'r') as f: lines = [] emails = [] + institutions = [] authors = [ (a[0], a[1], a[1].lower(),) for a in paper.authors ] journal = paper.journal.lower() found_authors = [] @@ -55,6 +57,7 @@ def process_paper(fn): # lines.append(NameLine(line)) continue if 'university' in l or 'universiteit' in l or 'research center' in l: + institutions.append(line) lines.append(BoldLine(line)) continue lines.append(line) @@ -63,6 +66,9 @@ def process_paper(fn): lines, found_authors, emails, + ], [ + paper_id, + sorted(institutions), ] class NameLine(object): |
