diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-06 01:42:13 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-06 01:42:13 +0100 |
| commit | 002e72bb172c34bb71756f9e6c23294913f1ef85 (patch) | |
| tree | ea3f3f91bb1a5219801fbf26cf5c12a3eeff1a55 /s2-pdf-report.py | |
| parent | 743159991f1bcf2080693424ebe5ad7001865583 (diff) | |
maybe rm empty txts
Diffstat (limited to 's2-pdf-report.py')
| -rw-r--r-- | s2-pdf-report.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/s2-pdf-report.py b/s2-pdf-report.py index 6ef5c0f7..7c89381f 100644 --- a/s2-pdf-report.py +++ b/s2-pdf-report.py @@ -12,10 +12,11 @@ PDF_DIR = 'datasets/s2/pdf' def pdf_report_first_pages(): rows = [] for fn in glob.iglob('{}/**/*.txt'.format(PDF_DIR), recursive=True): - row = process_paper(fn) + row, institutions = process_paper(fn) print(row) rows.append(row) write_report('reports/first_pages.html', title='First pages', keys=None, rows=rows) + write_report('reports/institutions.html', title='Institutions', keys=None, rows=institutions) print("Wrote {} rows".format(len(rows))) def process_paper(fn): @@ -27,6 +28,7 @@ def process_paper(fn): with open(fn, 'r') as f: lines = [] emails = [] + institutions = [] authors = [ (a[0], a[1], a[1].lower(),) for a in paper.authors ] journal = paper.journal.lower() found_authors = [] @@ -55,6 +57,7 @@ def process_paper(fn): # lines.append(NameLine(line)) continue if 'university' in l or 'universiteit' in l or 'research center' in l: + institutions.append(line) lines.append(BoldLine(line)) continue lines.append(line) @@ -63,6 +66,9 @@ def process_paper(fn): lines, found_authors, emails, + ], [ + paper_id, + sorted(institutions), ] class NameLine(object): |
