summaryrefslogtreecommitdiff
path: root/s2-pdf-report.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-06 01:42:13 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-06 01:42:13 +0100
commit002e72bb172c34bb71756f9e6c23294913f1ef85 (patch)
treeea3f3f91bb1a5219801fbf26cf5c12a3eeff1a55 /s2-pdf-report.py
parent743159991f1bcf2080693424ebe5ad7001865583 (diff)
maybe rm empty txts
Diffstat (limited to 's2-pdf-report.py')
-rw-r--r--s2-pdf-report.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/s2-pdf-report.py b/s2-pdf-report.py
index 6ef5c0f7..7c89381f 100644
--- a/s2-pdf-report.py
+++ b/s2-pdf-report.py
@@ -12,10 +12,11 @@ PDF_DIR = 'datasets/s2/pdf'
def pdf_report_first_pages():
rows = []
for fn in glob.iglob('{}/**/*.txt'.format(PDF_DIR), recursive=True):
- row = process_paper(fn)
+ row, institutions = process_paper(fn)
print(row)
rows.append(row)
write_report('reports/first_pages.html', title='First pages', keys=None, rows=rows)
+ write_report('reports/institutions.html', title='Institutions', keys=None, rows=institutions)
print("Wrote {} rows".format(len(rows)))
def process_paper(fn):
@@ -27,6 +28,7 @@ def process_paper(fn):
with open(fn, 'r') as f:
lines = []
emails = []
+ institutions = []
authors = [ (a[0], a[1], a[1].lower(),) for a in paper.authors ]
journal = paper.journal.lower()
found_authors = []
@@ -55,6 +57,7 @@ def process_paper(fn):
# lines.append(NameLine(line))
continue
if 'university' in l or 'universiteit' in l or 'research center' in l:
+ institutions.append(line)
lines.append(BoldLine(line))
continue
lines.append(line)
@@ -63,6 +66,9 @@ def process_paper(fn):
lines,
found_authors,
emails,
+ ], [
+ paper_id,
+ sorted(institutions),
]
class NameLine(object):