summaryrefslogtreecommitdiff
path: root/pdf_report_first_pages.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-05 23:34:55 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-05 23:34:55 +0100
commita7529c979563e87fe9c518a3b6a084772d4b63a4 (patch)
tree88445643552fcf1e761957cb634012cf4e0bc1a8 /pdf_report_first_pages.py
parent2cb40f0220c14cc4b42673b4b75fc04406f651ff (diff)
k
Diffstat (limited to 'pdf_report_first_pages.py')
-rw-r--r--pdf_report_first_pages.py23
1 files changed, 13 insertions, 10 deletions
diff --git a/pdf_report_first_pages.py b/pdf_report_first_pages.py
index d7fd3061..ae080539 100644
--- a/pdf_report_first_pages.py
+++ b/pdf_report_first_pages.py
@@ -6,27 +6,30 @@ import click
from util import *
PDF_DIR = 'datasets/s2/pdf'
+FIRST_PAGES_KEYS = ''
@click.command()
def pdf_report_first_pages():
- ids = {}
+ rows = []
for fn in glob.iglob('{}/**/*.txt'.format(PDF_DIR), recursive=True):
- process_paper(fn, ids)
- first_pages = list(ids.keys())
- print("Wrote {} ids".format(len(id_list)))
- write_html('reports/first_pages.html', first_pages)
+ row = process_paper(fn)
+ rows.append(row)
+ write_report('reports/first_pages.html', title='First pages', keys=FIRST_PAGES_KEYS, rows=rows)
+ print("Wrote {} rows".format(len(rows)))
-def process_paper(fn, ids):
+def process_paper(fn):
+ index = fn.replace(PDF_DIR, '').split('/')[2]
with open(fn, 'r') as f:
- lines = []
- for line in f.readlines:
+ lines = ''
+ for line in f.readlines():
if 'abstract' in line.lower():
break
if len(line) < 3:
continue
- lines.append(line)
+ lines += line + '<br>'
return [
- lines.join(''),
+ index,
+ lines
]
def paper_path(paper_id):