diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-05 23:34:55 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-05 23:34:55 +0100 |
| commit | a7529c979563e87fe9c518a3b6a084772d4b63a4 (patch) | |
| tree | 88445643552fcf1e761957cb634012cf4e0bc1a8 /pdf_report_first_pages.py | |
| parent | 2cb40f0220c14cc4b42673b4b75fc04406f651ff (diff) | |
k
Diffstat (limited to 'pdf_report_first_pages.py')
| -rw-r--r-- | pdf_report_first_pages.py | 23 |
1 files changed, 13 insertions, 10 deletions
diff --git a/pdf_report_first_pages.py b/pdf_report_first_pages.py index d7fd3061..ae080539 100644 --- a/pdf_report_first_pages.py +++ b/pdf_report_first_pages.py @@ -6,27 +6,30 @@ import click from util import * PDF_DIR = 'datasets/s2/pdf' +FIRST_PAGES_KEYS = '' @click.command() def pdf_report_first_pages(): - ids = {} + rows = [] for fn in glob.iglob('{}/**/*.txt'.format(PDF_DIR), recursive=True): - process_paper(fn, ids) - first_pages = list(ids.keys()) - print("Wrote {} ids".format(len(id_list))) - write_html('reports/first_pages.html', first_pages) + row = process_paper(fn) + rows.append(row) + write_report('reports/first_pages.html', title='First pages', keys=FIRST_PAGES_KEYS, rows=rows) + print("Wrote {} rows".format(len(rows))) -def process_paper(fn, ids): +def process_paper(fn): + index = fn.replace(PDF_DIR, '').split('/')[2] with open(fn, 'r') as f: - lines = [] - for line in f.readlines: + lines = '' + for line in f.readlines(): if 'abstract' in line.lower(): break if len(line) < 3: continue - lines.append(line) + lines += line + '<br>' return [ - lines.join(''), + index, + lines ] def paper_path(paper_id): |
