From f616775cd805ef991bae5f3058bb9c7857896d5a Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 5 Nov 2018 23:14:56 +0100 Subject: dump first pages --- pdf_dump_first_page.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 pdf_dump_first_page.sh (limited to 'pdf_dump_first_page.sh') diff --git a/pdf_dump_first_page.sh b/pdf_dump_first_page.sh new file mode 100644 index 00000000..dd67c87d --- /dev/null +++ b/pdf_dump_first_page.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +for i in datasets/s2/pdf/*/*/*.pdf + do + OUTPUT="${i%.*}.txt" + pdf2txt.py -p 1 $i > $OUTPUT + echo $OUTPUT + done \ No newline at end of file -- cgit v1.2.3-70-g09d2