From bf7eecfc2c1879f237d67f6bedf556b51a034d6a Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Tue, 19 Feb 2019 18:12:34 +0100 Subject: txt --- scraper/s2-extract-full-pdf-txt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scraper/s2-extract-full-pdf-txt.py b/scraper/s2-extract-full-pdf-txt.py index c748b6a1..32e7daec 100644 --- a/scraper/s2-extract-full-pdf-txt.py +++ b/scraper/s2-extract-full-pdf-txt.py @@ -18,8 +18,8 @@ s2 = SemanticScholarAPI() def extract_full_pdf_txt(): fns = [] for fn in glob.iglob('datasets/s2/pdf/*/*/*.pdf'): - path = os.path.dirname(fn).replace('pdf', 'txt') - out_fn = os.path.join(path, 'paper.txt') + out_path = os.path.dirname(fn).replace('pdf', 'txt') + out_fn = os.path.join(out_path, 'paper.txt') if not os.path.exists(out_fn): fns.append((fn, out_path, out_fn)) parallelize(extract_txt, fns) -- cgit v1.2.3-70-g09d2