summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-19 18:12:34 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-19 18:12:34 +0100
commitbf7eecfc2c1879f237d67f6bedf556b51a034d6a (patch)
tree1b995396888b95bce735281c2f6d2d215b2b3d4c
parent768757fe47d55b62c1d3ef87c982332e0292393e (diff)
txt
-rw-r--r--scraper/s2-extract-full-pdf-txt.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/scraper/s2-extract-full-pdf-txt.py b/scraper/s2-extract-full-pdf-txt.py
index c748b6a1..32e7daec 100644
--- a/scraper/s2-extract-full-pdf-txt.py
+++ b/scraper/s2-extract-full-pdf-txt.py
@@ -18,8 +18,8 @@ s2 = SemanticScholarAPI()
def extract_full_pdf_txt():
fns = []
for fn in glob.iglob('datasets/s2/pdf/*/*/*.pdf'):
- path = os.path.dirname(fn).replace('pdf', 'txt')
- out_fn = os.path.join(path, 'paper.txt')
+ out_path = os.path.dirname(fn).replace('pdf', 'txt')
+ out_fn = os.path.join(out_path, 'paper.txt')
if not os.path.exists(out_fn):
fns.append((fn, out_path, out_fn))
parallelize(extract_txt, fns)