summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-10 17:25:57 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-10 17:25:57 +0100
commit9c9b834222c47ad19eb562de3c7c9762dd0c99a6 (patch)
tree3819ea219131b98f8eb808e2c91b2f17e435fbd8
parent4143979504aebc9fd9d175bfd8e37033bc2bc62c (diff)
s2-extract-pdf-txt.py
-rw-r--r--scraper/s2-extract-pdf-txt.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/scraper/s2-extract-pdf-txt.py b/scraper/s2-extract-pdf-txt.py
index 62595947..4537d265 100644
--- a/scraper/s2-extract-pdf-txt.py
+++ b/scraper/s2-extract-pdf-txt.py
@@ -18,7 +18,7 @@ s2 = SemanticScholarAPI()
def extract_pdf_txt():
fns = []
for fn in glob.iglob('datasets/s2/pdf/*/*/*.pdf'):
- path = os.path.pathname(fn)
+ path = os.path.dirname(fn)
out_fn = os.path.join(path, 'paper.txt')
if not os.path.exists():
fns.append((fn, out_fn))