diff options
| -rw-r--r-- | scraper/s2-extract-full-pdf-txt.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/scraper/s2-extract-full-pdf-txt.py b/scraper/s2-extract-full-pdf-txt.py index c748b6a1..32e7daec 100644 --- a/scraper/s2-extract-full-pdf-txt.py +++ b/scraper/s2-extract-full-pdf-txt.py @@ -18,8 +18,8 @@ s2 = SemanticScholarAPI() def extract_full_pdf_txt(): fns = [] for fn in glob.iglob('datasets/s2/pdf/*/*/*.pdf'): - path = os.path.dirname(fn).replace('pdf', 'txt') - out_fn = os.path.join(path, 'paper.txt') + out_path = os.path.dirname(fn).replace('pdf', 'txt') + out_fn = os.path.join(out_path, 'paper.txt') if not os.path.exists(out_fn): fns.append((fn, out_path, out_fn)) parallelize(extract_txt, fns) |
