diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-19 18:12:34 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-19 18:12:34 +0100 |
| commit | bf7eecfc2c1879f237d67f6bedf556b51a034d6a (patch) | |
| tree | 1b995396888b95bce735281c2f6d2d215b2b3d4c /scraper/s2-extract-full-pdf-txt.py | |
| parent | 768757fe47d55b62c1d3ef87c982332e0292393e (diff) | |
txt
Diffstat (limited to 'scraper/s2-extract-full-pdf-txt.py')
| -rw-r--r-- | scraper/s2-extract-full-pdf-txt.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/scraper/s2-extract-full-pdf-txt.py b/scraper/s2-extract-full-pdf-txt.py index c748b6a1..32e7daec 100644 --- a/scraper/s2-extract-full-pdf-txt.py +++ b/scraper/s2-extract-full-pdf-txt.py @@ -18,8 +18,8 @@ s2 = SemanticScholarAPI() def extract_full_pdf_txt(): fns = [] for fn in glob.iglob('datasets/s2/pdf/*/*/*.pdf'): - path = os.path.dirname(fn).replace('pdf', 'txt') - out_fn = os.path.join(path, 'paper.txt') + out_path = os.path.dirname(fn).replace('pdf', 'txt') + out_fn = os.path.join(out_path, 'paper.txt') if not os.path.exists(out_fn): fns.append((fn, out_path, out_fn)) parallelize(extract_txt, fns) |
