diff options
Diffstat (limited to 's2-extract-papers.py')
| -rw-r--r-- | s2-extract-papers.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/s2-extract-papers.py b/s2-extract-papers.py index 67d9bb8b..178b9009 100644 --- a/s2-extract-papers.py +++ b/s2-extract-papers.py @@ -22,7 +22,7 @@ def search_dataset_shard(fn, ids): if (i % 1000) == 0: print("{}...".format(i)) for line in f.readlines(): - process_paper(str(line), ids) + process_paper(str(line)[2:-3], ids) def process_paper(line, ids): paper_id = line.split('"id":"', 2)[1].split('"', 2)[0] @@ -49,7 +49,7 @@ def write_paper(paper_id, data): if os.path.exists(fn): return os.makedirs(dir, exist_ok=True) - with open(fn, 'w') as f: + with open(fn, 'wb') as f: f.write(data) if __name__ == '__main__': |
