From 0dc3e40434c23e4d48119465f39b03bf35fb56bd Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Sat, 3 Nov 2018 01:44:18 +0100 Subject: fix output --- s2-extract-papers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 's2-extract-papers.py') diff --git a/s2-extract-papers.py b/s2-extract-papers.py index 3ac8ce50..90323e6e 100644 --- a/s2-extract-papers.py +++ b/s2-extract-papers.py @@ -16,7 +16,7 @@ def fetch_entries(fn): def search_dataset_shard(fn, ids): with gzip.open(fn, 'r') as f: for line in f.readlines(): - process_paper(str(line)) + process_paper(str(line)[2:-3]) def process_paper(line): paper_id = line.split('"id":"', 2)[1].split('"', 2)[0] @@ -43,7 +43,7 @@ def write_paper(paper_id, data): if os.path.exists(fn): return os.makedirs(dir, exist_ok=True) - with open(fn, 'w') as f: + with open(fn, 'wb') as f: f.write(data) if __name__ == '__main__': -- cgit v1.2.3-70-g09d2