diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-03 01:44:18 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-03 01:44:18 +0100 |
| commit | 0dc3e40434c23e4d48119465f39b03bf35fb56bd (patch) | |
| tree | 37862cecbeaf097c6a406b59a00c825f740f89ba /s2-extract-papers.py | |
| parent | c2f4665dbe5ff1225f90afbaf590975057dc5026 (diff) | |
fix output
Diffstat (limited to 's2-extract-papers.py')
| -rw-r--r-- | s2-extract-papers.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/s2-extract-papers.py b/s2-extract-papers.py index 3ac8ce50..90323e6e 100644 --- a/s2-extract-papers.py +++ b/s2-extract-papers.py @@ -16,7 +16,7 @@ def fetch_entries(fn): def search_dataset_shard(fn, ids): with gzip.open(fn, 'r') as f: for line in f.readlines(): - process_paper(str(line)) + process_paper(str(line)[2:-3]) def process_paper(line): paper_id = line.split('"id":"', 2)[1].split('"', 2)[0] @@ -43,7 +43,7 @@ def write_paper(paper_id, data): if os.path.exists(fn): return os.makedirs(dir, exist_ok=True) - with open(fn, 'w') as f: + with open(fn, 'wb') as f: f.write(data) if __name__ == '__main__': |
