summaryrefslogtreecommitdiff
path: root/s2-extract-papers.py
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2018-11-03 18:49:19 +0100
committerjules@lens <julescarbon@gmail.com>2018-11-03 18:49:19 +0100
commitcbd96aeca32dcfd37acdf5f6b7e3a2997311783c (patch)
treeb6b9311aeb09797b5dda060aa387b2ac074e14ba /s2-extract-papers.py
parent7f385e46dce654405fc965668a8104e876c8aa6d (diff)
utf8
Diffstat (limited to 's2-extract-papers.py')
-rw-r--r--s2-extract-papers.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/s2-extract-papers.py b/s2-extract-papers.py
index 00301433..bd30c24b 100644
--- a/s2-extract-papers.py
+++ b/s2-extract-papers.py
@@ -22,7 +22,7 @@ def search_dataset_shard(fn, ids):
if (i % 1000) == 0:
print("{}...".format(i))
for line in f.readlines():
- process_paper(str(line)[2:-3], ids)
+ process_paper(line.decode('UTF-8'), ids)
def process_paper(line, ids):
paper_id = line.split('"id":"', 2)[1].split('"', 2)[0]