summaryrefslogtreecommitdiff
path: root/s2-extract-papers.py
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2018-11-03 17:36:05 +0100
committerjules@lens <julescarbon@gmail.com>2018-11-03 17:36:05 +0100
commitc6f3973534d861d61939b691f1b8c697369f069c (patch)
tree9202d755c33389f29c6d42e51bbad22774e31cfb /s2-extract-papers.py
parentaa0470a3076f5ac65a0311c76e58254547f3eae0 (diff)
totals
Diffstat (limited to 's2-extract-papers.py')
-rw-r--r--s2-extract-papers.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/s2-extract-papers.py b/s2-extract-papers.py
index e84ffd0f..00301433 100644
--- a/s2-extract-papers.py
+++ b/s2-extract-papers.py
@@ -34,10 +34,16 @@ def process_paper(line, ids):
def load_id_lookup(fn):
lookup = {}
ids = read_json(fn)
+ skip_count = 0
+ save_count = 0
for paper_id in ids:
path = paper_path(paper_id)
if not os.path.exists(path):
lookup[paper_id] = True
+ save_count += 1
+ else:
+ skip_count += 1
+ print("finding {} ids ({} already pulled)".format(save_count, skip_count))
return lookup
def paper_path(paper_id):