From c6f3973534d861d61939b691f1b8c697369f069c Mon Sep 17 00:00:00 2001 From: "jules@lens" Date: Sat, 3 Nov 2018 17:36:05 +0100 Subject: totals --- s2-extract-papers.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 's2-extract-papers.py') diff --git a/s2-extract-papers.py b/s2-extract-papers.py index e84ffd0f..00301433 100644 --- a/s2-extract-papers.py +++ b/s2-extract-papers.py @@ -34,10 +34,16 @@ def process_paper(line, ids): def load_id_lookup(fn): lookup = {} ids = read_json(fn) + skip_count = 0 + save_count = 0 for paper_id in ids: path = paper_path(paper_id) if not os.path.exists(path): lookup[paper_id] = True + save_count += 1 + else: + skip_count += 1 + print("finding {} ids ({} already pulled)".format(save_count, skip_count)) return lookup def paper_path(paper_id): -- cgit v1.2.3-70-g09d2