diff options
Diffstat (limited to 's2-extract-papers.py')
| -rw-r--r-- | s2-extract-papers.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/s2-extract-papers.py b/s2-extract-papers.py index e84ffd0f..00301433 100644 --- a/s2-extract-papers.py +++ b/s2-extract-papers.py @@ -34,10 +34,16 @@ def process_paper(line, ids): def load_id_lookup(fn): lookup = {} ids = read_json(fn) + skip_count = 0 + save_count = 0 for paper_id in ids: path = paper_path(paper_id) if not os.path.exists(path): lookup[paper_id] = True + save_count += 1 + else: + skip_count += 1 + print("finding {} ids ({} already pulled)".format(save_count, skip_count)) return lookup def paper_path(paper_id): |
