diff options
| author | jules@lens <julescarbon@gmail.com> | 2018-11-03 17:36:05 +0100 |
|---|---|---|
| committer | jules@lens <julescarbon@gmail.com> | 2018-11-03 17:36:05 +0100 |
| commit | c6f3973534d861d61939b691f1b8c697369f069c (patch) | |
| tree | 9202d755c33389f29c6d42e51bbad22774e31cfb /s2-extract-papers.py | |
| parent | aa0470a3076f5ac65a0311c76e58254547f3eae0 (diff) | |
totals
Diffstat (limited to 's2-extract-papers.py')
| -rw-r--r-- | s2-extract-papers.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/s2-extract-papers.py b/s2-extract-papers.py index e84ffd0f..00301433 100644 --- a/s2-extract-papers.py +++ b/s2-extract-papers.py @@ -34,10 +34,16 @@ def process_paper(line, ids): def load_id_lookup(fn): lookup = {} ids = read_json(fn) + skip_count = 0 + save_count = 0 for paper_id in ids: path = paper_path(paper_id) if not os.path.exists(path): lookup[paper_id] = True + save_count += 1 + else: + skip_count += 1 + print("finding {} ids ({} already pulled)".format(save_count, skip_count)) return lookup def paper_path(paper_id): |
