summaryrefslogtreecommitdiff
path: root/scraper/s2-dump-missing-paper-ids.py
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2019-05-30 14:30:39 +0200
committerjules@lens <julescarbon@gmail.com>2019-05-30 14:30:39 +0200
commit4f1d44719221bb8195e32b8f1e97feb4c3e14991 (patch)
treeb147a5186f18413f43c2dfaf0c065aa1e5edc04d /scraper/s2-dump-missing-paper-ids.py
parentec239bc69e292a0c629e019a1edc3aea53109ab1 (diff)
fetching verified papers
Diffstat (limited to 'scraper/s2-dump-missing-paper-ids.py')
-rw-r--r--scraper/s2-dump-missing-paper-ids.py12
1 files changed, 8 insertions, 4 deletions
diff --git a/scraper/s2-dump-missing-paper-ids.py b/scraper/s2-dump-missing-paper-ids.py
index 47dd4238..6f7eb8ba 100644
--- a/scraper/s2-dump-missing-paper-ids.py
+++ b/scraper/s2-dump-missing-paper-ids.py
@@ -27,7 +27,7 @@ def load_missing_ids(fn):
lookup[paper_id] = True
found_count += 1
else:
- print(">> {} {}".format(dataset paper_id))
+ # print(">> {} {}".format(dataset, paper_id))
missing_lookup[paper_id] = True
missing_count += 1
@@ -35,9 +35,13 @@ def load_missing_ids(fn):
rows = []
for dataset, lookup in verified_lookup.items():
for paper_id in lookup.keys():
- paper_path = data_path('raw_papers', paper_id):
- if not os.path.exists(paper_path):
- print(">> {} {}".format(dataset paper_id))
+ if dataset == 'brainwash':
+ print('>> {} {}'.format(dataset, paper_id))
+ paper_path = make_raw_paper_path(paper_id)
+ if not os.path.exists(paper_path) and paper_id not in missing_lookup:
+ print(">> {} {}".format(dataset, paper_id))
+ missing_count += 1
+ missing_lookup[paper_id] = True
print("{} papers found, {} must be fetched".format(found_count, missing_count))
return missing_lookup.keys()