diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-05-30 14:03:34 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-05-30 14:03:34 +0200 |
| commit | 10c38b6b5916b2c7f84ca65fa471dda963dd9b5d (patch) | |
| tree | 479d5dab47ab7bf4d02fb8d233a220a233989ae5 /scraper/s2-fetch-pdf.py | |
| parent | 2963cd2ec73860e3bf3a5e4d469b4e573ce4817c (diff) | |
s2 fetch missing verified papers
Diffstat (limited to 'scraper/s2-fetch-pdf.py')
| -rw-r--r-- | scraper/s2-fetch-pdf.py | 5 |
1 files changed, 2 insertions, 3 deletions
diff --git a/scraper/s2-fetch-pdf.py b/scraper/s2-fetch-pdf.py index 72ca4ca8..61574b90 100644 --- a/scraper/s2-fetch-pdf.py +++ b/scraper/s2-fetch-pdf.py @@ -25,13 +25,12 @@ def fetch_pdf(paper_id, url): pdf_fn = make_pdf_fn(paper_id) txt_fn = make_txt_fn(paper_id) if os.path.exists(pdf_fn) or os.path.exists(txt_fn): - # return read_json(pdf_fn) - return + return None size = s2.fetch_file(url, pdf_fn) if size is None: print("{} empty?".format(paper_id)) return None - print("{} {} kb".format(paper_id, int(size / 1024))) + print("{} {} kb {}".format(paper_id, int(size / 1024), url)) def make_pdf_path(paper_id): return './datasets/s2/pdf/{}/{}'.format(paper_id[0:2], paper_id) |
