diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-06 17:09:03 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-06 17:09:03 +0100 |
| commit | 93f7991ef226d5c76645bc12359751f87ec6d15e (patch) | |
| tree | e69392044fba0266b2b04dadc14d456e2b93b950 | |
| parent | 0336f8a9e59f2dc68bc0f953ace27f7b5a681b0e (diff) | |
fetch doi none check
| -rw-r--r-- | s2-fetch-doi.py | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/s2-fetch-doi.py b/s2-fetch-doi.py index cd1d7999..4ca0119d 100644 --- a/s2-fetch-doi.py +++ b/s2-fetch-doi.py @@ -20,7 +20,9 @@ def fetch_doi_list(fn): domains = [] for line in lines: paper_id, url = line - domain = fetch_doi(paper_id, url) + if url: + domain = fetch_doi(paper_id, url) + print(domain) print("{} papers processed".format(len(lines))) def fetch_doi(paper_id, url): @@ -30,12 +32,12 @@ def fetch_doi(paper_id, url): txt_fn = make_txt_fn(paper_id) if os.path.exists(doi_fn) or os.path.exists(txt_fn): # return read_json(doi_fn) - return + return None, None size, final_url = s2.fetch_doi(url, doi_fn) if size is None: print("{} empty?".format(paper_id)) time.sleep(random.randint(2, 5)) - return None + return None, None print("{} {} kb".format(paper_id, int(size / 1024))) domain = urlparse(final_url).netloc write_json(url_fn, { |
