summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-06 17:09:03 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-06 17:09:03 +0100
commit93f7991ef226d5c76645bc12359751f87ec6d15e (patch)
treee69392044fba0266b2b04dadc14d456e2b93b950
parent0336f8a9e59f2dc68bc0f953ace27f7b5a681b0e (diff)
fetch doi none check
-rw-r--r--s2-fetch-doi.py8
1 files changed, 5 insertions, 3 deletions
diff --git a/s2-fetch-doi.py b/s2-fetch-doi.py
index cd1d7999..4ca0119d 100644
--- a/s2-fetch-doi.py
+++ b/s2-fetch-doi.py
@@ -20,7 +20,9 @@ def fetch_doi_list(fn):
domains = []
for line in lines:
paper_id, url = line
- domain = fetch_doi(paper_id, url)
+ if url:
+ domain = fetch_doi(paper_id, url)
+ print(domain)
print("{} papers processed".format(len(lines)))
def fetch_doi(paper_id, url):
@@ -30,12 +32,12 @@ def fetch_doi(paper_id, url):
txt_fn = make_txt_fn(paper_id)
if os.path.exists(doi_fn) or os.path.exists(txt_fn):
# return read_json(doi_fn)
- return
+ return None, None
size, final_url = s2.fetch_doi(url, doi_fn)
if size is None:
print("{} empty?".format(paper_id))
time.sleep(random.randint(2, 5))
- return None
+ return None, None
print("{} {} kb".format(paper_id, int(size / 1024)))
domain = urlparse(final_url).netloc
write_json(url_fn, {