diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-12-07 15:30:48 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-12-07 15:30:48 +0100 |
| commit | 74663e858aa3a1963c757c605f54264ab48b27af (patch) | |
| tree | 3631f1cbe1e15cead0b2cad85daf0fdcb8c8edb1 /scraper/s2.py | |
| parent | 602527f66a438ffe9340299a242900057b175926 (diff) | |
updating citations
Diffstat (limited to 'scraper/s2.py')
| -rw-r--r-- | scraper/s2.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/scraper/s2.py b/scraper/s2.py index ea090845..21c3a7aa 100644 --- a/scraper/s2.py +++ b/scraper/s2.py @@ -1,5 +1,8 @@ import os import requests +import time +import random +from util import * class AuthorStub(object): @@ -192,3 +195,21 @@ class SemanticScholarAPI(object): }, headers=SemanticScholarAPI.headers) # print(resp.status_code) return None if resp.status_code != 200 else resp.json() + +def fetch_paper(s2, paper_id): + os.makedirs('./datasets/s2/papers/{}/{}'.format(paper_id[0:2], paper_id), exist_ok=True) + paper_fn = './datasets/s2/papers/{}/{}/paper.json'.format(paper_id[0:2], paper_id) + if os.path.exists(paper_fn): + return read_json(paper_fn) + print(paper_id) + paper = s2.paper(paper_id) + if paper is None: + print("Got none paper??") + time.sleep(random.randint(1, 2)) + paper = s2.paper(paper_id) + if paper is None: + print("Paper not found") + return None + write_json(paper_fn, paper) + time.sleep(random.randint(1, 2)) + return paper |
