summaryrefslogtreecommitdiff
path: root/scraper/s2.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-12-07 15:30:48 +0100
committerJules Laplace <julescarbon@gmail.com>2018-12-07 15:30:48 +0100
commit74663e858aa3a1963c757c605f54264ab48b27af (patch)
tree3631f1cbe1e15cead0b2cad85daf0fdcb8c8edb1 /scraper/s2.py
parent602527f66a438ffe9340299a242900057b175926 (diff)
updating citations
Diffstat (limited to 'scraper/s2.py')
-rw-r--r--scraper/s2.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/scraper/s2.py b/scraper/s2.py
index ea090845..21c3a7aa 100644
--- a/scraper/s2.py
+++ b/scraper/s2.py
@@ -1,5 +1,8 @@
import os
import requests
+import time
+import random
+from util import *
class AuthorStub(object):
@@ -192,3 +195,21 @@ class SemanticScholarAPI(object):
}, headers=SemanticScholarAPI.headers)
# print(resp.status_code)
return None if resp.status_code != 200 else resp.json()
+
+def fetch_paper(s2, paper_id):
+ os.makedirs('./datasets/s2/papers/{}/{}'.format(paper_id[0:2], paper_id), exist_ok=True)
+ paper_fn = './datasets/s2/papers/{}/{}/paper.json'.format(paper_id[0:2], paper_id)
+ if os.path.exists(paper_fn):
+ return read_json(paper_fn)
+ print(paper_id)
+ paper = s2.paper(paper_id)
+ if paper is None:
+ print("Got none paper??")
+ time.sleep(random.randint(1, 2))
+ paper = s2.paper(paper_id)
+ if paper is None:
+ print("Paper not found")
+ return None
+ write_json(paper_fn, paper)
+ time.sleep(random.randint(1, 2))
+ return paper