From e3ac08949f737e0c9d0c10f797294725361a4547 Mon Sep 17 00:00:00 2001 From: "jules@lens" Date: Fri, 3 May 2019 19:11:28 +0200 Subject: max pages --- scraper/s2-search-deep.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'scraper/s2-search-deep.py') diff --git a/scraper/s2-search-deep.py b/scraper/s2-search-deep.py index 9846f2a3..05a49836 100644 --- a/scraper/s2-search-deep.py +++ b/scraper/s2-search-deep.py @@ -27,6 +27,8 @@ totalResults s2 = SemanticScholarAPI() +MAX_PAGES = 20 + def fetch_query(query, since=None, refresh=False): clean_title = re.sub(r'[^-0-9a-zA-Z ]+', '', query) yearFilter = {'min': since, 'max': 2020 } if since else None @@ -59,6 +61,8 @@ def fetch_query(query, since=None, refresh=False): if paper_id not in paper_ids: paper_ids[paper_id] = True page += 1 + if page > MAX_PAGES: + break if total >= results['totalResults'] - 9: break return paper_ids -- cgit v1.2.3-70-g09d2