diff options
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/s2-search-deep.py | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/scraper/s2-search-deep.py b/scraper/s2-search-deep.py index 9846f2a3..05a49836 100644 --- a/scraper/s2-search-deep.py +++ b/scraper/s2-search-deep.py @@ -27,6 +27,8 @@ totalResults s2 = SemanticScholarAPI() +MAX_PAGES = 20 + def fetch_query(query, since=None, refresh=False): clean_title = re.sub(r'[^-0-9a-zA-Z ]+', '', query) yearFilter = {'min': since, 'max': 2020 } if since else None @@ -59,6 +61,8 @@ def fetch_query(query, since=None, refresh=False): if paper_id not in paper_ids: paper_ids[paper_id] = True page += 1 + if page > MAX_PAGES: + break if total >= results['totalResults'] - 9: break return paper_ids |
