diff options
| author | jules@lens <julescarbon@gmail.com> | 2019-05-03 19:11:28 +0200 |
|---|---|---|
| committer | jules@lens <julescarbon@gmail.com> | 2019-05-03 19:11:28 +0200 |
| commit | e3ac08949f737e0c9d0c10f797294725361a4547 (patch) | |
| tree | 79af25cd49d27e06c538024fc6b09f9a424cc663 /scraper/s2-search-deep.py | |
| parent | 1be5e0e1a85a84d9eca7d1d89d14a562b356f2e0 (diff) | |
max pages
Diffstat (limited to 'scraper/s2-search-deep.py')
| -rw-r--r-- | scraper/s2-search-deep.py | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/scraper/s2-search-deep.py b/scraper/s2-search-deep.py index 9846f2a3..05a49836 100644 --- a/scraper/s2-search-deep.py +++ b/scraper/s2-search-deep.py @@ -27,6 +27,8 @@ totalResults s2 = SemanticScholarAPI() +MAX_PAGES = 20 + def fetch_query(query, since=None, refresh=False): clean_title = re.sub(r'[^-0-9a-zA-Z ]+', '', query) yearFilter = {'min': since, 'max': 2020 } if since else None @@ -59,6 +61,8 @@ def fetch_query(query, since=None, refresh=False): if paper_id not in paper_ids: paper_ids[paper_id] = True page += 1 + if page > MAX_PAGES: + break if total >= results['totalResults'] - 9: break return paper_ids |
