diff options
Diffstat (limited to 's2-papers.py')
| -rw-r--r-- | s2-papers.py | 64 |
1 files changed, 0 insertions, 64 deletions
diff --git a/s2-papers.py b/s2-papers.py deleted file mode 100644 index d5e8bcd4..00000000 --- a/s2-papers.py +++ /dev/null @@ -1,64 +0,0 @@ -import os -import sys -import csv -import subprocess -import time -import random -import re -import json -import click -from s2 import SemanticScholarAPI -from util import * - -''' -s2 search API format: -results -matchedAuthors -matchedPresentations -query -querySuggestions -results -stats -totalPages -totalResults -''' - -s2 = SemanticScholarAPI() - -@click.command() -@click.option('--index', '-n', default=0, help='Index of CSV.') -@click.option('--depth', '-d', default=1, help='Depth to recurse.') -def fetch_papers(index, depth): - keys, lines = read_citation_list(index) - for line in lines: - label = line[0] - title = re.sub(r'[^-0-9a-zA-Z ]+', '', line[1]) - entry_fn = './datasets/s2/entries/{}.json'.format(title) - if not os.path.exists(entry_fn): - print('not found: {}'.format(entry_fn)) - continue - result = read_json(entry_fn) - paper_id = result['id'] - paper = fetch_paper(paper_id) - # get all of the paper's citations - -def fetch_paper(paper_id): - os.makedirs('./datasets/s2/papers/{}/{}'.format(paper_id[0:2], paper_id), exist_ok=True) - paper_fn = './datasets/s2/papers/{}/{}/paper.json'.format(paper_id[0:2], paper_id) - if os.path.exists(paper_fn): - return read_json(paper_fn) - print(paper_id) - paper = s2.paper(paper_id) - if paper is None: - print("Got none paper??") - time.sleep(random.randint(20, 30)) - paper = s2.paper(paper_id) - if paper is None: - print("Paper not found") - return None - write_json(paper_fn, paper) - time.sleep(random.randint(5, 10)) - return paper - -if __name__ == '__main__': - fetch_papers() |
