import os import sys import csv import subprocess import time import random import re import simplejson as json import click from s2 import SemanticScholarAPI from util import * s2 = SemanticScholarAPI() @click.command() @click.option('--fn', '-i', default='missing.csv', help='Filename of CSV (id,)') def fetch_raw_papers(fn): lines = read_csv(fn, keys=False) for line in lines: paper_id = line[0] fetch_raw_paper(paper_id) def fetch_raw_paper(paper_id): os.makedirs(make_raw_paper_path(paper_id), exist_ok=True) paper_fn = make_raw_paper_fn(paper_id) if os.path.exists(paper_fn): return read_json(paper_fn) print(paper_id) paper = s2.raw_paper(paper_id) if paper is None: print("Got empty paper?? {}".format(paper_id)) time.sleep(random.randint(5, 10)) return None write_json(paper_fn, paper) time.sleep(random.randint(5, 10)) return paper def make_raw_paper_path(paper_id): return './datasets/s2/raw_papers/{}/{}'.format(paper_id[0:2], paper_id) def make_raw_paper_fn(paper_id): return './datasets/s2/raw_papers/{}/{}/paper.json'.format(paper_id[0:2], paper_id) if __name__ == '__main__': fetch_raw_papers()