import os import sys import csv import subprocess import time import random import re import click @click.command() @click.option('--index', '-n', default=1, help='Index of CSV.') def fetch_entries(index): keys, lines = read_citation_list(index) for line in lines: label = line[0] title = line[1] entries_fn = './datasets/scholar/entries/{}.csv'.format(title) # print(entries_fn) if not os.path.exists(entries_fn): with open(entries_fn, 'w') as f: t = re.sub(r'[^-0-9a-zA-Z ]+', '', title) print(t) subprocess.call([ './vendor/scholar.py', '-t', '-A', t, '--csv', ], stdout=f) time.sleep(random.randint(30, 60)) def read_citation_list(index): filename = './datasets/citations.csv' # fn, ext = os.path.splitext(filename) # in_fn = fn + '-' + str(index) + ext # with open(in_fn, 'r') as f: with open(filename, 'r') as f: reader = csv.reader(f) lines = list(reader) keys = lines[0] lines = lines[1:] return keys, lines if __name__ == '__main__': fetch_entries()