import os
import sys
import csv
import subprocess
import time
import random
import re
import operator
import click
from s2 import SemanticScholarAPI
from util import *

s2 = SemanticScholarAPI()

@click.command()
def fetch_papers():
  addresses = AddressBook()
  lookup_keys, lines = read_csv('./datasets/citation_lookup.csv')
  report_keys = [
    "key", "name", "our title", 'found title', '', '', 'address', 's2 id'
  ]
  all_rows = []
  no_location_rows = []
  nonmatching_rows = []
  for line in lines:
    key, name, title, paper_id = line
    paper = fetch_paper(s2, paper_id)
    db_paper = load_paper(paper_id)
    pdf_link = db_paper.pdf_link if db_paper else ""

    paper_institutions = load_institutions(paper_id)
    paper_address = None
    for inst in sorted(paper_institutions, key=operator.itemgetter(1)):
      # print(inst[1])
      institution = inst[1]
      if paper_address is None:
        paper_address = addresses.findObject(institution)

    if paper_address is None:
      paper_address = ""
    else:
      paper_address = paper_address['address']

    s2_link = "https://www.semanticscholar.org/search?q={}&sort=relevance".format(title.strip().lower())
    row = [
      key,
      name,
      title,
      paper['title'],
      LinkLine(pdf_link, '[pdf]'),
      LinkLine(s2_link, '[s2]'),
      paper_address,
      paper['paperId'],
    ]
    all_rows.append(row)
    if title.strip().lower() != paper['title'].strip().lower():
      nonmatching_rows.append(row)
    if paper_address == '':
      no_location_rows.append(row)
  write_report('./reports/paper_title_report.html', 'Paper Title Sanity Check', report_keys, all_rows)
  write_report('./reports/paper_title_report_nonmatching.html', 'Paper Titles that do not match', report_keys, nonmatching_rows)
  write_report('./reports/paper_title_report_no_location.html', 'Papers with no location', report_keys, no_location_rows)

def load_institutions(paperId):
  if os.path.exists(file_path('pdf', paperId, 'institutions.json')):
    return read_json(file_path('pdf', paperId, 'institutions.json'))['institutions']
  elif os.path.exists(file_path('doi', paperId, 'institutions.json')):
    return read_json(file_path('doi', paperId, 'institutions.json'))['institutions']
  else:
    return []

def data_path(key, paper_id):
  return 'datasets/s2/{}/{}/{}'.format(key, paper_id[0:2], paper_id)
def file_path(key, paper_id, fn):
  return os.path.join(data_path(key, paper_id), fn)

if __name__ == '__main__':
  fetch_papers()