From 6801542b636835c2abb07063448ce7416b12bbe2 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Sat, 10 Nov 2018 17:17:18 +0100 Subject: overall coverage - 42% --- s2-citation-report.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 's2-citation-report.py') diff --git a/s2-citation-report.py b/s2-citation-report.py index 26e148fe..79e34a0e 100644 --- a/s2-citation-report.py +++ b/s2-citation-report.py @@ -11,13 +11,23 @@ from util import * def s2_citation_report(): addresses = AddressBook() megapixels = load_megapixels_queries() + successful_geocodes = {} papers = [] for fn in glob.iglob('datasets/s2/papers/**/*.json', recursive=True): - paper_data = process_paper(fn, addresses, megapixels) + paper_data = process_paper(fn, addresses, megapixels, successful_geocodes) papers.append(paper_data) - write_papers_report('reports/report_index.html', 'All Papers', papers, 'paperId') + write_papers_report('reports/report_index.html', 'All Papers', papers, 'title') write_papers_report('reports/report_coverage.html', 'Coverage', papers, 'citations_geocoded', reverse=True) + paper_count = 0 + geocode_count = 0 + for key, value in successful_geocodes.items(): + if value: + geocode_count += 1 + paper_count += 1 + print("citations: {}".format(paper_count)) + print("geocoded: {} ({}%)".format(geocode_count, percent(geocode_count, paper_count))) + def write_papers_report(fn, title, papers, key, reverse=False): sorted_papers = [] for paper in sorted(papers, key=lambda x: x[key], reverse=reverse): @@ -31,7 +41,7 @@ def write_papers_report(fn, title, papers, key, reverse=False): paper['address'], paper['lat'], paper['lng'], - str(percent(paper['citation_count'], paper['citations_geocoded'])) + '%', + str(percent(paper['citations_geocoded'], paper['citation_count'])) + '%', paper['citation_count'], paper['citations_geocoded'], paper['citations_unknown'], @@ -59,7 +69,7 @@ def write_papers_report(fn, title, papers, key, reverse=False): ] write_report(fn, title=title, keys=sorted_paper_keys, rows=sorted_papers) -def process_paper(fn, addresses, megapixels): +def process_paper(fn, addresses, megapixels, success): res = { 'paperId': '', 'key': '', @@ -177,6 +187,7 @@ def process_paper(fn, addresses, megapixels): # 'geocoded': geocoded_institutions, # }) if address: + success[citationId] = True geocoded_citations.append([ citation.title, institution, @@ -185,6 +196,7 @@ def process_paper(fn, addresses, megapixels): citation.title, ] + address) else: + success[citationId] = False unknown_citations.append([ # citationId, citation.title, -- cgit v1.2.3-70-g09d2