diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-10 17:17:18 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-10 17:17:18 +0100 |
| commit | 6801542b636835c2abb07063448ce7416b12bbe2 (patch) | |
| tree | a27b696f1545567955b4822fe623304ee6af5fdc /s2-citation-report.py | |
| parent | e8ce7876c5869522f982073d70c3ee7be179e1f9 (diff) | |
overall coverage - 42%
Diffstat (limited to 's2-citation-report.py')
| -rw-r--r-- | s2-citation-report.py | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/s2-citation-report.py b/s2-citation-report.py index 26e148fe..79e34a0e 100644 --- a/s2-citation-report.py +++ b/s2-citation-report.py @@ -11,13 +11,23 @@ from util import * def s2_citation_report(): addresses = AddressBook() megapixels = load_megapixels_queries() + successful_geocodes = {} papers = [] for fn in glob.iglob('datasets/s2/papers/**/*.json', recursive=True): - paper_data = process_paper(fn, addresses, megapixels) + paper_data = process_paper(fn, addresses, megapixels, successful_geocodes) papers.append(paper_data) - write_papers_report('reports/report_index.html', 'All Papers', papers, 'paperId') + write_papers_report('reports/report_index.html', 'All Papers', papers, 'title') write_papers_report('reports/report_coverage.html', 'Coverage', papers, 'citations_geocoded', reverse=True) + paper_count = 0 + geocode_count = 0 + for key, value in successful_geocodes.items(): + if value: + geocode_count += 1 + paper_count += 1 + print("citations: {}".format(paper_count)) + print("geocoded: {} ({}%)".format(geocode_count, percent(geocode_count, paper_count))) + def write_papers_report(fn, title, papers, key, reverse=False): sorted_papers = [] for paper in sorted(papers, key=lambda x: x[key], reverse=reverse): @@ -31,7 +41,7 @@ def write_papers_report(fn, title, papers, key, reverse=False): paper['address'], paper['lat'], paper['lng'], - str(percent(paper['citation_count'], paper['citations_geocoded'])) + '%', + str(percent(paper['citations_geocoded'], paper['citation_count'])) + '%', paper['citation_count'], paper['citations_geocoded'], paper['citations_unknown'], @@ -59,7 +69,7 @@ def write_papers_report(fn, title, papers, key, reverse=False): ] write_report(fn, title=title, keys=sorted_paper_keys, rows=sorted_papers) -def process_paper(fn, addresses, megapixels): +def process_paper(fn, addresses, megapixels, success): res = { 'paperId': '', 'key': '', @@ -177,6 +187,7 @@ def process_paper(fn, addresses, megapixels): # 'geocoded': geocoded_institutions, # }) if address: + success[citationId] = True geocoded_citations.append([ citation.title, institution, @@ -185,6 +196,7 @@ def process_paper(fn, addresses, megapixels): citation.title, ] + address) else: + success[citationId] = False unknown_citations.append([ # citationId, citation.title, |
