summaryrefslogtreecommitdiff
path: root/s2-citation-report.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-10 17:17:18 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-10 17:17:18 +0100
commit6801542b636835c2abb07063448ce7416b12bbe2 (patch)
treea27b696f1545567955b4822fe623304ee6af5fdc /s2-citation-report.py
parente8ce7876c5869522f982073d70c3ee7be179e1f9 (diff)
overall coverage - 42%
Diffstat (limited to 's2-citation-report.py')
-rw-r--r--s2-citation-report.py20
1 files changed, 16 insertions, 4 deletions
diff --git a/s2-citation-report.py b/s2-citation-report.py
index 26e148fe..79e34a0e 100644
--- a/s2-citation-report.py
+++ b/s2-citation-report.py
@@ -11,13 +11,23 @@ from util import *
def s2_citation_report():
addresses = AddressBook()
megapixels = load_megapixels_queries()
+ successful_geocodes = {}
papers = []
for fn in glob.iglob('datasets/s2/papers/**/*.json', recursive=True):
- paper_data = process_paper(fn, addresses, megapixels)
+ paper_data = process_paper(fn, addresses, megapixels, successful_geocodes)
papers.append(paper_data)
- write_papers_report('reports/report_index.html', 'All Papers', papers, 'paperId')
+ write_papers_report('reports/report_index.html', 'All Papers', papers, 'title')
write_papers_report('reports/report_coverage.html', 'Coverage', papers, 'citations_geocoded', reverse=True)
+ paper_count = 0
+ geocode_count = 0
+ for key, value in successful_geocodes.items():
+ if value:
+ geocode_count += 1
+ paper_count += 1
+ print("citations: {}".format(paper_count))
+ print("geocoded: {} ({}%)".format(geocode_count, percent(geocode_count, paper_count)))
+
def write_papers_report(fn, title, papers, key, reverse=False):
sorted_papers = []
for paper in sorted(papers, key=lambda x: x[key], reverse=reverse):
@@ -31,7 +41,7 @@ def write_papers_report(fn, title, papers, key, reverse=False):
paper['address'],
paper['lat'],
paper['lng'],
- str(percent(paper['citation_count'], paper['citations_geocoded'])) + '%',
+ str(percent(paper['citations_geocoded'], paper['citation_count'])) + '%',
paper['citation_count'],
paper['citations_geocoded'],
paper['citations_unknown'],
@@ -59,7 +69,7 @@ def write_papers_report(fn, title, papers, key, reverse=False):
]
write_report(fn, title=title, keys=sorted_paper_keys, rows=sorted_papers)
-def process_paper(fn, addresses, megapixels):
+def process_paper(fn, addresses, megapixels, success):
res = {
'paperId': '',
'key': '',
@@ -177,6 +187,7 @@ def process_paper(fn, addresses, megapixels):
# 'geocoded': geocoded_institutions,
# })
if address:
+ success[citationId] = True
geocoded_citations.append([
citation.title,
institution,
@@ -185,6 +196,7 @@ def process_paper(fn, addresses, megapixels):
citation.title,
] + address)
else:
+ success[citationId] = False
unknown_citations.append([
# citationId,
citation.title,