summaryrefslogtreecommitdiff
path: root/s2-doi-report.py
diff options
context:
space:
mode:
Diffstat (limited to 's2-doi-report.py')
-rw-r--r--s2-doi-report.py14
1 files changed, 13 insertions, 1 deletions
diff --git a/s2-doi-report.py b/s2-doi-report.py
index 611b6391..74e388e3 100644
--- a/s2-doi-report.py
+++ b/s2-doi-report.py
@@ -20,8 +20,11 @@ def doi_report():
geocoded_papers = []
unknown_papers = []
unattributed_papers = []
-
+ paper_count = 0
+ ieee_count = 0
+ unparsed_count = 0
for fn in glob.iglob('{}/**/*.url'.format(DOI_DIR), recursive=True):
+ paper_count += 1
url_info = read_json(fn)
domain = url_info['domain']
paper_id = url_info['paper_id']
@@ -37,6 +40,7 @@ def doi_report():
# continue
paper_affiliation_count = 0
if 'ieee' in domain:
+ ieee_count += 1
affiliations = load_ieee(paper_id, doi_fn)
for affiliation in affiliations:
if affiliation:
@@ -50,6 +54,8 @@ def doi_report():
unknown_papers.append([paper.paper_id, paper.title, affiliation])
if paper_affiliation_count == 0:
unattributed_papers.append([paper.paper_id, paper.title])
+ else:
+ unparsed_count += 1
if address:
geocoded_papers.append([paper.paper_id, paper.title] + address)
@@ -72,6 +78,12 @@ def doi_report():
write_csv('reports/doi_institutions_geocoded.csv', keys=None, rows=geocoded_papers)
write_csv('reports/doi_institutions_unknown.csv', keys=None, rows=unknown_papers)
write_csv('reports/doi_institutions_unattributed.csv', keys=None, rows=unattributed_papers)
+ print("total papers: {}".format(paper_count))
+ print("ieee papers: {}".format(ieee_count))
+ print("unparsed papers: {}".format(unparsed_count))
+ print("geocoded papers: {}".format(len(geocoded_papers)))
+ print("unknown papers: {}".format(len(unknown_papers)))
+ print("unattributed papers: {}".format(len(unattributed_papers)))
def load_ieee(paper_id, fn):
paper = load_paper(paper_id)