diff options
Diffstat (limited to 's2-doi-report.py')
| -rw-r--r-- | s2-doi-report.py | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/s2-doi-report.py b/s2-doi-report.py index 611b6391..74e388e3 100644 --- a/s2-doi-report.py +++ b/s2-doi-report.py @@ -20,8 +20,11 @@ def doi_report(): geocoded_papers = [] unknown_papers = [] unattributed_papers = [] - + paper_count = 0 + ieee_count = 0 + unparsed_count = 0 for fn in glob.iglob('{}/**/*.url'.format(DOI_DIR), recursive=True): + paper_count += 1 url_info = read_json(fn) domain = url_info['domain'] paper_id = url_info['paper_id'] @@ -37,6 +40,7 @@ def doi_report(): # continue paper_affiliation_count = 0 if 'ieee' in domain: + ieee_count += 1 affiliations = load_ieee(paper_id, doi_fn) for affiliation in affiliations: if affiliation: @@ -50,6 +54,8 @@ def doi_report(): unknown_papers.append([paper.paper_id, paper.title, affiliation]) if paper_affiliation_count == 0: unattributed_papers.append([paper.paper_id, paper.title]) + else: + unparsed_count += 1 if address: geocoded_papers.append([paper.paper_id, paper.title] + address) @@ -72,6 +78,12 @@ def doi_report(): write_csv('reports/doi_institutions_geocoded.csv', keys=None, rows=geocoded_papers) write_csv('reports/doi_institutions_unknown.csv', keys=None, rows=unknown_papers) write_csv('reports/doi_institutions_unattributed.csv', keys=None, rows=unattributed_papers) + print("total papers: {}".format(paper_count)) + print("ieee papers: {}".format(ieee_count)) + print("unparsed papers: {}".format(unparsed_count)) + print("geocoded papers: {}".format(len(geocoded_papers))) + print("unknown papers: {}".format(len(unknown_papers))) + print("unattributed papers: {}".format(len(unattributed_papers))) def load_ieee(paper_id, fn): paper = load_paper(paper_id) |
