diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-13 02:46:28 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-13 02:46:28 +0100 |
| commit | 1b61e44f7fb273a3df6f1bb2a02aa046cfe736e0 (patch) | |
| tree | 6a7b1c9b492db190e4aecafc5b2a64acf57f5780 /s2-doi-report.py | |
| parent | 6801542b636835c2abb07063448ce7416b12bbe2 (diff) | |
red dot
Diffstat (limited to 's2-doi-report.py')
| -rw-r--r-- | s2-doi-report.py | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/s2-doi-report.py b/s2-doi-report.py index 611b6391..74e388e3 100644 --- a/s2-doi-report.py +++ b/s2-doi-report.py @@ -20,8 +20,11 @@ def doi_report(): geocoded_papers = [] unknown_papers = [] unattributed_papers = [] - + paper_count = 0 + ieee_count = 0 + unparsed_count = 0 for fn in glob.iglob('{}/**/*.url'.format(DOI_DIR), recursive=True): + paper_count += 1 url_info = read_json(fn) domain = url_info['domain'] paper_id = url_info['paper_id'] @@ -37,6 +40,7 @@ def doi_report(): # continue paper_affiliation_count = 0 if 'ieee' in domain: + ieee_count += 1 affiliations = load_ieee(paper_id, doi_fn) for affiliation in affiliations: if affiliation: @@ -50,6 +54,8 @@ def doi_report(): unknown_papers.append([paper.paper_id, paper.title, affiliation]) if paper_affiliation_count == 0: unattributed_papers.append([paper.paper_id, paper.title]) + else: + unparsed_count += 1 if address: geocoded_papers.append([paper.paper_id, paper.title] + address) @@ -72,6 +78,12 @@ def doi_report(): write_csv('reports/doi_institutions_geocoded.csv', keys=None, rows=geocoded_papers) write_csv('reports/doi_institutions_unknown.csv', keys=None, rows=unknown_papers) write_csv('reports/doi_institutions_unattributed.csv', keys=None, rows=unattributed_papers) + print("total papers: {}".format(paper_count)) + print("ieee papers: {}".format(ieee_count)) + print("unparsed papers: {}".format(unparsed_count)) + print("geocoded papers: {}".format(len(geocoded_papers))) + print("unknown papers: {}".format(len(unknown_papers))) + print("unattributed papers: {}".format(len(unattributed_papers))) def load_ieee(paper_id, fn): paper = load_paper(paper_id) |
