From 948174f04d9dea93271bec62283b1c09e687e157 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Sun, 16 Dec 2018 17:47:21 +0100 Subject: map and citations workin --- scraper/s2-citation-report.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'scraper/s2-citation-report.py') diff --git a/scraper/s2-citation-report.py b/scraper/s2-citation-report.py index b5849329..d70a378a 100644 --- a/scraper/s2-citation-report.py +++ b/scraper/s2-citation-report.py @@ -53,8 +53,16 @@ def write_master_report(fn, papers): statistics = {} def clean(n): + if type(n) is int: + return n + if type(n) is str and n: + s = str(n).replace(',','').replace('.','').replace('?','').strip() + try: + return int(s) + except e: + return s if n: - return int(n.replace(',','').replace('.','').replace('?','').strip()) + return n return None for row in rows: @@ -175,7 +183,7 @@ def process_paper(row, addresses, success): with open(fn, 'r') as f: data = json.load(f) - print('>> {}'.format(data['paperId'])) + print('>> {} {}'.format(data['paperId'], row['key'])) paper = load_paper(data['paperId']) if paper is None: print("Paper missing! {}".format(data['paperId'])) @@ -332,7 +340,7 @@ def process_paper(row, addresses, success): f.write('') f.write("") # template = env.get_template('paper.html') - with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, paper.key), 'w') as f: + with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f: json.dump({ 'id': paper.paper_id, 'paper': res, -- cgit v1.2.3-70-g09d2