diff options
Diffstat (limited to 'scraper/s2-citation-report.py')
| -rw-r--r-- | scraper/s2-citation-report.py | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/scraper/s2-citation-report.py b/scraper/s2-citation-report.py index b5849329..d70a378a 100644 --- a/scraper/s2-citation-report.py +++ b/scraper/s2-citation-report.py @@ -53,8 +53,16 @@ def write_master_report(fn, papers): statistics = {} def clean(n): + if type(n) is int: + return n + if type(n) is str and n: + s = str(n).replace(',','').replace('.','').replace('?','').strip() + try: + return int(s) + except e: + return s if n: - return int(n.replace(',','').replace('.','').replace('?','').strip()) + return n return None for row in rows: @@ -175,7 +183,7 @@ def process_paper(row, addresses, success): with open(fn, 'r') as f: data = json.load(f) - print('>> {}'.format(data['paperId'])) + print('>> {} {}'.format(data['paperId'], row['key'])) paper = load_paper(data['paperId']) if paper is None: print("Paper missing! {}".format(data['paperId'])) @@ -332,7 +340,7 @@ def process_paper(row, addresses, success): f.write('<script src="../map.js"></script>') f.write("</html>") # template = env.get_template('paper.html') - with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, paper.key), 'w') as f: + with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f: json.dump({ 'id': paper.paper_id, 'paper': res, |
