diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-12-16 17:47:21 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-12-16 17:47:21 +0100 |
| commit | 948174f04d9dea93271bec62283b1c09e687e157 (patch) | |
| tree | e755bb15d77ef21e43744fc07397b4b2b5cb20ca /scraper | |
| parent | 00eb85af05208e2129e7eba46da795e4528d2735 (diff) | |
map and citations workin
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/s2-citation-report.py | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/scraper/s2-citation-report.py b/scraper/s2-citation-report.py index b5849329..d70a378a 100644 --- a/scraper/s2-citation-report.py +++ b/scraper/s2-citation-report.py @@ -53,8 +53,16 @@ def write_master_report(fn, papers): statistics = {} def clean(n): + if type(n) is int: + return n + if type(n) is str and n: + s = str(n).replace(',','').replace('.','').replace('?','').strip() + try: + return int(s) + except e: + return s if n: - return int(n.replace(',','').replace('.','').replace('?','').strip()) + return n return None for row in rows: @@ -175,7 +183,7 @@ def process_paper(row, addresses, success): with open(fn, 'r') as f: data = json.load(f) - print('>> {}'.format(data['paperId'])) + print('>> {} {}'.format(data['paperId'], row['key'])) paper = load_paper(data['paperId']) if paper is None: print("Paper missing! {}".format(data['paperId'])) @@ -332,7 +340,7 @@ def process_paper(row, addresses, success): f.write('<script src="../map.js"></script>') f.write("</html>") # template = env.get_template('paper.html') - with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, paper.key), 'w') as f: + with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f: json.dump({ 'id': paper.paper_id, 'paper': res, |
