summaryrefslogtreecommitdiff
path: root/scraper/s2-citation-report.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-12-16 17:47:21 +0100
committerJules Laplace <julescarbon@gmail.com>2018-12-16 17:47:21 +0100
commit948174f04d9dea93271bec62283b1c09e687e157 (patch)
treee755bb15d77ef21e43744fc07397b4b2b5cb20ca /scraper/s2-citation-report.py
parent00eb85af05208e2129e7eba46da795e4528d2735 (diff)
map and citations workin
Diffstat (limited to 'scraper/s2-citation-report.py')
-rw-r--r--scraper/s2-citation-report.py14
1 files changed, 11 insertions, 3 deletions
diff --git a/scraper/s2-citation-report.py b/scraper/s2-citation-report.py
index b5849329..d70a378a 100644
--- a/scraper/s2-citation-report.py
+++ b/scraper/s2-citation-report.py
@@ -53,8 +53,16 @@ def write_master_report(fn, papers):
statistics = {}
def clean(n):
+ if type(n) is int:
+ return n
+ if type(n) is str and n:
+ s = str(n).replace(',','').replace('.','').replace('?','').strip()
+ try:
+ return int(s)
+ except e:
+ return s
if n:
- return int(n.replace(',','').replace('.','').replace('?','').strip())
+ return n
return None
for row in rows:
@@ -175,7 +183,7 @@ def process_paper(row, addresses, success):
with open(fn, 'r') as f:
data = json.load(f)
- print('>> {}'.format(data['paperId']))
+ print('>> {} {}'.format(data['paperId'], row['key']))
paper = load_paper(data['paperId'])
if paper is None:
print("Paper missing! {}".format(data['paperId']))
@@ -332,7 +340,7 @@ def process_paper(row, addresses, success):
f.write('<script src="../map.js"></script>')
f.write("</html>")
# template = env.get_template('paper.html')
- with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, paper.key), 'w') as f:
+ with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f:
json.dump({
'id': paper.paper_id,
'paper': res,