From 2116027843edad22d87e6a56269b26cd6aafb8e8 Mon Sep 17 00:00:00 2001 From: "jules@lens" Date: Wed, 20 Feb 2019 18:36:25 +0100 Subject: updating all reports --- scraper/s2-citation-report.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'scraper/s2-citation-report.py') diff --git a/scraper/s2-citation-report.py b/scraper/s2-citation-report.py index e4e41478..c515ec37 100644 --- a/scraper/s2-citation-report.py +++ b/scraper/s2-citation-report.py @@ -184,6 +184,10 @@ def process_paper(row, addresses, success): address_count = 0 fn = file_path('papers', row['paper_id'], 'paper.json') + if not os.path.exists(fn): + print("not found: {}".format(fn)) + print(row) + return with open(fn, 'r') as f: data = json.load(f) @@ -364,7 +368,8 @@ def load_megapixels_queries(): rec = {} for index, key in enumerate(keys): rec[key] = row[index] - recs.append(rec) + if rec['verified'] == '1': + recs.append(rec) return recs #def load_institutions(paperId): -- cgit v1.2.3-70-g09d2