summaryrefslogtreecommitdiff
path: root/scraper/s2-final-report.py
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/s2-final-report.py')
-rw-r--r--scraper/s2-final-report.py22
1 files changed, 13 insertions, 9 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index c9795680..63789d85 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -22,8 +22,8 @@ def s2_final_report():
verified_lookup, verified_totals = fetch_verified_paper_lookup()
items = []
for key, item in megapixels.items():
- if key != 'brainwash':
- continue
+ #if key != 'brainwash':
+ # continue
ft_share = 'ft_share' in item['dataset'] and item['dataset']['ft_share'] == 'Y'
nyt_share = 'nyt_share' in item['dataset'] and item['dataset']['nyt_share'] == 'Y'
if ft_share or nyt_share:
@@ -47,11 +47,11 @@ def s2_final_report():
# DIR_PUBLIC_CITATIONS + '/',
# "s3://megapixels/v1/citations/",
# ])
- #subprocess.call([
- # "s3cmd", "put", "-P", "--recursive",
- # DIR_VERIFIED_CITATIONS + '/',
- # "s3://megapixels/v1/citations/verified/",
- #])
+ subprocess.call([
+ "s3cmd", "put", "-P", "--recursive",
+ DIR_VERIFIED_CITATIONS + '/',
+ "s3://megapixels/v1/citations/verified/",
+ ])
def process_paper(row, verified_lookup, verified_totals):
aggregate_citations = {}
@@ -75,8 +75,12 @@ def process_paper(row, verified_lookup, verified_totals):
process_single_paper(row, 'search', addresses, aggregate_citations, unknown_citations)
for paper_id in verified_lookup.keys():
- if paper_id not in aggregate_citations:
- print('S2 API missing verified citation: {}'.format(paper_id))
+ if paper_id in aggregate_citations:
+ pass
+ elif paper_id in unknown_citations:
+ print('Verified paper needs address: {}'.format(paper_id))
+ else:
+ print('S2 API missing new verified citation: {}'.format(paper_id))
process_single_paper(row, 'verified', addresses, aggregate_citations, unknown_citations, verified_lookup.keys())