diff options
| author | jules@lens <julescarbon@gmail.com> | 2019-05-30 15:10:34 +0200 |
|---|---|---|
| committer | jules@lens <julescarbon@gmail.com> | 2019-05-30 15:10:34 +0200 |
| commit | a7b940665c82b4710c73099d22f347fc30017e3c (patch) | |
| tree | 8513d8d5872351014067d0012b7b6665acc5bf08 /scraper/s2-final-report.py | |
| parent | c8829966aec3e6d0ebf6f288c34bc87ff0ada3ae (diff) | |
getting all those brainwash citations
Diffstat (limited to 'scraper/s2-final-report.py')
| -rw-r--r-- | scraper/s2-final-report.py | 22 |
1 files changed, 13 insertions, 9 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index c9795680..63789d85 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -22,8 +22,8 @@ def s2_final_report(): verified_lookup, verified_totals = fetch_verified_paper_lookup() items = [] for key, item in megapixels.items(): - if key != 'brainwash': - continue + #if key != 'brainwash': + # continue ft_share = 'ft_share' in item['dataset'] and item['dataset']['ft_share'] == 'Y' nyt_share = 'nyt_share' in item['dataset'] and item['dataset']['nyt_share'] == 'Y' if ft_share or nyt_share: @@ -47,11 +47,11 @@ def s2_final_report(): # DIR_PUBLIC_CITATIONS + '/', # "s3://megapixels/v1/citations/", # ]) - #subprocess.call([ - # "s3cmd", "put", "-P", "--recursive", - # DIR_VERIFIED_CITATIONS + '/', - # "s3://megapixels/v1/citations/verified/", - #]) + subprocess.call([ + "s3cmd", "put", "-P", "--recursive", + DIR_VERIFIED_CITATIONS + '/', + "s3://megapixels/v1/citations/verified/", + ]) def process_paper(row, verified_lookup, verified_totals): aggregate_citations = {} @@ -75,8 +75,12 @@ def process_paper(row, verified_lookup, verified_totals): process_single_paper(row, 'search', addresses, aggregate_citations, unknown_citations) for paper_id in verified_lookup.keys(): - if paper_id not in aggregate_citations: - print('S2 API missing verified citation: {}'.format(paper_id)) + if paper_id in aggregate_citations: + pass + elif paper_id in unknown_citations: + print('Verified paper needs address: {}'.format(paper_id)) + else: + print('S2 API missing new verified citation: {}'.format(paper_id)) process_single_paper(row, 'verified', addresses, aggregate_citations, unknown_citations, verified_lookup.keys()) |
