update final report

author: Jules Laplace <julescarbon@gmail.com> 2019-02-25 17:07:47 +0100
committer: Jules Laplace <julescarbon@gmail.com> 2019-02-25 17:07:47 +0100
commit: bc240ba2a4b5e30710d37af88eccd905209fc263 (patch)
tree: 536281157e7a394edff02343926ae80c0d7daf90 /scraper/s2-final-report.py
parent: cd624bdcc5307713dca541f1be130450e86d62ea (diff)
1 files changed, 23 insertions, 4 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index 197d5642..878640ac 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -8,7 +8,8 @@ import click
 import subprocess
 from util import *
 
-DIR_PUBLIC_CITATIONS = "../site/datasets/final"
+DIR_PUBLIC_CITATIONS = "../site/datasets/public"
+DIR_FINAL_CITATIONS = "../site/datasets/final"
 DIR_UNKNOWN_CITATIONS = "../site/datasets/unknown"
 
 addresses = AddressBook()
@@ -22,6 +23,11 @@ def s2_final_report():
     if 'ft_share' in item['dataset'] and item['dataset']['ft_share'] == 'Y':
       items.append((item,))
   parallelize(process_paper, items)
+  subprocess.call([
+    "s3cmd", "put", "-P", "--recursive",
+    DIR_PUBLIC_CITATIONS + '/',
+    "s3://megapixels/v1/citations/",
+  ])
 
 def process_paper(row):
   aggregate_citations = {}
@@ -38,11 +44,12 @@ def process_paper(row):
         address_list.append(res['address'])
   if not len(papers):
     return
-  with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f:
+  paper = papers[0]
+  with open('{}/{}.json'.format(DIR_FINAL_CITATIONS, row['key']), 'w') as f:
     json.dump({
-      'id': papers[0]['paper_id'],
+      'id': paper['paper_id'],
       'dataset': row['dataset'],
-      'paper': papers[0],
+      'paper': paper,
       'addresses': address_list,
       'additional_papers': papers[1:],
       'citations': [aggregate_citations[key] for key in aggregate_citations.keys()],
@@ -52,6 +59,18 @@ def process_paper(row):
       'id': papers[0]['paper_id'],
       'citations': [unknown_citations[key] for key in unknown_citations.keys()],
     }, f)
+  with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f:
+    json.dump({
+      'id': paper['paper_id'],
+      'paper': {
+        'key': row['key'],
+        'name': row['name'],
+        'title': paper['title'],
+        'year': paper['year'],
+      },
+      'address': address_list[0],
+      'citations': [aggregate_citations[key] for key in aggregate_citations.keys()],
+    }, f)
 
 def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_citations):
   res = {
author	Jules Laplace <julescarbon@gmail.com>	2019-02-25 17:07:47 +0100
committer	Jules Laplace <julescarbon@gmail.com>	2019-02-25 17:07:47 +0100
commit	bc240ba2a4b5e30710d37af88eccd905209fc263 (patch)
tree	536281157e7a394edff02343926ae80c0d7daf90 /scraper/s2-final-report.py
parent	cd624bdcc5307713dca541f1be130450e86d62ea (diff)