1 files changed, 25 insertions, 7 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index 4b74750a..ec4ad25b 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -8,7 +8,8 @@ import click
 import subprocess
 from util import *
 
-DIR_PUBLIC_CITATIONS = "../site/datasets/final"
+DIR_PUBLIC_CITATIONS = "../site/datasets/citations"
+DIR_FINAL_CITATIONS = "../site/datasets/final"
 DIR_UNKNOWN_CITATIONS = "../site/datasets/unknown"
 
 addresses = AddressBook()
@@ -19,9 +20,14 @@ def s2_final_report():
   megapixels = load_megapixels_lookup()
   items = []
   for key, item in megapixels.items():
-    if item['dataset']['ft_share'] == '1':
+    if 'ft_share' in item['dataset'] and item['dataset']['ft_share'] == 'Y':
       items.append((item,))
   parallelize(process_paper, items)
+  subprocess.call([
+    "s3cmd", "put", "-P", "--recursive",
+    DIR_PUBLIC_CITATIONS + '/',
+    "s3://megapixels/v1/citations/",
+  ])
 
 def process_paper(row):
   aggregate_citations = {}
@@ -38,12 +44,12 @@ def process_paper(row):
         address_list.append(res['address'])
   if not len(papers):
     return
-  with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f:
+  paper = papers[0]
+  with open('{}/{}.json'.format(DIR_FINAL_CITATIONS, row['key']), 'w') as f:
     json.dump({
-      'id': papers[0]['paper_id'],
+      'id': paper['paper_id'],
       'dataset': row['dataset'],
-      'statistics': row['statistics'],
-      'paper': papers[0],
+      'paper': paper,
       'addresses': address_list,
       'additional_papers': papers[1:],
       'citations': [aggregate_citations[key] for key in aggregate_citations.keys()],
@@ -53,6 +59,18 @@ def process_paper(row):
       'id': papers[0]['paper_id'],
       'citations': [unknown_citations[key] for key in unknown_citations.keys()],
     }, f)
+  with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f:
+    json.dump({
+      'id': paper['paper_id'],
+      'paper': {
+        'key': row['key'],
+        'name': row['name'],
+        'title': paper['title'],
+        'year': paper['year'],
+      },
+      'address': address_list[0] if len(address_list) else {},
+      'citations': [aggregate_citations[key] for key in aggregate_citations.keys()],
+    }, f)
 
 def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_citations):
   res = {
@@ -184,7 +202,7 @@ def load_megapixels_lookup():
       lookup[paper_key]['dataset'] = dataset_lookup[paper_key]
     else:
       print("not in datasets lookup:", paper_key)
-    # recs.append(rec)
+      lookup[paper_key]['dataset'] = {}
   return lookup
 
 if __name__ == '__main__':