1 files changed, 16 insertions, 0 deletions
diff --git a/s2-dump-db-pdf-urls.py b/s2-dump-db-pdf-urls.py
index 0fd84fd1..473e90af 100644
--- a/s2-dump-db-pdf-urls.py
+++ b/s2-dump-db-pdf-urls.py
@@ -13,11 +13,27 @@ def s2_dump_pdf_urls():
   # store it and the paper id
   # another script will fetch the urls from this process
   rows = []
+  pdf_count = 0
+  ieee_count = 0
+  extra_count = 0
+  empty_count += 1
   for fn in glob.iglob('{}/**/paper.json'.format(PAPER_JSON_DIR), recursive=True):
     row = process_paper(fn)
     if row is not None:
       rows.append(row)
+      if row[1] is not None:
+        pdf_count += 1
+      if row[2] is not None:
+        ieee_count += 1
+      if row[3] is not None:
+        extra_count += 1
+      if row[1] is None and row[2] is None and row[3] is None:
+        empty_count += 1
   print("Wrote {} rows".format(len(rows)))
+  print("pdf count: {}".format(pdf_count))
+  print("ieee count: {}".format(ieee_count))
+  print("url count: {}".format(url_count))
+  print("empty count: {}".format(empty_count))
   write_csv('db_paper_pdf_list.csv', keys=['Paper ID', 'PDF URL', 'IEEE URL', 'Extra URL'], rows=rows)
 
 def process_paper(fn):