s2-dump-db-pdf-urls.py

author: Jules Laplace <julescarbon@gmail.com> 2018-11-03 18:42:24 +0100
committer: Jules Laplace <julescarbon@gmail.com> 2018-11-03 18:42:24 +0100
commit: aff1e8ed7bfbfc93a98e900e1781ad1fe5e3d47d (patch)
tree: 4e0078f03fc27b3fd8ae6642143228bc5b674022 /s2-dump-db-pdf-urls.py
parent: 2278adead1ff16115f8b989dc316bdf9efe9e37d (diff)
1 files changed, 8 insertions, 1 deletions
diff --git a/s2-dump-db-pdf-urls.py b/s2-dump-db-pdf-urls.py
index 520b513e..0ac9d0aa 100644
--- a/s2-dump-db-pdf-urls.py
+++ b/s2-dump-db-pdf-urls.py
@@ -12,12 +12,19 @@ def s2_dump_pdf_urls():
   # get all the PDF urls, pick the best one
   # store it and the paper id
   # another script will fetch the urls from this process
-  rows = [process_paper(fn) for fn in glob.iglob('{}/**/paper.json'.format(PAPER_JSON_DIR), recursive=True)]
+  rows = []
+  for fn in glob.iglob('{}/**/paper.json'.format(PAPER_JSON_DIR), recursive=True):
+    row = process_paper(fn)
+    if row is not None:
+      rows.append(row)
   print("Wrote {} rows".format(len(rows)))
   write_csv('db_paper_pdf_list.csv', keys=['Paper ID', 'PDF URL', 'IEEE URL', 'Extra URL'], rows=rows)
 
 def process_paper(fn, lookups):
   paper = read_json(fn)
+  print(fn)
+  if paper is None:
+    return None
   paper_id = paper['id']
   pdf_url = None
   ieee_url = None
author	Jules Laplace <julescarbon@gmail.com>	2018-11-03 18:42:24 +0100
committer	Jules Laplace <julescarbon@gmail.com>	2018-11-03 18:42:24 +0100
commit	aff1e8ed7bfbfc93a98e900e1781ad1fe5e3d47d (patch)
tree	4e0078f03fc27b3fd8ae6642143228bc5b674022 /s2-dump-db-pdf-urls.py
parent	2278adead1ff16115f8b989dc316bdf9efe9e37d (diff)