summaryrefslogtreecommitdiff
path: root/s2-dump-db-pdf-urls.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-03 18:42:24 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-03 18:42:24 +0100
commitaff1e8ed7bfbfc93a98e900e1781ad1fe5e3d47d (patch)
tree4e0078f03fc27b3fd8ae6642143228bc5b674022 /s2-dump-db-pdf-urls.py
parent2278adead1ff16115f8b989dc316bdf9efe9e37d (diff)
s2-dump-db-pdf-urls.py
Diffstat (limited to 's2-dump-db-pdf-urls.py')
-rw-r--r--s2-dump-db-pdf-urls.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/s2-dump-db-pdf-urls.py b/s2-dump-db-pdf-urls.py
index 520b513e..0ac9d0aa 100644
--- a/s2-dump-db-pdf-urls.py
+++ b/s2-dump-db-pdf-urls.py
@@ -12,12 +12,19 @@ def s2_dump_pdf_urls():
# get all the PDF urls, pick the best one
# store it and the paper id
# another script will fetch the urls from this process
- rows = [process_paper(fn) for fn in glob.iglob('{}/**/paper.json'.format(PAPER_JSON_DIR), recursive=True)]
+ rows = []
+ for fn in glob.iglob('{}/**/paper.json'.format(PAPER_JSON_DIR), recursive=True):
+ row = process_paper(fn)
+ if row is not None:
+ rows.append(row)
print("Wrote {} rows".format(len(rows)))
write_csv('db_paper_pdf_list.csv', keys=['Paper ID', 'PDF URL', 'IEEE URL', 'Extra URL'], rows=rows)
def process_paper(fn, lookups):
paper = read_json(fn)
+ print(fn)
+ if paper is None:
+ return None
paper_id = paper['id']
pdf_url = None
ieee_url = None