summaryrefslogtreecommitdiff
path: root/s2-dump-db-pdf-urls.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-03 19:08:06 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-03 19:08:06 +0100
commit8299124ecc34fd4885e7b525b849a44083ab334b (patch)
tree77192f6095f7af6bb6c16ab2df594ebc4951d62c /s2-dump-db-pdf-urls.py
parent7f385e46dce654405fc965668a8104e876c8aa6d (diff)
s2-dump-db-pdf-urls.py
Diffstat (limited to 's2-dump-db-pdf-urls.py')
-rw-r--r--s2-dump-db-pdf-urls.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/s2-dump-db-pdf-urls.py b/s2-dump-db-pdf-urls.py
index 0fd84fd1..473e90af 100644
--- a/s2-dump-db-pdf-urls.py
+++ b/s2-dump-db-pdf-urls.py
@@ -13,11 +13,27 @@ def s2_dump_pdf_urls():
# store it and the paper id
# another script will fetch the urls from this process
rows = []
+ pdf_count = 0
+ ieee_count = 0
+ extra_count = 0
+ empty_count += 1
for fn in glob.iglob('{}/**/paper.json'.format(PAPER_JSON_DIR), recursive=True):
row = process_paper(fn)
if row is not None:
rows.append(row)
+ if row[1] is not None:
+ pdf_count += 1
+ if row[2] is not None:
+ ieee_count += 1
+ if row[3] is not None:
+ extra_count += 1
+ if row[1] is None and row[2] is None and row[3] is None:
+ empty_count += 1
print("Wrote {} rows".format(len(rows)))
+ print("pdf count: {}".format(pdf_count))
+ print("ieee count: {}".format(ieee_count))
+ print("url count: {}".format(url_count))
+ print("empty count: {}".format(empty_count))
write_csv('db_paper_pdf_list.csv', keys=['Paper ID', 'PDF URL', 'IEEE URL', 'Extra URL'], rows=rows)
def process_paper(fn):