summaryrefslogtreecommitdiff
path: root/s2-dump-db-pdf-urls.py
diff options
context:
space:
mode:
Diffstat (limited to 's2-dump-db-pdf-urls.py')
-rw-r--r--s2-dump-db-pdf-urls.py18
1 files changed, 9 insertions, 9 deletions
diff --git a/s2-dump-db-pdf-urls.py b/s2-dump-db-pdf-urls.py
index 406bfeea..dbcb91d8 100644
--- a/s2-dump-db-pdf-urls.py
+++ b/s2-dump-db-pdf-urls.py
@@ -6,7 +6,7 @@ from urllib.parse import urlparse
import operator
from util import *
-PAPER_JSON_DIR = 'datasets/s2/raw_papers'
+PAPER_JSON_DIR = 'datasets/s2/db_papers'
@click.command()
def s2_dump_pdf_urls():
@@ -25,9 +25,9 @@ def s2_dump_pdf_urls():
doi = []
for fn in glob.iglob('{}/**/paper.json'.format(PAPER_JSON_DIR), recursive=True):
# if 'db_paper' in fn:
- # row = process_db_paper(fn)
+ row = process_db_paper(fn)
# elif 'raw_paper' in fn:
- row = process_raw_paper(fn)
+ # row = process_raw_paper(fn)
if row is not None:
rows.append(row)
if row[1] is not None:
@@ -58,12 +58,12 @@ def s2_dump_pdf_urls():
for domain, count in sorted(domains.items(), key=operator.itemgetter(1)):
print(" -- {} - {}".format(domain, count))
print("empty count: {}".format(empty_count))
- # write_csv('db_paper_pdf_list.csv', keys=['Paper ID', 'PDF URL', 'IEEE URL', 'DOI URL', 'Extra URL'], rows=rows)
- # write_csv('db_paper_pdf.csv', keys=None, rows=pdf)
- # write_csv('db_paper_doi.csv', keys=None, rows=doi)
- write_csv('raw_paper_pdf_list.csv', keys=['Paper ID', 'PDF URL', 'IEEE URL', 'DOI URL', 'Extra URL'], rows=rows)
- write_csv('raw_paper_pdf.csv', keys=None, rows=pdf)
- write_csv('raw_paper_doi.csv', keys=None, rows=doi)
+ write_csv('db_paper_pdf_list.csv', keys=['Paper ID', 'PDF URL', 'IEEE URL', 'DOI URL', 'Extra URL'], rows=rows)
+ write_csv('db_paper_pdf.csv', keys=None, rows=pdf)
+ write_csv('db_paper_doi.csv', keys=None, rows=doi)
+ # write_csv('raw_paper_pdf_list.csv', keys=['Paper ID', 'PDF URL', 'IEEE URL', 'DOI URL', 'Extra URL'], rows=rows)
+ # write_csv('raw_paper_pdf.csv', keys=None, rows=pdf)
+ # write_csv('raw_paper_doi.csv', keys=None, rows=doi)
def process_db_paper(fn):
# print(fn)