summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2019-02-08 23:24:07 +0100
committerjules@lens <julescarbon@gmail.com>2019-02-08 23:24:07 +0100
commitd2cbf2f7cb64f36c04612e3a7d996ed1b8ce7228 (patch)
treea4c84dd9065f29e75897d4f1e9ba4f07b839a8f3
parent410b7c88aaaccb2ceaf778015cb5a696b561e03c (diff)
nosleep
-rw-r--r--scraper/s2-fetch-pdf.py2
-rw-r--r--scraper/s2-pdf-first-pages.py2
2 files changed, 1 insertions, 3 deletions
diff --git a/scraper/s2-fetch-pdf.py b/scraper/s2-fetch-pdf.py
index 5477cbd5..30bc5a40 100644
--- a/scraper/s2-fetch-pdf.py
+++ b/scraper/s2-fetch-pdf.py
@@ -31,10 +31,8 @@ def fetch_pdf(paper_id, url):
size = s2.fetch_file(url, pdf_fn)
if size is None:
print("{} empty?".format(paper_id))
- time.sleep(random.randint(5, 10))
return None
print("{} {} kb".format(paper_id, int(size / 1024)))
- time.sleep(random.randint(5, 10))
return
# return paper
diff --git a/scraper/s2-pdf-first-pages.py b/scraper/s2-pdf-first-pages.py
index 0a6b20bd..6f1d81e3 100644
--- a/scraper/s2-pdf-first-pages.py
+++ b/scraper/s2-pdf-first-pages.py
@@ -30,7 +30,7 @@ def report_first_pages():
write_report('reports/first_pages.html', title='First pages', keys=None, rows=rows)
write_report('reports/institutions.html', title='Institutions', keys=None, rows=sorted(institutions, key=lambda x: x[1]))
write_report('reports/institutions_missing.html', title='Institutions', keys=None, rows=no_institutions)
- write_csv('reports/institution_names.csv', keys=None, rows=[(name,) for name in deduped_institutions])
+ write_csv('reports/institution_names_extracted.csv', keys=None, rows=[(name,) for name in deduped_institutions])
print("{} deduped institutions".format(len(deduped_institutions)))
def dedupe(a):