summaryrefslogtreecommitdiff
path: root/scraper/s2-scrape.sh
blob: e1df67c5321cc675a872bff9b537fa64fbd92987 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/bin/bash

# These are the pertinent scripts for updating an existing S2 citation scrape.

python s2-papers.py
python s2-dump-ids.py
#python s2-extract-papers.py
python s2-dump-missing-paper-ids.py
python s2-raw-papers.py
python s2-dump-db-pdf-urls.py
python s2-fetch-pdf.py
python s2-fetch-doi.py
python s2-extract-pdf-txt.py
python s2-citation-report.py
python s2-final-report.py

rm s2-final-all-papers.tar
tar cf s2-final-all-papers.tar datasets/s2/papers/ datasets/s2/raw_papers/ datasets/s2/pdf/*/*/*.txt
scp s2-final-all-papers.tar lens@neural.garden:www/files/