summaryrefslogtreecommitdiff
path: root/scraper/s2-scrape.sh
blob: 8c5632a0528bbf12bf2863ff1b3d22ee08c7ccd0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/bin/bash

# These are the pertinent scripts for updating an existing S2 citation scrape.

echo ">> s2-papers.py"
python s2-papers.py

echo ">> s2-dump-ids.py"
python s2-dump-ids.py

# echo ">> s2-extract-papers.py"
# python s2-extract-papers.py

echo ">> s2-dump-missing-paper-ids.py"
python s2-dump-missing-paper-ids.py

echo ">> s2-raw-papers.py"
python s2-raw-papers.py

echo ">> s2-dump-db-pdf-urls.py"
python s2-dump-db-pdf-urls.py

echo ">> s2-fetch-pdf.py"
python s2-fetch-pdf.py

echo ">> s2-fetch-doi.py"
python s2-fetch-doi.py

echo ">> s2-extract-pdf-txt.py"
python s2-extract-pdf-txt.py

echo ">> s2-citation-report.py"
python s2-citation-report.py

echo ">> s2-final-report.py"
python s2-final-report.py

#rm s2-final-all-papers.tar
#tar cf s2-final-all-papers.tar datasets/s2/papers/ datasets/s2/raw_papers/ datasets/s2/pdf/*/*/*.txt
#scp s2-final-all-papers.tar lens@neural.garden:www/files/