summaryrefslogtreecommitdiff
path: root/scraper/s2-scrape.sh
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-20 17:19:57 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-20 17:19:57 +0100
commitb744e9a307dbcaefc1b95957124aed0e96f29e14 (patch)
tree66408b3a1f84440cd59f1b2574b16fd14a3a407a /scraper/s2-scrape.sh
parent16f7583dbdb2928c557d0ee3766f809779ae9b39 (diff)
parent078574a7b827106ff1130b7c12320c69202394a9 (diff)
Merge branch 'master' of asdf.us:megapixels_dev
Diffstat (limited to 'scraper/s2-scrape.sh')
-rw-r--r--scraper/s2-scrape.sh27
1 files changed, 24 insertions, 3 deletions
diff --git a/scraper/s2-scrape.sh b/scraper/s2-scrape.sh
index e1df67c5..3f548e6e 100644
--- a/scraper/s2-scrape.sh
+++ b/scraper/s2-scrape.sh
@@ -2,19 +2,40 @@
# These are the pertinent scripts for updating an existing S2 citation scrape.
+echo ">> s2-papers.py"
python s2-papers.py
+
+echo ">> s2-dump-ids.py"
python s2-dump-ids.py
+
+echo ">> s2-dump-ids.py"
#python s2-extract-papers.py
+
+echo ">> s2-dump-missing-paper-ids.py"
python s2-dump-missing-paper-ids.py
+
+echo ">> s2-raw-papers.py"
python s2-raw-papers.py
+
+echo ">> s2-dump-db-pdf-urls.py"
python s2-dump-db-pdf-urls.py
+
+echo ">> s2-fetch-pdf.py"
python s2-fetch-pdf.py
+
+echo ">> s2-fetch-doi.py"
python s2-fetch-doi.py
+
+echo ">> s2-extract-pdf-txt.py"
python s2-extract-pdf-txt.py
+
+echo ">> s2-citation-report.py"
python s2-citation-report.py
+
+echo ">> s2-final-report.py"
python s2-final-report.py
-rm s2-final-all-papers.tar
-tar cf s2-final-all-papers.tar datasets/s2/papers/ datasets/s2/raw_papers/ datasets/s2/pdf/*/*/*.txt
-scp s2-final-all-papers.tar lens@neural.garden:www/files/
+#rm s2-final-all-papers.tar
+#tar cf s2-final-all-papers.tar datasets/s2/papers/ datasets/s2/raw_papers/ datasets/s2/pdf/*/*/*.txt
+#scp s2-final-all-papers.tar lens@neural.garden:www/files/