diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-20 17:19:57 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-20 17:19:57 +0100 |
| commit | b744e9a307dbcaefc1b95957124aed0e96f29e14 (patch) | |
| tree | 66408b3a1f84440cd59f1b2574b16fd14a3a407a /scraper/s2-scrape.sh | |
| parent | 16f7583dbdb2928c557d0ee3766f809779ae9b39 (diff) | |
| parent | 078574a7b827106ff1130b7c12320c69202394a9 (diff) | |
Merge branch 'master' of asdf.us:megapixels_dev
Diffstat (limited to 'scraper/s2-scrape.sh')
| -rw-r--r-- | scraper/s2-scrape.sh | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/scraper/s2-scrape.sh b/scraper/s2-scrape.sh index e1df67c5..3f548e6e 100644 --- a/scraper/s2-scrape.sh +++ b/scraper/s2-scrape.sh @@ -2,19 +2,40 @@ # These are the pertinent scripts for updating an existing S2 citation scrape. +echo ">> s2-papers.py" python s2-papers.py + +echo ">> s2-dump-ids.py" python s2-dump-ids.py + +echo ">> s2-dump-ids.py" #python s2-extract-papers.py + +echo ">> s2-dump-missing-paper-ids.py" python s2-dump-missing-paper-ids.py + +echo ">> s2-raw-papers.py" python s2-raw-papers.py + +echo ">> s2-dump-db-pdf-urls.py" python s2-dump-db-pdf-urls.py + +echo ">> s2-fetch-pdf.py" python s2-fetch-pdf.py + +echo ">> s2-fetch-doi.py" python s2-fetch-doi.py + +echo ">> s2-extract-pdf-txt.py" python s2-extract-pdf-txt.py + +echo ">> s2-citation-report.py" python s2-citation-report.py + +echo ">> s2-final-report.py" python s2-final-report.py -rm s2-final-all-papers.tar -tar cf s2-final-all-papers.tar datasets/s2/papers/ datasets/s2/raw_papers/ datasets/s2/pdf/*/*/*.txt -scp s2-final-all-papers.tar lens@neural.garden:www/files/ +#rm s2-final-all-papers.tar +#tar cf s2-final-all-papers.tar datasets/s2/papers/ datasets/s2/raw_papers/ datasets/s2/pdf/*/*/*.txt +#scp s2-final-all-papers.tar lens@neural.garden:www/files/ |
