diff options
Diffstat (limited to 'bucky2/rest/sendspace.pl')
| -rwxr-xr-x | bucky2/rest/sendspace.pl | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/bucky2/rest/sendspace.pl b/bucky2/rest/sendspace.pl new file mode 100755 index 0000000..4ee5041 --- /dev/null +++ b/bucky2/rest/sendspace.pl @@ -0,0 +1,71 @@ +#!/usr/bin/perl +use lib "../lib"; +use Rest; +my $browser = new Rest; +my $BIN_WGET = "/usr/bin/wget"; +my ($WGET_SINGLE, $WGET_WEBPAGE) = & make_wget_commands; + +my @sendspace_urls = & lemmingtrail_urls; +foreach my $sendspace_url (@sendspace_urls) + { + my ($url_dl) = & sendspace_get($sendspace_url); + & dl($url_dl); + } +exit; +sub dl + { + my ($url) = @_; + return unless $url; + system($WGET_SINGLE . " " . $url); + } +sub lemmingtrail_urls + { + my $lemmingtrail_url = "http://www.lemmingtrail.com/mb/207771/"; + my $content = $browser->rest_get_raw($lemmingtrail_url); + my @urls = & html_scrape_urls($content, "sendspace.com"); + return @urls; + } +sub sendspace_get + { + my ($sendspace_url) = @_; + return unless $sendspace_url; + my $content = $browser->rest_post_raw($sendspace_url, {download=>" REGULAR DOWNLOAD "}); + print "got content: " . length($content) . " bytes\n"; + my @urls = & html_scrape_urls($content, "sendspace.com/dl"); + return @urls; + } +sub html_scrape_urls + { + my ($content, $valid_url_match) = @_; + my @lines = split "<a", $content; + my @urls = (); + foreach my $line (@lines) + { + next unless $line =~ /href=/; + $line =~ /href="([^"]+)"/; + # http://fs05n5.sendspace.com/dl/181e8d00c2955c7862d9a0d559c12cf1/4ade166e37a8af6c/bwbz9y/Sporelec.zip + my $url = $1; + next unless $url =~ /$valid_url_match/; + print "URL: " . $url ."\n"; + push @urls, $url; + } + return @urls; + } +sub make_wget_commands + { + my $ua = ("Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.0.3705)"); + my $dp = $PROJECT || "wgetdir"; + + # -E = --html-extension + # -H = --span-hosts + # -k = --convert-links + # -K = --backup-converted + # -p = --page-requisite + + my $SINGLE = "$BIN_WGET -erobots=off --user-agent='$ua' --directory-prefix=$dp"; + my $WEBPAGE = "$BIN_WGET -erobots=off -d -o wgetlog " . + "--user-agent='$ua' -E -H -K -k -p --no-directories " . + "--directory-prefix=$dp"; + return ($SINGLE, $WEBPAGE); + } +1; |
