summaryrefslogtreecommitdiff
path: root/bucky2/rest/lemmingtrail-sendspace.pl
blob: 4ee5041a3672622671ce3bfde674d0a1c7effe04 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/perl
use lib "../lib";
use Rest;
my $browser = new Rest;
my $BIN_WGET = "/usr/bin/wget";
my ($WGET_SINGLE, $WGET_WEBPAGE) = & make_wget_commands;

my @sendspace_urls = & lemmingtrail_urls;
foreach my $sendspace_url (@sendspace_urls)
	{
	my ($url_dl) = & sendspace_get($sendspace_url);
	& dl($url_dl);
	}
exit;
sub dl
	{
	my ($url) = @_;
	return unless $url;
	system($WGET_SINGLE . " " . $url);
	}
sub lemmingtrail_urls
	{
	my $lemmingtrail_url = "http://www.lemmingtrail.com/mb/207771/";
	my $content = $browser->rest_get_raw($lemmingtrail_url);
	my @urls = & html_scrape_urls($content, "sendspace.com");
	return @urls;
	}
sub sendspace_get
	{
	my ($sendspace_url) = @_;
	return unless $sendspace_url;
	my $content = $browser->rest_post_raw($sendspace_url, {download=>" REGULAR DOWNLOAD "});
	print "got content: " . length($content) . " bytes\n";
	my @urls = & html_scrape_urls($content, "sendspace.com/dl");
	return @urls;
	}
sub html_scrape_urls
	{
	my ($content, $valid_url_match) = @_;
	my @lines = split "<a", $content;
	my @urls = ();
	foreach my $line (@lines)
		{
		next unless $line =~ /href=/;
		$line =~ /href="([^"]+)"/;
		# http://fs05n5.sendspace.com/dl/181e8d00c2955c7862d9a0d559c12cf1/4ade166e37a8af6c/bwbz9y/Sporelec.zip
		my $url = $1;
		next unless $url =~ /$valid_url_match/;
		print "URL: " . $url ."\n";
		push @urls, $url;
		}
	return @urls;
	}
sub make_wget_commands
	{
	my $ua = ("Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.0.3705)");
	my $dp = $PROJECT || "wgetdir";

	# -E = --html-extension
	# -H = --span-hosts
	# -k = --convert-links
	# -K = --backup-converted
	# -p = --page-requisite

	my $SINGLE = "$BIN_WGET -erobots=off --user-agent='$ua' --directory-prefix=$dp";
	my $WEBPAGE = "$BIN_WGET -erobots=off -d -o wgetlog " .
						"--user-agent='$ua' -E -H -K -k -p --no-directories " .
						"--directory-prefix=$dp";
	return ($SINGLE, $WEBPAGE);
	}
1;