blob: 4ee5041a3672622671ce3bfde674d0a1c7effe04 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
#!/usr/bin/perl
use lib "../lib";
use Rest;
my $browser = new Rest;
my $BIN_WGET = "/usr/bin/wget";
my ($WGET_SINGLE, $WGET_WEBPAGE) = & make_wget_commands;
my @sendspace_urls = & lemmingtrail_urls;
foreach my $sendspace_url (@sendspace_urls)
{
my ($url_dl) = & sendspace_get($sendspace_url);
& dl($url_dl);
}
exit;
sub dl
{
my ($url) = @_;
return unless $url;
system($WGET_SINGLE . " " . $url);
}
sub lemmingtrail_urls
{
my $lemmingtrail_url = "http://www.lemmingtrail.com/mb/207771/";
my $content = $browser->rest_get_raw($lemmingtrail_url);
my @urls = & html_scrape_urls($content, "sendspace.com");
return @urls;
}
sub sendspace_get
{
my ($sendspace_url) = @_;
return unless $sendspace_url;
my $content = $browser->rest_post_raw($sendspace_url, {download=>" REGULAR DOWNLOAD "});
print "got content: " . length($content) . " bytes\n";
my @urls = & html_scrape_urls($content, "sendspace.com/dl");
return @urls;
}
sub html_scrape_urls
{
my ($content, $valid_url_match) = @_;
my @lines = split "<a", $content;
my @urls = ();
foreach my $line (@lines)
{
next unless $line =~ /href=/;
$line =~ /href="([^"]+)"/;
# http://fs05n5.sendspace.com/dl/181e8d00c2955c7862d9a0d559c12cf1/4ade166e37a8af6c/bwbz9y/Sporelec.zip
my $url = $1;
next unless $url =~ /$valid_url_match/;
print "URL: " . $url ."\n";
push @urls, $url;
}
return @urls;
}
sub make_wget_commands
{
my $ua = ("Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.0.3705)");
my $dp = $PROJECT || "wgetdir";
# -E = --html-extension
# -H = --span-hosts
# -k = --convert-links
# -K = --backup-converted
# -p = --page-requisite
my $SINGLE = "$BIN_WGET -erobots=off --user-agent='$ua' --directory-prefix=$dp";
my $WEBPAGE = "$BIN_WGET -erobots=off -d -o wgetlog " .
"--user-agent='$ua' -E -H -K -k -p --no-directories " .
"--directory-prefix=$dp";
return ($SINGLE, $WEBPAGE);
}
1;
|