package Rest::Dailyrotten;
use base 'Rest';
# my $topsy_data = $self->rest_get_raw($self->topsy_query($page));
# $self->write_data("../tmp/topsy_call", $topsy_data);
# exit;
sub dailyrotten_get
{
my ($self) = @_;
my $year = 2009;
my $archive_url = "http://www.dailyrotten.com/archive/$year/";
my $dailyrotten_calendar = $self->rest_get_raw($archive_url);
my @lines = split "\n", $dailyrotten_calendar;
my $valid = [];
foreach my $line (@lines)
{
if ($line =~ //)
{
push @$valid, $1;
}
}
# skip the last day so we can get accurate forum count later
my $skip = pop(@$valid);
my $xml_data = [];
foreach my $file (@$valid)
{
my $raw_data = $self->read_data("../tmp/dr/raw/$file");
if (!$raw_data)
{
sleep 5;
my $page_url = $archive_url . $file;
$raw_data = $self->rest_get_raw($page_url);
$self->write_data("../tmp/dr/raw/$file", $raw_data);
}
my $posts = $self->dailyrotten_posts($raw_data);
push @$xml_data, { file => $file, post => $posts };
}
$self->write_xml("../tmp/dr/2009.xml", $xml_data);
}
sub dailyrotten_load
{
my ($self) = @_;
return $self->read_xml("../tmp/dr/2009.xml");
}
sub dailyrotten_posts
{
my ($self, $raw_data) = @_;
my @lines = split "\n", $raw_data;
my $recs = [];
my $rec = {};
foreach my $line (@lines)
{
## if ($line =~ /Daily Rotten Archives<\/font>
(.*)
/)
# {
# }
if ($line =~ /^Read article\.\.\.<\/a>/)
{
$rec->{'url'} = $1;
}
if ($line =~ /class="newslink">(.*)<\/a>/)
{
$rec->{'title'} = $1;
}
if ($line =~ /Comments \((\d+)\)/)
{
$rec->{'comments'} = $1;
push @$recs, $rec;
$rec = {};
}
}
return $recs;
}
1;