package Rest::Dailyrotten; use base 'Rest'; # my $topsy_data = $self->rest_get_raw($self->topsy_query($page)); # $self->write_data("../tmp/topsy_call", $topsy_data); # exit; sub dailyrotten_get { my ($self) = @_; my $year = 2009; my $archive_url = "http://www.dailyrotten.com/archive/$year/"; my $dailyrotten_calendar = $self->rest_get_raw($archive_url); my @lines = split "\n", $dailyrotten_calendar; my $valid = []; foreach my $line (@lines) { if ($line =~ //) { push @$valid, $1; } } # skip the last day so we can get accurate forum count later my $skip = pop(@$valid); my $xml_data = []; foreach my $file (@$valid) { my $raw_data = $self->read_data("../tmp/dr/raw/$file"); if (!$raw_data) { sleep 5; my $page_url = $archive_url . $file; $raw_data = $self->rest_get_raw($page_url); $self->write_data("../tmp/dr/raw/$file", $raw_data); } my $posts = $self->dailyrotten_posts($raw_data); push @$xml_data, { file => $file, post => $posts }; } $self->write_xml("../tmp/dr/2009.xml", $xml_data); } sub dailyrotten_load { my ($self) = @_; return $self->read_xml("../tmp/dr/2009.xml"); } sub dailyrotten_posts { my ($self, $raw_data) = @_; my @lines = split "\n", $raw_data; my $recs = []; my $rec = {}; foreach my $line (@lines) { ## if ($line =~ /Daily Rotten Archives<\/font>
(.*)
/) # { # } if ($line =~ /^
Read article\.\.\.<\/a>/) { $rec->{'url'} = $1; } if ($line =~ /class="newslink">(.*)<\/a>/) { $rec->{'title'} = $1; } if ($line =~ /Comments \((\d+)\)/) { $rec->{'comments'} = $1; push @$recs, $rec; $rec = {}; } } return $recs; } 1;