diff options
Diffstat (limited to 'bucky2/lib/Rest/Dailyrotten.pm')
| -rw-r--r-- | bucky2/lib/Rest/Dailyrotten.pm | 76 |
1 files changed, 76 insertions, 0 deletions
diff --git a/bucky2/lib/Rest/Dailyrotten.pm b/bucky2/lib/Rest/Dailyrotten.pm new file mode 100644 index 0000000..93a41a7 --- /dev/null +++ b/bucky2/lib/Rest/Dailyrotten.pm @@ -0,0 +1,76 @@ +package Rest::Dailyrotten; +use base 'Rest'; + +# my $topsy_data = $self->rest_get_raw($self->topsy_query($page)); +# $self->write_data("../tmp/topsy_call", $topsy_data); +# exit; + + +sub dailyrotten_get + { + my ($self) = @_; + my $year = 2009; + my $archive_url = "http://www.dailyrotten.com/archive/$year/"; + my $dailyrotten_calendar = $self->rest_get_raw($archive_url); + my @lines = split "\n", $dailyrotten_calendar; + my $valid = []; + foreach my $line (@lines) + { + if ($line =~ /<a href="$archive_url(_$year-\d+-\d+.html)">/) + { + push @$valid, $1; + } + } + # skip the last day so we can get accurate forum count later + my $skip = pop(@$valid); + my $xml_data = []; + foreach my $file (@$valid) + { + my $raw_data = $self->read_data("../tmp/dr/raw/$file"); + if (!$raw_data) + { + sleep 5; + my $page_url = $archive_url . $file; + $raw_data = $self->rest_get_raw($page_url); + $self->write_data("../tmp/dr/raw/$file", $raw_data); + } + my $posts = $self->dailyrotten_posts($raw_data); + push @$xml_data, { file => $file, post => $posts }; + } + $self->write_xml("../tmp/dr/2009.xml", $xml_data); + } +sub dailyrotten_load + { + my ($self) = @_; + return $self->read_xml("../tmp/dr/2009.xml"); + } +sub dailyrotten_posts + { + my ($self, $raw_data) = @_; + my @lines = split "\n", $raw_data; + my $recs = []; + my $rec = {}; + foreach my $line (@lines) + { +## if ($line =~ /Daily Rotten Archives<\/font><br>(.*)<br>/) +# { +# } + if ($line =~ /^<a href="(.*)" target="_blank">Read article\.\.\.<\/a>/) + { + $rec->{'url'} = $1; + } + if ($line =~ /class="newslink">(.*)<\/a>/) + { + $rec->{'title'} = $1; + } + if ($line =~ /Comments \((\d+)\)/) + { + $rec->{'comments'} = $1; + push @$recs, $rec; + $rec = {}; + } + } + return $recs; + } + +1; |
