summaryrefslogtreecommitdiff
path: root/bucky2/lib/Rest/Dailyrotten.pm
diff options
context:
space:
mode:
Diffstat (limited to 'bucky2/lib/Rest/Dailyrotten.pm')
-rw-r--r--bucky2/lib/Rest/Dailyrotten.pm76
1 files changed, 76 insertions, 0 deletions
diff --git a/bucky2/lib/Rest/Dailyrotten.pm b/bucky2/lib/Rest/Dailyrotten.pm
new file mode 100644
index 0000000..93a41a7
--- /dev/null
+++ b/bucky2/lib/Rest/Dailyrotten.pm
@@ -0,0 +1,76 @@
+package Rest::Dailyrotten;
+use base 'Rest';
+
+# my $topsy_data = $self->rest_get_raw($self->topsy_query($page));
+# $self->write_data("../tmp/topsy_call", $topsy_data);
+# exit;
+
+
+sub dailyrotten_get
+ {
+ my ($self) = @_;
+ my $year = 2009;
+ my $archive_url = "http://www.dailyrotten.com/archive/$year/";
+ my $dailyrotten_calendar = $self->rest_get_raw($archive_url);
+ my @lines = split "\n", $dailyrotten_calendar;
+ my $valid = [];
+ foreach my $line (@lines)
+ {
+ if ($line =~ /<a href="$archive_url(_$year-\d+-\d+.html)">/)
+ {
+ push @$valid, $1;
+ }
+ }
+ # skip the last day so we can get accurate forum count later
+ my $skip = pop(@$valid);
+ my $xml_data = [];
+ foreach my $file (@$valid)
+ {
+ my $raw_data = $self->read_data("../tmp/dr/raw/$file");
+ if (!$raw_data)
+ {
+ sleep 5;
+ my $page_url = $archive_url . $file;
+ $raw_data = $self->rest_get_raw($page_url);
+ $self->write_data("../tmp/dr/raw/$file", $raw_data);
+ }
+ my $posts = $self->dailyrotten_posts($raw_data);
+ push @$xml_data, { file => $file, post => $posts };
+ }
+ $self->write_xml("../tmp/dr/2009.xml", $xml_data);
+ }
+sub dailyrotten_load
+ {
+ my ($self) = @_;
+ return $self->read_xml("../tmp/dr/2009.xml");
+ }
+sub dailyrotten_posts
+ {
+ my ($self, $raw_data) = @_;
+ my @lines = split "\n", $raw_data;
+ my $recs = [];
+ my $rec = {};
+ foreach my $line (@lines)
+ {
+## if ($line =~ /Daily Rotten Archives<\/font><br>(.*)<br>/)
+# {
+# }
+ if ($line =~ /^<a href="(.*)" target="_blank">Read article\.\.\.<\/a>/)
+ {
+ $rec->{'url'} = $1;
+ }
+ if ($line =~ /class="newslink">(.*)<\/a>/)
+ {
+ $rec->{'title'} = $1;
+ }
+ if ($line =~ /Comments \((\d+)\)/)
+ {
+ $rec->{'comments'} = $1;
+ push @$recs, $rec;
+ $rec = {};
+ }
+ }
+ return $recs;
+ }
+
+1;