summaryrefslogtreecommitdiff
path: root/bucky2/lib/Rest/Dailyrotten.pm
blob: 93a41a7262b97bbb53f3f91f43db3f69c641e039 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
package Rest::Dailyrotten;
use base 'Rest';

#	my $topsy_data = $self->rest_get_raw($self->topsy_query($page));
#	$self->write_data("../tmp/topsy_call", $topsy_data);
#	exit;


sub dailyrotten_get
	{
	my ($self) = @_;
	my $year = 2009;
	my $archive_url = "http://www.dailyrotten.com/archive/$year/";
	my $dailyrotten_calendar = $self->rest_get_raw($archive_url);
	my @lines = split "\n", $dailyrotten_calendar;
	my $valid = [];
	foreach my $line (@lines)
		{
		if ($line =~ /<a href="$archive_url(_$year-\d+-\d+.html)">/)
			{
			push @$valid, $1;
			}
		}
	# skip the last day so we can get accurate forum count later
	my $skip = pop(@$valid);
	my $xml_data = [];
	foreach my $file (@$valid)
		{
		my $raw_data = $self->read_data("../tmp/dr/raw/$file");
		if (!$raw_data)
			{
			sleep 5;
			my $page_url = $archive_url . $file;
			$raw_data = $self->rest_get_raw($page_url);
			$self->write_data("../tmp/dr/raw/$file", $raw_data);
			}
		my $posts = $self->dailyrotten_posts($raw_data);
		push @$xml_data, { file => $file, post => $posts };
		}
	$self->write_xml("../tmp/dr/2009.xml", $xml_data);
	}
sub dailyrotten_load
	{
	my ($self) = @_;
	return $self->read_xml("../tmp/dr/2009.xml");
	}
sub dailyrotten_posts
	{
	my ($self, $raw_data) = @_;
	my @lines = split "\n", $raw_data;
	my $recs = [];
	my $rec = {};
	foreach my $line (@lines)
		{
##		if ($line =~ /Daily Rotten Archives<\/font><br>(.*)<br>/)
#			{
#			}
		if ($line =~ /^<a href="(.*)" target="_blank">Read article\.\.\.<\/a>/)
			{
			$rec->{'url'} = $1;
			}
		if ($line =~ /class="newslink">(.*)<\/a>/)
			{
			$rec->{'title'} = $1;
			}
		if ($line =~ /Comments \((\d+)\)/)
			{
			$rec->{'comments'} = $1;
			push @$recs, $rec;
			$rec = {};
			}
		}
	return $recs;
	}

1;