3 files changed, 390 insertions, 0 deletions
diff --git a/bucky2/lib/Rest/Dailyrotten.pm b/bucky2/lib/Rest/Dailyrotten.pm
new file mode 100644
index 0000000..93a41a7
--- /dev/null
+++ b/bucky2/lib/Rest/Dailyrotten.pm
@@ -0,0 +1,76 @@
+package Rest::Dailyrotten;
+use base 'Rest';
+
+#	my $topsy_data = $self->rest_get_raw($self->topsy_query($page));
+#	$self->write_data("../tmp/topsy_call", $topsy_data);
+#	exit;
+
+
+sub dailyrotten_get
+	{
+	my ($self) = @_;
+	my $year = 2009;
+	my $archive_url = "http://www.dailyrotten.com/archive/$year/";
+	my $dailyrotten_calendar = $self->rest_get_raw($archive_url);
+	my @lines = split "\n", $dailyrotten_calendar;
+	my $valid = [];
+	foreach my $line (@lines)
+		{
+		if ($line =~ /<a href="$archive_url(_$year-\d+-\d+.html)">/)
+			{
+			push @$valid, $1;
+			}
+		}
+	# skip the last day so we can get accurate forum count later
+	my $skip = pop(@$valid);
+	my $xml_data = [];
+	foreach my $file (@$valid)
+		{
+		my $raw_data = $self->read_data("../tmp/dr/raw/$file");
+		if (!$raw_data)
+			{
+			sleep 5;
+			my $page_url = $archive_url . $file;
+			$raw_data = $self->rest_get_raw($page_url);
+			$self->write_data("../tmp/dr/raw/$file", $raw_data);
+			}
+		my $posts = $self->dailyrotten_posts($raw_data);
+		push @$xml_data, { file => $file, post => $posts };
+		}
+	$self->write_xml("../tmp/dr/2009.xml", $xml_data);
+	}
+sub dailyrotten_load
+	{
+	my ($self) = @_;
+	return $self->read_xml("../tmp/dr/2009.xml");
+	}
+sub dailyrotten_posts
+	{
+	my ($self, $raw_data) = @_;
+	my @lines = split "\n", $raw_data;
+	my $recs = [];
+	my $rec = {};
+	foreach my $line (@lines)
+		{
+##		if ($line =~ /Daily Rotten Archives<\/font><br>(.*)<br>/)
+#			{
+#			}
+		if ($line =~ /^<a href="(.*)" target="_blank">Read article\.\.\.<\/a>/)
+			{
+			$rec->{'url'} = $1;
+			}
+		if ($line =~ /class="newslink">(.*)<\/a>/)
+			{
+			$rec->{'title'} = $1;
+			}
+		if ($line =~ /Comments \((\d+)\)/)
+			{
+			$rec->{'comments'} = $1;
+			push @$recs, $rec;
+			$rec = {};
+			}
+		}
+	return $recs;
+	}
+
+1;
diff --git a/bucky2/lib/Rest/Topsy.pm b/bucky2/lib/Rest/Topsy.pm
new file mode 100644
index 0000000..3ef045e
--- /dev/null
+++ b/bucky2/lib/Rest/Topsy.pm
@@ -0,0 +1,185 @@
+package Rest::Topsy;
+use base 'Rest';
+use Data::Dumper;
+use XML::Simple;
+
+sub topsy_search
+	{
+	my ($self, $query) = @_;
+	my $topsy_data = $self->rest_get_raw("http://topsy.com/s", { q => $query });
+	my $initials = $query;
+	$initials =~ s/\W//g;
+	$initials = substr($initials, 0, 2);
+	if (! -e "../tmp/nndb/topsy/$initials")
+		{
+		system("/bin/mkdir", "../tmp/nndb/topsy/$initials");
+		}
+	$self->write_data("../tmp/nndb/topsy/$initials/$query.txt", $topsy_data);
+	my @lines = split "\n", $topsy_data;
+	my $value = undef;
+	my $rank = {};
+	foreach my $line (@lines)
+		{
+		if ($line =~ /<span class="(count|label)">(.*)<\/span>/)
+			{
+			my ($token, $text) = ($1, $2);
+			if ($token eq "count")
+				{
+				$value = $text;
+				$value =~ s/\,//;
+				}
+			elsif ($token eq "label")
+				{
+				$rank->{lc $text} = $value if $value;
+				undef $value;
+				}
+			}
+		}
+	return $rank;
+	}
+sub topsy_get
+	{
+	my ($self) = @_;
+
+#	my $topsy_data = $self->rest_get_raw($self->topsy_query($page));
+#	$self->write_data("../tmp/topsy_call", $topsy_data);
+#	exit;
+
+	$topsy_data = $self->read_data("../tmp/topsy_call");
+
+	my $topsy_script_data ||= $self->topsy_script_data($topsy_data);
+	my $topsy_entries = $self->topsy_entries($topsy_data);
+	my $xml_entries = $self->topsy_load;
+	if (scalar(@$xml_entries))
+		{
+		$topsy_entries = $xml_entries;
+		}
+
+	my $page = int(scalar(@$topsy_entries) / 10) || 1;
+	my $last_page = $topsy_script_data->{'pages'};
+
+	print "$last_page pages\n";
+
+	while ($page < $last_page)
+		{
+		$page++;
+		print $page . "...";
+		my $page_data = $self->rest_get_raw($self->topsy_query($page));
+		my $page_entries = $self->topsy_entries($page_data);
+		push @$topsy_entries, @$page_entries;
+		$self->write_xml("../tmp/topsy_entries.xml", $topsy_entries);
+		sleep 10 + (int rand 5);
+		}
+
+	print "Expected " . $topsy_script_data->{'total'} . ", got " . scalar(@$topsy_entries)."\n";
+
+	$self->write_xml("../tmp/topsy_entries.xml", $topsy_entries);
+
+	return @$page_entries;
+	}
+sub topsy_load
+	{
+	my ($self) = @_;
+	my $page_entries = $self->read_xml("../tmp/topsy_entries.xml");
+	print "Loaded ".scalar(@$page_entries)." entries\n";
+	return $page_entries;
+	}
+
+sub topsy_entries
+	{
+	my ($self, $data) = @_;
+	my @raw_entries = split '<div class="concept-rank">', $data;
+	my $entries = [];
+	my $current = "";
+	foreach my $entry (@raw_entries)
+		{
+		next if $entry =~ /concept-list-re/;
+		my $entry_hash = {};
+		my @lines = split "\n", $entry;
+		my $key = "";
+LINE:	foreach my $line (@lines)
+			{
+			$line = $self->trim($line);
+			next unless $line;
+			if ($line =~ /class\=\"(\w+)\"/)
+				{
+				$key = $1;
+				}
+			my $value = "";
+			if ($line =~ /url\((.*)\)/)
+				{
+				$entry_hash->{'tile'} = $1;
+				}
+			if ($key eq "total" && $line =~ />(\d+)</)
+				{
+				$value = $1;
+				}
+			elsif ($line =~ />(.*)</)
+				{
+				$value = $1;
+				}
+			elsif ($key eq "description" && $line =~ /\&ldquo\;(.*)\&rdquo\;/)
+				{
+				$value = $1;
+				$value =~ s/ http.*$//;
+				}
+			if ($key && $value)
+				{
+				$entry_hash->{$key} = $value;
+				undef $key;
+				}
+			last LINE if $line =~ /script type\=\"text/;
+			}
+		if (scalar keys %$entry_hash)
+			{
+			push @$entries, $entry_hash;
+			}
+		}
+	return $entries;
+	}
+# Recipes.re= {
+#    page:    2,
+#    total:   964,
+#    perpage: 10,
+#    pages:   97
+# };
+sub topsy_script_data
+	{
+	my ($self, $data) = @_;
+	my @lines = split "\n", $data;
+	my $script_data = {};
+	foreach my $line (@lines)
+		{
+		next unless $line =~ /\:/;
+		$line =~ s/\s+//g;
+		$line =~ s/\,//;
+		my ($k, $v) = split ":", $line;
+		next unless $k && $v;
+		next unless $k =~ /^(page|total|perpage|pages)/;
+		$script_data->{$k} = $v;
+		}
+	return scalar keys %$script_data ? $script_data : undef;
+	}
+sub topsy_query
+	{
+	my ($self, $page) = @_;
+	my $url = "http://topsy.com/concept";
+	my $query =
+		{
+		"page" => $page,
+		"sort_method" => "",
+		"url" => $self->url,
+		"class" => "UB::Concept::List::Re",
+		};
+	return ($url, $query);
+	}
+sub url
+	{
+	my ($self, $url) = @_;
+	if ($url)
+		{
+		$self->{'url'} = $url;
+		}
+	return $self->{'url'};
+	}
+1;
diff --git a/bucky2/lib/Rest/Twitter.pm b/bucky2/lib/Rest/Twitter.pm
new file mode 100644
index 0000000..00220a6
--- /dev/null
+++ b/bucky2/lib/Rest/Twitter.pm
@@ -0,0 +1,129 @@
+package Rest::Twitter;
+use base 'Rest';
+use Data::Dumper;
+my $twitter_status_uri	= "http://twitter.com/statuses/mentions.xml";
+my $twitter_update_uri	= "http://twitter.com/statuses/update.xml";
+my $twitter_dm_uri		= "http://twitter.com/direct_messages.xml";
+my $twitter_dm_new_uri	= "http://twitter.com/direct_messages/new.xml";
+
+sub dm_post
+	{
+	my ($self, $user, $tweet) = @_;
+	return unless $user && $tweet;
+	$tweet =~ s/\s+/ /g;
+	print ">>> D $user: $tweet\n";
+	return $self->rest_post($twitter_dm_new_uri, {text => $tweet, user => $user});
+	}
+sub tweet_post
+	{
+	my ($self, $tweet, $replyid) = @_;
+	$tweet =~ s/\s+/ /g;
+	print ">>> $tweet\n";
+	return $self->rest_post($twitter_update_uri, {status => $tweet, in_reply_to_status_id => $replyid});
+	}
+sub dm_get
+	{
+	my ($self) = @_;
+	my $twitter_since_id = $self->since_id("dm");
+	my $dm_data = $self->rest_get($twitter_dm_uri, {since_id => $twitter_since_id});
+#	return (undef, undef) unless exists($tweet_data->{'status'});
+	# DEFAULT: plural behavior
+	my $status = $dm_data->{'direct_message'};
+	# CATCH: singular behavior
+	if (exists($status->{'id'}))
+		{
+		my $id = $status->{'id'};
+		$status = { $id => $status };
+		}
+	my $dms = [];
+	my $last_id = 0;
+	foreach my $id (keys %{ $status })
+		{
+		if ($id > $last_id)
+			{ $last_id = $id; }
+		my $dm = $status->{$id}->{'text'};
+		my $user = $status->{$id}->{'sender_screen_name'};
+		push @$dms, {id => $id, tweet => $dm, user => $user, type => "dm"};
+		}
+	if ($last_id > $twitter_since_id)
+		{
+		$self->since_id("dm", $last_id);
+		}
+	return $dms;
+	}
+sub tweet_get
+	{
+	my ($self) = @_;
+	my $twitter_since_id = $self->since_id("status");
+	my $tweet_data = $self->rest_get($twitter_status_uri, {since_id => $twitter_since_id});
+	return (undef, undef) unless exists($tweet_data->{'status'});
+	# DEFAULT: plural behavior
+	my $status = $tweet_data->{'status'};
+	# CATCH: singular behavior
+	if (exists($status->{'id'}))
+		{
+		my $id = $status->{'id'};
+		$status = { $id => $status };
+		}
+	my $tweets = [];
+	my $last_id = 0;
+	foreach my $id (keys %{ $status })
+		{
+		if ($id > $last_id)
+			{ $last_id = $id; }
+		my $tweet = $status->{$id}->{'text'};
+		my $user = $status->{$id}->{'user'}->{'screen_name'};
+		push @$tweets, {id => $id, tweet => $tweet, user => $user, type => "tweet"};
+		}
+	if ($last_id > $twitter_since_id)
+		{
+		$self->since_id("status", $last_id);
+		}
+	return $tweets;
+	}
+sub since_id
+	{
+	my ($self, $key, $id) = @_;
+	return unless $key;
+	$self->{'since'} ||= {};
+	if ($id)
+		{
+		if ($self->{'since'}->{$key} < $id)
+			{
+			$self->{'since'}->{$key} = $id;
+			$self->_since_id_write($key, $id);
+			}
+		}
+	if (! exists($self->{'since'}->{$key}))
+		{
+		$self->{'since'}->{$key} = $self->_since_id_read($key);
+		}
+	return $self->{'since'}->{$key};
+	}
+sub _since_id_read
+	{
+	my ($self, $key) = @_;
+	my $file = $self->_since_id_file($key);
+	open LAST, $file;
+	my $line = $self->trim(<LAST>);
+	close LAST;
+	return $line;
+	}
+sub _since_id_write
+	{
+	my ($self, $key, $id) = @_;
+	my $file = $self->_since_id_file($key);
+	open LAST, ">$file";
+	print LAST $id."\n";
+	close LAST;
+	}
+sub _since_id_file
+	{
+	my ($self, $key) = @_;
+	my $tmp_dir = "../tmp";
+	my $file = $tmp_dir."/twitter-".$self->user."-".$key;
+	for ($tmp_dir, $file)
+		{ die ("can't find $_") unless -e $_; }
+	return $file;
+	}
+1;