6 files changed, 738 insertions, 0 deletions
diff --git a/bucky2/lib/Bucky/DB.pm b/bucky2/lib/Bucky/DB.pm
new file mode 100644
index 0000000..bca92b7
--- /dev/null
+++ b/bucky2/lib/Bucky/DB.pm
@@ -0,0 +1,147 @@
+package Bucky::DB;
+
+use base 'Bucky';
+
+use Data::Dumper;
+use DBI;
+
+my $DB_LOOKUP =
+	{
+	bucky => '',
+	user => 'users',
+	keyword => 'keywords',
+	thread => 'threads',
+	file => 'files',
+	comment => 'comments',
+	family => 'family',
+	search_log => 'search_log',
+	poetaster_log => 'poetaster_log',
+	svn => 'svn',
+	};
+sub insert
+	{
+	my ($self, $type, $record) = @_;
+	$type = $DB_LOOKUP->{$type};
+	return unless $type && ref($record) eq "HASH" && scalar keys %$record;
+	my $keys = [];
+	my $values = [];
+	foreach my $key (keys %$record)
+		{
+		push @$keys, $key;
+		push @$values, $self->quote($record->{$key});
+		}
+	my $key_string = join ",", @$keys;
+	my $value_string = join ",", @$values;
+	return unless length $key_string && length $value_string;
+	my $sql = "INSERT INTO $type ($key_string) VALUES($value_string)";
+	$self->execute($sql);
+	return $self->lastinsertid($sql);
+	}
+sub update
+	{
+	}
+sub select
+	{
+	my ($self, $type, $criteria) = @_;
+	$type = $DB_LOOKUP->{$type};
+	return unless $type;
+	my $criteria_string = $self->criteria($criteria);
+	my $rows = [];
+	my $sql = "SELECT * FROM $type";
+	$sql .= " " . $criteria_string if $criteria_string;
+	my $sth = $self->execute($sql);
+	while (my $row = $sth->fetchrow_hashref)
+		{
+		push @$rows, $row;
+		}
+	return $rows;
+	}
+sub select_by_id
+	{
+	my ($self, $type, $id_array) = @_;
+	$type = $DB_LOOKUP->{$type};
+	return unless $type and ref($id_array) eq "ARRAY" and scalar @$id_array;
+	my $rows = {};
+	my $ids = join ",", @$id_array;
+	my $sql = "SELECT * FROM $type";
+	$sql .= " WHERE id IN ($ids)";
+	my $sth = $self->execute($sql);
+	while (my $row = $sth->fetchrow_hashref)
+		{
+		$rows->{ $row->{'id'} } = $row;
+		}
+	return $rows;
+	}
+sub criteria
+	{
+	my ($self, $criteria) = @_;
+
+	my $criteria_list = [];
+
+	if ($self->is_string($criteria))
+		{
+		push @$criteria_list, $criteria;
+		}
+	elsif (ref $criteria eq "HASH")
+		{
+		foreach my $key (keys %$criteria)
+			{
+			my $criterion = $key;
+			if ($criteria->{$key})
+				{ $criterion .= "=" . $self->quote($criteria->{$key}); }
+			push @$criteria_list, $criterion;
+			}
+		}
+	return undef unless scalar @$criteria_list;
+
+	my $criteria_string = join(" AND ", @$criteria_list);
+	$criteria_string = "WHERE " . $criteria_string if $criteria_string =~ /[=<>]|( (IS|IN) )/;
+	return $criteria_string;
+	}
+sub execute
+	{
+	my ($self, $sql) = @_;
+	my $sth = $self->dbh->prepare($sql);
+	$sth->execute;
+	return $sth;
+	}
+sub quote
+	{
+	my ($self, $string) = @_;
+	return $self->dbh->quote($string);
+	}
+sub lastinsertid
+	{
+	my ($self) = @_;
+	return $self->dbh->last_insert_id(0, undef, undef, undef);
+	}
+sub dbh
+	{
+	my ($self, $parent) = @_;
+	if ($parent && ref($parent) =~ /Bucky/)
+		{
+		$self->{_dbh} ||= $parent->dbh;
+		}
+	if (! $self->{_dbh})
+		{
+		$self->{_dbh} ||= DBI->connect($self->dsn);
+		}
+	return $self->{_dbh};
+	}
+sub dsn
+	{
+	my ($self) = @_;
+	$self->{_dsn} ||=
+		"DBI:mysql:database=" . $self->db_name .
+		":" . $self->db_host .
+		";mysql_read_default_file=" . $self->my_cnf;
+	return $self->{_dsn};
+	}
+sub db_name
+	{ 'bucky' }
+sub db_host
+	{ 'localhost' }
+sub my_cnf
+	{ '/var/www/vhosts/carbonpictures.com/.my.cnf' }
+
+1;
diff --git a/bucky2/lib/Bucky/Keyword.pm b/bucky2/lib/Bucky/Keyword.pm
new file mode 100644
index 0000000..8c52256
--- /dev/null
+++ b/bucky2/lib/Bucky/Keyword.pm
@@ -0,0 +1,26 @@
+package Bucky::Keyword;
+
+use base 'Bucky';
+
+sub type { $Bucky::Keyword }
+sub fields
+	{[qw[
+	id keyword threads owner createdate
+	owner ops public
+	agglutinate
+	color display
+	]]}
+
+sub _id				{ shift->{id} }
+sub _keyword		{ shift->{keyword} }
+sub _threads		{ shift->{threads} }
+sub _username		{ shift->{owner} }
+sub _ops			{ shift->{ops} }
+sub _public			{ shift->{public} }
+sub _createdate		{ shift->{createdate} }
+sub _agglutinate	{ shift->{agglutinate} }
+sub _color			{ shift->{color} }
+sub _display		{ shift->{display} }
+
+1;
+
diff --git a/bucky2/lib/Bucky/SVN.pm b/bucky2/lib/Bucky/SVN.pm
new file mode 100644
index 0000000..ef04464
--- /dev/null
+++ b/bucky2/lib/Bucky/SVN.pm
@@ -0,0 +1,103 @@
+package Bucky::SVN;
+use base "Bucky";
+use Bucky::Session;
+sub svn_secret
+	{ return shift->config("SVN_SECRET"); }
+sub list
+	{
+	my ($self, $count) = @_;
+	$count ||= 7;
+	return $self->db->select("svn", "ORDER BY date DESC LIMIT $count");
+	}
+sub query_incoming
+	{
+	my ($self) = @_;
+	my $session = new Bucky::Session;
+	error() unless scalar $session->q->param && length $session->q->param("secret") && $session->q->param("secret") eq $self->svn_secret();
+	if ($session->q->param("user"))
+		{
+		print $self->query_add($session);
+		}
+	else
+		{
+		print "Content-type: text/html\n\n";
+		print $self->query_list;
+		}
+	}
+sub query_list
+	{
+	my ($self) = @_;
+	my $svns = $self->list;
+	my $out .= <<__HEAD__;
+<table cellpadding=0 cellspacing=0 style="border: 1px solid #333;">
+__HEAD__
+	my $r = 0;
+	foreach my $svn (@$svns)
+		{
+		$r = $r ? 0 : 1;
+		my $user = $svn->{'user'};
+		$user = "default" if $user eq "root";
+		my $user_profile = "/cgi-bin/bucky/profile/$user";
+		my $user_img = "/bucky/data/profile/.thumb/am.$user.jpg";
+		my $date = $self->show_date($svn->{'date'});
+		my $revision = $svn->{'revision'};
+		my $comment = $svn->{'comment'};
+		$out .= <<__SVN__;
+<tr>
+<td style="border: 1px solid #333;" align="center">
+<a href="$user_profile"><img src="$user_img" border=0></a><!--<br><small>$user</small>-->
+</td>
+<td style="border: 1px solid #333; padding: 3px" class="r$r">
+<small>$revision: $comment</small>
+</td>
+</small>
+</tr>
+__SVN__
+		}
+	$out .= <<__FOOT__;
+</table>
+__FOOT__
+	return $out;
+	}
+sub query_add
+	{
+	my ($self, $session) = @_;
+
+	my $user = $session->q->param("user");
+	my $revision = $session->q->param("revision");
+	my $comment = $session->q->param("comment");
+	my $date = time;
+
+	error("missing some parameters\npossible: secret, user, revision, comment\n")
+		unless $user && $comment && $revision
+			&& $self->is_number($revision) && length $user && length $comment;
+
+	my $query =
+		{
+		user => $user,
+		comment => $comment,
+		revision => $revision,
+		date => $date,
+		};
+
+	if (my $id = $self->db->insert("svn", $query))
+		{ success("Successfully inserted $id"); }
+	else
+		{ error("Unable to insert!"); }
+	}
+sub success
+	{
+	my ($success) = @_;
+	$success ||= "NICE ONE";
+	print "Content-type: text/plain\n\nSUCCESS: $success";
+	exit;
+	}
+sub error
+	{
+	my ($error) = @_;
+	$error ||= "SORRY GUY";
+	print "Content-type: text/plain\n\nERROR: $error";
+	exit;
+	}
+
+1;
diff --git a/bucky2/lib/Bucky/Search.pm b/bucky2/lib/Bucky/Search.pm
new file mode 100644
index 0000000..43c23f7
--- /dev/null
+++ b/bucky2/lib/Bucky/Search.pm
@@ -0,0 +1,413 @@
+package Bucky::Search;
+
+use base 'Bucky';
+
+use Data::Dumper;
+use DB_File;
+
+sub index
+	{
+	my ($self, $index) = @_;
+	$self->{'_index'} = $index || $self->index_read;
+	return $self->{'_index'};
+	}
+sub index_read
+	{
+	my ($self) = @_;
+	my %index;
+	tie %index, "DB_File", $self->index_filename, O_RDONLY, 0666, $DB_HASH ;
+	return \%index;
+	}
+sub index_write
+	{
+	my ($self) = @_;
+	my %index;
+	tie %index, "DB_File", $self->index_filename, O_CREAT|O_RDWR, 0666, $DB_HASH ;
+	return \%index;
+	}
+sub index_close
+	{
+	my ($self, $index) = @_;
+	$index ||= $self->index;
+	untie %$index;
+	}
+sub index_filename
+	{ "gross.db" }
+sub auto_index
+	{
+	my ($self, $auto_index) = @_;
+	$self->{'_auto_index'} = $auto_index || $self->auto_index_read;
+	return $self->{'_auto_index'};
+	}
+sub auto_index_read
+	{
+	my ($self) = @_;
+	my %auto_index;
+	tie %auto_index, "DB_File", $self->auto_index_filename, O_RDONLY, 0666, $DB_HASH ;
+	return \%auto_index;
+	}
+sub auto_index_write
+	{
+	my ($self) = @_;
+	my %auto_index;
+	tie %auto_index, "DB_File", $self->auto_index_filename, O_CREAT|O_RDWR, 0666, $DB_HASH ;
+	return \%auto_index;
+	}
+sub auto_index_close
+	{
+	my ($self, $auto_index) = @_;
+	$auto_index ||= $self->auto_index;
+	untie %$auto_index;
+	}
+sub auto_index_filename
+	{ "auto.db" }
+sub lexicon_store
+	{
+	my ($self, $lexicon) = @_;
+	my $index = $self->index_write;
+	foreach my $term (keys %$lexicon)
+		{
+		next if $self->is_stopword($term);
+		my $serialized = $self->serialize_matches($lexicon->{$term});
+		next unless $serialized;
+		$index->{$term} = $serialized;
+		}
+	$self->index_close($index);
+	}
+sub unserialize_matches
+	{
+	my ($self, $serialized_string) = @_;
+	my @serialized_matches = split ",", $serialized_string;
+	my @matches;
+	foreach my $serialized_match (@serialized_matches)
+		{
+		my ($thread, $comment, $file, $strength) = split " ", $serialized_match;
+		my $match = {};
+		$match->{'thread'} = $thread;
+		$match->{'comment'} = $comment;
+		$match->{'file'} = $file;
+		$match->{'strength'} = $strength;
+		push @matches, $match;
+		}
+	return \@matches;
+	}
+sub serialize_matches
+	{
+	my ($self, $matches) = @_;
+	my @serialized_matches;
+	foreach my $match (values %$matches)
+		{
+		next unless $match && ref($match) eq "HASH";
+		my $string = join " ",
+			$match->{'thread'},
+			$match->{'comment'},
+			$match->{'file'},
+			$match->{'strength'};
+		next unless $string;
+		push @serialized_matches, $string if $string;
+		}
+	return undef unless scalar @serialized_matches;
+	return join ",", @serialized_matches;
+	}
+my $STOPWORDS = {( map { lc $_, 1 } qw(
+a about above across adj after again against all almost alone along also 
+although always am among an and another any anybody anyone anything anywhere 
+apart are around as aside at away be because been before behind being below 
+besides between beyond both but by can cannot could deep did do does doing done 
+down downwards during each either else enough etc even ever every everybody 
+everyone except far few for forth from get gets got had hardly has have having 
+her here herself him himself his how however i if in indeed instead into inward 
+is it its itself just kept many maybe might mine more most mostly much must 
+myself near neither next no nobody none nor not nothing nowhere of off often on 
+only onto or other others ought our ours out outside over own p per please plus 
+pp quite rather really said seem self selves several shall she should since so 
+some somebody somewhat still such than that the their theirs them themselves 
+then there therefore these they this thorough thoroughly those through thus to 
+together too toward towards under until up upon v very was well were what 
+whatever when whenever where whether which while who whom whose will with
+within without would yet young your yourself s ) )};
+sub is_stopword
+	{
+	my ($self, $term) = @_;
+	return exists $STOPWORDS->{lc $term};
+	}
+sub autocomplete
+	{
+	my ($self, $query) = @_;
+	return unless $query;
+	my $terms = parse_terms($query);
+	my $last_term = pop @$terms;
+
+	my $auto_index = $self->auto_index;
+	my $guess_term = $auto_index->{$last_term};
+	my $guess_full = join " ", @$terms, $guess_term;
+	my $guess_tail = $guess_term;
+	$guess_tail =~ s/^$last_term//;
+
+	my $guess = {};
+	$guess->{'full'} = $guess_full;
+	$guess->{'tail'} = $guess_tail;
+	$guess->{'term'} = $guess_term;
+	return $guess;
+	}
+sub search_light
+	{
+	my ($self, $query, $start, $limit) = @_;
+	return unless $query;
+	$start ||= 0;
+	$limit ||= 10;
+	my $scores = {};
+	my $terms = parse_terms($query);
+	my $index = $self->index;
+	foreach my $term (@$terms)
+		{
+		next if $self->is_stopword($term);
+		next unless my $serial = $index->{$term};
+		my $results = $self->unserialize_matches($serial);
+		foreach my $result (@$results)
+			{
+			my $thread = $result->{'thread'};
+			$scores->{$thread} ||= {};
+			$scores->{$thread}->{thread} ||= $result->{'thread'};
+			$scores->{$thread}->{file} ||= $result->{'file'};
+			$scores->{$thread}->{strength} += $result->{'strength'};
+			$scores->{$thread}->{count}++;
+			}
+		}
+	my $total = scalar keys %$scores;
+	my $i = 0;
+	my $to_display = $limit;
+	my $threads = {};
+	my $comments_to_get = [];
+	my $files_to_get = [];
+	foreach my $match (sort { $b->{count} <=> $a->{count} || $b->{strength} * $b->{count} <=> $a->{strength} * $a->{count} } values %$scores )
+		{
+		next if $i++ < $start;
+		my $thread = $self->thread( $match->{'thread'} );
+		next unless $thread;
+		# next if $thread->{'private'};
+		last if $to_display-- == 0;
+        push @$results, $match;
+		push @$comments_to_get, $match->{'comment'} if $match->{'comment'};
+		if ( $match->{'file'} )
+			{ push @$files_to_get, $match->{'file'}; }
+		if ( $thread->{'flagged'} )
+			{ push @$files_to_get, $thread->{'flagged'}; }
+		$threads->{ $thread->{'id'} } = $thread;
+        }
+	my $files = $self->files_by_id($files_to_get);
+	# $self->log_query($query, $total);
+    return
+        {
+		start => $start + $limit,
+		limit => $limit,
+        total => $total,
+        results => $results,
+		threads => $threads,
+		files => $files,
+		terms => $terms,
+        };
+	}
+sub search
+	{
+	my ($self, $query, $start, $limit) = @_;
+	return unless $query;
+	$start ||= 0;
+	$limit ||= 10;
+	my $scores = {};
+	my $terms = parse_terms($query);
+	my $index = $self->index;
+	foreach my $term (@$terms)
+		{
+		next if $self->is_stopword($term);
+		next unless my $serial = $index->{$term};
+		my $results = $self->unserialize_matches($serial);
+		foreach my $result (@$results)
+			{
+			my $thread = $result->{'thread'};
+			$scores->{$thread} ||= {};
+			$scores->{$thread}->{thread} ||= $result->{'thread'};
+			$scores->{$thread}->{comment} ||= $result->{'comment'};
+			$scores->{$thread}->{file} ||= $result->{'file'};
+			$scores->{$thread}->{strength} += $result->{'strength'};
+			$scores->{$thread}->{count}++;
+			}
+		}
+	my $total = scalar keys %$scores;
+	my $i = 0;
+	my $to_display = $limit;
+	my $threads = {};
+	my $comments_to_get = [];
+	my $files_to_get = [];
+	foreach my $match (sort { $b->{count} <=> $a->{count} || $b->{strength} * $b->{count} <=> $a->{strength} * $a->{count} } values %$scores )
+		{
+		next if $i++ < $start;
+		my $thread = $self->thread( $match->{'thread'} );
+		next unless $thread;
+		next if $thread->{'private'};
+		last if $to_display-- == 0;
+        push @$results, $match;
+		push @$comments_to_get, $match->{'comment'} if $match->{'comment'};
+		if ( $match->{'file'} )
+			{ push @$files_to_get, $match->{'file'}; }
+		if ( $thread->{'flagged'} )
+			{ push @$files_to_get, $thread->{'flagged'}; }
+		$threads->{ $thread->{'id'} } = $thread;
+        }
+	my $files = $self->files_by_id($files_to_get);
+	my $comments = $self->comments_by_id($comments_to_get);
+	$self->log_query($query, $total);
+    return
+        {
+		start => $start + $limit,
+		limit => $limit,
+        total => $total,
+        results => $results,
+		threads => $threads,
+		comments => $comments,
+		files => $files,
+		terms => $terms,
+        };
+	}
+sub score_display
+    {
+    my ($obj) = @_;
+    return scalar(keys %{$obj->{terms}}) . "x" . $obj->{count};
+    }
+sub display_object
+	{
+	my ($obj) = @_;
+	my ($type, $id) = split ":", $obj;
+	my $thread = $bucky->thread($id);
+	my $title = $thread ? $thread->_title : "* * *";
+	return $type . " " . $id . "\t" . $title;
+	}
+sub bold_terms
+	{
+	my ($self, $string, $terms) = @_;
+	$string = $self->strip_html($string);
+	foreach my $term (@$terms)
+		{
+		$string =~ s/\b($term)\b/<b>$1<\/b>/gi;
+		}
+	return $string;
+	}
+sub bold_snippet
+	{
+	my ($self, $string, $terms) = @_;
+	my $snippet = $self->snippet($string, $terms);
+	return $self->bold_terms($snippet, $terms);
+	}
+sub snippet
+	{
+	my ($self, $string, $terms) = @_;
+
+	# clean up the string we got
+	$string = $self->strip_html($string);
+
+	# create a regex out of the search terms
+	my $term_re = join "|", @$terms;
+
+	# take the string to be snippetized and split it into words
+	my @words = split /\s+/, $string;
+
+	# deduper for matching @words indexes, so we don't add a word twice
+	my $index_matches = {};
+
+	# words in the eventual snippet
+	my @words_matched;
+
+	# the snippet itself
+	my $snippet = '';
+
+	# counter for aggregating context after a match
+	my $aggr = 0;
+
+	# amount of context to show, in number of words surrounding a match
+	my $pad = 4;
+
+	# loop over each of the words in the string
+	for (my $i = 0; $i < scalar @words; $i++)
+		{
+		# does this word contain a match?
+		if ($words[$i] =~ /\b($term_re)\b/i && ! $self->is_stopword($1))
+			{
+			# if we aren't already aggregating, add an ellipsis
+			if (! $aggr)
+				{
+				push @words_matched, "...";
+				}
+			# look backward $pad words
+			for (my $j = -$pad; $j < 1; $j++)
+				{
+				# create a new index from the offset
+				my $idx = $i + $j;
+
+				# is this a valid index? has it already been encountered?
+				next if $idx < 0;
+				next if $idx > scalar @words;
+				next if exists $index_matches->{$i+$j};
+
+				# checks out, save this word
+				push @words_matched, $words[$i+$j];
+
+				# note the matching index in our deduper
+				$index_matches->{$i+$j} ++;
+				}
+			# enter aggregate mode -- add the next $pad words
+			$aggr = $pad;
+			}
+		# have we been told to aggregate?
+		elsif ($aggr)
+			{
+			# save this word
+			push @words_matched, $words[$i];
+
+			# add index to the deduper
+			$index_matches->{$i} ++;
+
+			# one less word to aggregate
+			$aggr--;
+			}
+		# keep snippets to a modest length
+		last if scalar @words_matched > 30;
+		}
+	# add a trailing ellipsis
+	push @words_matched, "...";
+
+	# create the snippet from the saved context words
+	$snippet = join " ", @words_matched;
+
+	return $snippet;
+	}
+sub parse_terms
+	{
+	my ($s) = @_;
+	my @terms = split /(\W+)/, lc($s);
+	my $words = [];
+	my $count = 0;
+	foreach my $term (@terms)
+		{
+		if ( $term !~ /\W/ )
+			{
+			push @$words, $term;
+			}
+		}
+	return $words;
+	}
+sub log
+	{
+	my ($self, $date) = @_;
+	my $criteria = {};
+	$criteria->{'date'} = $date if $date;
+	return $self->db->select("search_log", $criteria);
+	}
+sub log_query
+	{
+	my ($self, $query, $total) = @_;
+	return unless $query;
+	my $date = time;
+	$matches ||= '0';
+	$self->db->insert("search_log", { query => $query, date => $date, matches => $total });
+	}
+1;
diff --git a/bucky2/lib/Bucky/Session.pm b/bucky2/lib/Bucky/Session.pm
new file mode 100644
index 0000000..0cff753
--- /dev/null
+++ b/bucky2/lib/Bucky/Session.pm
@@ -0,0 +1,19 @@
+package Bucky::Session;
+
+use base 'Bucky';
+
+use CGI;
+
+sub param
+	{
+	my ($self, $name) = @_;
+	return unless $name;
+	return $self->q->param($name);
+	}
+sub q
+	{
+	my ($self) = @_;
+	$self->{'_q'} ||= new CGI;
+	return $self->{'_q'};
+	}
+1;
diff --git a/bucky2/lib/Bucky/Thread.pm b/bucky2/lib/Bucky/Thread.pm
new file mode 100644
index 0000000..dbd8ad0
--- /dev/null
+++ b/bucky2/lib/Bucky/Thread.pm
@@ -0,0 +1,30 @@
+package Bucky::Thread;
+
+use base 'Bucky';
+
+sub type { $Bucky::Thread }
+sub fields
+	{[qw[
+	id title username keyword private allowed
+	createdate lastmodified revision viewed
+	size color display flagged zipped
+	]]}
+
+sub _id				{ shift->{id} }
+sub _title			{ shift->{title} }
+sub _username		{ shift->{username} }
+sub _keyword		{ shift->{keyword} }
+sub _private		{ shift->{private} }
+sub _allowed		{ shift->{allowed} }
+sub _createdate		{ shift->{createdate} }
+sub _lastmodified	{ shift->{lastmodified} }
+sub _revision		{ shift->{revision} }
+sub _viewed			{ shift->{viewed} }
+sub _size			{ shift->{size} }
+sub _color			{ shift->{color} }
+sub _display		{ shift->{display} }
+sub _flagged		{ shift->{flagged} }
+sub _zipped			{ shift->{zipped} }
+
+1;
+