diff options
| -rwxr-xr-x | search/bin/build-autocomplete | 53 | ||||
| -rwxr-xr-x | search/bin/build-index | 112 | ||||
| -rwxr-xr-x | search/bin/build-splashes | 112 | ||||
| -rw-r--r-- | search/bin/fix-files | 41 | ||||
| -rw-r--r-- | search/bin/gross.db | bin | 0 -> 3158016 bytes | |||
| -rwxr-xr-x | search/bin/listener.pl | 65 | ||||
| -rwxr-xr-x | search/bin/poetaster | 33 | ||||
| -rwxr-xr-x | search/bin/watch-index.pl | 8 | ||||
| -rw-r--r-- | search/lib/Bucky.pm | 159 | ||||
| -rw-r--r-- | search/lib/Bucky/DB.pm | 175 | ||||
| -rw-r--r-- | search/lib/Bucky/Keyword.pm | 26 | ||||
| -rw-r--r-- | search/lib/Bucky/SVN.pm | 103 | ||||
| -rw-r--r-- | search/lib/Bucky/Search.pm | 413 | ||||
| -rw-r--r-- | search/lib/Bucky/Session.pm | 19 | ||||
| -rw-r--r-- | search/lib/Bucky/Thread.pm | 30 | ||||
| -rw-r--r-- | search/lib/Common.pm | 46 | ||||
| -rw-r--r-- | search/lib/Time/Stopwatch.pm | 13 | ||||
| -rw-r--r-- | search/lib/localbucky.pm | 12 |
18 files changed, 1420 insertions, 0 deletions
diff --git a/search/bin/build-autocomplete b/search/bin/build-autocomplete new file mode 100755 index 0000000..d9a62fd --- /dev/null +++ b/search/bin/build-autocomplete @@ -0,0 +1,53 @@ +#!/usr/bin/perl +use lib "../lib"; +use Bucky; +use Bucky::Search; +use Data::Dumper; +use DB_File; + +my $file = "auto.db"; + +my $search = new Bucky::Search; + +my $index = $search->index; +my $auto_index = $search->auto_index_write; + +my $partials = {}; +my $partials_with_media = {}; +foreach my $word (keys %$index) + { + # goatse operator + my $count =()= $index->{$word} =~ /,/g; + + next unless $word; + + my $par = ''; + my @letters = split "", $word; + for (my $i = 0; $i < scalar @letters; $i++) + { + $par .= $letters[$i]; + if (! exists $partials->{$par} || $partials->{$par}->[0] < $count) + { + $partials->{$par} = [$count,$word]; + } + } + } +# don't autocomplete if we match a word +foreach my $word (keys %$index) + { + $partials->{$word}->[1] = $word + } + +foreach my $par (sort keys %$partials) + { + $auto_index->{$par} = $partials->{$par}->[1]; + } +$search->auto_index_close; + +print "NEW: " ; system("/bin/ls", "-l", "./$file"); +print "OLD: " ; system("/bin/ls", "-l", "../cgi-bin/$file"); +system("/bin/mv", "../cgi-bin/$file", "../cgi-bin/$file.1"); +system("/bin/mv", "./$file", "../cgi-bin/$file"); + +exit; + diff --git a/search/bin/build-index b/search/bin/build-index new file mode 100755 index 0000000..b838924 --- /dev/null +++ b/search/bin/build-index @@ -0,0 +1,112 @@ +#!/usr/bin/perl +use strict; +use lib "../lib"; +use Bucky; +use DB_File; +#require Time::Stopwatch; +tie my $timer, 'Time::Stopwatch'; + +print_timer($timer, "Initialized"); + +my $bucky = new Bucky::Search; + +my $keywords = $bucky->db->select("keyword"); +my $threads = $bucky->db->select("thread", {"id > 1"}); +my $files = $bucky->db->select("file"); +my $comments = $bucky->db->select("comment", {"thread > 1"}); + +print_timer($timer, "Loaded mysql"); + +my $lexicon = {}; +my $total = 0; +#foreach my $keyword (@$keywords) +# { +# my $id = $keyword->{$id}; +# $lexicon->{ $keyword->{'keyword'} }++; +# $total++; +# } +foreach my $thread (@$threads) + { + $total += parse_terms({ string => $thread->{'title'}, thread => $thread->{'id'} }); + } +foreach my $comment (@$comments) + { + $total += parse_terms({ string => $comment->{'comment'}, thread => $comment->{'thread'}, comment => $comment->{'id'} }); + } +foreach my $file (@$files) + { + $total += parse_terms({ string => $file->{'filename'}, thread => $file->{'thread'}, file => $file->{'id'} }); + } + +print_timer($timer, "Created index"); + +my $unique = scalar keys %$lexicon; +print "--- WORD COUNT: " . $total . "\n"; +print "--- UNIQUE WORDS: " . $unique . "\n"; + +$bucky->lexicon_store($lexicon); + +my $file = $bucky->index_filename; + +print_timer($timer, "Dumped $file"); + +print "NEW: " ; system("/bin/ls", "-l", "./$file"); +print "OLD: " ; system("/bin/ls", "-l", "../cgi-bin/$file"); +system("/bin/mv", "../cgi-bin/$file", "../cgi-bin/$file.1"); +system("/bin/cp", "./$file", "../cgi-bin/$file"); +system("/usr/bin/perl", "./build-autocomplete"); +exit; + +sub parse_terms + { + my ($args) = @_; + my $thread = $args->{'thread'} || return; + my $comment = $args->{'comment'} || '0'; + my $file = $args->{'file'} || '0'; + my $string = $args->{'string'}; + $string =~ s/_/ /g; + my @terms = split /(\W+)/, $string; + my $count = 0; + foreach my $term (@terms) + { + if ( $term !~ /\W/ ) + { + my $t = lc($term); + $lexicon->{$t} ||= {}; + $lexicon->{$t}->{$thread} ||= {}; + $lexicon->{$t}->{$thread}->{'thread'} ||= $thread; + $lexicon->{$t}->{$thread}->{'comment'} ||= $comment; + $lexicon->{$t}->{$thread}->{'file'} ||= $file; + # give terms in title an extra bump + if ($comment eq '0' && $file eq '0') + { $lexicon->{$t}->{$thread}->{'strength'} += 2; } + else + { $lexicon->{$t}->{$thread}->{'strength'} += 1; } + $count++; + } + } + return $count; + } + +sub print_timer + { print STDERR sprintf "%3.2f s %s\n", shift, shift; } + +################################################3 + +package Time::Stopwatch; +my $VERSION = '1.00'; + +use strict; +use constant HIRES => eval { local $SIG{__DIE__}; require Time::HiRes }; + +sub TIESCALAR { + my $pkg = shift; + my $time = (HIRES ? Time::HiRes::time() : time()) - (@_ ? shift() : 0); + bless \$time, $pkg; +} + +sub FETCH { (HIRES ? Time::HiRes::time() : time()) - ${$_[0]}; } +sub STORE { ${$_[0]} = (HIRES ? Time::HiRes::time() : time()) - $_[1]; } + +1; + diff --git a/search/bin/build-splashes b/search/bin/build-splashes new file mode 100755 index 0000000..d5fd5c1 --- /dev/null +++ b/search/bin/build-splashes @@ -0,0 +1,112 @@ +#!/usr/bin/perl + +my $TEMPLATE_DIR = "../template"; +my $BASE_DIR = "/var/www/vhosts/carbonpictures.com/httpdocs/splash"; +my $OUT_FILE = "/var/www/vhosts/carbonpictures.com/httpdocs/splash/index.html"; +my @ab = qw[b c d f g h j k l m n p q r s t v w x y z]; +unshift @ab, undef; + +# disabled for now +# build_site(read_splashes()); + +sub build_site + { + my ($years) = @_; + carp("Building site!"); + my $t_page = slurp_template("splash_list_page"); + my $t_year = slurp_template("splash_year"); + my $t_month = slurp_template("splash_month"); + my $t_day = slurp_template("splash_day"); + my $yeas = ""; + foreach my $year (sort keys %$years) + { + carp($year); + my $mons = ""; + foreach my $month (sort keys %{ $years->{$year} }) + { + my $days = ""; + foreach my $day (sort {$a cmp $b} keys %{ $years->{$year}->{$month} }) + { + my $d = $years->{$year}->{$month}->{$day}; + $days .= templatize($t_day, $d); + } + $mons .= templatize($t_month, { month => $month, days => $days }); + } + $yeas .= templatize($t_year, { year => "20".$year, months => $mons }); + } + my $page = templatize($t_page, { years => $yeas } ); + vomit($OUT_FILE, $page); + } +sub templatize + { + my ($t, $o) = @_; + while ($t =~ /\%\%([^\%]+)\%\%/) + { + my $val = $o->{lc $1}; + $t =~ s/\%\%$1\%\%/$val/g; + } + return $t; + } +sub read_splashes + { + my $years = {}; + foreach my $y (grep /^\d/, slurp_dir($BASE_DIR)) + { + my $months = {}; + $years->{$y} = $months; + foreach my $m (grep /^\d/, slurp_dir("$BASE_DIR/$y")) + { + my $days = {}; + $months->{$m} = $days; + foreach my $d (grep /^\d/, slurp_dir("$BASE_DIR/$y/$m")) + { + my $abi = 0; + foreach my $f (sort grep /html$/, slurp_dir("$BASE_DIR/$y/$m/$d")) + { + my $is_index = $f eq "index.html"; + if ($is_index) + { $k = $d; } + else + { $k = "$d".$ab[$abi++]; } + my $d = + { + key => $k, + url => "/splash/$y/$m/$d/" . ($is_index ? undef : $f), + }; + $days->{$k} = $d; + } + } + } + } + return $years; + } + +sub slurp_dir + { + my ($d) = @_; + my @files; + opendir D, $d; + while (my $f = readdir(D)) + { push(@files,$f) if $f !~ /^\./; } + closedir D; + return @files; + } +sub slurp_template + { return slurp(join "/", $TEMPLATE_DIR, @_); } +sub slurp + { + my ($f) = @_; + open F, $f; my @lines = <F>; close F; + return join "", @lines; + } +sub vomit + { + my ($f, $t) = @_; + carp("Writing $f"); + open F, ">", $f || die "couldn't open $f : $!"; print F $t; close F; + } +sub carp + { + my ($m) = @_; + print STDERR $m . "\n"; + } diff --git a/search/bin/fix-files b/search/bin/fix-files new file mode 100644 index 0000000..b56d377 --- /dev/null +++ b/search/bin/fix-files @@ -0,0 +1,41 @@ +#!/usr/bin/perl +use strict; +use lib "../lib"; +use Bucky; + +my $bucky = new Bucky; +my $file_list = $bucky->db->select("file", {'thread = 2833'}); +my $file_map = {}; +foreach my $f (@$file_list) { + $file_map->{ $f->{'filename'} } = $f->{'id'}; +} + +my $base = "/var/www/vhosts/carbonpictures.com/bucky/data/"; +opendir(DIR, $base) or die $!; +my @dirs = readdir(DIR); +closedir(DIR); + +print scalar @dirs; + +foreach my $thread_id (@dirs) { + my $dir = $base . $thread_id; + + next unless (-d $dir && $thread_id !~ /^\./); + + opendir (THREAD, $dir); + my @local_files = readdir(THREAD); + closedir (THREAD); + + foreach my $filename (@local_files) { + next unless exists($file_map->{$filename}); + + my $file_id = $file_map->{$filename}; + + $bucky->db->update_by_id('file', { + "id" => $file_id, + "record" => { + "thread" => $thread_id + } + }); + } +}
\ No newline at end of file diff --git a/search/bin/gross.db b/search/bin/gross.db Binary files differnew file mode 100644 index 0000000..410d4e8 --- /dev/null +++ b/search/bin/gross.db diff --git a/search/bin/listener.pl b/search/bin/listener.pl new file mode 100755 index 0000000..0f0f2d9 --- /dev/null +++ b/search/bin/listener.pl @@ -0,0 +1,65 @@ +#!/usr/bin/perl + use IO::Socket; + use IO::Select; + + # Create a socket to listen on. + # + my $listener = + IO::Socket::INET->new( LocalPort => 8008, Listen => 5, Reuse => 1 ); + + die "Can't create socket for listening: $!" unless $listener; + print "Listening for connections on port 8008\n"; + + my $readable = IO::Select->new; # Create a new IO::Select object + $readable->add($listener); # Add the listener to it + + while(1) { + + # Get a list of sockets that are ready to talk to us. + # + my ($ready) = IO::Select->select($readable, undef, undef, undef); + foreach my $s (@$ready) { + + # Is it a new connection? + # + if($s == $listener) { + + # Accept the connection and add it to our readable list. + # + my $new_sock = $listener->accept; + $readable->add($new_sock) if $new_sock; + + print $new_sock "Welcome!\r\n"; + print "connection :-o\n\n"; + + } else { # It's an established connection + + my $buf = <$s>; # Try to read a line + + # Was there anyone on the other end? + # + if( defined $buf ) { + + # If they said goodbye, close the socket. If not, + # echo what they said to us. + # + if ($buf =~ /(good)?bye/i) { + print $s "See you later!\n"; + $readable->remove($s); + $s->close; + } else { + print $s "You said: $buf\n"; + print "$buf\n"; + } + + } else { # The client disconnected. + + $readable->remove($s); + $s->close; + print STDERR "Client Connection closed\n"; + + } + } + } + } + diff --git a/search/bin/poetaster b/search/bin/poetaster new file mode 100755 index 0000000..fe973a5 --- /dev/null +++ b/search/bin/poetaster @@ -0,0 +1,33 @@ +#!/usr/bin/perl +use lib "../lib"; +use Rest; +use CGI; +use Poetaster; + +my $file = $ARGV[0] || "twain"; +my $data = load_data($file); +$data =~ s/<[^>]+>//g; +$data =~ s/\r\n/ /g; +$data =~ s/\s+/ /g; +$data =~ s/\. \. \. /... /g; +my $self = new Poetaster; +my $poem = $self->poem($data); +print $poem; + +sub load_data + { + my ($file) = @_; + my $data = ''; + if ($file =~ /^http/) + { + $data = Rest->new->rest_get_raw($file); + } + else + { + local $/; + open F, $file; + $data = <F>; + close F; + } + return $data; + } diff --git a/search/bin/watch-index.pl b/search/bin/watch-index.pl new file mode 100755 index 0000000..c9d950b --- /dev/null +++ b/search/bin/watch-index.pl @@ -0,0 +1,8 @@ +#!/usr/bin/perl + +while (1) + { + system("./build-index"); + sleep(60*60); + } + diff --git a/search/lib/Bucky.pm b/search/lib/Bucky.pm new file mode 100644 index 0000000..181c1ae --- /dev/null +++ b/search/lib/Bucky.pm @@ -0,0 +1,159 @@ +package Bucky; + +use strict 'vars'; + +use Data::Dumper; +use Common; + +use localbucky qw("config"); + +use Bucky::DB; +use Bucky::Keyword; +use Bucky::Search; +use Bucky::Session; +use Bucky::Thread; +# use Bucky::Comment; +# use Bucky::File; +use Bucky::SVN; + +our $TYPE_Bucky = "bucky"; +our $TYPE_Keyword = "keyword"; +our $TYPE_Thread = "thread"; +our $TYPE_Comment = "comment"; +our $TYPE_File = "file"; + +our $VALID_TYPES = + { + $TYPE_Bucky => 1, + $TYPE_Keyword => 1, + $TYPE_Thread => 1, + $TYPE_Comment => 1, + $TYPE_File => 1, + }; +sub valid_types + { return $VALID_TYPES; } +sub is_valid_type + { + my ($self, $type) = @_; +# print "TYPE $type IS VALID? >>> " . $self->valid_types->{$type} . "\n"; + return $self->valid_types->{$type}; + } +sub bucky_data_path + { return "/var/www/vhosts/carbonpictures.com" } +sub new + { + my ($class, $self) = @_; + $self ||= {}; + bless $self, $class; + return $self; + } +sub type + { return $TYPE_Bucky; } +sub inherit + { + my ($self, $parent) = @_; + if ($parent && ref($parent) =~ /Bucky/) + { + $self->db($parent); + $self->bucky($parent); + } + } +sub bucky + { + my ($self, $parent) = @_; + return $self->{_bucky} if $self->{_bucky}; + if ($parent && ref($parent) eq "Bucky") + { $self->{_bucky} = $parent; } + elsif ($parent && ref($parent) =~ /Bucky/) + { $self->{_bucky} = $parent->bucky; } + elsif ($self && ref($self) eq "Bucky") + { $self->{_bucky} = $self; } + else + { $self->{_bucky} = new Bucky; } + return $self->{_bucky}; + } +sub db + { + my ($self, $parent) = @_; + return $self->{_db} if $self->{_db}; + if ($parent && ref($parent) =~ /Bucky/) + { $self->{_db} = $parent->bucky->db; } + else + { $self->{_db} = new Bucky::DB; } + return $self->{_db}; + } +sub keywords + { + } +sub keyword ($) + { return shift->entity( $TYPE_Keyword, @_ ); } +sub thread ($) + { return shift->entity( $TYPE_Thread, @_ ); } +sub comment ($) + { return shift->entity( $TYPE_Comment, @_ ); } +sub file ($) + { return shift->entity( $TYPE_File, @_ ); } +# return unless my $criteria = $self->check_criteria($which); +sub check_criteria + { + my ($self, $type, $which) = @_; + my $accessor = "_" . $type; + if ($self->can($accessor)) + { $which ||= $self->$accessor; } + return undef unless $type && $which; + my $criteria = {}; + if ( $self->is_number($which) ) + { $criteria->{id} = $which; } + elsif ( length($which) ) + { $criteria->{keyword} = $which; } + return scalar keys %$criteria ? $criteria : undef; + } +# my $keyword = $self->entity( $TYPE_Keyword, $which ); +sub entity + { + my ($self, $type, $which) = @_; + return unless my $criteria = $self->check_criteria($type, $which); + my $entity_list = $self->db->select($type, $criteria); + foreach my $entity (@$entity_list) + { + # TODO: privacy check? + return $self->condone( $entity, $type ); + } + return undef; + } +# my $threads = $keyword->threads; +# my $files = $thread->files; +# my $children = $keyword->children; +# join " ", map { $_->type }, $keyword->children; +sub comments_by_id + { + my ($self, $comments_to_get) = @_; + return $self->db->select_by_id("comment", $comments_to_get); + } +sub files_by_id + { + my ($self, $files_to_get) = @_; + return $self->db->select_by_id("file", $files_to_get); + } +sub threads + { + my ($self) = @_; + return {}; + } +sub family + { + my ($self, $which) = @_; + my $type = $self->type || return; + my $entity_list = $self->db->select("family", { $type => $self->id }); + } +sub condone + { + my ($self, $ref, $type) = @_; + if ($type !~ /Bucky/ && $self->is_valid_type($type)) + { $type = "Bucky::" . ucfirst($type); } + bless $ref, $type; + $ref->inherit($self); + return $ref; + } +1; + diff --git a/search/lib/Bucky/DB.pm b/search/lib/Bucky/DB.pm new file mode 100644 index 0000000..1e33b32 --- /dev/null +++ b/search/lib/Bucky/DB.pm @@ -0,0 +1,175 @@ +package Bucky::DB; + +use base 'Bucky'; + +use Data::Dumper; +use DBI; + +my $DB_LOOKUP = + { + bucky => '', + user => 'users', + keyword => 'keywords', + thread => 'threads', + file => 'files', + comment => 'comments', + family => 'family', + search_log => 'search_log', + poetaster_log => 'poetaster_log', + svn => 'svn', + }; +sub insert + { + my ($self, $type, $record) = @_; + $type = $DB_LOOKUP->{$type}; + return unless $type && ref($record) eq "HASH" && scalar keys %$record; + my $keys = []; + my $values = []; + foreach my $key (keys %$record) + { + push @$keys, $key; + push @$values, $self->quote($record->{$key}); + } + my $key_string = join ",", @$keys; + my $value_string = join ",", @$values; + return unless length $key_string && length $value_string; + my $sql = "INSERT INTO $type ($key_string) VALUES($value_string)"; + $self->execute($sql); + return $self->lastinsertid($sql); + } +sub update + { + my ($self, $type, $opt) = @_; + my $criteria = $opt->{'criteria'}; + my $record = $opt->{'record'}; + + $type = $DB_LOOKUP->{$type}; + return unless $type && ref($record) eq "HASH" && scalar keys %$record; + my $key_values = []; + foreach my $key (keys %$record) + { + push @$key_values, $key . "=" . $self->quote($record->{$key}); + } + my $key_value_string = join ",", @$key_values; + return unless length $key_value_string; + + my $criteria_string = $self->criteria($criteria); + return unless length $criteria_string; + + my $sql = "UPDATE $type SET $key_value_string $criteria_string"; + + $self->execute($sql); + } +sub update_by_id + { + my ($self, $type, $opt) = @_; + my $id = $opt->{'id'} + 0; + $opt->{'criteria'} = "id=$id"; + + $self->update($type, $opt) + } +sub select + { + my ($self, $type, $criteria) = @_; + $type = $DB_LOOKUP->{$type}; + return unless $type; + my $criteria_string = $self->criteria($criteria); + my $rows = []; + my $sql = "SELECT * FROM $type"; + $sql .= " " . $criteria_string if $criteria_string; + my $sth = $self->execute($sql); + while (my $row = $sth->fetchrow_hashref) + { + push @$rows, $row; + } + return $rows; + } +sub select_by_id + { + my ($self, $type, $id_array) = @_; + $type = $DB_LOOKUP->{$type}; + return unless $type and ref($id_array) eq "ARRAY" and scalar @$id_array; + my $rows = {}; + my $ids = join ",", @$id_array; + my $sql = "SELECT * FROM $type"; + $sql .= " WHERE id IN ($ids)"; + my $sth = $self->execute($sql); + while (my $row = $sth->fetchrow_hashref) + { + $rows->{ $row->{'id'} } = $row; + } + return $rows; + } +sub criteria + { + my ($self, $criteria) = @_; + + my $criteria_list = []; + + if ($self->is_string($criteria)) + { + push @$criteria_list, $criteria; + } + elsif (ref $criteria eq "HASH") + { + foreach my $key (keys %$criteria) + { + my $criterion = $key; + if ($criteria->{$key}) + { $criterion .= "=" . $self->quote($criteria->{$key}); } + push @$criteria_list, $criterion; + } + } + return undef unless scalar @$criteria_list; + + my $criteria_string = join(" AND ", @$criteria_list); + $criteria_string = "WHERE " . $criteria_string if $criteria_string =~ /[=<>]|( (IS|IN) )/; + return $criteria_string; + } +sub execute + { + my ($self, $sql) = @_; + my $sth = $self->dbh->prepare($sql); + $sth->execute; + return $sth; + } +sub quote + { + my ($self, $string) = @_; + return $self->dbh->quote($string); + } +sub lastinsertid + { + my ($self) = @_; + return $self->dbh->last_insert_id(0, undef, undef, undef); + } +sub dbh + { + my ($self, $parent) = @_; + if ($parent && ref($parent) =~ /Bucky/) + { + $self->{_dbh} ||= $parent->dbh; + } + if (! $self->{_dbh}) + { + $self->{_dbh} ||= DBI->connect($self->dsn); + } + return $self->{_dbh}; + } +sub dsn + { + my ($self) = @_; + $self->{_dsn} ||= + "DBI:mysql:database=" . $self->db_name . + ":" . $self->db_host . + ";mysql_read_default_file=" . $self->my_cnf; + return $self->{_dsn}; + } +sub db_name + { 'bucky' } +sub db_host + { 'localhost' } +sub my_cnf + { '/var/www/vhosts/carbonpictures.com/.my.cnf' } + +1; diff --git a/search/lib/Bucky/Keyword.pm b/search/lib/Bucky/Keyword.pm new file mode 100644 index 0000000..8c52256 --- /dev/null +++ b/search/lib/Bucky/Keyword.pm @@ -0,0 +1,26 @@ +package Bucky::Keyword; + +use base 'Bucky'; + +sub type { $Bucky::Keyword } +sub fields + {[qw[ + id keyword threads owner createdate + owner ops public + agglutinate + color display + ]]} + +sub _id { shift->{id} } +sub _keyword { shift->{keyword} } +sub _threads { shift->{threads} } +sub _username { shift->{owner} } +sub _ops { shift->{ops} } +sub _public { shift->{public} } +sub _createdate { shift->{createdate} } +sub _agglutinate { shift->{agglutinate} } +sub _color { shift->{color} } +sub _display { shift->{display} } + +1; + diff --git a/search/lib/Bucky/SVN.pm b/search/lib/Bucky/SVN.pm new file mode 100644 index 0000000..ef04464 --- /dev/null +++ b/search/lib/Bucky/SVN.pm @@ -0,0 +1,103 @@ +package Bucky::SVN; +use base "Bucky"; +use Bucky::Session; +sub svn_secret + { return shift->config("SVN_SECRET"); } +sub list + { + my ($self, $count) = @_; + $count ||= 7; + return $self->db->select("svn", "ORDER BY date DESC LIMIT $count"); + } +sub query_incoming + { + my ($self) = @_; + my $session = new Bucky::Session; + error() unless scalar $session->q->param && length $session->q->param("secret") && $session->q->param("secret") eq $self->svn_secret(); + if ($session->q->param("user")) + { + print $self->query_add($session); + } + else + { + print "Content-type: text/html\n\n"; + print $self->query_list; + } + } +sub query_list + { + my ($self) = @_; + my $svns = $self->list; + my $out .= <<__HEAD__; +<table cellpadding=0 cellspacing=0 style="border: 1px solid #333;"> +__HEAD__ + my $r = 0; + foreach my $svn (@$svns) + { + $r = $r ? 0 : 1; + my $user = $svn->{'user'}; + $user = "default" if $user eq "root"; + my $user_profile = "/cgi-bin/bucky/profile/$user"; + my $user_img = "/bucky/data/profile/.thumb/am.$user.jpg"; + my $date = $self->show_date($svn->{'date'}); + my $revision = $svn->{'revision'}; + my $comment = $svn->{'comment'}; + $out .= <<__SVN__; +<tr> +<td style="border: 1px solid #333;" align="center"> +<a href="$user_profile"><img src="$user_img" border=0></a><!--<br><small>$user</small>--> +</td> +<td style="border: 1px solid #333; padding: 3px" class="r$r"> +<small>$revision: $comment</small> +</td> +</small> +</tr> +__SVN__ + } + $out .= <<__FOOT__; +</table> +__FOOT__ + return $out; + } +sub query_add + { + my ($self, $session) = @_; + + my $user = $session->q->param("user"); + my $revision = $session->q->param("revision"); + my $comment = $session->q->param("comment"); + my $date = time; + + error("missing some parameters\npossible: secret, user, revision, comment\n") + unless $user && $comment && $revision + && $self->is_number($revision) && length $user && length $comment; + + my $query = + { + user => $user, + comment => $comment, + revision => $revision, + date => $date, + }; + + if (my $id = $self->db->insert("svn", $query)) + { success("Successfully inserted $id"); } + else + { error("Unable to insert!"); } + } +sub success + { + my ($success) = @_; + $success ||= "NICE ONE"; + print "Content-type: text/plain\n\nSUCCESS: $success"; + exit; + } +sub error + { + my ($error) = @_; + $error ||= "SORRY GUY"; + print "Content-type: text/plain\n\nERROR: $error"; + exit; + } + +1; diff --git a/search/lib/Bucky/Search.pm b/search/lib/Bucky/Search.pm new file mode 100644 index 0000000..e01d55a --- /dev/null +++ b/search/lib/Bucky/Search.pm @@ -0,0 +1,413 @@ +package Bucky::Search; + +use base 'Bucky'; + +use Data::Dumper; +use DB_File; + +sub index + { + my ($self, $index) = @_; + $self->{'_index'} = $index || $self->index_read; + return $self->{'_index'}; + } +sub index_read + { + my ($self) = @_; + my %index; + tie %index, "DB_File", $self->index_filename, O_RDONLY, 0666, $DB_HASH ; + return \%index; + } +sub index_write + { + my ($self) = @_; + my %index; + tie %index, "DB_File", $self->index_filename, O_CREAT|O_RDWR, 0666, $DB_HASH ; + return \%index; + } +sub index_close + { + my ($self, $index) = @_; + $index ||= $self->index; + untie %$index; + } +sub index_filename + { "gross.db" } +sub auto_index + { + my ($self, $auto_index) = @_; + $self->{'_auto_index'} = $auto_index || $self->auto_index_read; + return $self->{'_auto_index'}; + } +sub auto_index_read + { + my ($self) = @_; + my %auto_index; + tie %auto_index, "DB_File", $self->auto_index_filename, O_RDONLY, 0666, $DB_HASH ; + return \%auto_index; + } +sub auto_index_write + { + my ($self) = @_; + my %auto_index; + tie %auto_index, "DB_File", $self->auto_index_filename, O_CREAT|O_RDWR, 0666, $DB_HASH ; + return \%auto_index; + } +sub auto_index_close + { + my ($self, $auto_index) = @_; + $auto_index ||= $self->auto_index; + untie %$auto_index; + } +sub auto_index_filename + { "auto.db" } +sub lexicon_store + { + my ($self, $lexicon) = @_; + my $index = $self->index_write; + foreach my $term (keys %$lexicon) + { + next if $self->is_stopword($term); + my $serialized = $self->serialize_matches($lexicon->{$term}); + next unless $serialized; + $index->{$term} = $serialized; + } + $self->index_close($index); + } +sub unserialize_matches + { + my ($self, $serialized_string) = @_; + my @serialized_matches = split ",", $serialized_string; + my @matches; + foreach my $serialized_match (@serialized_matches) + { + my ($thread, $comment, $file, $strength) = split " ", $serialized_match; + my $match = {}; + $match->{'thread'} = $thread; + $match->{'comment'} = $comment; + $match->{'file'} = $file; + $match->{'strength'} = $strength; + push @matches, $match; + } + return \@matches; + } +sub serialize_matches + { + my ($self, $matches) = @_; + my @serialized_matches; + foreach my $match (values %$matches) + { + next unless $match && ref($match) eq "HASH"; + my $string = join " ", + $match->{'thread'}, + $match->{'comment'}, + $match->{'file'}, + $match->{'strength'}; + next unless $string; + push @serialized_matches, $string if $string; + } + return undef unless scalar @serialized_matches; + return join ",", @serialized_matches; + } +my $STOPWORDS = {( map { lc $_, 1 } qw( +a about above across adj after again against all almost alone along also +although always am among an and another any anybody anyone anything anywhere +apart are around as aside at away be because been before behind being below +besides between beyond both but by can cannot could deep did do does doing done +down downwards during each either else enough etc even ever every everybody +everyone except far few for forth from get gets got had hardly has have having +her here herself him himself his how however i if in indeed instead into inward +is it its itself just kept many maybe might mine more most mostly much must +myself near neither next no nobody none nor not nothing nowhere of off often on +only onto or other others ought our ours out outside over own p per please plus +pp quite rather really said seem self selves several shall she should since so +some somebody somewhat still such than that the their theirs them themselves +then there therefore these they this thorough thoroughly those through thus to +together too toward towards under until up upon v very was well were what +whatever when whenever where whether which while who whom whose will with +within without would yet young your yourself s ) )}; +sub is_stopword + { + my ($self, $term) = @_; + return exists $STOPWORDS->{lc $term}; + } +sub autocomplete + { + my ($self, $query) = @_; + return unless $query; + my $terms = parse_terms($query); + my $last_term = pop @$terms; + + my $auto_index = $self->auto_index; + my $guess_term = $auto_index->{$last_term}; + my $guess_full = join " ", @$terms, $guess_term; + my $guess_tail = $guess_term; + $guess_tail =~ s/^$last_term//; + + my $guess = {}; + $guess->{'full'} = $guess_full; + $guess->{'tail'} = $guess_tail; + $guess->{'term'} = $guess_term; + return $guess; + } +sub search_light + { + my ($self, $query, $start, $limit) = @_; + return unless $query; + $start ||= 0; + $limit ||= 10; + my $scores = {}; + my $terms = parse_terms($query); + my $index = $self->index; + foreach my $term (@$terms) + { + next if $self->is_stopword($term); + next unless my $serial = $index->{$term}; + my $results = $self->unserialize_matches($serial); + foreach my $result (@$results) + { + my $thread = $result->{'thread'}; + $scores->{$thread} ||= {}; + $scores->{$thread}->{thread} ||= $result->{'thread'}; + $scores->{$thread}->{file} ||= $result->{'file'}; + $scores->{$thread}->{strength} += $result->{'strength'}; + $scores->{$thread}->{count}++; + } + } + my $total = scalar keys %$scores; + my $i = 0; + my $to_display = $limit; + my $threads = {}; + my $comments_to_get = []; + my $files_to_get = []; + foreach my $match (sort { $b->{count} <=> $a->{count} || $b->{strength} * $b->{count} <=> $a->{strength} * $a->{count} } values %$scores ) + { + next if $i++ < $start; + my $thread = $self->thread( $match->{'thread'} ); + next unless $thread; + next if $thread->{'private'}; + last if $to_display-- == 0; + push @$results, $match; + push @$comments_to_get, $match->{'comment'} if $match->{'comment'}; + if ( $match->{'file'} ) + { push @$files_to_get, $match->{'file'}; } + if ( $thread->{'flagged'} ) + { push @$files_to_get, $thread->{'flagged'}; } + $threads->{ $thread->{'id'} } = $thread; + } + my $files = $self->files_by_id($files_to_get); + # $self->log_query($query, $total); + return + { + start => $start + $limit, + limit => $limit, + total => $total, + results => $results, + threads => $threads, + files => $files, + terms => $terms, + }; + } +sub search + { + my ($self, $query, $start, $limit) = @_; + return unless $query; + $start ||= 0; + $limit ||= 10; + my $scores = {}; + my $terms = parse_terms($query); + my $index = $self->index; + foreach my $term (@$terms) + { + next if $self->is_stopword($term); + next unless my $serial = $index->{$term}; + my $results = $self->unserialize_matches($serial); + foreach my $result (@$results) + { + my $thread = $result->{'thread'}; + $scores->{$thread} ||= {}; + $scores->{$thread}->{thread} ||= $result->{'thread'}; + $scores->{$thread}->{comment} ||= $result->{'comment'}; + $scores->{$thread}->{file} ||= $result->{'file'}; + $scores->{$thread}->{strength} += $result->{'strength'}; + $scores->{$thread}->{count}++; + } + } + my $total = scalar keys %$scores; + my $i = 0; + my $to_display = $limit; + my $threads = {}; + my $comments_to_get = []; + my $files_to_get = []; + foreach my $match (sort { $b->{count} <=> $a->{count} || $b->{strength} * $b->{count} <=> $a->{strength} * $a->{count} } values %$scores ) + { + next if $i++ < $start; + my $thread = $self->thread( $match->{'thread'} ); + next unless $thread; + next if $thread->{'private'}; + last if $to_display-- == 0; + push @$results, $match; + push @$comments_to_get, $match->{'comment'} if $match->{'comment'}; + if ( $match->{'file'} ) + { push @$files_to_get, $match->{'file'}; } + if ( $thread->{'flagged'} ) + { push @$files_to_get, $thread->{'flagged'}; } + $threads->{ $thread->{'id'} } = $thread; + } + my $files = $self->files_by_id($files_to_get); + my $comments = $self->comments_by_id($comments_to_get); + $self->log_query($query, $total); + return + { + start => $start + $limit, + limit => $limit, + total => $total, + results => $results, + threads => $threads, + comments => $comments, + files => $files, + terms => $terms, + }; + } +sub score_display + { + my ($obj) = @_; + return scalar(keys %{$obj->{terms}}) . "x" . $obj->{count}; + } +sub display_object + { + my ($obj) = @_; + my ($type, $id) = split ":", $obj; + my $thread = $bucky->thread($id); + my $title = $thread ? $thread->_title : "* * *"; + return $type . " " . $id . "\t" . $title; + } +sub bold_terms + { + my ($self, $string, $terms) = @_; + $string = $self->strip_html($string); + foreach my $term (@$terms) + { + $string =~ s/\b($term)\b/<b>$1<\/b>/gi; + } + return $string; + } +sub bold_snippet + { + my ($self, $string, $terms) = @_; + my $snippet = $self->snippet($string, $terms); + return $self->bold_terms($snippet, $terms); + } +sub snippet + { + my ($self, $string, $terms) = @_; + + # clean up the string we got + $string = $self->strip_html($string); + + # create a regex out of the search terms + my $term_re = join "|", @$terms; + + # take the string to be snippetized and split it into words + my @words = split /\s+/, $string; + + # deduper for matching @words indexes, so we don't add a word twice + my $index_matches = {}; + + # words in the eventual snippet + my @words_matched; + + # the snippet itself + my $snippet = ''; + + # counter for aggregating context after a match + my $aggr = 0; + + # amount of context to show, in number of words surrounding a match + my $pad = 4; + + # loop over each of the words in the string + for (my $i = 0; $i < scalar @words; $i++) + { + # does this word contain a match? + if ($words[$i] =~ /\b($term_re)\b/i && ! $self->is_stopword($1)) + { + # if we aren't already aggregating, add an ellipsis + if (! $aggr) + { + push @words_matched, "..."; + } + # look backward $pad words + for (my $j = -$pad; $j < 1; $j++) + { + # create a new index from the offset + my $idx = $i + $j; + + # is this a valid index? has it already been encountered? + next if $idx < 0; + next if $idx > scalar @words; + next if exists $index_matches->{$i+$j}; + + # checks out, save this word + push @words_matched, $words[$i+$j]; + + # note the matching index in our deduper + $index_matches->{$i+$j} ++; + } + # enter aggregate mode -- add the next $pad words + $aggr = $pad; + } + # have we been told to aggregate? + elsif ($aggr) + { + # save this word + push @words_matched, $words[$i]; + + # add index to the deduper + $index_matches->{$i} ++; + + # one less word to aggregate + $aggr--; + } + # keep snippets to a modest length + last if scalar @words_matched > 30; + } + # add a trailing ellipsis + push @words_matched, "..."; + + # create the snippet from the saved context words + $snippet = join " ", @words_matched; + + return $snippet; + } +sub parse_terms + { + my ($s) = @_; + my @terms = split /(\W+)/, lc($s); + my $words = []; + my $count = 0; + foreach my $term (@terms) + { + if ( $term !~ /\W/ ) + { + push @$words, $term; + } + } + return $words; + } +sub log + { + my ($self, $date) = @_; + my $criteria = {}; + $criteria->{'date'} = $date if $date; + return $self->db->select("search_log", $criteria); + } +sub log_query + { + my ($self, $query, $total) = @_; + return unless $query; + my $date = time; + $matches ||= '0'; + $self->db->insert("search_log", { query => $query, date => $date, matches => $total }); + } +1; diff --git a/search/lib/Bucky/Session.pm b/search/lib/Bucky/Session.pm new file mode 100644 index 0000000..0cff753 --- /dev/null +++ b/search/lib/Bucky/Session.pm @@ -0,0 +1,19 @@ +package Bucky::Session; + +use base 'Bucky'; + +use CGI; + +sub param + { + my ($self, $name) = @_; + return unless $name; + return $self->q->param($name); + } +sub q + { + my ($self) = @_; + $self->{'_q'} ||= new CGI; + return $self->{'_q'}; + } +1; diff --git a/search/lib/Bucky/Thread.pm b/search/lib/Bucky/Thread.pm new file mode 100644 index 0000000..dbd8ad0 --- /dev/null +++ b/search/lib/Bucky/Thread.pm @@ -0,0 +1,30 @@ +package Bucky::Thread; + +use base 'Bucky'; + +sub type { $Bucky::Thread } +sub fields + {[qw[ + id title username keyword private allowed + createdate lastmodified revision viewed + size color display flagged zipped + ]]} + +sub _id { shift->{id} } +sub _title { shift->{title} } +sub _username { shift->{username} } +sub _keyword { shift->{keyword} } +sub _private { shift->{private} } +sub _allowed { shift->{allowed} } +sub _createdate { shift->{createdate} } +sub _lastmodified { shift->{lastmodified} } +sub _revision { shift->{revision} } +sub _viewed { shift->{viewed} } +sub _size { shift->{size} } +sub _color { shift->{color} } +sub _display { shift->{display} } +sub _flagged { shift->{flagged} } +sub _zipped { shift->{zipped} } + +1; + diff --git a/search/lib/Common.pm b/search/lib/Common.pm new file mode 100644 index 0000000..0797943 --- /dev/null +++ b/search/lib/Common.pm @@ -0,0 +1,46 @@ +## utility functions +package Common; +use base 'Exporter'; +our @EXPORT = qw[is_number is_string show_date get_age trim strip_html]; + +# promiscuous namespace +sub is_number + { my ($self, $s) = @_; return $s && $s !~ /\D/; } +sub is_string + { my ($self, $s) = @_; return length $s && ! length ref $s; } +sub show_date + { my ($self, $date) = @_; return scalar localtime($date); } +sub get_age + { + my ($self, $time) = @_; + if ($time < 0) + { return "old"; } + my $age = time - $time; + if ($age < 60) + { return int($age)."s"; } + $age /= 60; + if ($age < 60) + { return int($age)."m"; } + my $mins = $age % 60; + $age /= 60; + if ($age < 2) + { return int($age)."h".int($mins)."m"; } + elsif ($age < 24) + { return int($age)."h"; } + $age /= 24; + if ($age < 10000) + { return int($age)."d"; } + my $powers = qw[k m g t p e z y]; + foreach my $prefix (@$powers) + { + $age /= 1000; + if ($age < 10000) + { return int($age).$prefix."d"; } + } + } +sub trim + { my ($self, $s) = @_; $s =~ s/^\s+//; $s =~ s/\s+$//; return $s; } +sub strip_html + { my ($self, $s) = @_; $s =~ s/[<>]+/ /g; return $s; } + +1; diff --git a/search/lib/Time/Stopwatch.pm b/search/lib/Time/Stopwatch.pm new file mode 100644 index 0000000..b7cec26 --- /dev/null +++ b/search/lib/Time/Stopwatch.pm @@ -0,0 +1,13 @@ +################################################3 +package Time::Stopwatch; +my $VERSION = '1.00'; +use strict; +use constant HIRES => eval { local $SIG{__DIE__}; require Time::HiRes }; +sub TIESCALAR { my $pkg = shift; + my $time = (HIRES ? Time::HiRes::time() : time()) - (@_ ? shift() : 0); + bless \$time, $pkg; } +sub FETCH { (HIRES ? Time::HiRes::time() : time()) - ${$_[0]}; } +sub STORE { ${$_[0]} = (HIRES ? Time::HiRes::time() : time()) - $_[1]; } +################################################# +sub print_timer { print sprintf "%3.2f s %s\n", shift, shift; } +1; diff --git a/search/lib/localbucky.pm b/search/lib/localbucky.pm new file mode 100644 index 0000000..41c7553 --- /dev/null +++ b/search/lib/localbucky.pm @@ -0,0 +1,12 @@ +package Bucky; +my $LOCAL_CONFIG = + { + SVN_SECRET => '1s0c4h3dr0n', + }; +sub config + { + my ($self, $key) = @_; + $key ||= $self; + return $LOCAL_CONFIG->{$key} if $LOCAL_CONFIG->{$key}; + } +1; |
