diff options
Diffstat (limited to 'search/bin/build-index')
| -rwxr-xr-x | search/bin/build-index | 112 |
1 files changed, 0 insertions, 112 deletions
diff --git a/search/bin/build-index b/search/bin/build-index deleted file mode 100755 index b7fa2fc..0000000 --- a/search/bin/build-index +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/perl -use strict; -use lib "./search/lib"; -use Bucky; -use DB_File; -#require Time::Stopwatch; -tie my $timer, 'Time::Stopwatch'; - -print_timer($timer, "Initialized"); - -my $bucky = new Bucky::Search; - -my $keywords = $bucky->db->select("keyword"); -my $threads = $bucky->db->select("thread", {"id > 1"}); -my $files = $bucky->db->select("file"); -my $comments = $bucky->db->select("comment", {"thread > 1"}); - -print_timer($timer, "Loaded mysql"); - -my $lexicon = {}; -my $total = 0; -#foreach my $keyword (@$keywords) -# { -# my $id = $keyword->{$id}; -# $lexicon->{ $keyword->{'keyword'} }++; -# $total++; -# } -foreach my $thread (@$threads) - { - $total += parse_terms({ string => $thread->{'title'}, thread => $thread->{'id'} }); - } -foreach my $comment (@$comments) - { - $total += parse_terms({ string => $comment->{'comment'}, thread => $comment->{'thread'}, comment => $comment->{'id'} }); - } -foreach my $file (@$files) - { - $total += parse_terms({ string => $file->{'filename'}, thread => $file->{'thread'}, file => $file->{'id'} }); - } - -print_timer($timer, "Created index"); - -my $unique = scalar keys %$lexicon; -print "--- WORD COUNT: " . $total . "\n"; -print "--- UNIQUE WORDS: " . $unique . "\n"; - -$bucky->lexicon_store($lexicon); - -my $file = $bucky->index_filename; - -print_timer($timer, "Dumped $file"); - -system("/bin/mv", "./search/db/search.db", "./search/db/search.db.1"); -system("/bin/mv", "$file", "./search/db/search.db"); -print "OLD: " ; system("/bin/ls", "-l", "./search/db/search.db.1"); -print "NEW: " ; system("/bin/ls", "-l", "./search/db/search.db"); -# system("/usr/bin/perl", "./build-autocomplete"); -exit; - -sub parse_terms - { - my ($args) = @_; - my $thread = $args->{'thread'} || return; - my $comment = $args->{'comment'} || '0'; - my $file = $args->{'file'} || '0'; - my $string = $args->{'string'}; - $string =~ s/_/ /g; - my @terms = split /(\W+)/, $string; - my $count = 0; - foreach my $term (@terms) - { - if ( $term !~ /\W/ ) - { - my $t = lc($term); - $lexicon->{$t} ||= {}; - $lexicon->{$t}->{$thread} ||= {}; - $lexicon->{$t}->{$thread}->{'thread'} ||= $thread; - $lexicon->{$t}->{$thread}->{'comment'} ||= $comment; - $lexicon->{$t}->{$thread}->{'file'} ||= $file; - # give terms in title an extra bump - if ($comment eq '0' && $file eq '0') - { $lexicon->{$t}->{$thread}->{'strength'} += 2; } - else - { $lexicon->{$t}->{$thread}->{'strength'} += 1; } - $count++; - } - } - return $count; - } - -sub print_timer - { print STDERR sprintf "%3.2f s %s\n", shift, shift; } - -################################################3 - -package Time::Stopwatch; -my $VERSION = '1.00'; - -use strict; -use constant HIRES => eval { local $SIG{__DIE__}; require Time::HiRes }; - -sub TIESCALAR { - my $pkg = shift; - my $time = (HIRES ? Time::HiRes::time() : time()) - (@_ ? shift() : 0); - bless \$time, $pkg; -} - -sub FETCH { (HIRES ? Time::HiRes::time() : time()) - ${$_[0]}; } -sub STORE { ${$_[0]} = (HIRES ? Time::HiRes::time() : time()) - $_[1]; } - -1; - |
