1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
#!/usr/bin/perl
use strict;
use lib "./search/lib";
use Bucky;
use DB_File;
#require Time::Stopwatch;
tie my $timer, 'Time::Stopwatch';
print_timer($timer, "Initialized");
my $bucky = new Bucky::Search;
my $keywords = $bucky->db->select("keyword");
my $threads = $bucky->db->select("thread", {"id > 1"});
my $files = $bucky->db->select("file");
my $comments = $bucky->db->select("comment", {"thread > 1"});
print_timer($timer, "Loaded mysql");
my $lexicon = {};
my $total = 0;
#foreach my $keyword (@$keywords)
# {
# my $id = $keyword->{$id};
# $lexicon->{ $keyword->{'keyword'} }++;
# $total++;
# }
foreach my $thread (@$threads)
{
$total += parse_terms({ string => $thread->{'title'}, thread => $thread->{'id'} });
}
foreach my $comment (@$comments)
{
$total += parse_terms({ string => $comment->{'comment'}, thread => $comment->{'thread'}, comment => $comment->{'id'} });
}
foreach my $file (@$files)
{
$total += parse_terms({ string => $file->{'filename'}, thread => $file->{'thread'}, file => $file->{'id'} });
}
print_timer($timer, "Created index");
my $unique = scalar keys %$lexicon;
print "--- WORD COUNT: " . $total . "\n";
print "--- UNIQUE WORDS: " . $unique . "\n";
$bucky->lexicon_store($lexicon);
my $file = $bucky->index_filename;
print_timer($timer, "Dumped $file");
system("/bin/mv", "./search/db/search.db", "./search/db/search.db.1");
system("/bin/mv", "$file", "./search/db/search.db");
print "OLD: " ; system("/bin/ls", "-l", "./search/db/search.db.1");
print "NEW: " ; system("/bin/ls", "-l", "./search/db/search.db");
# system("/usr/bin/perl", "./build-autocomplete");
exit;
sub parse_terms
{
my ($args) = @_;
my $thread = $args->{'thread'} || return;
my $comment = $args->{'comment'} || '0';
my $file = $args->{'file'} || '0';
my $string = $args->{'string'};
$string =~ s/_/ /g;
my @terms = split /(\W+)/, $string;
my $count = 0;
foreach my $term (@terms)
{
if ( $term !~ /\W/ )
{
my $t = lc($term);
$lexicon->{$t} ||= {};
$lexicon->{$t}->{$thread} ||= {};
$lexicon->{$t}->{$thread}->{'thread'} ||= $thread;
$lexicon->{$t}->{$thread}->{'comment'} ||= $comment;
$lexicon->{$t}->{$thread}->{'file'} ||= $file;
# give terms in title an extra bump
if ($comment eq '0' && $file eq '0')
{ $lexicon->{$t}->{$thread}->{'strength'} += 2; }
else
{ $lexicon->{$t}->{$thread}->{'strength'} += 1; }
$count++;
}
}
return $count;
}
sub print_timer
{ print STDERR sprintf "%3.2f s %s\n", shift, shift; }
################################################3
package Time::Stopwatch;
my $VERSION = '1.00';
use strict;
use constant HIRES => eval { local $SIG{__DIE__}; require Time::HiRes };
sub TIESCALAR {
my $pkg = shift;
my $time = (HIRES ? Time::HiRes::time() : time()) - (@_ ? shift() : 0);
bless \$time, $pkg;
}
sub FETCH { (HIRES ? Time::HiRes::time() : time()) - ${$_[0]}; }
sub STORE { ${$_[0]} = (HIRES ? Time::HiRes::time() : time()) - $_[1]; }
1;
|