summaryrefslogtreecommitdiff
path: root/bucky2/bin/build-index
blob: 437f146f6e2b4c7220e76f95c2e20d2504e1ba35 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/perl
use strict;
use lib "../lib";
use Bucky;
use DB_File;
#require Time::Stopwatch;
tie my $timer, 'Time::Stopwatch';

print_timer($timer, "Initialized");

my $bucky = new Bucky::Search;

my $keywords = $bucky->db->select("keyword");
my $threads = $bucky->db->select("thread", {"id > 1"});
my $files = $bucky->db->select("file");
my $comments = $bucky->db->select("comment", {"thread > 1"});

print_timer($timer, "Loaded mysql");

my $lexicon = {};
my $total = 0;
#foreach my $keyword (@$keywords)
#	{
#	my $id = $keyword->{$id};
#	$lexicon->{ $keyword->{'keyword'} }++;
#	$total++;
#	}
foreach my $thread (@$threads)
	{
	$total += parse_terms({ string => $thread->{'title'}, thread => $thread->{'id'} });
	}
foreach my $comment (@$comments)
	{
	$total += parse_terms({ string => $comment->{'comment'}, thread => $comment->{'thread'}, comment => $comment->{'id'} });
	}
foreach my $file (@$files)
	{
	$total += parse_terms({ string => $file->{'filename'}, thread => $file->{'thread'}, file => $file->{'id'} });
	}

print_timer($timer, "Created index");

my $unique = scalar keys %$lexicon;
print "--- WORD COUNT: " . $total . "\n";
print "--- UNIQUE WORDS: " . $unique . "\n";

$bucky->lexicon_store($lexicon);

my $file = $bucky->index_filename;

print_timer($timer, "Dumped $file");

print "NEW: " ; system("/bin/ls", "-l", "./$file");
print "OLD: " ; system("/bin/ls", "-l", "../cgi-bin/$file");
system("/bin/mv", "../cgi-bin/$file", "../cgi-bin/$file.1");
system("/bin/cp", "./$file", "../cgi-bin/$file");
# system("/usr/bin/perl", "./build-autocomplete");
exit;

sub parse_terms
	{
	my ($args) = @_;
	my $thread = $args->{'thread'} || return;
	my $comment = $args->{'comment'} || '0';
	my $file = $args->{'file'} || '0';
	my $string = $args->{'string'};
	$string =~ s/_/ /g;
	my @terms = split /(\W+)/, $string;
	my $count = 0;
	foreach my $term (@terms)
		{
		if ( $term !~ /\W/ )
			{
			my $t = lc($term);
			$lexicon->{$t} ||= {};
			$lexicon->{$t}->{$thread} ||= {};
			$lexicon->{$t}->{$thread}->{'thread'} ||= $thread;
			$lexicon->{$t}->{$thread}->{'comment'} ||= $comment;
			$lexicon->{$t}->{$thread}->{'file'} ||= $file;
			# give terms in title an extra bump
			if ($comment eq '0' && $file eq '0')
				{ $lexicon->{$t}->{$thread}->{'strength'} += 2; }
			else
				{ $lexicon->{$t}->{$thread}->{'strength'} += 1; }
			$count++;
			}
		}
	return $count;
	}

sub print_timer
	{ print STDERR sprintf "%3.2f s %s\n", shift, shift; }

################################################3

package Time::Stopwatch;
my $VERSION = '1.00';

use strict;
use constant HIRES => eval { local $SIG{__DIE__}; require Time::HiRes };

sub TIESCALAR {
    my $pkg = shift;
    my $time = (HIRES ? Time::HiRes::time() : time()) - (@_ ? shift() : 0);
    bless \$time, $pkg;
}

sub FETCH { (HIRES ? Time::HiRes::time() : time()) - ${$_[0]}; }
sub STORE { ${$_[0]} = (HIRES ? Time::HiRes::time() : time()) - $_[1]; }

1;