summaryrefslogtreecommitdiff
path: root/lib/search/snippet.js
blob: de71911478e480e91792811a93e784d9f27c5ab0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
var util = require('../util/util')

function bold_terms (s, terms) {
  
}
sub bold_terms
	{
	my ($self, $string, $terms) = @_;
	$string = $self->strip_html($string);
	foreach my $term (@$terms)
		{
		$string =~ s/\b($term)\b/<b>$1<\/b>/gi;
		}
	return $string;
	}
sub bold_snippet
	{
	my ($self, $string, $terms) = @_;
	my $snippet = $self->snippet($string, $terms);
	return $self->bold_terms($snippet, $terms);
	}
sub snippet
	{
	my ($self, $string, $terms) = @_;

	# clean up the string we got
	$string = $self->strip_html($string);

	# create a regex out of the search terms
	my $term_re = join "|", @$terms;

	# take the string to be snippetized and split it into words
	my @words = split /\s+/, $string;

	# deduper for matching @words indexes, so we don't add a word twice
	my $index_matches = {};

	# words in the eventual snippet
	my @words_matched;

	# the snippet itself
	my $snippet = '';

	# counter for aggregating context after a match
	my $aggr = 0;

	# amount of context to show, in number of words surrounding a match
	my $pad = 4;

	# loop over each of the words in the string
	for (my $i = 0; $i < scalar @words; $i++)
		{
		# does this word contain a match?
		if ($words[$i] =~ /\b($term_re)\b/i && ! $self->is_stopword($1))
			{
			# if we aren't already aggregating, add an ellipsis
			if (! $aggr)
				{
				push @words_matched, "...";
				}
			# look backward $pad words
			for (my $j = -$pad; $j < 1; $j++)
				{
				# create a new index from the offset
				my $idx = $i + $j;

				# is this a valid index? has it already been encountered?
				next if $idx < 0;
				next if $idx > scalar @words;
				next if exists $index_matches->{$i+$j};

				# checks out, save this word
				push @words_matched, $words[$i+$j];

				# note the matching index in our deduper
				$index_matches->{$i+$j} ++;
				}
			# enter aggregate mode -- add the next $pad words
			$aggr = $pad;
			}
		# have we been told to aggregate?
		elsif ($aggr)
			{
			# save this word
			push @words_matched, $words[$i];

			# add index to the deduper
			$index_matches->{$i} ++;

			# one less word to aggregate
			$aggr--;
			}
		# keep snippets to a modest length
		last if scalar @words_matched > 30;
		}
	# add a trailing ellipsis
	push @words_matched, "...";

	# create the snippet from the saved context words
	$snippet = join " ", @words_matched;

	return $snippet;
	}