blob: f8fd12f10059dafe26bf731354d980b4307fff61 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
var util = require('../util/util')
var STOPWORDS = require('./stopwords')
function snippet(s, terms) {
s = util.sanitize(s)
var term_set = new Set(terms)
var words = s.split(/[^a-zA-Z0-9]+/)
var snippet = "";
// deduper for matching @words indexes, so we don't add a word twice
var index_matches = {}
// words in the eventual snippet
var words_matched = []
// counter for aggregating context after a match
var aggr = 0;
// amount of context to show, in number of words surrounding a match
var pad = 10;
// loop over each of the words in the string
var word
for (var i = 0, len = words.length; i < len; i++) {
word = words[i]
// if the word matches...
if (term_set.has(word.toLowerCase()) && ! STOPWORDS.has(word.toLowerCase())) {
// if we aren't already aggregating, add an ellipsis
if (! aggr) {
words_matched.push("...")
}
// look backward $pad words
var idx;
INNER: for (var j = -pad; j < 1; j++) {
// create a new index from the offset
idx = i + j;
// is this a valid index? has it already been encountered?
if (idx < 0) continue INNER;
if (idx > words.length) continue INNER;
if (index_matches[idx]) continue INNER;
// checks out, save this word
words_matched.push(words[idx])
// note the matching index in our deduper
index_matches[idx] = 1;
}
// enter aggregate mode -- add the next (pad) words
aggr = pad;
}
// have we been told to aggregate?
else if (aggr) {
// save this word
words_matched.push(word)
// add index to the deduper
index_matches[i] = 1;
// one less word to aggregate
aggr--;
}
// keep snippets to a modest length
if (words_matched.length > 30) break
}
// add a trailing ellipsis
words_matched.push("...")
// create the snippet from the saved context words
snippet = words_matched.join(" ")
return snippet
}
module.exports = snippet
|