summaryrefslogtreecommitdiff
path: root/bucky/search/snippet.js
diff options
context:
space:
mode:
Diffstat (limited to 'bucky/search/snippet.js')
-rw-r--r--bucky/search/snippet.js47
1 files changed, 20 insertions, 27 deletions
diff --git a/bucky/search/snippet.js b/bucky/search/snippet.js
index cd0657f..17988d2 100644
--- a/bucky/search/snippet.js
+++ b/bucky/search/snippet.js
@@ -1,19 +1,11 @@
var util = require('../util/util')
var STOPWORDS = require('./stopwords')
-function bold_snippet(s, terms) {
- return bold_terms(snippet(s, terms), terms)
-}
-function bold_terms (s, terms) {
- s = util.sanitize(s)
- terms.forEach( (term) => {
- s.replace(new RegExp("\b" + term + "\b", "i"), "<b>" + term + "</b>")
- })
-}
function snippet(s, terms) {
s = util.sanitize(s)
- var term_re = new RegExp("\b(" + terms.join("|") + ")\b", "i")
- var words = s.split(/\s+/)
+ var term_set = new Set(terms)
+
+ var words = s.split(/[^a-zA-Z0-9]+/)
var snippet = "";
// deduper for matching @words indexes, so we don't add a word twice
@@ -26,27 +18,30 @@ function snippet(s, terms) {
var aggr = 0;
// amount of context to show, in number of words surrounding a match
- var $pad = 4;
+ var pad = 10;
// loop over each of the words in the string
- words.some((word, i) => {
+ var word
+ for (var i = 0, len = words.length; i < len; i++) {
+ word = words[i]
+
// if the word matches...
- if (term_re.match(word) && ! STOPWORDS.has(word.toLowerCase())) {
+ if (term_set.has(word.toLowerCase()) && ! STOPWORDS.has(word.toLowerCase())) {
// if we aren't already aggregating, add an ellipsis
- if (! $aggr) {
+ if (! aggr) {
words_matched.push("...")
}
-
+
// look backward $pad words
var idx;
- for (var j = -pad; j < 1; j++) {
+ INNER: for (var j = -pad; j < 1; j++) {
// create a new index from the offset
idx = i + j;
// is this a valid index? has it already been encountered?
- if (idx < 0) continue;
- if (idx > words.length) continue;
- if (index_matches[idx]) continue;
+ if (idx < 0) continue INNER;
+ if (idx > words.length) continue INNER;
+ if (index_matches[idx]) continue INNER;
// checks out, save this word
words_matched.push(words[idx])
@@ -69,20 +64,18 @@ function snippet(s, terms) {
// one less word to aggregate
aggr--;
}
-
+
// keep snippets to a modest length
- return words_matched.length > 30;
- })
+ if (words_matched.length > 30) break
+ }
// add a trailing ellipsis
words_matched.push("...")
// create the snippet from the saved context words
snippet = words_matched.join(" ")
-
+
return snippet
}
-module.exports = {
- bold_snippet, bold_terms, snippet,
-} \ No newline at end of file
+module.exports = snippet \ No newline at end of file