diff options
Diffstat (limited to 'bucky/search/snippet.js')
| -rw-r--r-- | bucky/search/snippet.js | 57 |
1 files changed, 29 insertions, 28 deletions
diff --git a/bucky/search/snippet.js b/bucky/search/snippet.js index 17988d2..787a53f 100644 --- a/bucky/search/snippet.js +++ b/bucky/search/snippet.js @@ -1,35 +1,36 @@ -var util = require('../util/util') -var STOPWORDS = require('./stopwords') +var util = require("../util/util"); +var STOPWORDS = require("./stopwords"); +var parse_term = require("./parse_term"); function snippet(s, terms) { - s = util.sanitize(s) - var term_set = new Set(terms) - - var words = s.split(/[^a-zA-Z0-9]+/) - var snippet = ""; - + s = util.sanitize(s); + var term_set = new Set(terms); + + var words = s.split(/[^a-zA-Z0-9]+/); + var snippet = ""; + // deduper for matching @words indexes, so we don't add a word twice - var index_matches = {} + var index_matches = {}; // words in the eventual snippet - var words_matched = [] + var words_matched = []; // counter for aggregating context after a match - var aggr = 0; + var aggr = 0; // amount of context to show, in number of words surrounding a match var pad = 10; // loop over each of the words in the string - var word for (var i = 0, len = words.length; i < len; i++) { - word = words[i] + var word = words[i]; + var term = parse_term(word); - // if the word matches... - if (term_set.has(word.toLowerCase()) && ! STOPWORDS.has(word.toLowerCase())) { + // if the word matches... + if (term && term_set.has(term) && !STOPWORDS.has(term.toLowerCase())) { // if we aren't already aggregating, add an ellipsis - if (! aggr) { - words_matched.push("...") + if (!aggr) { + words_matched.push("..."); } // look backward $pad words @@ -44,38 +45,38 @@ function snippet(s, terms) { if (index_matches[idx]) continue INNER; // checks out, save this word - words_matched.push(words[idx]) + words_matched.push(words[idx]); // note the matching index in our deduper index_matches[idx] = 1; - } + } // enter aggregate mode -- add the next (pad) words aggr = pad; - } + } // have we been told to aggregate? else if (aggr) { // save this word - words_matched.push(word) + words_matched.push(word); // add index to the deduper index_matches[i] = 1; // one less word to aggregate aggr--; - } + } // keep snippets to a modest length - if (words_matched.length > 30) break - } + if (words_matched.length > 30) break; + } // add a trailing ellipsis - words_matched.push("...") + words_matched.push("..."); // create the snippet from the saved context words - snippet = words_matched.join(" ") + snippet = words_matched.join(" "); - return snippet + return snippet; } -module.exports = snippet
\ No newline at end of file +module.exports = snippet; |
