var util = require('../util/util') var STOPWORDS = require('./stopwords') function snippet(s, terms) { s = util.sanitize(s) var term_set = new Set(terms) var words = s.split(/[^a-zA-Z0-9]+/) var snippet = ""; // deduper for matching @words indexes, so we don't add a word twice var index_matches = {} // words in the eventual snippet var words_matched = [] // counter for aggregating context after a match var aggr = 0; // amount of context to show, in number of words surrounding a match var pad = 10; // loop over each of the words in the string var word for (var i = 0, len = words.length; i < len; i++) { word = words[i] // if the word matches... if (term_set.has(word.toLowerCase()) && ! STOPWORDS.has(word.toLowerCase())) { // if we aren't already aggregating, add an ellipsis if (! aggr) { words_matched.push("...") } // look backward $pad words var idx; INNER: for (var j = -pad; j < 1; j++) { // create a new index from the offset idx = i + j; // is this a valid index? has it already been encountered? if (idx < 0) continue INNER; if (idx > words.length) continue INNER; if (index_matches[idx]) continue INNER; // checks out, save this word words_matched.push(words[idx]) // note the matching index in our deduper index_matches[idx] = 1; } // enter aggregate mode -- add the next (pad) words aggr = pad; } // have we been told to aggregate? else if (aggr) { // save this word words_matched.push(word) // add index to the deduper index_matches[i] = 1; // one less word to aggregate aggr--; } // keep snippets to a modest length if (words_matched.length > 30) break } // add a trailing ellipsis words_matched.push("...") // create the snippet from the saved context words snippet = words_matched.join(" ") return snippet } module.exports = snippet