var util = require("../util/util"); var STOPWORDS = require("./stopwords"); var parse_term = require("./parse_term"); function snippet(s, terms) { s = util.sanitize(s); var term_set = new Set(terms); var words = s.split(/[^a-zA-Z0-9]+/); var snippet = ""; // deduper for matching @words indexes, so we don't add a word twice var index_matches = {}; // words in the eventual snippet var words_matched = []; // counter for aggregating context after a match var aggr = 0; // amount of context to show, in number of words surrounding a match var pad = 10; // loop over each of the words in the string for (var i = 0, len = words.length; i < len; i++) { var word = words[i]; var term = parse_term(word); // if the word matches... if (term && term_set.has(term) && !STOPWORDS.has(term.toLowerCase())) { // if we aren't already aggregating, add an ellipsis if (!aggr) { words_matched.push("..."); } // look backward $pad words var idx; INNER: for (var j = -pad; j < 1; j++) { // create a new index from the offset idx = i + j; // is this a valid index? has it already been encountered? if (idx < 0) continue INNER; if (idx > words.length) continue INNER; if (index_matches[idx]) continue INNER; // checks out, save this word words_matched.push(words[idx]); // note the matching index in our deduper index_matches[idx] = 1; } // enter aggregate mode -- add the next (pad) words aggr = pad; } // have we been told to aggregate? else if (aggr) { // save this word words_matched.push(word); // add index to the deduper index_matches[i] = 1; // one less word to aggregate aggr--; } // keep snippets to a modest length if (words_matched.length > 30) break; } // add a trailing ellipsis words_matched.push("..."); // create the snippet from the saved context words snippet = words_matched.join(" "); return snippet; } module.exports = snippet;