var util = require('../util/util') var STOPWORDS = require('./stopwords') function bold_snippet(s, terms) { return bold_terms(snippet(s, terms), terms) } function bold_terms (s, terms) { s = util.sanitize(s) terms.forEach( (term) => { s.replace(new RegExp("\b" + term + "\b", "i"), "" + term + "") }) } function snippet(s, terms) { s = util.sanitize(s) var term_re = new RegExp("\b(" + terms.join("|") + ")\b", "i") var words = s.split(/\s+/) var snippet = ""; // deduper for matching @words indexes, so we don't add a word twice var index_matches = {} // words in the eventual snippet var words_matched = [] // counter for aggregating context after a match var aggr = 0; // amount of context to show, in number of words surrounding a match var $pad = 4; // loop over each of the words in the string words.some((word, i) => { // if the word matches... if (term_re.match(word) && ! STOPWORDS.has(word.toLowerCase())) { // if we aren't already aggregating, add an ellipsis if (! $aggr) { words_matched.push("...") } // look backward $pad words var idx; for (var j = -pad; j < 1; j++) { // create a new index from the offset idx = i + j; // is this a valid index? has it already been encountered? if (idx < 0) continue; if (idx > words.length) continue; if (index_matches[idx]) continue; // checks out, save this word words_matched.push(words[idx]) // note the matching index in our deduper index_matches[idx] = 1; } // enter aggregate mode -- add the next (pad) words aggr = pad; } // have we been told to aggregate? else if (aggr) { // save this word words_matched.push(word) // add index to the deduper index_matches[i] = 1; // one less word to aggregate aggr--; } // keep snippets to a modest length return words_matched.length > 30; }) // add a trailing ellipsis words_matched.push("...") // create the snippet from the saved context words snippet = words_matched.join(" ") return snippet } module.exports = { bold_snippet, bold_terms, snippet, }