diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2017-12-08 02:52:19 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2017-12-08 02:56:41 +0100 |
| commit | 192abb9db60f95968953b515ce18700c6b2da090 (patch) | |
| tree | e0e77c935ad95ca9e641c6a1f19a22556df9b8d2 /bucky/search/snippet.js | |
| parent | bbbd8bbab8737f5067c85376daf79cd8a5a9c4cb (diff) | |
snippets and middleware
Diffstat (limited to 'bucky/search/snippet.js')
| -rw-r--r-- | bucky/search/snippet.js | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/bucky/search/snippet.js b/bucky/search/snippet.js new file mode 100644 index 0000000..cd0657f --- /dev/null +++ b/bucky/search/snippet.js @@ -0,0 +1,88 @@ +var util = require('../util/util') +var STOPWORDS = require('./stopwords') + +function bold_snippet(s, terms) { + return bold_terms(snippet(s, terms), terms) +} +function bold_terms (s, terms) { + s = util.sanitize(s) + terms.forEach( (term) => { + s.replace(new RegExp("\b" + term + "\b", "i"), "<b>" + term + "</b>") + }) +} +function snippet(s, terms) { + s = util.sanitize(s) + var term_re = new RegExp("\b(" + terms.join("|") + ")\b", "i") + var words = s.split(/\s+/) + var snippet = ""; + + // deduper for matching @words indexes, so we don't add a word twice + var index_matches = {} + + // words in the eventual snippet + var words_matched = [] + + // counter for aggregating context after a match + var aggr = 0; + + // amount of context to show, in number of words surrounding a match + var $pad = 4; + + // loop over each of the words in the string + words.some((word, i) => { + // if the word matches... + if (term_re.match(word) && ! STOPWORDS.has(word.toLowerCase())) { + // if we aren't already aggregating, add an ellipsis + if (! $aggr) { + words_matched.push("...") + } + + // look backward $pad words + var idx; + for (var j = -pad; j < 1; j++) { + // create a new index from the offset + idx = i + j; + + // is this a valid index? has it already been encountered? + if (idx < 0) continue; + if (idx > words.length) continue; + if (index_matches[idx]) continue; + + // checks out, save this word + words_matched.push(words[idx]) + + // note the matching index in our deduper + index_matches[idx] = 1; + } + // enter aggregate mode -- add the next (pad) words + aggr = pad; + } + + // have we been told to aggregate? + else if (aggr) { + // save this word + words_matched.push(word) + + // add index to the deduper + index_matches[i] = 1; + + // one less word to aggregate + aggr--; + } + + // keep snippets to a modest length + return words_matched.length > 30; + }) + + // add a trailing ellipsis + words_matched.push("...") + + // create the snippet from the saved context words + snippet = words_matched.join(" ") + + return snippet +} + +module.exports = { + bold_snippet, bold_terms, snippet, +}
\ No newline at end of file |
