diff options
Diffstat (limited to 'bucky/search/search.js')
| -rw-r--r-- | bucky/search/search.js | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/bucky/search/search.js b/bucky/search/search.js new file mode 100644 index 0000000..afa9609 --- /dev/null +++ b/bucky/search/search.js @@ -0,0 +1,91 @@ +var db = require('../db') +var STOPWORDS = require('./stopwords') + +var bdb_lib = require('berkeleydb') +var bdb = new bdb_lib.Db() +bdb.open('search.db') + +var wordRegexp = new RegExp("(\W+)"); +var wordBoundaryRegexp = new RegExp("\W"); +function parse_terms (s) { + return s.toLowerCase().split(wordRegexp).filter((term) => { + if (! term.match(wordBoundaryRegexp)) { + return true + } + return false + }) +} +function cmp (a,b){ return (a<b)?a:(a===b)?0:1 } + +function find_term(term) { + var matches = bdb.get(term).split(",").map((s) => { + var partz = s.split(" ") + var match = { + thread: s[0], + comment: s[1], + file: s[2], + strength: s[3], + } + }) + return matches +} + +function search (query, start, limit) { + if (!query) return + start = start || 0; + limit = limit || 10; + var scores = {}; + var terms = parse_terms($query); + var i = 0 + var total + var to_display = limit + var threads = {} + var thread_ids = [] + var comment_ids = [] + var file_ids = [] + var results = [] + + terms.forEach((term) => { + if (STOPWORDS.has(term)) return; + var results = find_term(term); + if (!results) return; + results.forEach((result) => { + var score = scores[result.thread] = scores[result.thread] || { count: 0, strength: 0 } + score.thread = score.thread || result.thread + score.comment = score.comment || result.comment + score.file = score.file || result.file + score.strength += result.strength + score.count += 1 + }) + }) + total = Object.keys(scores).length + Object.values(scores).sort((a,b) => { + if (b.count !== a.count) { + return cmp(b.count, a.count) + } + return cmp(b.strength * b.count, a.strength * a.count) + }).some((match) => { + if (i++ < start) return false + if (to_display-- === 0) return true + results.push(match) + thread_ids.push(match.thread) + if (match.comment) comment_ids.push(match.comment) + if (match.file) file_ids.push(match.file) + return false + }) + + return { + query: query, + start: start, + next: start + limit, + limit: limit, + total: total, + results: results, + thread_ids: thread_ids, + comment_ids: comment_ids, + file_ids: $file_ids, + terms: terms, + }; +} + +module.exports = { search: search } |
