summaryrefslogtreecommitdiff
path: root/bucky/search/search.js
diff options
context:
space:
mode:
Diffstat (limited to 'bucky/search/search.js')
-rw-r--r--bucky/search/search.js91
1 files changed, 91 insertions, 0 deletions
diff --git a/bucky/search/search.js b/bucky/search/search.js
new file mode 100644
index 0000000..afa9609
--- /dev/null
+++ b/bucky/search/search.js
@@ -0,0 +1,91 @@
+var db = require('../db')
+var STOPWORDS = require('./stopwords')
+
+var bdb_lib = require('berkeleydb')
+var bdb = new bdb_lib.Db()
+bdb.open('search.db')
+
+var wordRegexp = new RegExp("(\W+)");
+var wordBoundaryRegexp = new RegExp("\W");
+function parse_terms (s) {
+ return s.toLowerCase().split(wordRegexp).filter((term) => {
+ if (! term.match(wordBoundaryRegexp)) {
+ return true
+ }
+ return false
+ })
+}
+function cmp (a,b){ return (a<b)?a:(a===b)?0:1 }
+
+function find_term(term) {
+ var matches = bdb.get(term).split(",").map((s) => {
+ var partz = s.split(" ")
+ var match = {
+ thread: s[0],
+ comment: s[1],
+ file: s[2],
+ strength: s[3],
+ }
+ })
+ return matches
+}
+
+function search (query, start, limit) {
+ if (!query) return
+ start = start || 0;
+ limit = limit || 10;
+ var scores = {};
+ var terms = parse_terms($query);
+ var i = 0
+ var total
+ var to_display = limit
+ var threads = {}
+ var thread_ids = []
+ var comment_ids = []
+ var file_ids = []
+ var results = []
+
+ terms.forEach((term) => {
+ if (STOPWORDS.has(term)) return;
+ var results = find_term(term);
+ if (!results) return;
+ results.forEach((result) => {
+ var score = scores[result.thread] = scores[result.thread] || { count: 0, strength: 0 }
+ score.thread = score.thread || result.thread
+ score.comment = score.comment || result.comment
+ score.file = score.file || result.file
+ score.strength += result.strength
+ score.count += 1
+ })
+ })
+ total = Object.keys(scores).length
+ Object.values(scores).sort((a,b) => {
+ if (b.count !== a.count) {
+ return cmp(b.count, a.count)
+ }
+ return cmp(b.strength * b.count, a.strength * a.count)
+ }).some((match) => {
+ if (i++ < start) return false
+ if (to_display-- === 0) return true
+ results.push(match)
+ thread_ids.push(match.thread)
+ if (match.comment) comment_ids.push(match.comment)
+ if (match.file) file_ids.push(match.file)
+ return false
+ })
+
+ return {
+ query: query,
+ start: start,
+ next: start + limit,
+ limit: limit,
+ total: total,
+ results: results,
+ thread_ids: thread_ids,
+ comment_ids: comment_ids,
+ file_ids: $file_ids,
+ terms: terms,
+ };
+}
+
+module.exports = { search: search }