summaryrefslogtreecommitdiff
path: root/bucky/search/search.js
blob: fb3bb2d6cf945284a31b06d4719b78331027912c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
var db = require("../db");
var bdb = require("./bdb")("search");
var STOPWORDS = require("./stopwords");

var wordRegexp = new RegExp("[^a-z0-9]+", "g");
function parse_terms(s) {
  return s
    .toLowerCase()
    .split(wordRegexp)
    .filter((term) => !!term);
}
function cmp(a, b) {
  return a < b ? -1 : a === b ? 0 : 1;
}

function find_term(term) {
  var row = bdb.get(term);
  if (!row) return [];
  var res = row.toString();
  // console.log(res)
  if (!res.length) return [];
  var matches = res.split(",").map((s) => {
    if (!s.length) return;
    var partz = s.split(" ");
    return {
      thread: parseInt(partz[0]),
      comment: parseInt(partz[1]),
      file: parseInt(partz[2]),
      strength: parseInt(partz[3]) || 1,
    };
  });
  // console.log(matches)
  return matches;
}

function search(query, start, limit) {
  if (!query) return;
  start = parseInt(start) || 0;
  limit = parseInt(limit) || 10;
  var scores = {};
  var terms = parse_terms(query);
  var total;
  var to_display = limit;
  var threads = {};
  var thread_ids = [];
  var comment_ids = [];
  var file_ids = [];
  var results = [];

  terms.forEach((term) => {
    if (STOPWORDS.has(term)) return;
    var results = find_term(term);
    if (!results) return;
    results.forEach((result) => {
      var score = (scores[result.thread] = scores[result.thread] || {
        count: 0,
        strength: 0,
      });
      score.thread = score.thread || parseInt(result.thread);
      score.comment = score.comment || parseInt(result.comment);
      score.file = score.file || parseInt(result.file);
      score.strength += parseFloat(result.strength);
      score.count += 1;
    });
  });
  total = Object.keys(scores).length;
  Object.values(scores)
    .sort((b, a) => {
      // if (a.count !== b.count) {
      //   return cmp(a.count, b.count)
      // }
      return cmp(a.strength, b.strength);
    })
    .some((match, i) => {
      if (i < start) return false;
      if (to_display-- === 0) return true;
      results.push(match);
      // console.log(match)
      thread_ids.push(match.thread);
      if (match.comment) comment_ids.push(match.comment);
      if (match.file) file_ids.push(match.file);
      return false;
    });

  return {
    meta: {
      query: query,
      terms: terms,
      start: start,
      next: start + limit,
      limit: limit,
      total: total,
    },
    results: results,
    thread_ids: thread_ids,
    comment_ids: comment_ids,
    file_ids: file_ids,
  };
}

module.exports = { search: search };