summaryrefslogtreecommitdiff
path: root/lib/search/index.js
blob: 8d209e6193b2667103ce4c535bd1e7d2785fbf57 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
var db = require('../db')
var bdb_lib = require('berkeleydb')
var bdb = new bdb_lib.Db()
bdb.open('search.db')

var wordRegexp = new RegExp("(\W+)");
var wordBoundaryRegexp = new RegExp("\W");
function parse_terms (s) {
  return s.toLowerCase().split(wordRegexp).filter((term) => {
	  if (! term.match(wordBoundaryRegexp)) {
	    return true
	  }
	  return false
	})
}
function cmp (a,b){ return (a<b)?a:(a===b)?0:1 }

var STOPWORDS = new Set(
  "a about above across adj after again against all almost alone along also " +
  "although always am among an and another any anybody anyone anything anywhere " +
  "apart are around as aside at away be because been before behind being below " +
  "besides between beyond both but by can cannot could did do does doing done " +
  "down downwards during each either else enough etc even ever every everybody " +
  "everyone except far few for forth from get gets got had hardly has have having " +
  "her here herself him himself his how however i if in indeed instead into inward " +
  "is it its itself just kept many maybe might mine more most mostly much must " +
  "myself near neither next no nobody none nor not nothing nowhere of off often on " +
  "only onto or other others ought our ours out outside over own p per please plus " +
  "pp quite rather really said seem self selves several shall she should since so " +
  "some somebody somewhat still such than that the their theirs them themselves " +
  "then there therefore these they this thorough thoroughly those through thus to " +
  "together too toward towards under until up upon v very was well were what " +
  "whatever when whenever where whether which while who whom whose will with" +
  "within without would yet young your yourself s".split(" ")
);

function find_term(term) {
  bdb.get(term)
  
}

function search (query, start, limit) {
  if (!query) return
	start = start || 0;
	limit = limit || 10;
	var scores = {};
	var terms = parse_terms($query);
  var i = 0
  var total
  var to_display = limit
  var threads = {}
  var comment_ids = []
  var file_ids = []
  var results = []

  terms.forEach((term) => {
    if (STOPWORDS.has(term)) return;
    var results = find_term(term);
    if (!results) return;
    results.forEach((result) => {
      var score = scores[result.thread] = scores[result.thread] || { count: 0, strength: 0 }
      score.thread = score.thread || result.thread
      score.comment = score.comment || result.comment
      score.file = score.file || result.file
      score.strength += result.strength
      score.count += 1
    })
  })
  total = Object.keys(scores).length
  Object.values(scores).sort((a,b) => {
    if (b.count !== a.count) {
      return cmp(b.count, a.count)
    }
    return cmp(b.strength * b.count, a.strength * a.count)
  }).some((match) => {
    if (i++ < start) return false
    if (to_display-- === 0) return true
    results.push(match)
    thread_ids.push(match.thread)
    if (match.comment) comment_ids.push(match.comment)
    if (match.file) file_ids.push(match.file)
    return false
  })
  
  db.storeQuery(query, total)
  
	my $files = $self->files_by_id($files_to_get);
	my $comments = $self->comments_by_id($comments_to_get);
	$self->log_query($query, $total);
  return {
		start => $start + $limit,
		limit => $limit,
    total => $total,
    results => $results,
		threads => $threads,
		comments => $comments,
		files => $files,
		terms => $terms,
  };
}

module.exports = { search: search }