summaryrefslogtreecommitdiff
path: root/bucky
diff options
context:
space:
mode:
authorjulian laplace <julescarbon@gmail.com>2026-01-20 14:41:38 +0100
committerjulian laplace <julescarbon@gmail.com>2026-01-20 14:41:38 +0100
commit2404d26e3b4129135f709aba78808a7b48dccee7 (patch)
tree66d422d3feddfda01f6f05d704e565f0678fb86a /bucky
parent71ce635a09ad53f8b15137ad0a2aaaafe7bcc980 (diff)
redo lexicon
Diffstat (limited to 'bucky')
-rw-r--r--bucky/search/lexicon.js22
1 files changed, 14 insertions, 8 deletions
diff --git a/bucky/search/lexicon.js b/bucky/search/lexicon.js
index 0783512..fe3d6d4 100644
--- a/bucky/search/lexicon.js
+++ b/bucky/search/lexicon.js
@@ -88,6 +88,12 @@ function parse_files() {
var underscoreRegexp = new RegExp("_", "g");
var spaceRegexp = new RegExp("[^a-zA-Z0-9]+", "g");
+/**
+ * For each term, create mappings:
+ * - lexicon[term][thread] => {thread, comment, file, strength}
+ * - lex_counts[term] => document frequency
+ * - total terms ++
+ */
function parse_terms(opt) {
var thread = opt.thread;
var comment = opt.comment || 0;
@@ -104,19 +110,19 @@ function parse_terms(opt) {
return;
}
var lookup = (lexicon[t] = lexicon[t] || {});
- var res = (lookup[thread] = lookup[thread] || { strength: 0 });
+ var res = (lookup[thread] = lookup[thread] || { strength: 1 });
res.thread = res.thread || thread;
res.comment = res.comment || comment;
res.file = res.file || file;
// prioritize threads
if (!comment && !file) {
- res.strength += 2;
- } else {
- res.strength += 1;
+ res.strength += 4;
+ } else if (file) {
+ res.strength += 1.5;
}
count += 1;
- lex_counts[term] = lex_counts[term] || 0;
- lex_counts[term] += 1;
+ lex_counts[term] = lex_counts[term] || new Set();
+ lex_counts[term].add(res.thread);
});
return count || 0;
}
@@ -136,11 +142,11 @@ function lexicon_store() {
// console.log(term)
search_db.put(term, serialized);
});
- search_db.save();
+ // search_db.save();
}
function serialize_matches(term) {
var matches = lexicon[term];
- var lex_count = lex_counts[term];
+ var lex_count = lex_counts[term]?.size || 0;
if (!lex_count) {
return null;
}