summaryrefslogtreecommitdiff
path: root/bucky
diff options
context:
space:
mode:
authorjulian laplace <julescarbon@gmail.com>2026-01-20 15:02:26 +0100
committerjulian laplace <julescarbon@gmail.com>2026-01-20 15:02:26 +0100
commitf907512a39eb4a6c98277fbdbf9e71f73d9c3b3f (patch)
tree3cb57fc2ceaaf6b97b64b758a7b98078ad1004cb /bucky
parent1975d0f84bb596ea37b204ab5a9009b26d36634b (diff)
redo lexicon
Diffstat (limited to 'bucky')
-rw-r--r--bucky/search/lexicon.js35
1 files changed, 20 insertions, 15 deletions
diff --git a/bucky/search/lexicon.js b/bucky/search/lexicon.js
index 426b3d3..bd24f11 100644
--- a/bucky/search/lexicon.js
+++ b/bucky/search/lexicon.js
@@ -7,8 +7,8 @@ var parse_term = require("./parse_term");
var search_db = bdb("search");
-var lexicon = {};
-var lex_counts = {};
+var lexicon = new Map();
+var lex_counts = new Map();
var total = 0;
module.exports = {
@@ -30,14 +30,14 @@ function watch_index() {
function build_index(cb) {
console.log("building search index");
- lexicon = {};
- lex_counts = {};
+ lexicon = new Map();
+ lex_counts = new Map();
total = 0;
return parse_threads()
.then(parse_comments)
.then(parse_files)
.then(() => {
- var unique = Object.keys(lexicon).length;
+ var unique = lexicon.size;
console.log("--- WORD COUNT: ", total);
console.log("--- UNIQUE WORDS: ", unique);
lexicon_store();
@@ -107,10 +107,12 @@ function parse_terms(opt) {
if (!term || !parsedTerm) {
return;
}
- lexicon[parsedTerm] = lexicon[parsedTerm] || {};
- var lookup = lexicon[parsedTerm];
-
+ if (!lexicon.has(parsedTerm)) {
+ lexicon.set(parsedTerm, {});
+ }
+ var lookup = lexicon.get(parsedTerm);
lookup[thread] = lookup[thread] || { strength: 1 };
+
var res = lookup[thread];
res.thread = res.thread || thread;
res.comment = res.comment || comment;
@@ -124,13 +126,16 @@ function parse_terms(opt) {
}
count += 1;
- lex_counts[term] = lex_counts[term] || new Set();
+ if (!lex_counts.has(parsedTerm)) {
+ lex_counts.set(parsedTerm, new Set());
+ }
+ const lex_count = lex_counts.get(parsedTerm);
try {
- lex_counts[term].add(res.thread);
+ lex_count.add(res.thread);
} catch (error) {
console.error(error);
- console.log(term, terms, lex_counts[term]);
+ console.log(term, terms, lex_count);
}
}
return count || 0;
@@ -141,7 +146,7 @@ function lexicon_store() {
console.log("writing db...");
// console.log(Object.keys(lexicon));
search_db.reset();
- Object.keys(lexicon).forEach((term) => {
+ for (const term of lexicon.keys()) {
if (STOPWORDS.has(term)) return;
var serialized = serialize_matches(term);
if (!serialized) return;
@@ -150,12 +155,12 @@ function lexicon_store() {
// if (put_total > 10) return
// console.log(term)
search_db.put(term, serialized);
- });
+ }
// search_db.save();
}
function serialize_matches(term) {
- var matches = lexicon[term];
- var lex_count = lex_counts[term]?.size || 0;
+ var matches = lexicon.get(term);
+ var lex_count = lex_counts.get(term)?.size || 0;
if (!lex_count) {
return null;
}