summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2017-12-08 02:52:19 +0100
committerJules Laplace <julescarbon@gmail.com>2017-12-08 02:56:41 +0100
commit192abb9db60f95968953b515ce18700c6b2da090 (patch)
treee0e77c935ad95ca9e641c6a1f19a22556df9b8d2
parentbbbd8bbab8737f5067c85376daf79cd8a5a9c4cb (diff)
snippets and middleware
-rw-r--r--bucky/app/bucky.js2
-rw-r--r--bucky/app/router.js9
-rw-r--r--bucky/db/index.js6
-rw-r--r--bucky/search/middleware.js44
-rw-r--r--bucky/search/search.js (renamed from lib/search/index.js)47
-rw-r--r--bucky/search/snippet.js88
-rw-r--r--bucky/search/stopwords.js18
-rw-r--r--lib/search/snippet.js103
8 files changed, 180 insertions, 137 deletions
diff --git a/bucky/app/bucky.js b/bucky/app/bucky.js
index 757592a..d1aad4d 100644
--- a/bucky/app/bucky.js
+++ b/bucky/app/bucky.js
@@ -157,7 +157,7 @@ var bucky = module.exports = {
next()
})
},
-
+
/* MAIL */
ensureMailboxes: function (req, res, next){
diff --git a/bucky/app/router.js b/bucky/app/router.js
index c3af565..a87e1ec 100644
--- a/bucky/app/router.js
+++ b/bucky/app/router.js
@@ -4,6 +4,7 @@ var fortune = require('./fortune')
var bucky = require('./bucky')
var db = require('./db')
var util = require('./util')
+var search = require('../search/middleware')
module.exports = function(app){
app.all('*', middleware.ensureLocals)
@@ -91,6 +92,14 @@ module.exports = function(app){
// delete a comment
})
+ app.get("/api/search",
+ middleware.ensureAuthenticated,
+ search.search,
+ search.getComments,
+ search.getFiles,
+ search.logQuery,
+ search.success
+ )
app.get("/api/keyword/:keyword",
middleware.ensureAuthenticated,
diff --git a/bucky/db/index.js b/bucky/db/index.js
index f376308..dcd5f20 100644
--- a/bucky/db/index.js
+++ b/bucky/db/index.js
@@ -92,6 +92,9 @@ db.getFileCounts = function(ids){
db.getFileSizes = function(ids){
return knex.column('thread').sum('size as size').select().from('files').where('thread', 'in', ids).groupBy('thread')
}
+db.getFilesById = function(ids){
+ return File.where("id", "in", ids)
+}
db.createFile = function(data){
return new db.File(data).save()
}
@@ -117,6 +120,9 @@ db.getCommentsForThread = function (id, limit, offset, order){
return comments
})
}
+db.getCommentsById = function(ids){
+ return Comment.where("id", "in", ids)
+}
db.getCommentCounts = function(ids){
return knex.column('thread').count('* as count').select().from('comments').where('thread', 'in', ids).groupBy('thread')
}
diff --git a/bucky/search/middleware.js b/bucky/search/middleware.js
new file mode 100644
index 0000000..64ddd28
--- /dev/null
+++ b/bucky/search/middleware.js
@@ -0,0 +1,44 @@
+var search = require('./search')
+var snippet = require('./snippet')
+var db = require('../db')
+
+module.exports = {
+
+ search: function (req, res, next) {
+ var results = search.search(req.body.query, req.body.start, req.body.limit)
+ res.search = results
+ next()
+ },
+
+ getComments: function (req, res, next){
+ var comment_ids = res.search.comment_ids;
+ if (! comment_ids || ! comment_ids.length) {
+ return next()
+ }
+ db.getCommentsById(comment_ids).then(function(comments){
+ res.search.comments = comments
+ next()
+ })
+ },
+
+ getFiles: function (req, res, next){
+ var file_ids = res.search.file_ids
+ if (! file_ids || ! file_ids.length) {
+ return next()
+ }
+ db.getFilesById(file_ids).then(function(files){
+ res.search.files = files
+ next()
+ })
+ },
+
+ logQuery: function(req, res, next) {
+ // req.search.query, req.search.count
+ next()
+ },
+
+ success: function(req, res, next){
+ res.send(res.search)
+ }
+
+}
diff --git a/lib/search/index.js b/bucky/search/search.js
index 27f436f..afa9609 100644
--- a/lib/search/index.js
+++ b/bucky/search/search.js
@@ -1,4 +1,6 @@
var db = require('../db')
+var STOPWORDS = require('./stopwords')
+
var bdb_lib = require('berkeleydb')
var bdb = new bdb_lib.Db()
bdb.open('search.db')
@@ -15,25 +17,6 @@ function parse_terms (s) {
}
function cmp (a,b){ return (a<b)?a:(a===b)?0:1 }
-var STOPWORDS = new Set(
- "a about above across adj after again against all almost alone along also " +
- "although always am among an and another any anybody anyone anything anywhere " +
- "apart are around as aside at away be because been before behind being below " +
- "besides between beyond both but by can cannot could did do does doing done " +
- "down downwards during each either else enough etc even ever every everybody " +
- "everyone except far few for forth from get gets got had hardly has have having " +
- "her here herself him himself his how however i if in indeed instead into inward " +
- "is it its itself just kept many maybe might mine more most mostly much must " +
- "myself near neither next no nobody none nor not nothing nowhere of off often on " +
- "only onto or other others ought our ours out outside over own p per please plus " +
- "pp quite rather really said seem self selves several shall she should since so " +
- "some somebody somewhat still such than that the their theirs them themselves " +
- "then there therefore these they this thorough thoroughly those through thus to " +
- "together too toward towards under until up upon v very was well were what " +
- "whatever when whenever where whether which while who whom whose will with" +
- "within without would yet young your yourself s".split(" ")
-);
-
function find_term(term) {
var matches = bdb.get(term).split(",").map((s) => {
var partz = s.split(" ")
@@ -57,6 +40,7 @@ function search (query, start, limit) {
var total
var to_display = limit
var threads = {}
+ var thread_ids = []
var comment_ids = []
var file_ids = []
var results = []
@@ -89,21 +73,18 @@ function search (query, start, limit) {
if (match.file) file_ids.push(match.file)
return false
})
-
- db.storeQuery(query, total)
-
- my $files = $self->files_by_id($files_to_get);
- my $comments = $self->comments_by_id($comments_to_get);
- $self->log_query($query, $total);
+
return {
- start => $start + $limit,
- limit => $limit,
- total => $total,
- results => $results,
- threads => $threads,
- comments => $comments,
- files => $files,
- terms => $terms,
+ query: query,
+ start: start,
+ next: start + limit,
+ limit: limit,
+ total: total,
+ results: results,
+ thread_ids: thread_ids,
+ comment_ids: comment_ids,
+ file_ids: $file_ids,
+ terms: terms,
};
}
diff --git a/bucky/search/snippet.js b/bucky/search/snippet.js
new file mode 100644
index 0000000..cd0657f
--- /dev/null
+++ b/bucky/search/snippet.js
@@ -0,0 +1,88 @@
+var util = require('../util/util')
+var STOPWORDS = require('./stopwords')
+
+function bold_snippet(s, terms) {
+ return bold_terms(snippet(s, terms), terms)
+}
+function bold_terms (s, terms) {
+ s = util.sanitize(s)
+ terms.forEach( (term) => {
+ s.replace(new RegExp("\b" + term + "\b", "i"), "<b>" + term + "</b>")
+ })
+}
+function snippet(s, terms) {
+ s = util.sanitize(s)
+ var term_re = new RegExp("\b(" + terms.join("|") + ")\b", "i")
+ var words = s.split(/\s+/)
+ var snippet = "";
+
+ // deduper for matching @words indexes, so we don't add a word twice
+ var index_matches = {}
+
+ // words in the eventual snippet
+ var words_matched = []
+
+ // counter for aggregating context after a match
+ var aggr = 0;
+
+ // amount of context to show, in number of words surrounding a match
+ var $pad = 4;
+
+ // loop over each of the words in the string
+ words.some((word, i) => {
+ // if the word matches...
+ if (term_re.match(word) && ! STOPWORDS.has(word.toLowerCase())) {
+ // if we aren't already aggregating, add an ellipsis
+ if (! $aggr) {
+ words_matched.push("...")
+ }
+
+ // look backward $pad words
+ var idx;
+ for (var j = -pad; j < 1; j++) {
+ // create a new index from the offset
+ idx = i + j;
+
+ // is this a valid index? has it already been encountered?
+ if (idx < 0) continue;
+ if (idx > words.length) continue;
+ if (index_matches[idx]) continue;
+
+ // checks out, save this word
+ words_matched.push(words[idx])
+
+ // note the matching index in our deduper
+ index_matches[idx] = 1;
+ }
+ // enter aggregate mode -- add the next (pad) words
+ aggr = pad;
+ }
+
+ // have we been told to aggregate?
+ else if (aggr) {
+ // save this word
+ words_matched.push(word)
+
+ // add index to the deduper
+ index_matches[i] = 1;
+
+ // one less word to aggregate
+ aggr--;
+ }
+
+ // keep snippets to a modest length
+ return words_matched.length > 30;
+ })
+
+ // add a trailing ellipsis
+ words_matched.push("...")
+
+ // create the snippet from the saved context words
+ snippet = words_matched.join(" ")
+
+ return snippet
+}
+
+module.exports = {
+ bold_snippet, bold_terms, snippet,
+} \ No newline at end of file
diff --git a/bucky/search/stopwords.js b/bucky/search/stopwords.js
new file mode 100644
index 0000000..ceffe14
--- /dev/null
+++ b/bucky/search/stopwords.js
@@ -0,0 +1,18 @@
+module.exports = new Set(
+ "a about above across adj after again against all almost alone along also " +
+ "although always am among an and another any anybody anyone anything anywhere " +
+ "apart are around as aside at away be because been before behind being below " +
+ "besides between beyond both but by can cannot could did do does doing done " +
+ "down downwards during each either else enough etc even ever every everybody " +
+ "everyone except far few for forth from get gets got had hardly has have having " +
+ "her here herself him himself his how however i if in indeed instead into inward " +
+ "is it its itself just kept many maybe might mine more most mostly much must " +
+ "myself near neither next no nobody none nor not nothing nowhere of off often on " +
+ "only onto or other others ought our ours out outside over own p per please plus " +
+ "pp quite rather really said seem self selves several shall she should since so " +
+ "some somebody somewhat still such than that the their theirs them themselves " +
+ "then there therefore these they this thorough thoroughly those through thus to " +
+ "together too toward towards under until up upon v very was well were what " +
+ "whatever when whenever where whether which while who whom whose will with" +
+ "within without would yet young your yourself s".split(" ")
+);
diff --git a/lib/search/snippet.js b/lib/search/snippet.js
deleted file mode 100644
index de71911..0000000
--- a/lib/search/snippet.js
+++ /dev/null
@@ -1,103 +0,0 @@
-var util = require('../util/util')
-
-function bold_terms (s, terms) {
-
-}
-sub bold_terms
- {
- my ($self, $string, $terms) = @_;
- $string = $self->strip_html($string);
- foreach my $term (@$terms)
- {
- $string =~ s/\b($term)\b/<b>$1<\/b>/gi;
- }
- return $string;
- }
-sub bold_snippet
- {
- my ($self, $string, $terms) = @_;
- my $snippet = $self->snippet($string, $terms);
- return $self->bold_terms($snippet, $terms);
- }
-sub snippet
- {
- my ($self, $string, $terms) = @_;
-
- # clean up the string we got
- $string = $self->strip_html($string);
-
- # create a regex out of the search terms
- my $term_re = join "|", @$terms;
-
- # take the string to be snippetized and split it into words
- my @words = split /\s+/, $string;
-
- # deduper for matching @words indexes, so we don't add a word twice
- my $index_matches = {};
-
- # words in the eventual snippet
- my @words_matched;
-
- # the snippet itself
- my $snippet = '';
-
- # counter for aggregating context after a match
- my $aggr = 0;
-
- # amount of context to show, in number of words surrounding a match
- my $pad = 4;
-
- # loop over each of the words in the string
- for (my $i = 0; $i < scalar @words; $i++)
- {
- # does this word contain a match?
- if ($words[$i] =~ /\b($term_re)\b/i && ! $self->is_stopword($1))
- {
- # if we aren't already aggregating, add an ellipsis
- if (! $aggr)
- {
- push @words_matched, "...";
- }
- # look backward $pad words
- for (my $j = -$pad; $j < 1; $j++)
- {
- # create a new index from the offset
- my $idx = $i + $j;
-
- # is this a valid index? has it already been encountered?
- next if $idx < 0;
- next if $idx > scalar @words;
- next if exists $index_matches->{$i+$j};
-
- # checks out, save this word
- push @words_matched, $words[$i+$j];
-
- # note the matching index in our deduper
- $index_matches->{$i+$j} ++;
- }
- # enter aggregate mode -- add the next $pad words
- $aggr = $pad;
- }
- # have we been told to aggregate?
- elsif ($aggr)
- {
- # save this word
- push @words_matched, $words[$i];
-
- # add index to the deduper
- $index_matches->{$i} ++;
-
- # one less word to aggregate
- $aggr--;
- }
- # keep snippets to a modest length
- last if scalar @words_matched > 30;
- }
- # add a trailing ellipsis
- push @words_matched, "...";
-
- # create the snippet from the saved context words
- $snippet = join " ", @words_matched;
-
- return $snippet;
- } \ No newline at end of file