diff options
| author | Scott Ostler <scottbot9000@gmail.com> | 2010-09-09 21:43:38 -0400 |
|---|---|---|
| committer | Scott Ostler <scottbot9000@gmail.com> | 2010-09-09 21:43:38 -0400 |
| commit | dba1c87e89e391b3e1ccd35f2a3aa600380feb8f (patch) | |
| tree | eeb1195c7cdc0285de0f18f7142c1ba3fcf06762 | |
| parent | 136fb22a14391e781c7cd1fed624af8fbe638f1f (diff) | |
Moved msg classification to utils.clj
| -rwxr-xr-x | src/utils.clj | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/src/utils.clj b/src/utils.clj index 455f8a0..9e4fc38 100755 --- a/src/utils.clj +++ b/src/utils.clj @@ -33,6 +33,23 @@ (def *dumps-per-page* 20) (def *vip-dumps-per-page* 200) +;; Message parsing + +;; http://snippets.dzone.com/posts/show/6995 +(def url-regex #"(?i)^((http\:\/\/|https\:\/\/|ftp\:\/\/)|(www\.))+(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%@!\-\/]))?$") +(def pic-regex #"(?i)\.(jpg|jpeg|png|gif|bmp|svg)(\?|&|$)") + +(defn is-image? [word] + (and (re-find url-regex word) + (re-find pic-regex word))) + +(defn classify-msg [msg] + (let [words (.split msg " ") + imgs (map is-image? words)] + (cond (every? boolean imgs) :image + (some boolean imgs) :mixed + :else :text))) + ;; Misc (defn except! [& more] @@ -121,7 +138,6 @@ ;; Formatters - (defn comma-format [i] (.format (java.text.DecimalFormat. "#,###") i)) |
