From dba1c87e89e391b3e1ccd35f2a3aa600380feb8f Mon Sep 17 00:00:00 2001 From: Scott Ostler Date: Thu, 9 Sep 2010 21:43:38 -0400 Subject: Moved msg classification to utils.clj --- src/utils.clj | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/utils.clj b/src/utils.clj index 455f8a0..9e4fc38 100755 --- a/src/utils.clj +++ b/src/utils.clj @@ -33,6 +33,23 @@ (def *dumps-per-page* 20) (def *vip-dumps-per-page* 200) +;; Message parsing + +;; http://snippets.dzone.com/posts/show/6995 +(def url-regex #"(?i)^((http\:\/\/|https\:\/\/|ftp\:\/\/)|(www\.))+(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%@!\-\/]))?$") +(def pic-regex #"(?i)\.(jpg|jpeg|png|gif|bmp|svg)(\?|&|$)") + +(defn is-image? [word] + (and (re-find url-regex word) + (re-find pic-regex word))) + +(defn classify-msg [msg] + (let [words (.split msg " ") + imgs (map is-image? words)] + (cond (every? boolean imgs) :image + (some boolean imgs) :mixed + :else :text))) + ;; Misc (defn except! [& more] @@ -121,7 +138,6 @@ ;; Formatters - (defn comma-format [i] (.format (java.text.DecimalFormat. "#,###") i)) -- cgit v1.2.3-70-g09d2