summaryrefslogtreecommitdiff
path: root/src/feed.clj
diff options
context:
space:
mode:
authorsostler <sbostler@gmail.com>2010-05-17 11:58:28 -0400
committersostler <sbostler@gmail.com>2010-05-17 11:58:28 -0400
commitffc48db948189fbce594d1a86be3fa197315aaed (patch)
tree40745df8919c03744cee3aadc7d3ee3ba0fd338f /src/feed.clj
parent43a4c7ce3ea0c4278232a0b93c4876372ca7e037 (diff)
test page for feed import
Diffstat (limited to 'src/feed.clj')
-rwxr-xr-xsrc/feed.clj121
1 files changed, 120 insertions, 1 deletions
diff --git a/src/feed.clj b/src/feed.clj
index 30ff973..e075e69 100755
--- a/src/feed.clj
+++ b/src/feed.clj
@@ -1 +1,120 @@
-(ns feed)
+(ns feed
+ (:require [clojure.contrib.str-utils2 :as s])
+ (:use clojure.contrib.condition
+ clojure.contrib.duck-streams
+ compojure
+ utils))
+
+(def *feeds-path* "docs/feeds.csv")
+
+(defn parse-line [line]
+ (let [r (s/split line #",")]
+ (zipmap [:room-name :desc :feed-link :site-link :contact]
+ (map #(.trim (.replaceAll % "\"" ""))
+ r))))
+
+(defn read-feeds []
+ (rest (map parse-line (read-lines *feeds-path*))))
+
+(def *image-posted-qry* "
+SELECT *
+FROM UNNEST(?) as v
+WHERE NOT EXISTS (SELECT 1
+ FROM feed_images f
+ WHERE f.external_url = v
+ AND f.room_id = ?)
+")
+
+(defn filter-posted-images [urls room-id]
+ (if (empty? urls)
+ []
+ (do-select [*image-posted-qry*
+ (sql-array "text" urls)
+ room-id])))
+
+; http://stackoverflow.com/questions/169625/regex-to-check-if-valid-url-that-ends-in-jpg-png-or-gif
+(def *image-regex*
+ #"(?i)https?://(?:[a-z\-]+\.)+[a-z]{2,6}(?:/[^/#?]+)+\.(?:jpeg|jpg|gif|png)")
+
+(defn extract-images [text]
+ (re-seq *image-regex* text))
+
+(defn is-thumbnail? [img]
+ (boolean (re-find #"(?i)[-._](thumb|small|thumbs)[-._]" img)))
+
+(def image-filters [["THUMBNAIL" is-thumbnail?]])
+
+(defn filter-image [img]
+ (or (some
+ (fn [[r f]] (if (f img) [img r]))
+ image-filters)
+ [img nil]))
+
+(defn filter-images [imgs]
+ (let [filtered (map filter-image imgs)]
+ [(for [[img r] filtered :when (nil? r)]
+ img)
+ (for [[img r] filtered :when r]
+ [img r])]))
+
+(defn classify-images-from-feed [feed]
+ (let [[ms text] (with-timing (slurp* feed))
+ [g b] (filter-images (extract-images text))]
+ [g b ms]))
+
+(defn download-all-feeds! []
+ (doseq [f (read-feeds)]
+ (try
+ (when-let [url (:feed-link f)]
+ (let [c (slurp* url)
+ images (extract-images c)]
+ (println url images)))
+ (catch Exception e
+ (print-stack-trace e)))))
+
+(defn download-feed [room-id feed-user-nick feed]
+ "TODO")
+
+;; Testing
+
+(defn feed-test-page [session]
+ (if-vip
+ (html [:body
+ [:h1 "Feed Test"]
+ [:form {:action "/feed-test" :method "post"}
+ [:input {:type "text" :name "url"}]
+ [:input {:type "submit" :value "Send"}]]])))
+
+(defn show-bad-images [imgs]
+ (for [[img reason] imgs]
+ [:div
+ reason
+ [:a {:href img}
+ [:img {:src img}]]]))
+
+(defn show-good-images [imgs]
+ (for [img imgs]
+ [:div
+ [:a {:href img}
+ [:img {:src img}]]]))
+
+
+(defn feed-test [session params]
+ (if-vip
+ (if-let [feed (params :url)]
+ (let [[ms text] (with-timing (slurp* feed))
+ imgs (extract-images text)
+ [good-imgs bad-imgs] (filter-images imgs)]
+ (html [:body
+ [:h1 (str "Images for " feed)]
+ [:div (format "Downloaded in %s ms" ms)]
+ [:h3 "Images"]
+ (show-good-images good-imgs)
+ [:h3 "Filtered Images"]
+ (show-bad-images bad-imgs)
+ [:h3 "Raw Feed Contents"]
+ [:pre (escape-html text)]
+ ]))
+ (redirect-to "/feed-test"))))
+
+