(ns feed (:require [clojure.contrib.str-utils2 :as s]) (:use clojure.contrib.condition clojure.contrib.duck-streams compojure utils)) (def *feeds-path* "docs/feeds.csv") (defn parse-line [line] (let [r (s/split line #",")] (zipmap [:room-name :desc :feed-link :site-link :contact] (map #(.trim (.replaceAll % "\"" "")) r)))) (defn read-feeds [] (rest (map parse-line (read-lines *feeds-path*)))) (def *image-posted-qry* " SELECT * FROM UNNEST(?) as v WHERE NOT EXISTS (SELECT 1 FROM feed_images f WHERE f.external_url = v AND f.room_id = ?) ") (defn filter-posted-images [urls room-id] (if (empty? urls) [] (do-select [*image-posted-qry* (sql-array "text" urls) room-id]))) ; http://stackoverflow.com/questions/169625/regex-to-check-if-valid-url-that-ends-in-jpg-png-or-gif (def *image-regex* #"(?i)https?://(?:[a-z0-9\-]+\.)+[a-z]{2,6}(?:/[^/#?]+)+\.(?:jpeg|jpg|gif|png)") (defn extract-images [text] (re-seq *image-regex* text)) (defn is-thumbnail? [img] (boolean (re-find #"(?i)[-._](thumb|small|thumbs)[-._]" img))) (def image-filters [["THUMBNAIL" is-thumbnail?]]) (defn filter-image [img] (or (some (fn [[r f]] (if (f img) [img r])) image-filters) [img nil])) (defn filter-images [imgs] (let [filtered (map filter-image imgs)] [(for [[img r] filtered :when (nil? r)] img) (for [[img r] filtered :when r] [img r])])) (defn classify-images-from-feed [feed] (let [[ms text] (with-timing (slurp* feed)) [g b] (filter-images (extract-images text))] [g b ms])) (defn download-all-feeds! [] (doseq [f (read-feeds)] (try (when-let [url (:feed-link f)] (let [c (slurp* url) images (extract-images c)] (println url images))) (catch Exception e (print-stack-trace e))))) (defn download-feed [room-id feed-user-nick feed] "TODO") ;; Testing (defn feed-test-page [session] (if-vip (html [:body [:h1 "Feed Test"] [:form {:action "/feed-test" :method "post"} [:input {:type "text" :name "url"}] [:input {:type "submit" :value "Send"}]]]))) (defn show-bad-images [imgs] (for [[img reason] imgs] [:div reason [:a {:href img} [:img {:src img}]]])) (defn show-good-images [imgs] (for [img imgs] [:div [:a {:href img} [:img {:src img}]]])) (defn feed-test [session params] (if-vip (if-let [feed (params :url)] (let [[slurp-ms text] (with-timing (slurp* feed)) [process-ms imgs] (with-timing (extract-images text)) [good-imgs bad-imgs] (filter-images imgs)] (html [:body [:h1 (str "Images for " feed)] [:div (format "Downloaded in %s ms" slurp-ms)] [:div (format "Processed in %s ms" process-ms)] [:hr] [:h2 "Good Images"] (show-good-images good-imgs) [:hr] [:h2 "Filtered Out Images"] (show-bad-images bad-imgs) [:hr] [:h2 "Raw Feed Contents"] [:pre (escape-html text)] ])) (redirect-to "/feed-test"))))