diff options
Diffstat (limited to 'src/feed.clj')
| -rwxr-xr-x | src/feed.clj | 93 |
1 files changed, 65 insertions, 28 deletions
diff --git a/src/feed.clj b/src/feed.clj index d3cf3f6..fdeb0be 100755 --- a/src/feed.clj +++ b/src/feed.clj @@ -1,15 +1,20 @@ (ns feed + (:import java.util.Date) (:require [clojure.contrib.str-utils2 :as s]) (:use clojure.contrib.condition clojure.contrib.duck-streams + clojure.contrib.seq-utils + clojure.contrib.sql compojure + rooms + scheduled-agent utils)) (def *feeds-path* "docs/feeds.csv") (defn parse-line [line] (let [r (s/split line #",")] - (zipmap [:room-name :desc :feed-link :site-link :contact] + (zipmap [:room-key :desc :feed-link :site-link :contact] (map #(.trim (.replaceAll % "\"" "")) r)))) @@ -21,16 +26,31 @@ SELECT * FROM UNNEST(?) as v WHERE NOT EXISTS (SELECT 1 FROM feed_images f - WHERE f.external_url = v + WHERE f.image_url = v AND f.room_id = ?) ") (defn filter-posted-images [urls room-id] (if (empty? urls) [] - (do-select [*image-posted-qry* - (sql-array "text" urls) - room-id]))) + (map :v + (do-select [*image-posted-qry* + (sql-array "text" urls) + room-id])))) + +(defn insert-feed-image-to-db! [room-id feed img user-id] + (with-connection *db* + (transaction + (let [acc (comp :message_id first) + m-id (acc + (do-select ["INSERT INTO messages (user_id, room_id, content, is_image) + VALUES (?, ?, ?, true) RETURNING message_id" + user-id room-id img]))] + (do-prepared "INSERT INTO feed_images + (feed_url, image_url, room_id, message_id) + VALUES (?, ?, ?, ?)" + [feed img room-id m-id]) + m-id)))) ; http://stackoverflow.com/questions/169625/regex-to-check-if-valid-url-that-ends-in-jpg-png-or-gif (def *image-regex* @@ -50,30 +70,51 @@ WHERE NOT EXISTS (SELECT 1 image-filters) [img nil])) -(defn filter-images [imgs] - (let [filtered (map filter-image imgs)] - [(for [[img r] filtered :when (nil? r)] - img) - (for [[img r] filtered :when r] - [img r])])) +(defn classify-images [imgs] + (let [good? (comp not boolean second) + res (group-by good? (map filter-image imgs))] + [(map first (res true)) + (res false)])) (defn classify-images-from-feed [feed] - (let [[ms text] (with-timing (slurp* feed)) - [g b] (filter-images (extract-images text))] + (let [[ms text] (with-timing (download-http-url feed)) + [g b] (classify-images (extract-images text))] [g b ms])) -(defn download-all-feeds! [] - (doseq [f (read-feeds)] +(defn process-feed [f] + (let [room-key (:room-key f) + room-id (get-or-create-room! room-key) + [bot-nick bot-id] (get-or-create-room-bot! room-key) + feed (:feed-link f) + [good bad time] (classify-images-from-feed feed) + filtered-good (filter-posted-images good room-id)] + (doseq [img filtered-good] + (println (format "Inserting %s into room-id %s" img room-key)) + (let [msg-id (insert-feed-image-to-db! room-id feed img bot-id) + msg {:msg_id msg-id + :nick bot-nick + :created_on (new Date) + :content img}] + (dosync + (add-message msg (lookup-room room-key))))))) + +(defn process-all-feeds! [] + (doseq [f (shuffle (read-feeds))] (try - (when-let [url (:feed-link f)] - (let [c (slurp* url) - images (extract-images c)] - (println url images))) + (if (and (:room-key f) (:feed-link f)) + (process-feed f) + (println "Incomplete feed " f)) (catch Exception e (print-stack-trace e))))) -(defn download-feed [room-id feed-user-nick feed] - "TODO") +;; Feed download schedule + +(def *feed-refresh-period-sec* (* 30 60)) + +;(def *feed-downloader* +; (scheduled-agent process-all-feeds! +; *feed-refresh-period-sec* +; nil)) ;; Testing @@ -97,14 +138,13 @@ WHERE NOT EXISTS (SELECT 1 [:div [:a {:href img} [:img {:src img}]]])) - (defn feed-test [session params] (if-vip (if-let [feed (params :url)] - (let [[slurp-ms text] (with-timing (slurp* feed)) + (let [[slurp-ms text] (with-timing (download-http-url feed)) [process-ms imgs] (with-timing (extract-images text)) - [good-imgs bad-imgs] (filter-images imgs)] + [good-imgs bad-imgs] (classify-images imgs)] (html [:body [:h1 (str "Images for " feed)] [:div (format "Downloaded in %s ms" slurp-ms)] @@ -117,8 +157,5 @@ WHERE NOT EXISTS (SELECT 1 (show-bad-images bad-imgs) [:hr] [:h2 "Raw Feed Contents"] - [:pre (escape-html text)] - ])) + [:pre (escape-html text)]])) (redirect-to "/feed-test")))) - - |
