summaryrefslogtreecommitdiff
path: root/src/feed.clj
diff options
context:
space:
mode:
Diffstat (limited to 'src/feed.clj')
-rwxr-xr-xsrc/feed.clj93
1 files changed, 65 insertions, 28 deletions
diff --git a/src/feed.clj b/src/feed.clj
index d3cf3f6..fdeb0be 100755
--- a/src/feed.clj
+++ b/src/feed.clj
@@ -1,15 +1,20 @@
(ns feed
+ (:import java.util.Date)
(:require [clojure.contrib.str-utils2 :as s])
(:use clojure.contrib.condition
clojure.contrib.duck-streams
+ clojure.contrib.seq-utils
+ clojure.contrib.sql
compojure
+ rooms
+ scheduled-agent
utils))
(def *feeds-path* "docs/feeds.csv")
(defn parse-line [line]
(let [r (s/split line #",")]
- (zipmap [:room-name :desc :feed-link :site-link :contact]
+ (zipmap [:room-key :desc :feed-link :site-link :contact]
(map #(.trim (.replaceAll % "\"" ""))
r))))
@@ -21,16 +26,31 @@ SELECT *
FROM UNNEST(?) as v
WHERE NOT EXISTS (SELECT 1
FROM feed_images f
- WHERE f.external_url = v
+ WHERE f.image_url = v
AND f.room_id = ?)
")
(defn filter-posted-images [urls room-id]
(if (empty? urls)
[]
- (do-select [*image-posted-qry*
- (sql-array "text" urls)
- room-id])))
+ (map :v
+ (do-select [*image-posted-qry*
+ (sql-array "text" urls)
+ room-id]))))
+
+(defn insert-feed-image-to-db! [room-id feed img user-id]
+ (with-connection *db*
+ (transaction
+ (let [acc (comp :message_id first)
+ m-id (acc
+ (do-select ["INSERT INTO messages (user_id, room_id, content, is_image)
+ VALUES (?, ?, ?, true) RETURNING message_id"
+ user-id room-id img]))]
+ (do-prepared "INSERT INTO feed_images
+ (feed_url, image_url, room_id, message_id)
+ VALUES (?, ?, ?, ?)"
+ [feed img room-id m-id])
+ m-id))))
; http://stackoverflow.com/questions/169625/regex-to-check-if-valid-url-that-ends-in-jpg-png-or-gif
(def *image-regex*
@@ -50,30 +70,51 @@ WHERE NOT EXISTS (SELECT 1
image-filters)
[img nil]))
-(defn filter-images [imgs]
- (let [filtered (map filter-image imgs)]
- [(for [[img r] filtered :when (nil? r)]
- img)
- (for [[img r] filtered :when r]
- [img r])]))
+(defn classify-images [imgs]
+ (let [good? (comp not boolean second)
+ res (group-by good? (map filter-image imgs))]
+ [(map first (res true))
+ (res false)]))
(defn classify-images-from-feed [feed]
- (let [[ms text] (with-timing (slurp* feed))
- [g b] (filter-images (extract-images text))]
+ (let [[ms text] (with-timing (download-http-url feed))
+ [g b] (classify-images (extract-images text))]
[g b ms]))
-(defn download-all-feeds! []
- (doseq [f (read-feeds)]
+(defn process-feed [f]
+ (let [room-key (:room-key f)
+ room-id (get-or-create-room! room-key)
+ [bot-nick bot-id] (get-or-create-room-bot! room-key)
+ feed (:feed-link f)
+ [good bad time] (classify-images-from-feed feed)
+ filtered-good (filter-posted-images good room-id)]
+ (doseq [img filtered-good]
+ (println (format "Inserting %s into room-id %s" img room-key))
+ (let [msg-id (insert-feed-image-to-db! room-id feed img bot-id)
+ msg {:msg_id msg-id
+ :nick bot-nick
+ :created_on (new Date)
+ :content img}]
+ (dosync
+ (add-message msg (lookup-room room-key)))))))
+
+(defn process-all-feeds! []
+ (doseq [f (shuffle (read-feeds))]
(try
- (when-let [url (:feed-link f)]
- (let [c (slurp* url)
- images (extract-images c)]
- (println url images)))
+ (if (and (:room-key f) (:feed-link f))
+ (process-feed f)
+ (println "Incomplete feed " f))
(catch Exception e
(print-stack-trace e)))))
-(defn download-feed [room-id feed-user-nick feed]
- "TODO")
+;; Feed download schedule
+
+(def *feed-refresh-period-sec* (* 30 60))
+
+;(def *feed-downloader*
+; (scheduled-agent process-all-feeds!
+; *feed-refresh-period-sec*
+; nil))
;; Testing
@@ -97,14 +138,13 @@ WHERE NOT EXISTS (SELECT 1
[:div
[:a {:href img}
[:img {:src img}]]]))
-
(defn feed-test [session params]
(if-vip
(if-let [feed (params :url)]
- (let [[slurp-ms text] (with-timing (slurp* feed))
+ (let [[slurp-ms text] (with-timing (download-http-url feed))
[process-ms imgs] (with-timing (extract-images text))
- [good-imgs bad-imgs] (filter-images imgs)]
+ [good-imgs bad-imgs] (classify-images imgs)]
(html [:body
[:h1 (str "Images for " feed)]
[:div (format "Downloaded in %s ms" slurp-ms)]
@@ -117,8 +157,5 @@ WHERE NOT EXISTS (SELECT 1
(show-bad-images bad-imgs)
[:hr]
[:h2 "Raw Feed Contents"]
- [:pre (escape-html text)]
- ]))
+ [:pre (escape-html text)]]))
(redirect-to "/feed-test"))))
-
-