diff options
| author | dumpfmprod <dumpfmprod@ubuntu.(none)> | 2010-05-17 12:01:04 -0400 |
|---|---|---|
| committer | dumpfmprod <dumpfmprod@ubuntu.(none)> | 2010-05-17 12:01:04 -0400 |
| commit | a56999f4bfc6d12c34f9fb7201b6f86e537a5207 (patch) | |
| tree | f7d4620442b928261127012c29260cd40f8a7b40 /src/feed.clj | |
| parent | 2439fbecf91c813c2f560ee2e45f25dfa7c01633 (diff) | |
| parent | f3d4f45db32c105e5c700668eb420f4d3709cbf3 (diff) | |
Merge branch 'master' of /pichat/repo
Diffstat (limited to 'src/feed.clj')
| -rwxr-xr-x | src/feed.clj | 121 |
1 files changed, 120 insertions, 1 deletions
diff --git a/src/feed.clj b/src/feed.clj index 30ff973..e075e69 100755 --- a/src/feed.clj +++ b/src/feed.clj @@ -1 +1,120 @@ -(ns feed) +(ns feed + (:require [clojure.contrib.str-utils2 :as s]) + (:use clojure.contrib.condition + clojure.contrib.duck-streams + compojure + utils)) + +(def *feeds-path* "docs/feeds.csv") + +(defn parse-line [line] + (let [r (s/split line #",")] + (zipmap [:room-name :desc :feed-link :site-link :contact] + (map #(.trim (.replaceAll % "\"" "")) + r)))) + +(defn read-feeds [] + (rest (map parse-line (read-lines *feeds-path*)))) + +(def *image-posted-qry* " +SELECT * +FROM UNNEST(?) as v +WHERE NOT EXISTS (SELECT 1 + FROM feed_images f + WHERE f.external_url = v + AND f.room_id = ?) +") + +(defn filter-posted-images [urls room-id] + (if (empty? urls) + [] + (do-select [*image-posted-qry* + (sql-array "text" urls) + room-id]))) + +; http://stackoverflow.com/questions/169625/regex-to-check-if-valid-url-that-ends-in-jpg-png-or-gif +(def *image-regex* + #"(?i)https?://(?:[a-z\-]+\.)+[a-z]{2,6}(?:/[^/#?]+)+\.(?:jpeg|jpg|gif|png)") + +(defn extract-images [text] + (re-seq *image-regex* text)) + +(defn is-thumbnail? [img] + (boolean (re-find #"(?i)[-._](thumb|small|thumbs)[-._]" img))) + +(def image-filters [["THUMBNAIL" is-thumbnail?]]) + +(defn filter-image [img] + (or (some + (fn [[r f]] (if (f img) [img r])) + image-filters) + [img nil])) + +(defn filter-images [imgs] + (let [filtered (map filter-image imgs)] + [(for [[img r] filtered :when (nil? r)] + img) + (for [[img r] filtered :when r] + [img r])])) + +(defn classify-images-from-feed [feed] + (let [[ms text] (with-timing (slurp* feed)) + [g b] (filter-images (extract-images text))] + [g b ms])) + +(defn download-all-feeds! [] + (doseq [f (read-feeds)] + (try + (when-let [url (:feed-link f)] + (let [c (slurp* url) + images (extract-images c)] + (println url images))) + (catch Exception e + (print-stack-trace e))))) + +(defn download-feed [room-id feed-user-nick feed] + "TODO") + +;; Testing + +(defn feed-test-page [session] + (if-vip + (html [:body + [:h1 "Feed Test"] + [:form {:action "/feed-test" :method "post"} + [:input {:type "text" :name "url"}] + [:input {:type "submit" :value "Send"}]]]))) + +(defn show-bad-images [imgs] + (for [[img reason] imgs] + [:div + reason + [:a {:href img} + [:img {:src img}]]])) + +(defn show-good-images [imgs] + (for [img imgs] + [:div + [:a {:href img} + [:img {:src img}]]])) + + +(defn feed-test [session params] + (if-vip + (if-let [feed (params :url)] + (let [[ms text] (with-timing (slurp* feed)) + imgs (extract-images text) + [good-imgs bad-imgs] (filter-images imgs)] + (html [:body + [:h1 (str "Images for " feed)] + [:div (format "Downloaded in %s ms" ms)] + [:h3 "Images"] + (show-good-images good-imgs) + [:h3 "Filtered Images"] + (show-bad-images bad-imgs) + [:h3 "Raw Feed Contents"] + [:pre (escape-html text)] + ])) + (redirect-to "/feed-test")))) + + |
