summaryrefslogtreecommitdiff
path: root/src/feed.clj
blob: ae7f54d9c45763efe642d4d47cb72a7736d39f55 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
(ns feed
  (:require [clojure.contrib.str-utils2 :as s])
  (:use clojure.contrib.condition
        clojure.contrib.duck-streams
        compojure
        utils))

(def *feeds-path* "docs/feeds.csv")

(defn parse-line [line]
  (let [r (s/split line #",")]
    (zipmap [:room-name :desc :feed-link :site-link :contact]
            (map #(.trim (.replaceAll % "\"" ""))
                 r))))

(defn read-feeds []
  (rest (map parse-line (read-lines *feeds-path*))))

(def *image-posted-qry* "
SELECT *
FROM   UNNEST(?) as v
WHERE  NOT EXISTS (SELECT 1 
                   FROM   feed_images f 
                   WHERE  f.external_url = v
                          AND f.room_id = ?)
")

(defn filter-posted-images [urls room-id]
  (if (empty? urls)
    []
    (do-select [*image-posted-qry*
                (sql-array "text" urls)
                room-id])))

; http://stackoverflow.com/questions/169625/regex-to-check-if-valid-url-that-ends-in-jpg-png-or-gif
(def *image-regex*
     #"(?i)https?://(?:[a-z\-]+\.)+[a-z]{2,6}(?:/[^/#?]+)+\.(?:jpeg|jpg|gif|png)")

(defn extract-images [text]
  (re-seq *image-regex* text))

(defn is-thumbnail? [img]
  (boolean (re-find #"(?i)[-._](thumb|small|thumbs)[-._]" img)))

(def image-filters [["THUMBNAIL" is-thumbnail?]])

(defn filter-image [img]
  (or (some
       (fn [[r f]] (if (f img) [img r]))
       image-filters)
      [img nil]))

(defn filter-images [imgs]
  (let [filtered (map filter-image imgs)]
    [(for [[img r] filtered :when (nil? r)]
       img)
     (for [[img r] filtered :when r]
       [img r])]))

(defn classify-images-from-feed [feed]
  (let [[ms text]  (with-timing (slurp* feed))
        [g b]      (filter-images (extract-images text))]
    [g b ms]))

(defn download-all-feeds! []
  (doseq [f (read-feeds)]
    (try
     (when-let [url (:feed-link f)]
       (let [c      (slurp* url)
             images (extract-images c)]
         (println url images)))
     (catch Exception e
       (print-stack-trace e)))))

(defn download-feed [room-id feed-user-nick feed]
  "TODO")

;; Testing

(defn feed-test-page [session]
  (if-vip
   (html [:body
               [:h1 "Feed Test"]
               [:form {:action "/feed-test" :method "post"}
                [:input {:type "text" :name "url"}]
                [:input {:type "submit" :value "Send"}]]])))

(defn show-bad-images [imgs]
  (for [[img reason] imgs]
    [:div
     reason
     [:a {:href img}
      [:img {:src img}]]]))

(defn show-good-images [imgs]
  (for [img imgs]
    [:div
     [:a {:href img}
      [:img {:src img}]]]))
  

(defn feed-test [session params]
  (if-vip
   (if-let [feed (params :url)]
     (let [[slurp-ms text]        (with-timing (slurp* feed))
           [process-ms imgs]      (with-timing (extract-images text))
           [good-imgs bad-imgs]   (filter-images imgs)]
       (html [:body 
              [:h1 (str "Images for " feed)]
              [:div (format "Downloaded in %s ms" slurp-ms)]
              [:div (format "Processed in %s ms" process-ms)]
              [:hr]
              [:h2 "Good Images"]
              (show-good-images good-imgs)
              [:hr]
              [:h2 "Filtered Out Images"]
              (show-bad-images bad-imgs)
              [:hr]
              [:h2 "Raw Feed Contents"]
              [:pre (escape-html text)]
              ]))
     (redirect-to "/feed-test"))))