blob: d3cf3f6a8bd814b23acfb014c9a54ee5e270117a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
(ns feed
(:require [clojure.contrib.str-utils2 :as s])
(:use clojure.contrib.condition
clojure.contrib.duck-streams
compojure
utils))
(def *feeds-path* "docs/feeds.csv")
(defn parse-line [line]
(let [r (s/split line #",")]
(zipmap [:room-name :desc :feed-link :site-link :contact]
(map #(.trim (.replaceAll % "\"" ""))
r))))
(defn read-feeds []
(rest (map parse-line (read-lines *feeds-path*))))
(def *image-posted-qry* "
SELECT *
FROM UNNEST(?) as v
WHERE NOT EXISTS (SELECT 1
FROM feed_images f
WHERE f.external_url = v
AND f.room_id = ?)
")
(defn filter-posted-images [urls room-id]
(if (empty? urls)
[]
(do-select [*image-posted-qry*
(sql-array "text" urls)
room-id])))
; http://stackoverflow.com/questions/169625/regex-to-check-if-valid-url-that-ends-in-jpg-png-or-gif
(def *image-regex*
#"(?i)https?://(?:[a-z0-9\-]+\.)+[a-z]{2,6}(?:/[^/#?]+)+\.(?:jpeg|jpg|gif|png)")
(defn extract-images [text]
(re-seq *image-regex* text))
(defn is-thumbnail? [img]
(boolean (re-find #"(?i)[-._](thumb|small|thumbs)[-._]" img)))
(def image-filters [["THUMBNAIL" is-thumbnail?]])
(defn filter-image [img]
(or (some
(fn [[r f]] (if (f img) [img r]))
image-filters)
[img nil]))
(defn filter-images [imgs]
(let [filtered (map filter-image imgs)]
[(for [[img r] filtered :when (nil? r)]
img)
(for [[img r] filtered :when r]
[img r])]))
(defn classify-images-from-feed [feed]
(let [[ms text] (with-timing (slurp* feed))
[g b] (filter-images (extract-images text))]
[g b ms]))
(defn download-all-feeds! []
(doseq [f (read-feeds)]
(try
(when-let [url (:feed-link f)]
(let [c (slurp* url)
images (extract-images c)]
(println url images)))
(catch Exception e
(print-stack-trace e)))))
(defn download-feed [room-id feed-user-nick feed]
"TODO")
;; Testing
(defn feed-test-page [session]
(if-vip
(html [:body
[:h1 "Feed Test"]
[:form {:action "/feed-test" :method "post"}
[:input {:type "text" :name "url"}]
[:input {:type "submit" :value "Send"}]]])))
(defn show-bad-images [imgs]
(for [[img reason] imgs]
[:div
reason
[:a {:href img}
[:img {:src img}]]]))
(defn show-good-images [imgs]
(for [img imgs]
[:div
[:a {:href img}
[:img {:src img}]]]))
(defn feed-test [session params]
(if-vip
(if-let [feed (params :url)]
(let [[slurp-ms text] (with-timing (slurp* feed))
[process-ms imgs] (with-timing (extract-images text))
[good-imgs bad-imgs] (filter-images imgs)]
(html [:body
[:h1 (str "Images for " feed)]
[:div (format "Downloaded in %s ms" slurp-ms)]
[:div (format "Processed in %s ms" process-ms)]
[:hr]
[:h2 "Good Images"]
(show-good-images good-imgs)
[:hr]
[:h2 "Filtered Out Images"]
(show-bad-images bad-imgs)
[:hr]
[:h2 "Raw Feed Contents"]
[:pre (escape-html text)]
]))
(redirect-to "/feed-test"))))
|