diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/config.clj | 1 | ||||
| -rw-r--r-- | src/imgreplacer.clj | 69 | ||||
| -rw-r--r-- | src/site.clj | 3 | ||||
| -rwxr-xr-x | src/utils.clj | 3 |
4 files changed, 72 insertions, 4 deletions
diff --git a/src/config.clj b/src/config.clj index 15834cb..3230c72 100644 --- a/src/config.clj +++ b/src/config.clj @@ -4,7 +4,6 @@ ;; Configuration (def *server-user* (System/getProperty "user.name")) -;; (if (= (System/getProperty "user.name") "dumpfmprod") (def *server-url* (if (= *server-user* "dumpfmprod") diff --git a/src/imgreplacer.clj b/src/imgreplacer.clj new file mode 100644 index 0000000..6f763e7 --- /dev/null +++ b/src/imgreplacer.clj @@ -0,0 +1,69 @@ +(ns imgreplacer + (:import java.net.URL + java.io.File + java.io.IOException + java.io.ByteArrayInputStream + javax.imageio.ImageIO + org.htmlcleaner.HtmlCleaner) + (:use clojure.contrib.duck-streams + clojure.contrib.str-utils + clojure.contrib.command-line + config + feed + utils)) + +(def save-root "images/replaced") + +(defn file-path [date fname] + (str-join "/" [save-root date fname])) + +(defn image-url [date fname] + (str-join "/" [*server-url* save-root date fname])) + +(defn ins-substring? [ss s] + (>= (.indexOf (lower-case s) (lower-case ss)) 0)) + +(defn fetch-bad-messages [url] + (println "fetching bad messages for" url) + (do-select [(str "SELECT * FROM messages WHERE content ilike '%" url "%'")])) + +(defn replace-grp-str [replacements string] + (reduce (fn [s [k v]] (.replaceAll s k v)) + string replacements)) + +(defn mirror-image [url] + (println "fetching" url) + (let [bytes (to-byte-array (.openStream (URL. url))) + date (today) + fname (last (.split url "/")) + file (File. (file-path date fname)) + img (ImageIO/read (ByteArrayInputStream. bytes))] + (println img) + (make-parents file) + (copy (ByteArrayInputStream. bytes) file) + (image-url date fname))) + +(defn take-safe-images [m] + (set (concat (take-images m) + (try + (pull-images-from-html m) + (catch Exception _ []))))) + +(def image-url-map (ref {})) + +(defn replace-bad-images [url dryrun?] + (doseq [m (fetch-bad-messages url)] + (let [imgs (filter #(ins-substring? url %) + (take-safe-images (:content m)))] + (doseq [img imgs] + (if-not (contains? @image-url-map img) + (if-let [path (mirror-image img)] + (dosync (alter image-url-map assoc img path)) + (println "Unable to mirror" img)))) + (let [replace-map (zipmap imgs (map @image-url-map imgs)) + new-content (replace-grp-str replace-map (:content m))] + (println "\nupdating content of" (:message_id m) "from:\n" (:content m) "\nto:\n" new-content) + (if-not dryrun? + (do-update :messages + ["message_id = ?" (:message_id m)] + {:content new-content})))))) diff --git a/src/site.clj b/src/site.clj index ce5796b..bb4d2b8 100644 --- a/src/site.clj +++ b/src/site.clj @@ -393,9 +393,6 @@ ORDER BY cnt DESC ;; Profile
-(defn take-images [content]
- (filter is-image? (.split content " ")))
-
(defn pull-random-dump-images [dumps num]
(take num
(shuffle
diff --git a/src/utils.clj b/src/utils.clj index 9e4fc38..4c6905c 100755 --- a/src/utils.clj +++ b/src/utils.clj @@ -43,6 +43,9 @@ (and (re-find url-regex word) (re-find pic-regex word))) +(defn take-images [content] + (filter is-image? (.split content " "))) + (defn classify-msg [msg] (let [words (.split msg " ") imgs (map is-image? words)] |
