diff options
| author | Scott Ostler <scottbot9000@gmail.com> | 2010-09-11 18:04:13 -0400 |
|---|---|---|
| committer | Scott Ostler <scottbot9000@gmail.com> | 2010-09-11 18:04:13 -0400 |
| commit | f31b00db197eca2fd46ada0625a403f7e43e6c54 (patch) | |
| tree | 75d630f6b15f2f942af6840bb446caeefc3d4f4d /src/imgreplacer.clj | |
| parent | 86d502cc6aa4a67baa0ebbc1dd1bb04a45fff952 (diff) | |
Add imgreplacer file
Diffstat (limited to 'src/imgreplacer.clj')
| -rw-r--r-- | src/imgreplacer.clj | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/src/imgreplacer.clj b/src/imgreplacer.clj new file mode 100644 index 0000000..8c31a6b --- /dev/null +++ b/src/imgreplacer.clj @@ -0,0 +1,73 @@ +(ns imgreplacer
+ (:import java.net.URL
+ java.io.File
+ java.io.IOException
+ java.io.ByteArrayInputStream
+ javax.imageio.ImageIO
+ org.htmlcleaner.HtmlCleaner)
+ (:use clojure.contrib.duck-streams
+ clojure.contrib.str-utils
+ config
+ feed
+ utils))
+
+(def save-root "images/replaced")
+(def string "wonderranchpublishing.com")
+(def dry-run true)
+
+
+(defn file-path [date fname]
+ (str-join "/" [save-root date fname]))
+
+(defn image-url [date fname]
+ (str-join "/" [*server-url* save-root date fname]))
+
+(defn ins-substring? [ss s]
+ (>= (.indexOf (lower-case s) (lower-case ss)) 0))
+
+(defn fetch-bad-messages [url]
+ (println "fetching bad messages for" url)
+ (do-select [(str "SELECT * FROM messages WHERE content ilike '%" url "%'")]))
+
+(defn replace-grp-str [replacements string]
+ (reduce (fn [s [k v]] (.replaceAll s k v))
+ string replacements))
+
+(defn mirror-image [url]
+ (println "fetching" url)
+ (let [bytes (to-byte-array (.openStream (URL. url)))
+ date (today)
+ fname (last (.split url "/"))
+ file (File. (file-path date fname))
+ img (ImageIO/read (ByteArrayInputStream. bytes))]
+ (println img)
+ (make-parents file)
+ (copy (ByteArrayInputStream. bytes) file)
+ (image-url date fname)))
+
+(defn take-safe-images [m]
+ (set (concat (take-images m)
+ (try
+ (pull-images-from-html m)
+ (catch Exception _ [])))))
+
+(def image-url-map (ref {}))
+
+(defn replace-bad-images [url]
+ (doseq [m (fetch-bad-messages url)]
+ (let [imgs (filter #(ins-substring? url %)
+ (take-safe-images (:content m)))]
+ (doseq [img imgs]
+ (if-not (contains? @image-url-map img)
+ (if-let [path (mirror-image img)]
+ (dosync (alter image-url-map assoc img path))
+ (println "Unable to mirror" img))))
+ (let [replace-map (zipmap imgs (map @image-url-map imgs))
+ new-content (replace-grp-str replace-map (:content m))]
+ (println "\nupdating content of" (:message_id m) "from:\n" (:content m) "\nto:\n" new-content)
+ (if-not dry-run
+ (do-update :messages
+ ["message_id = ?" (:message_id m)]
+ {:content new-content}))))))
+
+(replace-bad-images string)
\ No newline at end of file |
