summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott Ostler <scottbot9000@gmail.com>2010-09-11 18:04:13 -0400
committerScott Ostler <scottbot9000@gmail.com>2010-09-11 18:04:13 -0400
commitf31b00db197eca2fd46ada0625a403f7e43e6c54 (patch)
tree75d630f6b15f2f942af6840bb446caeefc3d4f4d
parent86d502cc6aa4a67baa0ebbc1dd1bb04a45fff952 (diff)
Add imgreplacer file
-rw-r--r--src/config.clj1
-rw-r--r--src/imgreplacer.clj73
-rw-r--r--src/site.clj3
-rwxr-xr-xsrc/utils.clj3
4 files changed, 76 insertions, 4 deletions
diff --git a/src/config.clj b/src/config.clj
index 15834cb..3230c72 100644
--- a/src/config.clj
+++ b/src/config.clj
@@ -4,7 +4,6 @@
;; Configuration
(def *server-user* (System/getProperty "user.name"))
-;; (if (= (System/getProperty "user.name") "dumpfmprod")
(def *server-url*
(if (= *server-user* "dumpfmprod")
diff --git a/src/imgreplacer.clj b/src/imgreplacer.clj
new file mode 100644
index 0000000..8c31a6b
--- /dev/null
+++ b/src/imgreplacer.clj
@@ -0,0 +1,73 @@
+(ns imgreplacer
+ (:import java.net.URL
+ java.io.File
+ java.io.IOException
+ java.io.ByteArrayInputStream
+ javax.imageio.ImageIO
+ org.htmlcleaner.HtmlCleaner)
+ (:use clojure.contrib.duck-streams
+ clojure.contrib.str-utils
+ config
+ feed
+ utils))
+
+(def save-root "images/replaced")
+(def string "wonderranchpublishing.com")
+(def dry-run true)
+
+
+(defn file-path [date fname]
+ (str-join "/" [save-root date fname]))
+
+(defn image-url [date fname]
+ (str-join "/" [*server-url* save-root date fname]))
+
+(defn ins-substring? [ss s]
+ (>= (.indexOf (lower-case s) (lower-case ss)) 0))
+
+(defn fetch-bad-messages [url]
+ (println "fetching bad messages for" url)
+ (do-select [(str "SELECT * FROM messages WHERE content ilike '%" url "%'")]))
+
+(defn replace-grp-str [replacements string]
+ (reduce (fn [s [k v]] (.replaceAll s k v))
+ string replacements))
+
+(defn mirror-image [url]
+ (println "fetching" url)
+ (let [bytes (to-byte-array (.openStream (URL. url)))
+ date (today)
+ fname (last (.split url "/"))
+ file (File. (file-path date fname))
+ img (ImageIO/read (ByteArrayInputStream. bytes))]
+ (println img)
+ (make-parents file)
+ (copy (ByteArrayInputStream. bytes) file)
+ (image-url date fname)))
+
+(defn take-safe-images [m]
+ (set (concat (take-images m)
+ (try
+ (pull-images-from-html m)
+ (catch Exception _ [])))))
+
+(def image-url-map (ref {}))
+
+(defn replace-bad-images [url]
+ (doseq [m (fetch-bad-messages url)]
+ (let [imgs (filter #(ins-substring? url %)
+ (take-safe-images (:content m)))]
+ (doseq [img imgs]
+ (if-not (contains? @image-url-map img)
+ (if-let [path (mirror-image img)]
+ (dosync (alter image-url-map assoc img path))
+ (println "Unable to mirror" img))))
+ (let [replace-map (zipmap imgs (map @image-url-map imgs))
+ new-content (replace-grp-str replace-map (:content m))]
+ (println "\nupdating content of" (:message_id m) "from:\n" (:content m) "\nto:\n" new-content)
+ (if-not dry-run
+ (do-update :messages
+ ["message_id = ?" (:message_id m)]
+ {:content new-content}))))))
+
+(replace-bad-images string) \ No newline at end of file
diff --git a/src/site.clj b/src/site.clj
index ce5796b..bb4d2b8 100644
--- a/src/site.clj
+++ b/src/site.clj
@@ -393,9 +393,6 @@ ORDER BY cnt DESC
;; Profile
-(defn take-images [content]
- (filter is-image? (.split content " ")))
-
(defn pull-random-dump-images [dumps num]
(take num
(shuffle
diff --git a/src/utils.clj b/src/utils.clj
index 9e4fc38..4c6905c 100755
--- a/src/utils.clj
+++ b/src/utils.clj
@@ -43,6 +43,9 @@
(and (re-find url-regex word)
(re-find pic-regex word)))
+(defn take-images [content]
+ (filter is-image? (.split content " ")))
+
(defn classify-msg [msg]
(let [words (.split msg " ")
imgs (map is-image? words)]