summaryrefslogtreecommitdiff
path: root/src/imgreplacer.clj
blob: bc9b00f7822c68a88e1b613eb1b2887bc4d79b36 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
(ns imgreplacer
  (:import java.net.URL
           java.io.File
           java.io.IOException
           java.io.ByteArrayInputStream
           javax.imageio.ImageIO
           org.htmlcleaner.HtmlCleaner)
  (:use clojure.contrib.duck-streams
        clojure.contrib.str-utils
        clojure.contrib.command-line
        config
        feed
        utils))

(def save-root "images/replaced")

(defn file-path [date fname]
  (str-join "/" [save-root date fname]))

(defn image-url [date fname]
  (str-join "/" [*server-url* save-root date fname]))

(defn ins-substring? [ss s]
  (>= (.indexOf (lower-case s) (lower-case ss)) 0))

(defn fetch-bad-messages [url]
  (println "fetching bad messages for" url)
  (do-select [(str "SELECT * FROM messages WHERE content ilike '%" url "%'")]))

(defn replace-grp-str [replacements string]
  (reduce (fn [s [k v]] (.replaceAll s k v))
          string replacements))

(defn mirror-image [url]
  (println "fetching" url)
  (let [bytes (to-byte-array (.openStream (URL. url)))
        date  (today)
        fname (last (.split url "/"))
        file  (File. (file-path date fname))
        img   (ImageIO/read (ByteArrayInputStream. bytes))]
    (make-parents file)
    (copy (ByteArrayInputStream. bytes) file)
    (image-url date fname)))

(defn take-safe-images [m]
  (set (concat (take-images m)
               (try
                 (pull-images-from-html m)
                 (catch Exception _ [])))))

(def image-url-map (ref {}))

(defn replace-bad-images [url dryrun?]
  (doseq [m (fetch-bad-messages url)]
    (let [imgs (filter #(ins-substring? url %)
                       (take-safe-images (:content m)))]
      (doseq [img imgs]
        (if-not (contains? @image-url-map img)
          (if-let [path (mirror-image img)]
            (dosync (alter image-url-map assoc img path))
            (println "Unable to mirror" img))))
      (let [replace-map  (zipmap imgs (map @image-url-map imgs))
            new-content  (replace-grp-str replace-map (:content m))]
        (println "\nupdating content of" (:message_id m) "from:\n" (:content m) "\nto:\n" new-content)
        (if-not dryrun?
          (do-update :messages
                     ["message_id = ?" (:message_id m)]
                     {:content new-content}))))))