summaryrefslogtreecommitdiff
path: root/src/imgreplacer.clj
blob: 03ae0e65794d5dbea4df66a4720bc5928ced397a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
(ns imgreplacer
  (:import java.net.URL
           java.io.File
           java.io.IOException
           java.io.ByteArrayInputStream
           javax.imageio.ImageIO
           org.htmlcleaner.HtmlCleaner)
  (:require [clojure.set :as r])
  (:use clojure.contrib.duck-streams
        clojure.contrib.str-utils
        clojure.contrib.command-line
        config
        feed
        utils))

(def save-root "images/replaced")

(defn file-path [date fname]
  (str-join "/" [save-root date fname]))

(defn image-url [date fname]
  (str-join "/" [*server-url* save-root date fname]))

(defn ins-substring? [ss s]
  (>= (.indexOf (lower-case s) (lower-case ss)) 0))

(defn fetch-bad-messages [url]
  (println "fetching bad messages for" url)
  (do-select [(str "SELECT * FROM messages WHERE content ilike '%" url "%'")]))

(defn replace-grp-str [replacements string]
  (reduce (fn [s [k v]] (.replaceAll s k v))
          string replacements))

(defn image-name [url]
  (let [fname (last (.split url "/"))]
    (format "%s-%s"
            (System/currentTimeMillis)
            ;; Hack: nginx doesn't like to serve images w/ spaces in them
            (.replaceAll fname "%20" ""))))

(defn mirror-image [url]
  (println "fetching" url)
  (let [bytes (to-byte-array (.openStream (URL. url)))
        date  (today)
        fname (image-name url)
        file  (File. (file-path date fname))
        img   (ImageIO/read (ByteArrayInputStream. bytes))]
    (make-parents file)
    (copy (ByteArrayInputStream. bytes) file)
    (image-url date fname)))

(defn take-safe-images [m]
  (set (concat (take-images m)
               (try
                 (pull-images-from-html m)
                 (catch Exception _ [])))))

(def image-url-map (ref {}))

(defn mirror-message! [msg dryrun url-filter]
  (let [imgs (filter url-filter
                     (take-safe-images (:content msg)))]
    (doseq [img imgs]
      (if-not (contains? @image-url-map img)
        (dosync (alter image-url-map assoc img (mirror-image img)))))
    (let [replace-map  (zipmap imgs (map @image-url-map imgs))
          new-content  (replace-grp-str replace-map (:content msg))]
      (if (= (:content msg) new-content)
        (println (format "Message %s: no change" (:message_id msg)))
        (do
          (println "\nupdating content of" (:message_id msg) "from:\n" (:content msg) "\nto:\n" new-content)
          (if-not dryrun
            (do-update :messages
                       ["message_id = ?" (:message_id msg)]
                       {:content new-content})))))))

(defn mirror-bad-host! [url dryrun]
  (doseq [m (fetch-bad-messages url)]
    (mirror-message! m dryrun
                     #(ins-substring? url %))))

(defn mirror-message-id! [msg-id dryrun]
  (if-let [m (first (do-select ["SELECT * FROM messages WHERE message_id = ?" msg-id]))]
    (mirror-message! m dryrun
                     #(not (re-find #"^http://dump.fm" %)))))