blob: 4963e028df2c464d4690fe0ef72130723ccab92d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
(ns imgreplacer
(:import java.net.URL
java.io.File
java.io.IOException
java.io.ByteArrayInputStream
javax.imageio.ImageIO
org.htmlcleaner.HtmlCleaner)
(:require [clojure.set :as r])
(:use clojure.contrib.duck-streams
clojure.contrib.str-utils
clojure.contrib.command-line
config
feed
utils))
(def save-root "images/replaced")
(defn file-path [date fname]
(str-join "/" [save-root date fname]))
(defn image-url [date fname]
(str-join "/" [*server-url* save-root date fname]))
(defn ins-substring? [ss s]
(>= (.indexOf (lower-case s) (lower-case ss)) 0))
(defn fetch-bad-messages [url]
(println "fetching bad messages for" url)
(do-select [(str "SELECT * FROM messages WHERE content ilike '%" url "%'")]))
(defn replace-grp-str [replacements string]
(reduce (fn [s [k v]] (.replaceAll s k v))
string replacements))
(defn mirror-image [url]
(println "fetching" url)
(let [bytes (to-byte-array (.openStream (URL. url)))
date (today)
fname (last (.split url "/"))
file (File. (file-path date fname))
img (ImageIO/read (ByteArrayInputStream. bytes))]
(make-parents file)
(copy (ByteArrayInputStream. bytes) file)
(image-url date fname)))
(defn take-safe-images [m]
(set (concat (take-images m)
(try
(pull-images-from-html m)
(catch Exception _ [])))))
(def image-url-map (ref {}))
(defn mirror-message! [msg dryrun url-filter]
(let [imgs (filter url-filter
(take-safe-images (:content msg)))]
(doseq [img imgs]
(if-not (contains? @image-url-map img)
(dosync (alter image-url-map assoc img (mirror-image img)))))
(let [replace-map (zipmap imgs (map @image-url-map imgs))
new-content (replace-grp-str replace-map (:content msg))]
(if (= (:content msg) new-content)
(println (format "Message %s: no change" (:message_id msg)))
(do
(println "\nupdating content of" (:message_id msg) "from:\n" (:content msg) "\nto:\n" new-content)
(if-not dryrun
(do-update :messages
["message_id = ?" (:message_id msg)]
{:content new-content})))))))
(defn mirror-bad-host! [url dryrun]
(doseq [m (fetch-bad-messages url)]
(mirror-message! m dryrun
#(ins-substring? url %))))
(defn mirror-message-id! [msg-id dryrun]
(if-let [m (first (do-select ["SELECT * FROM messages WHERE message_id = ?" msg-id]))]
(mirror-message! m dryrun
#(not (re-find #"^http://dump.fm" %)))))
|