summaryrefslogtreecommitdiff
path: root/scripts/fill.image_altars.py
diff options
context:
space:
mode:
authordumpfmprod <dumpfmprod@ubuntu.(none)>2010-09-30 21:21:45 -0400
committerdumpfmprod <dumpfmprod@ubuntu.(none)>2010-09-30 21:21:45 -0400
commit4ee9af5e96dfdf045d13afa3510937f82b06df5c (patch)
treef016cb3dc4ca1f5870c8095f470dcd3ca3c9d2b2 /scripts/fill.image_altars.py
parent9581006a7a6702a115b0afde760f8b47f3e757af (diff)
timb: image altars
Diffstat (limited to 'scripts/fill.image_altars.py')
-rw-r--r--scripts/fill.image_altars.py105
1 files changed, 105 insertions, 0 deletions
diff --git a/scripts/fill.image_altars.py b/scripts/fill.image_altars.py
new file mode 100644
index 0000000..4b8dd91
--- /dev/null
+++ b/scripts/fill.image_altars.py
@@ -0,0 +1,105 @@
+# this needs python 3
+
+import re
+import sys
+import postgresql
+from urllib.parse import urlparse
+
+db = postgresql.open("pq://postgres:root@localhost/dumpfm")
+db.execute("SET CLIENT_ENCODING to 'UNICODE'")
+
+def get_highest_message_id_in_db():
+ ps = db.prepare("SELECT message_id FROM image_altars ORDER BY message_id DESC LIMIT 1")
+ try:
+ highest = int(ps()[0][0])
+ except IndexError:
+ highest = 0
+ return highest
+
+def add_altar(message_id, user_id, content):
+ try:
+ print(message_id, content)
+ except UnicodeEncodeError:
+ print("i thought python 3 fixed the unicode shit. yet i still get unicode errors everywhere. GOOD JOB FUCKHEADS")
+ ps = db.prepare("INSERT INTO image_altars(message_id, user_id) VALUES($1, $2)")
+ try:
+ ps(message_id, user_id)
+ except postgresql.exceptions.UniqueError:
+ print("skipped adding a dupe")
+
+# NOTE. hardcoded room numbers to index here... only indexing DUMPFM (1) and GIF (8) currently.
+def get_messages(lower, upper):
+ ps = db.prepare("SELECT message_id, user_id, content FROM messages WHERE message_id >= $1 AND message_id <= $2 AND room_id IN (1,8) ORDER BY message_id ASC")
+ rows = ps(lower, upper)
+ return rows
+
+def is_url_an_image(url):
+ image_types = {"jpg", "bmp", "gif", "png"}
+ url = urlparse(url)
+ filetype = url.path[-3:].lower()
+ return filetype in image_types
+
+def is_altar(content):
+ if content[0:6] == "<safe>": # skip html messages
+ return False
+ tokens = content.split(" ")
+ if is_bad_sized_array(tokens): # no even sized arrays
+# print("array not oddly sized")
+ return False
+ for token in tokens: # everything must be an image
+ if token[0:7] != "http://":
+# print("contains stuff thats not urls")
+ return False
+ elif not is_url_an_image(token):
+# print("contains stuff thats not images")
+ return False
+ middleImage = tokens[int((len(tokens)-1)/2)]
+ i = 0
+ while i < (len(tokens)-1)/2:
+ if tokens[i] != tokens[len(tokens) - 1 - i]: # must be symmetric
+# print("not symmetric")
+ return False
+ if tokens[i] == middleImage: # middle image must be unique
+# print("middle image not unique")
+ return False
+ i += 1
+ return True
+
+def process_messages(messages):
+ num_added = 0
+ processed = 0
+ for message in messages:
+ if processed % 1000 == 0:
+ print(processed, " processed so far")
+ processed += 1
+ if is_altar(message[2]):
+ add_altar(message[0], message[1], message[2])
+ num_added += 1
+ return num_added
+
+def get_urls_from_messages(messages):
+ urls = []
+ for message in messages:
+ urls.extend(get_images_from_messages(message[0]))
+ return urls
+
+# image altars look like aba or abcba but not a or abba
+def is_bad_sized_array(a):
+ if len(a) % 2 == 0:
+ return True
+ elif len(a) < 3:
+ return True
+ else:
+ return False
+
+if __name__ == "__main__":
+ if not len(sys.argv) == 2:
+ print('usage: fill.image_altars.py message_id_end')
+ sys.exit(1)
+
+ upper = int(sys.argv[1])
+
+ highest = get_highest_message_id_in_db()
+ messages = get_messages(highest, upper)
+ num_added = process_messages(messages)
+ print("added ", num_added, " altars to db")