summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordumpfmprod <dumpfmprod@ubuntu.(none)>2010-10-24 20:16:20 -0400
committerdumpfmprod <dumpfmprod@ubuntu.(none)>2010-10-24 20:16:20 -0400
commitfafbde26383e42858538046d4b2b3691e21bd898 (patch)
treeb8f9d164305e92d1bbc8b7f5a278c195a3903a16
parentd79fb90ae279d0f46606c305688e97e73374eacc (diff)
Update python scripts
-rw-r--r--scripts/fill.image_altars.py26
-rw-r--r--scripts/fill.image_urls.py8
-rw-r--r--scripts/hiscores.py12
-rw-r--r--scripts/mias3.py76
-rw-r--r--scripts/s3upload.py4
5 files changed, 105 insertions, 21 deletions
diff --git a/scripts/fill.image_altars.py b/scripts/fill.image_altars.py
index 4b8dd91..8321ffa 100644
--- a/scripts/fill.image_altars.py
+++ b/scripts/fill.image_altars.py
@@ -28,10 +28,9 @@ def add_altar(message_id, user_id, content):
print("skipped adding a dupe")
# NOTE. hardcoded room numbers to index here... only indexing DUMPFM (1) and GIF (8) currently.
-def get_messages(lower, upper):
+def get_messages():
ps = db.prepare("SELECT message_id, user_id, content FROM messages WHERE message_id >= $1 AND message_id <= $2 AND room_id IN (1,8) ORDER BY message_id ASC")
- rows = ps(lower, upper)
- return rows
+ return ps.chunks
def is_url_an_image(url):
image_types = {"jpg", "bmp", "gif", "png"}
@@ -65,16 +64,17 @@ def is_altar(content):
i += 1
return True
-def process_messages(messages):
+def process_messages(chunks, lower, upper):
num_added = 0
processed = 0
- for message in messages:
- if processed % 1000 == 0:
- print(processed, " processed so far")
- processed += 1
- if is_altar(message[2]):
- add_altar(message[0], message[1], message[2])
- num_added += 1
+ for rowset in chunks(lower, upper):
+ for message in rowset:
+ if processed % 1000 == 0:
+ print(processed, " processed so far")
+ processed += 1
+ if is_altar(message[2]):
+ add_altar(message[0], message[1], message[2])
+ num_added += 1
return num_added
def get_urls_from_messages(messages):
@@ -100,6 +100,6 @@ if __name__ == "__main__":
upper = int(sys.argv[1])
highest = get_highest_message_id_in_db()
- messages = get_messages(highest, upper)
- num_added = process_messages(messages)
+ chunks = get_messages()
+ num_added = process_messages(chunks, highest, upper)
print("added ", num_added, " altars to db")
diff --git a/scripts/fill.image_urls.py b/scripts/fill.image_urls.py
index 79280b9..4f7c2ea 100644
--- a/scripts/fill.image_urls.py
+++ b/scripts/fill.image_urls.py
@@ -12,6 +12,13 @@ def get_num_images_in_db():
ps = db.prepare("SELECT COUNT(*) FROM image_urls")
return int(ps()[0][0])
+def vacuum_and_analyze():
+ print("cleaning db")
+ ps = db.prepare("VACUUM image_urls")
+ ps()
+ ps2 = db.prepare("ANALYZE image_urls")
+ ps2()
+
def add_url(url):
try:
print(url)
@@ -83,3 +90,4 @@ if __name__ == "__main__":
num_new_images = get_num_images_in_db() - num_existing_images
percent_new_images = num_new_images / len(urls) * 100
print(num_new_images, " were new (", percent_new_images ,"%)")
+ vacuum_and_analyze()
diff --git a/scripts/hiscores.py b/scripts/hiscores.py
index e8a5ed0..b81269e 100644
--- a/scripts/hiscores.py
+++ b/scripts/hiscores.py
@@ -11,13 +11,12 @@ db.execute("SET CLIENT_ENCODING to 'UNICODE'")
r = redis.Redis("localhost")
key_prefix = "hiscore:"
-hiscore_len = 40
config = {
- "day": {"days": 1, "amt": hiscore_len},
- "week": {"days": 7, "amt": hiscore_len},
- "month": {"days": 30, "amt": hiscore_len},
- "all": {"days": 0, "amt": hiscore_len}
+ "day": {"days": 1, "amt": 40},
+ "week": {"days": 7, "amt": 40},
+ "month": {"days": 30, "amt": 40},
+ "all": {"days": 0, "amt": 40}
}
def fetch_favs(days):
@@ -70,9 +69,10 @@ if __name__ == "__main__":
print('this script adds message ids to redis for the highest scoring posts over a period.')
sys.exit(1)
+ # write to key and then overwrite keyfinal when complete
keyfinal = key_prefix + period
key = keyfinal + ":temp"
- # write to key and then overwrite keyfinal when complete
+ r.delete(key) # in case temp key still exists bc script was killed
chunks = fetch_favs(days)
add_favs_to_redis(key, chunks)
diff --git a/scripts/mias3.py b/scripts/mias3.py
new file mode 100644
index 0000000..9a276d6
--- /dev/null
+++ b/scripts/mias3.py
@@ -0,0 +1,76 @@
+import datetime
+import mimetypes
+import os
+import sys
+import time
+import S3
+
+CONN = None
+AWS_ACCESS_KEY_ID = 'AKIAJAQK4CDDP6I6SNVA'
+AWS_SECRET_ACCESS_KEY = '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'
+BUCKET_NAME = 'dumpfm'
+COUNTER = 0
+
+def retry_func(f, count):
+ try:
+ f()
+ except:
+ if count <= 1: raise
+ else:
+ print 'Error! retrying %s more time(s)' % (count - 1)
+ retry_func(f, count - 1)
+
+def upload_file(path):
+ global COUNTER
+ path = os.path.normpath(path)
+ if path == '.' or not os.path.isfile(path):
+ return
+ filedata = open(path, 'rb').read()
+ size = os.path.getsize(path)
+ content_type = mimetypes.guess_type(path)[0]
+ if not content_type:
+ content_type = 'text/plain'
+
+ path = path.replace('\\', '/') # Windows hack
+ start = time.time()
+ def do_upload():
+ CONN.put(BUCKET_NAME, path, S3.S3Object(filedata),
+ {'x-amz-acl': 'public-read', 'Content-Type': 'video/x-flv'})
+ retry_func(do_upload, 3)
+
+ ms_took = (time.time() - start) * 1000
+ print "uploaded %s (%0.0fms) (%sKB)" % (path, ms_took, size / 1024)
+ COUNTER += 1
+
+def upload_directory(path):
+ for f in sorted(os.listdir(path)):
+ subpath = os.path.join(path, f)
+ if os.path.isdir(subpath):
+ upload_directory(subpath)
+ else:
+ upload_file(subpath)
+
+def do_upload(path):
+ global CONN
+ CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
+
+ start = time.time()
+
+ if os.path.isdir(path):
+ upload_directory(path)
+ else:
+ upload_file(path)
+
+ s_took = (time.time() - start)
+ print "uploaded %s files in %0.0fs" % (COUNTER, s_took)
+
+
+if __name__ == "__main__":
+ if len(sys.argv) == 1:
+ print 'usage: s3upload.py path'
+ sys.exit(1)
+
+ args = sys.argv[1:]
+ for path in args:
+ do_upload(path)
+ print
diff --git a/scripts/s3upload.py b/scripts/s3upload.py
index a31874b..724561c 100644
--- a/scripts/s3upload.py
+++ b/scripts/s3upload.py
@@ -6,8 +6,8 @@ import time
import S3
CONN = None
-AWS_ACCESS_KEY_ID = 'AKIAJAQK4CDDP6I6SNVA'
-AWS_SECRET_ACCESS_KEY = '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'
+AWS_ACCESS_KEY_ID = 'AKIAIOP42NFKLLJXEGJQ'
+AWS_SECRET_ACCESS_KEY = '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'
BUCKET_NAME = 'dumpfm'
COUNTER = 0