diff options
| author | dumpfmprod <dumpfmprod@ubuntu.(none)> | 2010-10-24 20:16:20 -0400 |
|---|---|---|
| committer | dumpfmprod <dumpfmprod@ubuntu.(none)> | 2010-10-24 20:16:20 -0400 |
| commit | fafbde26383e42858538046d4b2b3691e21bd898 (patch) | |
| tree | b8f9d164305e92d1bbc8b7f5a278c195a3903a16 /scripts | |
| parent | d79fb90ae279d0f46606c305688e97e73374eacc (diff) | |
Update python scripts
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/fill.image_altars.py | 26 | ||||
| -rw-r--r-- | scripts/fill.image_urls.py | 8 | ||||
| -rw-r--r-- | scripts/hiscores.py | 12 | ||||
| -rw-r--r-- | scripts/mias3.py | 76 | ||||
| -rw-r--r-- | scripts/s3upload.py | 4 |
5 files changed, 105 insertions, 21 deletions
diff --git a/scripts/fill.image_altars.py b/scripts/fill.image_altars.py index 4b8dd91..8321ffa 100644 --- a/scripts/fill.image_altars.py +++ b/scripts/fill.image_altars.py @@ -28,10 +28,9 @@ def add_altar(message_id, user_id, content): print("skipped adding a dupe") # NOTE. hardcoded room numbers to index here... only indexing DUMPFM (1) and GIF (8) currently. -def get_messages(lower, upper): +def get_messages(): ps = db.prepare("SELECT message_id, user_id, content FROM messages WHERE message_id >= $1 AND message_id <= $2 AND room_id IN (1,8) ORDER BY message_id ASC") - rows = ps(lower, upper) - return rows + return ps.chunks def is_url_an_image(url): image_types = {"jpg", "bmp", "gif", "png"} @@ -65,16 +64,17 @@ def is_altar(content): i += 1 return True -def process_messages(messages): +def process_messages(chunks, lower, upper): num_added = 0 processed = 0 - for message in messages: - if processed % 1000 == 0: - print(processed, " processed so far") - processed += 1 - if is_altar(message[2]): - add_altar(message[0], message[1], message[2]) - num_added += 1 + for rowset in chunks(lower, upper): + for message in rowset: + if processed % 1000 == 0: + print(processed, " processed so far") + processed += 1 + if is_altar(message[2]): + add_altar(message[0], message[1], message[2]) + num_added += 1 return num_added def get_urls_from_messages(messages): @@ -100,6 +100,6 @@ if __name__ == "__main__": upper = int(sys.argv[1]) highest = get_highest_message_id_in_db() - messages = get_messages(highest, upper) - num_added = process_messages(messages) + chunks = get_messages() + num_added = process_messages(chunks, highest, upper) print("added ", num_added, " altars to db") diff --git a/scripts/fill.image_urls.py b/scripts/fill.image_urls.py index 79280b9..4f7c2ea 100644 --- a/scripts/fill.image_urls.py +++ b/scripts/fill.image_urls.py @@ -12,6 +12,13 @@ def get_num_images_in_db(): ps = db.prepare("SELECT COUNT(*) FROM image_urls") return int(ps()[0][0]) +def vacuum_and_analyze(): + print("cleaning db") + ps = db.prepare("VACUUM image_urls") + ps() + ps2 = db.prepare("ANALYZE image_urls") + ps2() + def add_url(url): try: print(url) @@ -83,3 +90,4 @@ if __name__ == "__main__": num_new_images = get_num_images_in_db() - num_existing_images percent_new_images = num_new_images / len(urls) * 100 print(num_new_images, " were new (", percent_new_images ,"%)") + vacuum_and_analyze() diff --git a/scripts/hiscores.py b/scripts/hiscores.py index e8a5ed0..b81269e 100644 --- a/scripts/hiscores.py +++ b/scripts/hiscores.py @@ -11,13 +11,12 @@ db.execute("SET CLIENT_ENCODING to 'UNICODE'") r = redis.Redis("localhost") key_prefix = "hiscore:" -hiscore_len = 40 config = { - "day": {"days": 1, "amt": hiscore_len}, - "week": {"days": 7, "amt": hiscore_len}, - "month": {"days": 30, "amt": hiscore_len}, - "all": {"days": 0, "amt": hiscore_len} + "day": {"days": 1, "amt": 40}, + "week": {"days": 7, "amt": 40}, + "month": {"days": 30, "amt": 40}, + "all": {"days": 0, "amt": 40} } def fetch_favs(days): @@ -70,9 +69,10 @@ if __name__ == "__main__": print('this script adds message ids to redis for the highest scoring posts over a period.') sys.exit(1) + # write to key and then overwrite keyfinal when complete keyfinal = key_prefix + period key = keyfinal + ":temp" - # write to key and then overwrite keyfinal when complete + r.delete(key) # in case temp key still exists bc script was killed chunks = fetch_favs(days) add_favs_to_redis(key, chunks) diff --git a/scripts/mias3.py b/scripts/mias3.py new file mode 100644 index 0000000..9a276d6 --- /dev/null +++ b/scripts/mias3.py @@ -0,0 +1,76 @@ +import datetime +import mimetypes +import os +import sys +import time +import S3 + +CONN = None +AWS_ACCESS_KEY_ID = 'AKIAJAQK4CDDP6I6SNVA' +AWS_SECRET_ACCESS_KEY = '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' +BUCKET_NAME = 'dumpfm' +COUNTER = 0 + +def retry_func(f, count): + try: + f() + except: + if count <= 1: raise + else: + print 'Error! retrying %s more time(s)' % (count - 1) + retry_func(f, count - 1) + +def upload_file(path): + global COUNTER + path = os.path.normpath(path) + if path == '.' or not os.path.isfile(path): + return + filedata = open(path, 'rb').read() + size = os.path.getsize(path) + content_type = mimetypes.guess_type(path)[0] + if not content_type: + content_type = 'text/plain' + + path = path.replace('\\', '/') # Windows hack + start = time.time() + def do_upload(): + CONN.put(BUCKET_NAME, path, S3.S3Object(filedata), + {'x-amz-acl': 'public-read', 'Content-Type': 'video/x-flv'}) + retry_func(do_upload, 3) + + ms_took = (time.time() - start) * 1000 + print "uploaded %s (%0.0fms) (%sKB)" % (path, ms_took, size / 1024) + COUNTER += 1 + +def upload_directory(path): + for f in sorted(os.listdir(path)): + subpath = os.path.join(path, f) + if os.path.isdir(subpath): + upload_directory(subpath) + else: + upload_file(subpath) + +def do_upload(path): + global CONN + CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + + start = time.time() + + if os.path.isdir(path): + upload_directory(path) + else: + upload_file(path) + + s_took = (time.time() - start) + print "uploaded %s files in %0.0fs" % (COUNTER, s_took) + + +if __name__ == "__main__": + if len(sys.argv) == 1: + print 'usage: s3upload.py path' + sys.exit(1) + + args = sys.argv[1:] + for path in args: + do_upload(path) + print diff --git a/scripts/s3upload.py b/scripts/s3upload.py index a31874b..724561c 100644 --- a/scripts/s3upload.py +++ b/scripts/s3upload.py @@ -6,8 +6,8 @@ import time import S3 CONN = None -AWS_ACCESS_KEY_ID = 'AKIAJAQK4CDDP6I6SNVA' -AWS_SECRET_ACCESS_KEY = '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' +AWS_ACCESS_KEY_ID = 'AKIAIOP42NFKLLJXEGJQ' +AWS_SECRET_ACCESS_KEY = '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' BUCKET_NAME = 'dumpfm' COUNTER = 0 |
