# this needs python 3 due to py-postgresql... import re import sys import postgresql import redis db = postgresql.open("pq://postgres:root@localhost/dumpfm") db.execute("SET CLIENT_ENCODING to 'UNICODE'") r = redis.Redis("localhost") key_prefix = "hiscore:" config = { "day": {"days": 1, "amt": 40}, "week": {"days": 7, "amt": 40}, "month": {"days": 30, "amt": 40}, "all": {"days": 0, "amt": 40} } def fetch_favs(days): statement = """SELECT message_id FROM tags WHERE LOWER(tag) = 'favorite' """ if days > 0: statement = statement + "AND created_on > (now() - INTERVAL '" + str(days) + " day')" ps = db.prepare(statement) return ps.chunks # using "chunks()" is apparently most efficient way to stream tons of results def add_favs_to_redis(key, chunk): counter = 0 for rowset in chunks(): for row in rowset: message_id = row[0] r.zincrby(key, message_id, 1) counter += 1 if counter % 1000 == 0: print("processing row", counter) def rm_low_scores(key, amt): amt = (amt + 1) * -1 r.zremrangebyrank(key, 0, amt) def switch_keys(key, keyfinal): r.rename(key, keyfinal) if __name__ == "__main__": error = False if not len(sys.argv) == 2: error = True else: period = sys.argv[1] if period not in config: error = True else: days = config[period]['days'] amt = config[period]['amt'] if error: print('usage: python3.1 hiscores.py period') print('where period is one of:') for period in config: print(period) print('this script adds message ids to redis for the highest scoring posts over a period.') sys.exit(1) # write to key and then overwrite keyfinal when complete keyfinal = key_prefix + period key = keyfinal + ":temp" r.delete(key) # in case temp key still exists bc script was killed chunks = fetch_favs(days) add_favs_to_redis(key, chunks) rm_low_scores(key, amt) switch_keys(key, keyfinal)