diff options
| author | dumpfmprod <dumpfmprod@ubuntu.(none)> | 2010-10-03 21:28:23 -0400 |
|---|---|---|
| committer | dumpfmprod <dumpfmprod@ubuntu.(none)> | 2010-10-03 21:28:23 -0400 |
| commit | 02a0597e39ed337ba4dc0cccddebb4c98c63b5ee (patch) | |
| tree | 9606f75c4d24702310945dc0bd0affa885a11f8c /scripts/hiscores.py | |
| parent | e5f01c9ad63b4547900a488c452523a3f88344c5 (diff) | |
timb: better hiscore code
Diffstat (limited to 'scripts/hiscores.py')
| -rw-r--r-- | scripts/hiscores.py | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/scripts/hiscores.py b/scripts/hiscores.py new file mode 100644 index 0000000..e8a5ed0 --- /dev/null +++ b/scripts/hiscores.py @@ -0,0 +1,80 @@ +# this needs python 3 due to py-postgresql... + +import re +import sys +import postgresql +import redis + +db = postgresql.open("pq://postgres:root@localhost/dumpfm") +db.execute("SET CLIENT_ENCODING to 'UNICODE'") + +r = redis.Redis("localhost") + +key_prefix = "hiscore:" +hiscore_len = 40 + +config = { + "day": {"days": 1, "amt": hiscore_len}, + "week": {"days": 7, "amt": hiscore_len}, + "month": {"days": 30, "amt": hiscore_len}, + "all": {"days": 0, "amt": hiscore_len} +} + +def fetch_favs(days): + statement = """SELECT message_id + FROM tags + WHERE + LOWER(tag) = 'favorite' """ + if days > 0: + statement = statement + "AND created_on > (now() - INTERVAL '" + str(days) + " day')" + + ps = db.prepare(statement) + return ps.chunks +# using "chunks()" is apparently most efficient way to stream tons of results + +def add_favs_to_redis(key, chunk): + counter = 0 + for rowset in chunks(): + for row in rowset: + message_id = row[0] + r.zincrby(key, message_id, 1) + counter += 1 + if counter % 1000 == 0: + print("processing row", counter) + +def rm_low_scores(key, amt): + amt = (amt + 1) * -1 + r.zremrangebyrank(key, 0, amt) + +def switch_keys(key, keyfinal): + r.rename(key, keyfinal) + +if __name__ == "__main__": + error = False + + if not len(sys.argv) == 2: + error = True + else: + period = sys.argv[1] + if period not in config: + error = True + else: + days = config[period]['days'] + amt = config[period]['amt'] + + if error: + print('usage: python3.1 hiscores.py period') + print('where period is one of:') + for period in config: + print(period) + print('this script adds message ids to redis for the highest scoring posts over a period.') + sys.exit(1) + + keyfinal = key_prefix + period + key = keyfinal + ":temp" + # write to key and then overwrite keyfinal when complete + + chunks = fetch_favs(days) + add_favs_to_redis(key, chunks) + rm_low_scores(key, amt) + switch_keys(key, keyfinal) |
