diff options
| author | Julie Lala <jules@okfoc.us> | 2014-06-24 18:52:07 -0400 |
|---|---|---|
| committer | Julie Lala <jules@okfoc.us> | 2014-06-24 18:52:07 -0400 |
| commit | 948926970571793e774ebf34c16e14ef8e694062 (patch) | |
| tree | 90250502c299fc130efbd4c54a62e3c158cb7acd /scripts/fix_fav_counts.py | |
| parent | 0e9eb8b4fc0ef43f91b69749f276cadf2d3bb3ad (diff) | |
| parent | c14e6d4356a2c4d9981a6808ef19edb66fc96e51 (diff) | |
Merge branch 'master' of dumpfm:/pichat/repo
Diffstat (limited to 'scripts/fix_fav_counts.py')
| -rw-r--r-- | scripts/fix_fav_counts.py | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/scripts/fix_fav_counts.py b/scripts/fix_fav_counts.py new file mode 100644 index 0000000..00e8d0f --- /dev/null +++ b/scripts/fix_fav_counts.py @@ -0,0 +1,99 @@ +""" + this needs python 3 due to py-postgresql... + + before running this command, please run the following SQL, which tallies the faves per post: + + COPY (SELECT messages.user_id, tags.message_id, COUNT(tags.message_id) AS mycount, TO_CHAR(messages.created_on, 'YYYYMMDD') + FROM tags, messages + WHERE tags.message_id = messages.message_id AND tags.tag = 'favorite' + GROUP BY tags.message_id, messages.user_id, messages.created_on) + TO '/tmp/fav_counts2.csv' WITH CSV; + + then run ./sort_faves.sh which will pre-sort the data for this script. +""" + +import re +import sys +import postgresql +import redis +import csv + +db = postgresql.open("pq://postgres:root@localhost/dumpfm") +db.execute("SET CLIENT_ENCODING to 'UNICODE'") + +r = redis.Redis("192.168.156.111") + +def fetch_users(): + statement = """SELECT user_id, nick FROM users""" + + ps = db.prepare(statement) + return ps.chunks + +# by_date.csv by_user.csv counts_sorted.csv hall.csv +# field order: user_id, message_id, score, date + +def load_faves_by_user(): + nicks = load_nicks() + counter = 0 + user_counter = 0 + score = 0 + user_id = 0 + key = "" + with open('faves/by_user.csv', 'r') as csvfile: + reader = csv.reader(csvfile) + for row in reader: + if row[0] != user_id: + if score != 0: + r.zadd("favscores", nicks[user_id], score) + counter = 0 + user_counter += 1 + score = 0 + user_id = row[0] + key = "popular:" + nicks[user_id] + if (user_counter % 1000) == 0: + print(str(user_counter) + " ...") + score += int(row[2]) + if counter > 30: + continue + r.zadd(key, row[1], int(row[2])) + counter += 1 + +def load_faves_by_date(): + date_counter = 0 + counter = 0 + date = "" + key = "" + with open('faves/by_date.csv', 'r') as csvfile: + reader = csv.reader(csvfile) + for row in reader: + if row[3] != date: + counter = 0 + date_counter += 1 + date = row[3] + key = "hall:daily:" + row[3] + if (int(date) % 100) == 1: + print(key) + if counter > 30: + continue + r.zadd(key, row[1], int(row[2])) + counter += 1 + +def load_hall(): + with open('faves/hall.csv', 'r') as csvfile: + reader = csv.reader(csvfile) + for row in reader: + r.zadd('hall', row[1], int(row[2])) + +def load_nicks(): + nicks = {} + chunks = fetch_users() + for rowset in chunks(): + for row in rowset: + nicks[str(row[0])] = row[1] + return nicks + +if __name__ == "__main__": + # load_hall() + # load_faves_by_user() + load_faves_by_date() + |
