1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
"""
this needs python 3 due to py-postgresql...
before running this command, please run the following SQL, which tallies the faves per post:
COPY (SELECT messages.user_id, tags.message_id, COUNT(tags.message_id)
AS mycount, TO_CHAR(messages.created_on, 'YYYYMMDD')
FROM tags, messages
WHERE tags.message_id = messages.message_id AND tags.tag = 'favorite'
GROUP BY tags.message_id, messages.user_id, messages.created_on)
TO '/tmp/fav_counts69.csv' WITH CSV;
this file will be owned by the postgres user,
so change the 69 to some other number (janky i know)
then run ./sort_faves.sh /tmp/fav_counts69.csv
...which will pre-sort the data for this script.
then run python3 fix_fav_counts.py
...this script.
if you run this twice, don't worry, data will not be duplicated.
"""
import re
import sys
import postgresql
import redis
import csv
db = postgresql.open("pq://postgres:root@localhost/dumpfm")
db.execute("SET CLIENT_ENCODING to 'UNICODE'")
r = redis.Redis("192.168.156.111")
def fetch_users():
statement = """SELECT user_id, nick FROM users"""
ps = db.prepare(statement)
return ps.chunks
# by_date.csv by_user.csv counts_sorted.csv hall.csv
# field order: user_id, message_id, score, date
def load_faves_by_user():
print("fixing favscores...")
nicks = load_nicks()
counter = 0
user_counter = 0
score = 0
user_id = 0
key = ""
with open('faves/by_user.csv', 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
if row[0] != user_id:
if score != 0:
r.zadd("favscores", nicks[user_id], score)
counter = 0
user_counter += 1
score = 0
user_id = row[0]
key = "popular:" + nicks[user_id]
if (user_counter % 1000) == 0:
print(str(user_counter) + " ...")
score += int(row[2])
if counter > 30:
continue
r.zadd(key, row[1], int(row[2]))
counter += 1
def load_faves_by_date():
print("fixing daily halls...")
date_counter = 0
counter = 0
date = ""
key = ""
with open('faves/by_date.csv', 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
if row[3] != date:
counter = 0
date_counter += 1
date = row[3]
key = "hall:daily:" + row[3]
if (int(date) % 100) == 1:
print(key)
if counter > 30:
continue
r.zadd(key, row[1], int(row[2]))
counter += 1
def load_hall():
print("fixing hall...")
with open('faves/hall.csv', 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
r.zadd('hall', row[1], int(row[2]))
def load_nicks():
nicks = {}
chunks = fetch_users()
for rowset in chunks():
for row in rowset:
nicks[str(row[0])] = row[1]
return nicks
if __name__ == "__main__":
load_hall()
load_faves_by_user()
load_faves_by_date()
print("done!")
|