summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/fix_fav_counts.py99
-rwxr-xr-xscripts/sort_faves.sh20
2 files changed, 119 insertions, 0 deletions
diff --git a/scripts/fix_fav_counts.py b/scripts/fix_fav_counts.py
new file mode 100644
index 0000000..00e8d0f
--- /dev/null
+++ b/scripts/fix_fav_counts.py
@@ -0,0 +1,99 @@
+"""
+ this needs python 3 due to py-postgresql...
+
+ before running this command, please run the following SQL, which tallies the faves per post:
+
+ COPY (SELECT messages.user_id, tags.message_id, COUNT(tags.message_id) AS mycount, TO_CHAR(messages.created_on, 'YYYYMMDD')
+ FROM tags, messages
+ WHERE tags.message_id = messages.message_id AND tags.tag = 'favorite'
+ GROUP BY tags.message_id, messages.user_id, messages.created_on)
+ TO '/tmp/fav_counts2.csv' WITH CSV;
+
+ then run ./sort_faves.sh which will pre-sort the data for this script.
+"""
+
+import re
+import sys
+import postgresql
+import redis
+import csv
+
+db = postgresql.open("pq://postgres:root@localhost/dumpfm")
+db.execute("SET CLIENT_ENCODING to 'UNICODE'")
+
+r = redis.Redis("192.168.156.111")
+
+def fetch_users():
+ statement = """SELECT user_id, nick FROM users"""
+
+ ps = db.prepare(statement)
+ return ps.chunks
+
+# by_date.csv by_user.csv counts_sorted.csv hall.csv
+# field order: user_id, message_id, score, date
+
+def load_faves_by_user():
+ nicks = load_nicks()
+ counter = 0
+ user_counter = 0
+ score = 0
+ user_id = 0
+ key = ""
+ with open('faves/by_user.csv', 'r') as csvfile:
+ reader = csv.reader(csvfile)
+ for row in reader:
+ if row[0] != user_id:
+ if score != 0:
+ r.zadd("favscores", nicks[user_id], score)
+ counter = 0
+ user_counter += 1
+ score = 0
+ user_id = row[0]
+ key = "popular:" + nicks[user_id]
+ if (user_counter % 1000) == 0:
+ print(str(user_counter) + " ...")
+ score += int(row[2])
+ if counter > 30:
+ continue
+ r.zadd(key, row[1], int(row[2]))
+ counter += 1
+
+def load_faves_by_date():
+ date_counter = 0
+ counter = 0
+ date = ""
+ key = ""
+ with open('faves/by_date.csv', 'r') as csvfile:
+ reader = csv.reader(csvfile)
+ for row in reader:
+ if row[3] != date:
+ counter = 0
+ date_counter += 1
+ date = row[3]
+ key = "hall:daily:" + row[3]
+ if (int(date) % 100) == 1:
+ print(key)
+ if counter > 30:
+ continue
+ r.zadd(key, row[1], int(row[2]))
+ counter += 1
+
+def load_hall():
+ with open('faves/hall.csv', 'r') as csvfile:
+ reader = csv.reader(csvfile)
+ for row in reader:
+ r.zadd('hall', row[1], int(row[2]))
+
+def load_nicks():
+ nicks = {}
+ chunks = fetch_users()
+ for rowset in chunks():
+ for row in rowset:
+ nicks[str(row[0])] = row[1]
+ return nicks
+
+if __name__ == "__main__":
+ # load_hall()
+ # load_faves_by_user()
+ load_faves_by_date()
+
diff --git a/scripts/sort_faves.sh b/scripts/sort_faves.sh
new file mode 100755
index 0000000..eb28a39
--- /dev/null
+++ b/scripts/sort_faves.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+if [ ! -n "$1" ]
+then
+ echo "Usage: `basename $0` fav_scores.csv"
+ exit
+fi
+
+mkdir -p faves
+
+echo "Get the top 50 faves for the Hall of Fame"
+sort -t, -k3,3 -rn $1 | head -50 > faves/hall.csv
+
+echo "Group faves by user for popular pages, total fave scores"
+sort -t, -k1,1 -r -k3,3 -n faves/counts_sorted.csv > faves/by_user.csv
+
+echo "Group faves by date for daily tallies"
+sort -t, -k4,4 -r -k3,3 -n faves/counts_sorted.csv > faves/by_date.csv
+
+