summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/fix_fav_counts.py24
-rwxr-xr-xscripts/sort_faves.sh4
2 files changed, 21 insertions, 7 deletions
diff --git a/scripts/fix_fav_counts.py b/scripts/fix_fav_counts.py
index 00e8d0f..0472488 100644
--- a/scripts/fix_fav_counts.py
+++ b/scripts/fix_fav_counts.py
@@ -3,13 +3,23 @@
before running this command, please run the following SQL, which tallies the faves per post:
- COPY (SELECT messages.user_id, tags.message_id, COUNT(tags.message_id) AS mycount, TO_CHAR(messages.created_on, 'YYYYMMDD')
+ COPY (SELECT messages.user_id, tags.message_id, COUNT(tags.message_id)
+ AS mycount, TO_CHAR(messages.created_on, 'YYYYMMDD')
FROM tags, messages
WHERE tags.message_id = messages.message_id AND tags.tag = 'favorite'
GROUP BY tags.message_id, messages.user_id, messages.created_on)
- TO '/tmp/fav_counts2.csv' WITH CSV;
+ TO '/tmp/fav_counts69.csv' WITH CSV;
- then run ./sort_faves.sh which will pre-sort the data for this script.
+ this file will be owned by the postgres user,
+ so change the 69 to some other number (janky i know)
+
+ then run ./sort_faves.sh /tmp/fav_counts69.csv
+ ...which will pre-sort the data for this script.
+
+ then run python3 fix_fav_counts.py
+ ...this script.
+
+ if you run this twice, don't worry, data will not be duplicated.
"""
import re
@@ -33,6 +43,7 @@ def fetch_users():
# field order: user_id, message_id, score, date
def load_faves_by_user():
+ print("fixing favscores...")
nicks = load_nicks()
counter = 0
user_counter = 0
@@ -59,6 +70,7 @@ def load_faves_by_user():
counter += 1
def load_faves_by_date():
+ print("fixing daily halls...")
date_counter = 0
counter = 0
date = ""
@@ -79,6 +91,7 @@ def load_faves_by_date():
counter += 1
def load_hall():
+ print("fixing hall...")
with open('faves/hall.csv', 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
@@ -93,7 +106,8 @@ def load_nicks():
return nicks
if __name__ == "__main__":
- # load_hall()
- # load_faves_by_user()
+ load_hall()
+ load_faves_by_user()
load_faves_by_date()
+ print("done!")
diff --git a/scripts/sort_faves.sh b/scripts/sort_faves.sh
index eb28a39..3daad0b 100755
--- a/scripts/sort_faves.sh
+++ b/scripts/sort_faves.sh
@@ -12,9 +12,9 @@ echo "Get the top 50 faves for the Hall of Fame"
sort -t, -k3,3 -rn $1 | head -50 > faves/hall.csv
echo "Group faves by user for popular pages, total fave scores"
-sort -t, -k1,1 -r -k3,3 -n faves/counts_sorted.csv > faves/by_user.csv
+sort -t, -k1,1 -r -k3,3 -n $1 > faves/by_user.csv
echo "Group faves by date for daily tallies"
-sort -t, -k4,4 -r -k3,3 -n faves/counts_sorted.csv > faves/by_date.csv
+sort -t, -k4,4 -r -k3,3 -n $1 > faves/by_date.csv