summaryrefslogtreecommitdiff
path: root/sync_iasdfus_deleted.py
diff options
context:
space:
mode:
Diffstat (limited to 'sync_iasdfus_deleted.py')
-rw-r--r--sync_iasdfus_deleted.py67
1 files changed, 60 insertions, 7 deletions
diff --git a/sync_iasdfus_deleted.py b/sync_iasdfus_deleted.py
index f062a19..b654c0d 100644
--- a/sync_iasdfus_deleted.py
+++ b/sync_iasdfus_deleted.py
@@ -4,27 +4,80 @@ import re
from photoblaster.db.models import Iasdfus
from photoblaster.db.models import ImCmd
+#database = Database()
def super_unquote(s):
for i in xrange(0,20):
s = urllib.unquote(s)
return s
-deleted_urls = Iasdfus().search(deleted=True).all()
+#searches for elements in the Iasdfus table that have deleted=1
+#stores all the objects as a list in memory, there are 92,000
+deleted_urls = Iasdfus.search(deleted=True).all()
print len(deleted_urls)
+#well do you think I should try to study the flask-sqlalchemy sources and look for clues?
+#well i'm reading doc on it, they recommend create one session per all requests, and we here are creating new session per
+#each request, not sure if that matters I guess it does.
+#so in other words, in the flask-sqlalchemy pattern the session is created when the server is started
+#and in our case, we are wrapping a session into each query
+#so instead we need to invoke session as part of this script?
+#well almost. I think need to make main class like Database, and use it like this
+# and inside .rs() get it froem Database() object
+# I think we need to stop trying to create new sessions every time, obviously there's
+# something under the hood making it impossible to use that way in any scalable situation, you know? yeah
+#so instead of calling sqlalchemy.session directly in the script
+#instantiate pb.db.Database()
+#and at the end of the script
+#database.close(), something like this? close() can be in destroy method for dataabase, it will go out of scope at the end of script
+
+#do python classes have a default destroy method, like a hook that gets called when they are cleared? __del__ i think
+#ok I can work on that
+
+
+#address is a field in Iasdfus (this data is coming from the s3logs)
#im/ff/wigglelogo_1347403794_frankhats_1347403811_frankhats.gif
-bob = 0
+n = 0
for url in deleted_urls:
+ print "from iasdfus: (%s)" % (url.address)
+ #iterates through
try:
parts = url.address.split("/")
+ #rips out "dir" and "newfile" column values from Iasdfus to used
+ #to search a different table, ImCmd
dirpart = parts[1]
newfile = super_unquote(parts[2])
newfile_parts = re.split(r'\+?http', newfile)
newfile = newfile_parts[0]
- matching_url = ImCmd.search(**{"dir": dirpart, "newfile": newfile}).first()
- print matching_url.deleted
- matching_url.update(deleted=1)
- except AttributeError:
- continue
+ print "parts from iasdfus (newfile : %s, dir: %s)" % (newfile, dirpart)
except IndexError:
continue
+ try:
+ #searches ImCmd for that row, where dirpart and newfile are the same as the
+ #values in the Iasdfus row
+ print "imcmd: %s %s\n" % (dirpart, newfile)
+ #k so looks like it's stuck here regardless on item, so this probably session limit, or something like that
+ query = database.rs('ImCmd').search(**{"dir": dirpart, "newfile": newfile})
+ print "imcmd done query n: %d\n" % (n)
+ n +=1
+ #can't find anything about it me too
+ #so it doesn't have .free(), I think it doesni so it's some sort of session parameter? looks so seen it anywhere? have free only when it doesn't find anything it's still a Query objects
+ #could be that when there's this rollback error, it has a timeout of 30 seconds that it waits before proceeding
+ #this whole thing is so weird
+ #it seems like my theory might be right...seems like it to you right? not sure yet
+ #processes the query
+ matching_url = query.first()
+ print "got first\n"
+ #if not matching_url:
+ # print "nothing found\n"
+ # continue
+ #if matching_url.deleted == 1:
+ # print "is deleted\n"
+ # continue
+ #update matching_url
+ #matching_url.update(deleted=1)
+ #print "update done\n"
+ except AttributeError:
+ raise
+ #honestly can't imagine why I'm getting this error maybe we just need to implement another rollback exception?
+ #not syre yet. why is it slow? how long ImCMD take s time? yeah it's huge, over 600,000 fields
+ #probably needs an index on dir + newfile as that is unique right index will be nice