import sys import urllib import re from photoblaster.db.models import Iasdfus from photoblaster.db.models import ImCmd def super_unquote(s): for i in xrange(0,20): s = urllib.unquote(s) return s #searches for elements in the Iasdfus table that have deleted=1 #stores all the objects as a list in memory, there are 92,000 deleted_urls = Iasdfus.search(deleted=True).all() #print len(deleted_urls) #well i'm reading doc on it, they recommend create one session per all requests, and we here are creating new session per #each request, not sure if that matters I guess it does. #so in other words, in the flask-sqlalchemy pattern the session is created when the server is started #instantiate pb.db.Database() #and at the end of the script #database.close(), something like this? close() can be in destroy method for dataabase, it will go out of scope at the end of script #do python classes have a default destroy method, like a hook that gets called when they are cleared? __del__ i think #ok I can work on that #address is a field in Iasdfus (this data is coming from the s3logs) #im/ff/wigglelogo_1347403794_frankhats_1347403811_frankhats.gif n = 0 for url in deleted_urls: # print "from iasdfus: (%s)" % (url.address) #iterates through try: parts = url.address.split("/") #rips out "dir" and "newfile" column values from Iasdfus to used #to search a different table, ImCmd dirpart = parts[1] newfile = super_unquote(parts[2]) newfile_parts = re.split(r'\+?http', newfile) newfile = newfile_parts[0] # print "parts from iasdfus (newfile : %s, dir: %s)" % (newfile, dirpart) except IndexError: continue try: #searches ImCmd for that row, where dirpart and newfile are the same as the #values in the Iasdfus row #k so looks like it's stuck here regardless on item, so this probably session limit, or something like that query = ImCmd.search(**{"dir": dirpart, "newfile": newfile}) # print "imcmd done query n: %d\n" % (n) n +=1 #can't find anything about it me too #so it doesn't have .free(), I think it doesni so it's some sort of session parameter? looks so seen it anywhere? have free only when it doesn't find anything it's still a Query objects #could be that when there's this rollback error, it has a timeout of 30 seconds that it waits before proceeding #this whole thing is so weird #it seems like my theory might be right...seems like it to you right? not sure yet #processes the query matching_url = query.first() # print "got first\n" if not matching_url: print "imcmd: %s %s\n" % (dirpart, newfile) continue if matching_url.deleted == 1: # print "is deleted\n" continue #update matching_url matching_url.update(deleted=1) print "update done\n" except AttributeError: raise #honestly can't imagine why I'm getting this error maybe we just need to implement another rollback exception? #not syre yet. why is it slow? how long ImCMD take s time? yeah it's huge, over 600,000 fields #probably needs an index on dir + newfile as that is unique right index will be nice