diff options
| -rw-r--r-- | photoblaster/db/__init__.py | 1 | ||||
| -rw-r--r-- | photoblaster/db/models/__init__.py | 42 | ||||
| -rw-r--r-- | sync_iasdfus_deleted.py | 67 | ||||
| -rw-r--r-- | test_db.py | 18 |
4 files changed, 115 insertions, 13 deletions
diff --git a/photoblaster/db/__init__.py b/photoblaster/db/__init__.py index c720869..a3bb960 100644 --- a/photoblaster/db/__init__.py +++ b/photoblaster/db/__init__.py @@ -12,3 +12,4 @@ engine = create_engine('mysql://{}:{}@{}/{}'.format( session_factory = sessionmaker(bind=engine) SessionHeap = scoped_session(session_factory) +Session = session_factory diff --git a/photoblaster/db/models/__init__.py b/photoblaster/db/models/__init__.py index 4152860..bb3e496 100644 --- a/photoblaster/db/models/__init__.py +++ b/photoblaster/db/models/__init__.py @@ -1,4 +1,4 @@ -from photoblaster.db import SessionHeap +from photoblaster.db import SessionHeap, Session from sqlalchemy import inspect, desc from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.sql.expression import func @@ -6,15 +6,16 @@ from sqlalchemy.sql.expression import func Base = declarative_base() - class Actions(object): @classmethod def create(cls, **kwargs): - session = SessionHeap() + session = Session() try: session.add(cls(**kwargs)) session.commit() except: + #also took this pattern from the sqlalchemy docs to handle "rollback exceptions" + #should I show you the code that's causing the error now? yep session.rollback() raise finally: @@ -30,37 +31,66 @@ class Actions(object): except: session.rollback() raise - +#what about update, how it's used? we should maybe look at the flush thing + def free(self): + #I tried creating this free method to help with these timeouts + session = inspect(self).session + session.close() + SessionHeap.remove() + @classmethod def _search(cls, **kwargs): - session = SessionHeap() + session = Session() + print "session()" query = session.query(cls).filter_by(**kwargs) + session.close() + return query try: session.commit() except: + print "session.rollback()" session.rollback() raise finally: + print "session.close()" session.close() SessionHeap.remove() return query - +# def _search(cls, **kwargs): +# session = SessionHeap() +# print "session()" +# query = session.query(cls).filter_by(**kwargs) +# try: +# session.commit() +# except: +# print "session.rollback()" +# session.rollback() +# raise +# finally: +# print "session.close()" +# session.close() +# SessionHeap.remove() +# return query +# @classmethod def search_random(cls, **kwargs): return cls._search(**kwargs).order_by(func.rand()) @classmethod def search(cls, **kwargs): + #something with this maybe? is it creating an extra session instance? because it seems like return cls._search(**kwargs).order_by(desc(cls.id)) @classmethod def query(cls, **kwargs): session = SessionHeap() + print "session() query" query = session.query(cls) try: session.add(cls(**kwargs)) session.commit() except: + print "session.rollback()" session.rollback() raise finally: diff --git a/sync_iasdfus_deleted.py b/sync_iasdfus_deleted.py index f062a19..b654c0d 100644 --- a/sync_iasdfus_deleted.py +++ b/sync_iasdfus_deleted.py @@ -4,27 +4,80 @@ import re from photoblaster.db.models import Iasdfus from photoblaster.db.models import ImCmd +#database = Database() def super_unquote(s): for i in xrange(0,20): s = urllib.unquote(s) return s -deleted_urls = Iasdfus().search(deleted=True).all() +#searches for elements in the Iasdfus table that have deleted=1 +#stores all the objects as a list in memory, there are 92,000 +deleted_urls = Iasdfus.search(deleted=True).all() print len(deleted_urls) +#well do you think I should try to study the flask-sqlalchemy sources and look for clues? +#well i'm reading doc on it, they recommend create one session per all requests, and we here are creating new session per +#each request, not sure if that matters I guess it does. +#so in other words, in the flask-sqlalchemy pattern the session is created when the server is started +#and in our case, we are wrapping a session into each query +#so instead we need to invoke session as part of this script? +#well almost. I think need to make main class like Database, and use it like this +# and inside .rs() get it froem Database() object +# I think we need to stop trying to create new sessions every time, obviously there's +# something under the hood making it impossible to use that way in any scalable situation, you know? yeah +#so instead of calling sqlalchemy.session directly in the script +#instantiate pb.db.Database() +#and at the end of the script +#database.close(), something like this? close() can be in destroy method for dataabase, it will go out of scope at the end of script + +#do python classes have a default destroy method, like a hook that gets called when they are cleared? __del__ i think +#ok I can work on that + + +#address is a field in Iasdfus (this data is coming from the s3logs) #im/ff/wigglelogo_1347403794_frankhats_1347403811_frankhats.gif -bob = 0 +n = 0 for url in deleted_urls: + print "from iasdfus: (%s)" % (url.address) + #iterates through try: parts = url.address.split("/") + #rips out "dir" and "newfile" column values from Iasdfus to used + #to search a different table, ImCmd dirpart = parts[1] newfile = super_unquote(parts[2]) newfile_parts = re.split(r'\+?http', newfile) newfile = newfile_parts[0] - matching_url = ImCmd.search(**{"dir": dirpart, "newfile": newfile}).first() - print matching_url.deleted - matching_url.update(deleted=1) - except AttributeError: - continue + print "parts from iasdfus (newfile : %s, dir: %s)" % (newfile, dirpart) except IndexError: continue + try: + #searches ImCmd for that row, where dirpart and newfile are the same as the + #values in the Iasdfus row + print "imcmd: %s %s\n" % (dirpart, newfile) + #k so looks like it's stuck here regardless on item, so this probably session limit, or something like that + query = database.rs('ImCmd').search(**{"dir": dirpart, "newfile": newfile}) + print "imcmd done query n: %d\n" % (n) + n +=1 + #can't find anything about it me too + #so it doesn't have .free(), I think it doesni so it's some sort of session parameter? looks so seen it anywhere? have free only when it doesn't find anything it's still a Query objects + #could be that when there's this rollback error, it has a timeout of 30 seconds that it waits before proceeding + #this whole thing is so weird + #it seems like my theory might be right...seems like it to you right? not sure yet + #processes the query + matching_url = query.first() + print "got first\n" + #if not matching_url: + # print "nothing found\n" + # continue + #if matching_url.deleted == 1: + # print "is deleted\n" + # continue + #update matching_url + #matching_url.update(deleted=1) + #print "update done\n" + except AttributeError: + raise + #honestly can't imagine why I'm getting this error maybe we just need to implement another rollback exception? + #not syre yet. why is it slow? how long ImCMD take s time? yeah it's huge, over 600,000 fields + #probably needs an index on dir + newfile as that is unique right index will be nice diff --git a/test_db.py b/test_db.py new file mode 100644 index 0000000..dc694f0 --- /dev/null +++ b/test_db.py @@ -0,0 +1,18 @@ +#!/usr/bin/python2.7 +from photoblaster.db.imcmd import ImCmd +#ok so what is it exactly that we want to test, creating a session and doing a simple +#query? +#imcmd is the name of the table, by the way yeah need to create a session and try to supply not ImCmd, but variable which contain it +#ok so should I use that first db.py module I showed you? or do it directly from sqlalchemy any will doing + + +from photoblaster.db import Db +import simplejson as json +db = Db({ table: ImCmd }) + + +print map(lambda n: n.serialize(), db.search(**{ 'tag': 'PbPattern' })) +#ok looks better so far, right? yeah we can also hide table into self . something like this...seems better? yep +#is this helpful at all? yeah, now we need to change it so we provide not imcmd, but variable with it inside. works +#ok where does this search method go? Db class? yes looks so +#what should it return? probably should rename serialze to as_dict right? no need, just return results as is |
