diff options
| author | Pepper <pepper@scannerjammer.com> | 2019-12-04 04:30:53 +0000 |
|---|---|---|
| committer | Pepper <pepper@scannerjammer.com> | 2019-12-04 04:30:53 +0000 |
| commit | 65697891be538591f57384d3469ab2a7f2a86568 (patch) | |
| tree | 5b34929c7e2c64abbec1949d755205839281456b | |
| parent | 1d8ed6e2f6ffb872c2e29104067a28ae098ec290 (diff) | |
server_stuffserver_stuff
| -rwxr-xr-x | example.py | 5 | ||||
| -rw-r--r-- | notes | 196 | ||||
| -rw-r--r-- | ricky/config.py | 1 | ||||
| -rw-r--r-- | ricky/dataset.py | 32 | ||||
| -rw-r--r-- | ricky/params/__init__.py | 34 | ||||
| -rw-r--r-- | ricky/utils.py | 2 |
6 files changed, 217 insertions, 53 deletions
@@ -5,6 +5,11 @@ import ricky.utils as utils params = ricky.params.PbGradient() params.randomize() print params.as_serialized() +print "" +print params.as_dict() +print "" + + #print params.execute() #print params #data = utils.data_from_url( @@ -0,0 +1,196 @@ + + # it seems like there are probably better/more clever ways to implement some of these "options" + # like the OFFLINE variable seems a bit weird, the probabilities_local, etc + # maybe I just don't know the right pattern from the client side? like a pretty way of + # calling the functions? + # yeah, global variables from config like in python everyone like to do is not good, usually it looks like this + # my $module = MyMainModule->new({ offline => 1 }); + # package MyMainModule; + # sub offline { return $self->{offline} } + # package MySubModule; + # sub parent { return $self->{parent} } + # sub something { print $self->parent->offline } + # something like this. so whenever the main module is invoked, have an optional + # settings hash that you can pass in, right? yep + + + +#how about the way this is laid out, seem bad? yeah config files like that better avoid if possible +#so with params below, url can be generate by module itself, it knows it's own name, and can get base url from parent class. +# offline is for main module initialization hash. +# directories, in perl we have File::ShareDir, check it out how it works. basically it's module ot get full path to share/ folder +#in installed app. after module installation it can get right path for you, looks good. ok I'll check it out + + +#ok last thing...wanted to show you how I included photoblaster as a git submodule + +#so one simple solution would be to put this function in the parent class, like self._get_pb_url(PB_PATTERN_API) + +# while subclassing this works, we should try to detect the length of params +# and build a new data set for each type of params set... +# therefore, an instance of SupervisedDataSet could actually be +# accessed through the params instance...simplified one-to-one mapping + +# we are limited to only one classifier per params instance as well +# however this is sort of a good thing, because built into the params +# class can be a method that randomizes params, and then evaluates + +# we might be able to get this done through multiple inheritance +# keep all dataset related stuff in a separate class to make it better organized + +# we need +# .evaluate +# .generate_liked_image +# .train_from_url_list +# .reset + +#this is still a bit messed up + +#so what I want to do here is described above +#so basically using the machine learning lib... +# I get three things from the machine learning lib +# a DataSet class +# a Network class +# a Trainer class + +#when I use the Trainer class, it acts on the Network class, feeding it the DataSet +# then the network can generate params basically by doing this +# params.randomize() +# if network.evaluate(params.serialized()): +# params.execute() +# does that make some sense? yep + +# but the thing is that a dataset is something that I store a huge list of +# params.serialized() outputs, as well as whether or not I liked those +# params.serialized() outputs +# dataset.add(params.serialized(), liked=True) +# dataset.add(params.serialized(), liked=False) +# do you follow me so far? yes + +# so I'm thinking I should keep the dataset actually INSIDE the params class +# because I don't need more than one params instance to feed the DataSet + +# so it would be something like this + +# self._dataset.add(self.serialized, liked=True) +# something like that, does that make some sense? how Trainer class need for DataSet to look like? +# just as a variable, an instance of SupervisedDataSet() and it can keep tranining network with many datasets? +# yeah and you can keep adding to the dataset after you've trained the network and then retrain or train a new Network +# both datasets and networks have a reset() method that clears them. A bit confusing but I guess it's fine +# hmm, well if it can keep training it with new datasets without resetting old data, you can just make DataSet class +# as converter from Params class into pybrain data. no need to embedd it into Params clas, as you may need to change machine learning lib +# in future, and that will be diffucult to do with merged classes. + + +#problem is that the dataset needs specific info from the params class, +# it needs the number of params, so that when it is instantiated I can specify the +# number of inputs and targets (specifically, len(params_instance) inputs, and 1 output) +# similarly the network network needs input layers and output layers, which will matched +# the datasets in this case (it's binary classification) +# so the network has len(params_instance) inputs and 1 outputs + +# so I'm thinking of just putting EVERYTHING inside the params class + +# so I can have a method + +# params_instance.train_nn_from_file(Datafile) +# params_instance.randomize(use_nn=True) + +# something like that? better not. think of params class as your Model class in Model/Controller/View. and in this case neural Network +# will be Controller/View. add params.length() or whatever is necessary, and then in DataSet and relate classes use it like: +# +# api = Api('PbGradient') +# sample_params = api.new_params() +# ds = pybrain.DataSet(len(sample_params), 1) +# +# def add_param(params): +# ds.add(params.as_serialized(), params.extra.liked()) +# api.list_params(add_param) + +# trainer = pybrain.Trainer() +# nn = pybrain.NeuralNetwork(len(params), 1) +# trainer.train(ds, nn) +# +# params = api.new_params() +# for i in range(0, 100): +# params.randomize() +# if nn.active(params.serialized) > .5: +# params.execute() +# +# ok so this is the procedure more or less, do you see how this is working? yep +# so what about putting these things in the params class, still a bad idea? +# I was thinking I could make them accessible from params through multiple inheritance, just +# keeping all my nn stuff in a separate file +# well dataset is one-to-many with params, so it can't be inside in any case. to improve it a bit, you can restore api class +# something like this. but still you don't need to merge ds and api class, it breaks logic. + +# so it will work if I move the ds into the params class, but it won't adhere to the design pattern? basically +# it's not the right design pattern to do that? right, it's also logically not possible to put signle ds into multiple params instances. +# right but there doesn't need to be multiple params instances. that's why I was thinking of doing that, no reason +# to have more than one params instance. even if there is one to one with ds and params, it breaks logic between model and user of model, +# neural network considered as user of this params model. +# hmm can the neural network be trained on the params model directly...like +# trainer(params.ds, nn) ? +# it would work, but is that just bad for the reasons you stated above? right, it will work, but if you want later to change library it +# won't be possible to do easily because it's embedded, right? right, even if you won't ever change library, it still very hard to +#understand how this thing works, and why child class contain something different. yeah so in this case, +# do I wrap both classes in methods? like do I just write a method to create a image generator that uses these classes? (pybrain, params, etc) +#? params class is extended so that it's easy to get all info, like this params.extra.liked() and even can be params.as_normalized, that's fine. +# as for netural network, you can wrap it into something like ApiMachineLearningPybrain() class, which can accept Params() instance as sample, +# with data and create trained network for you. + +# so for design, make multiple instances of params, and create a separate nn instance, maybe something that wraps +# other methods in the class ( ie can call params.randomize()) and generate for me a set of params that I will probably like? +# it's really missing Api class here that cause logic problems. with that restored it will look just fine. +# api = Api('PbGradient') +# api_machinel = ApiMachineLearningPybrain(api = api) +# params = api_machinel.new_params_liked_by_me() +# params.execute() +# +# class ApiMachineLearningPybrain: +# def train(): +# for param in self.api.list_params: +# ... +# def new_params_liked_by_me(): +# for i in range(0, 100): +# params.randomize() +# if nn.active(params.serialized) > .5: +# return params +# and so on, if you want to change machine library, you write another ApiMachineLearning class. and ApiMachineLearning* know how works pybrain +#and knows how api/params works, so it can use all of them and combine into useful data. something like this. + +#ok good...I get it. it's a logical separation. So embedding external libs is generally sort of a bad practice? right, unless it's part of +#solution, like read json, etc. I see + +# well it's sort of unfortunate that I deleted the Api class, but it made it easier for me to keep writing because at the time +# I didn't need it at all, it's fine to sort of delete and remake things as something grows, right? yeah classes go and come all the time. + +# ok well let's figure this out + + + + + #so this is kinda just bad? or is it ok for now? well not very good + # ok I'll learn the python installer and start using it that should be next on my list + # so do you do the same thing for perl? yep, i use module-starter, it basically create Makefile.PL, only few edits and it's done + # and even for something like Struct-it, it is a global package? yes it's all a perl module and wrapped into gentoo ebuild. + # ahh ok so the one thing I don't quite understand is, what about the database, obviously when you install it, the db is + # empty right? not really. i have a config file in /etc/ for website, and package installs sample config, just like any other + #sane app. you install app, copy config from example to right place, edit database parameters there and start it. + # do you use my.cnf or something? how does the app know db password etc? from config in /etc for this app. so it + # reads from /etc/ in the app...? yes ok I get it + # and the db create commands, do you do those separately? yeah, in gentoo ebuild there are commands for configuring app after + # installation, you can put any command there, including db initialization, miggration etc. + # ok I'm understanding this + +are these mostly just issues I'll iron out by continually making new projects/writing more code? yep +ok so they aren't anything I should be stressed out about, really, seems like with every project I'll figure out a way to do +something better? is that basically how it happens? yeah just need to keep track how you do things and how it turns out to be, +like if some part of code always fails, have a lot of errors ro you rewrite it many time, there probably something wrong about it. +same thing with installation.configuration, it's fine if you have config.py with few settings, but then you will have 10-20 projects +and all need to install on new server it will go crazy, so had to learn how perl/python installer works, gentoo ebuiled etc to avoid all pain +yeah I understand +it seems like this is all a process of organization, like a project will fall apart eventually if you keep adding to it +and the foundation wasn't originally well-organized? right + +ok in general I think I learned what I needed to from this, this was great, thanks a lot! no problems diff --git a/ricky/config.py b/ricky/config.py index 1cf3255..7f8715f 100644 --- a/ricky/config.py +++ b/ricky/config.py @@ -10,7 +10,6 @@ TEST_URL = ( ) PB_BASE = "http://asdf.us/" - def _add_pb_base(path): return os.path.join(PB_BASE, path) diff --git a/ricky/dataset.py b/ricky/dataset.py index 4f8a422..478ee5e 100644 --- a/ricky/dataset.py +++ b/ricky/dataset.py @@ -3,24 +3,6 @@ from ricky.utils import data_from_image from pybrain.datasets import SupervisedDataSet -# while subclassing this works, we should try to detect the length of params -# and build a new data set for each type of params set... -# therefore, an instance of SupervisedDataSet could actually be -# accessed through the params instance...simplified one-to-one mapping - -# we are limited to only one classifier per params instance as well -# however this is sort of a good thing, because built into the params -# class can be a method that randomizes params, and then evaluates - -# we might be able to get this done through multiple inheritance -# keep all dataset related stuff in a separate class to make it better organized - -# we need -# .evaluate -# .generate_liked_image -# .train_from_url_list -# .reset - class DataSet(SupervisedDataSet): @@ -35,11 +17,9 @@ class DataSet(SupervisedDataSet): target = 1 data_list = [data_from_image(image) for image in url_list if image] for data in data_list: - for params_class in ricky.params.Params.__subclasses__(): - if data['module'] == params_class.__name__: - params_instance = params_class() - params_instance.from_dict(data['params']) - self.addSample( - params_instance.as_normalized(), - target - ) + params_instance = Params.new_class_from_classname(data['module']) + params_instance.from_dict(data['params']) + self.addSample( + params_instance.as_normalized(), + target + ) diff --git a/ricky/params/__init__.py b/ricky/params/__init__.py index 53ac530..82cbc79 100644 --- a/ricky/params/__init__.py +++ b/ricky/params/__init__.py @@ -25,27 +25,14 @@ class Params(object): def __len__(self): return len(self._params) + def _load_probabilities_json(self, probabilities_file=None): - if probabilities_file: - filepath = probabilities_file - else: - filepath = os.path.join( + filepath = probabilities_file or \ + os.path.join( PROBABILITIES_DIR, "%s.json" % (self.__class__.__name__) ) - try: - f = open(filepath, 'r') - data = f.read() - f.close() - return json.loads(data) - except json.scanner.JSONDecodeError as e: - sys.stderr.write("Invalid Json - Problem decoding %s\n" % filepath) - sys.stderr.write("%s\n" % e) - sys.exit(1) - except IOError: - sys.stderr.write( - "Could not find probabilities file %s\n" % filepath) - sys.exit(1) + return json.load(open(filepath)) def randomize( self, @@ -65,7 +52,6 @@ class Params(object): param.randomize(probabilities=probabilities_dict.get(param.name)) - def execute(self): """calls the associated api""" if OFFLINE: @@ -94,12 +80,6 @@ class Params(object): result[param.name] = param.value return result - def as_normalized(self): - return tuple([ - {'name': param.name, 'normalized': param.as_normalized()} - for param in self._params - ]) - def as_serialized(self): """ returns params in serialized form to use in a dataset @@ -120,7 +100,11 @@ class Params(object): param.value = params_dict[param.name] @classmethod - def from_classname(cls, classname): + def new_class_from_classname(cls, classname): + """ + #FIXME make this class a plugin parent class + anything else look weird here? + """ for subclass in cls.__subclasses__(): if subclass.__name__ == classname: return subclass() diff --git a/ricky/utils.py b/ricky/utils.py index 04e2074..98dca5f 100644 --- a/ricky/utils.py +++ b/ricky/utils.py @@ -17,7 +17,7 @@ def data_from_url(url): result = ImCmd.search(newfile=newfile).first() try: return { - "module": result.tag.split(":")[0], + "module": result.tag.split(":")[0], "params": json.loads(result.dataobj) } except AttributeError: |
