server_stuffserver_stuff

author: Pepper <pepper@scannerjammer.com> 2019-12-04 04:30:53 +0000
committer: Pepper <pepper@scannerjammer.com> 2019-12-04 04:30:53 +0000
commit: 65697891be538591f57384d3469ab2a7f2a86568 (patch)
tree: 5b34929c7e2c64abbec1949d755205839281456b
parent: 1d8ed6e2f6ffb872c2e29104067a28ae098ec290 (diff)
6 files changed, 217 insertions, 53 deletions
diff --git a/example.py b/example.py
index 8e932e8..0dac0d8 100755
--- a/example.py
+++ b/example.py
@@ -5,6 +5,11 @@ import ricky.utils as utils
 params = ricky.params.PbGradient()
 params.randomize()
 print params.as_serialized()
+print ""
+print params.as_dict()
+print ""
+
+
 #print params.execute()
 #print params
 #data = utils.data_from_url(
diff --git a/notes b/notes
new file mode 100644
index 0000000..9561062
--- /dev/null
+++ b/notes
@@ -0,0 +1,196 @@
+
+    # it seems like there are probably better/more clever ways to implement some of these "options"
+    # like the OFFLINE variable seems a bit weird, the probabilities_local, etc
+    # maybe I just don't know the right pattern from the client side? like a pretty way of 
+    # calling the functions?
+    # yeah, global variables from config like in python everyone like to do is not good, usually it looks like this
+    # my $module = MyMainModule->new({ offline => 1 });
+    # package MyMainModule;
+    # sub offline { return $self->{offline} }
+    # package MySubModule;
+    # sub parent { return $self->{parent} }
+    # sub something { print $self->parent->offline }
+    # something like this. so whenever the main module is invoked, have an optional
+    # settings hash that you can pass in, right? yep
+
+
+
+#how about the way this is laid out, seem bad? yeah config files like that better avoid if possible
+#so with params below, url can be generate by module itself, it knows it's own name, and can get base url from parent class.
+# offline is for main module initialization hash. 
+# directories, in perl we have File::ShareDir, check it out how it works. basically it's module ot get full path to share/ folder
+#in installed app. after module installation it can get right path for you, looks good. ok I'll check it out
+
+
+#ok last thing...wanted to show you how I included photoblaster as a git submodule
+
+#so one simple solution would be to put this function in the parent class, like self._get_pb_url(PB_PATTERN_API)
+
+# while subclassing this works, we should try to detect the length of params
+# and build a new data set for each type of params set...
+# therefore, an instance of SupervisedDataSet could actually be
+# accessed through the params instance...simplified one-to-one mapping
+
+# we are limited to only one classifier per params instance as well
+# however this is sort of a good thing, because built into the params
+# class can be a method that randomizes params, and then evaluates
+
+# we might be able to get this done through multiple inheritance
+# keep all dataset related stuff in a separate class to make it better organized
+
+# we need
+# .evaluate
+# .generate_liked_image
+# .train_from_url_list
+# .reset
+
+#this is still a bit messed up
+
+#so what I want to do here is described above
+#so basically using the machine learning lib...
+# I get three things from the machine learning lib
+# a DataSet class
+# a Network class
+# a Trainer class
+
+#when I use the Trainer class, it acts on the Network class, feeding it the DataSet
+# then the network can generate params basically by doing this
+# params.randomize()
+# if network.evaluate(params.serialized()):
+#     params.execute()
+# does that make some sense? yep
+
+# but the thing is that a dataset is something that I store a huge list of
+# params.serialized() outputs, as well as whether or not I liked those 
+# params.serialized() outputs
+# dataset.add(params.serialized(), liked=True)
+# dataset.add(params.serialized(), liked=False)
+# do you follow me so far? yes
+
+# so I'm thinking I should keep the dataset actually INSIDE the params class
+# because I don't need more than one params instance to feed the DataSet
+
+# so it would be something like this
+
+# self._dataset.add(self.serialized, liked=True) 
+# something like that, does that make some sense? how Trainer class need for DataSet to look like?
+# just as a variable, an instance of SupervisedDataSet() and it can keep tranining network with many datasets?
+# yeah and you can keep adding to the dataset after you've trained the network and then retrain or train a new Network
+# both datasets and networks have a reset() method that clears them. A bit confusing but I guess it's fine
+# hmm, well if it can keep training it with new datasets without resetting old data, you can just make DataSet class 
+# as converter from Params class into pybrain data. no need to embedd it into Params clas, as you may need to change machine learning lib
+# in future, and that will be diffucult to do with merged classes.
+
+
+#problem is that the dataset needs specific info from the params class, 
+# it needs the number of params, so that when it is instantiated I can specify the 
+# number of inputs and targets (specifically, len(params_instance) inputs, and 1 output)
+# similarly the network network needs input layers and output layers, which will matched
+# the datasets in this case (it's binary classification)
+# so the network has len(params_instance) inputs and 1 outputs
+
+# so I'm thinking of just putting EVERYTHING inside the params class 
+
+# so I can have a method
+
+# params_instance.train_nn_from_file(Datafile)
+# params_instance.randomize(use_nn=True) 
+
+# something like that? better not. think of params class as your Model class in Model/Controller/View. and in this case neural Network
+# will be Controller/View. add params.length() or whatever is necessary, and then in DataSet and relate classes use it like:
+#
+# api = Api('PbGradient')
+# sample_params = api.new_params()
+# ds = pybrain.DataSet(len(sample_params), 1)
+#
+# def add_param(params):
+#   ds.add(params.as_serialized(), params.extra.liked())
+# api.list_params(add_param)
+
+# trainer = pybrain.Trainer()
+# nn = pybrain.NeuralNetwork(len(params), 1)
+# trainer.train(ds, nn)
+#
+# params = api.new_params()
+# for i in range(0, 100):
+#    params.randomize()
+#    if nn.active(params.serialized) > .5:
+#        params.execute()
+# 
+# ok so this is the procedure more or less, do you see how this is working? yep
+# so what about putting these things in the params class, still a bad idea?
+# I was thinking I could make them accessible from params through multiple inheritance, just
+# keeping all my nn stuff in a separate file 
+# well dataset is one-to-many with params, so it can't be inside in any case. to improve it a bit, you can restore api class
+# something like this. but still you don't need to merge ds and api class, it breaks logic.
+
+# so it will work if I move the ds into the params class, but it won't adhere to the design pattern? basically
+# it's not the right design pattern to do that? right, it's also logically not possible to put signle ds into multiple params instances.
+# right but there doesn't need to be multiple params instances. that's why I was thinking of doing that, no reason
+# to have more than one params instance. even if there is one to one with ds and params, it breaks logic between model and user of model,
+# neural network considered as user of this params model. 
+# hmm can the neural network be trained on the params model directly...like
+# trainer(params.ds, nn) ?
+# it would work, but is that just bad for the reasons you stated above? right, it will work, but if you want later to change library it
+# won't be possible to do easily because it's embedded, right? right, even if you won't ever change library, it still very hard to
+#understand how this thing works, and why child class contain something different. yeah so in this case,
+# do I wrap both classes in methods? like do I just write a method to create a image generator that uses these classes? (pybrain, params, etc)
+#? params class is extended so that it's easy to get all info, like this params.extra.liked() and even can be params.as_normalized, that's fine.
+# as for netural network, you can wrap it into something like ApiMachineLearningPybrain() class, which can accept Params() instance as sample,
+# with data and create trained network for you.
+
+# so for design, make multiple instances of params, and create a separate nn instance, maybe something that wraps 
+# other methods in the class ( ie can call params.randomize()) and generate for me a set of params that I will probably like?
+# it's really missing Api class here that cause logic problems. with that restored it will look just fine.
+# api = Api('PbGradient')
+# api_machinel = ApiMachineLearningPybrain(api = api)
+# params = api_machinel.new_params_liked_by_me()
+# params.execute()
+#
+# class ApiMachineLearningPybrain:
+#   def train():
+#     for param in self.api.list_params:
+#      ...
+#   def new_params_liked_by_me():
+#     for i in range(0, 100):
+#      params.randomize()
+#      if nn.active(params.serialized) > .5:
+#         return params
+# and so on, if you want to change machine library, you write another ApiMachineLearning class. and ApiMachineLearning* know how works pybrain
+#and knows how api/params works, so it can use all of them and combine into useful data. something like this.
+
+#ok good...I get it. it's a logical separation. So embedding external libs is generally sort of a bad practice? right, unless it's part of 
+#solution, like read json, etc. I see
+
+# well it's sort of unfortunate that I deleted the Api class, but it made it easier for me to keep writing because at the time
+# I didn't need it at all, it's fine to sort of delete and remake things as something grows, right? yeah classes go and come all the time.
+
+# ok well let's figure this out
+
+
+
+
+            #so this is kinda just bad? or is it ok for now? well not very good
+            # ok I'll learn the python installer and start using it that should be next on my list
+            # so do you do the same thing for perl? yep, i use module-starter, it basically create Makefile.PL, only few edits and it's done
+            # and even for something like Struct-it, it is a global package? yes it's all a perl module and wrapped into gentoo ebuild.
+            # ahh ok so the one thing I don't quite understand is, what about the database, obviously when you install it, the db is 
+            # empty right? not really. i have a config file in /etc/ for website, and package installs sample config, just like any other
+            #sane app. you install app, copy config from example to right place, edit database parameters there and start it.
+            # do you use my.cnf or something?  how does the app know db password etc? from config in /etc for this app. so it 
+            # reads from /etc/ in the app...?  yes ok I get it
+            # and the db create commands, do you do those separately? yeah, in gentoo ebuild there are commands for configuring app after 
+            # installation, you can put any command there, including db initialization, miggration etc.
+            # ok I'm understanding this
+
+are these mostly just issues I'll iron out by continually making new projects/writing more code? yep
+ok so they aren't anything I should be stressed out about, really, seems like with every project I'll figure out a way to do 
+something better? is that basically how it happens? yeah just need to keep track how you do things and how it turns out to be,
+like if some part of code always fails, have a lot of errors ro you rewrite it many time, there probably something wrong about it.
+same thing with installation.configuration, it's fine if you have config.py with few settings, but then you will have 10-20 projects 
+and all need to install on new server it will go crazy, so had to learn how perl/python installer works, gentoo ebuiled etc to avoid all pain
+yeah I understand
+it seems like this is all a process of organization, like a project will fall apart eventually if you keep adding to it
+and the foundation wasn't originally well-organized? right
+
+ok in general I think I learned what I needed to from this, this was great, thanks a lot! no problems
diff --git a/ricky/config.py b/ricky/config.py
index 1cf3255..7f8715f 100644
--- a/ricky/config.py
+++ b/ricky/config.py
@@ -10,7 +10,6 @@ TEST_URL = (
 )
 PB_BASE = "http://asdf.us/"
 
-
 def _add_pb_base(path):
     return os.path.join(PB_BASE, path)
 
diff --git a/ricky/dataset.py b/ricky/dataset.py
index 4f8a422..478ee5e 100644
--- a/ricky/dataset.py
+++ b/ricky/dataset.py
@@ -3,24 +3,6 @@ from ricky.utils import data_from_image
 from pybrain.datasets import SupervisedDataSet
 
 
-# while subclassing this works, we should try to detect the length of params
-# and build a new data set for each type of params set...
-# therefore, an instance of SupervisedDataSet could actually be
-# accessed through the params instance...simplified one-to-one mapping
-
-# we are limited to only one classifier per params instance as well
-# however this is sort of a good thing, because built into the params
-# class can be a method that randomizes params, and then evaluates
-
-# we might be able to get this done through multiple inheritance
-# keep all dataset related stuff in a separate class to make it better organized
-
-# we need
-# .evaluate
-# .generate_liked_image
-# .train_from_url_list
-# .reset
-
 
 class DataSet(SupervisedDataSet):
 
@@ -35,11 +17,9 @@ class DataSet(SupervisedDataSet):
             target = 1
         data_list = [data_from_image(image) for image in url_list if image]
         for data in data_list:
-            for params_class in ricky.params.Params.__subclasses__():
-                if data['module'] == params_class.__name__:
-                    params_instance = params_class()
-                    params_instance.from_dict(data['params'])
-                    self.addSample(
-                        params_instance.as_normalized(),
-                        target
-                    )
+            params_instance = Params.new_class_from_classname(data['module'])
+            params_instance.from_dict(data['params'])
+            self.addSample(
+                params_instance.as_normalized(),
+                target
+            )
diff --git a/ricky/params/__init__.py b/ricky/params/__init__.py
index 53ac530..82cbc79 100644
--- a/ricky/params/__init__.py
+++ b/ricky/params/__init__.py
@@ -25,27 +25,14 @@ class Params(object):
 
     def __len__(self):
         return len(self._params)
+
     def _load_probabilities_json(self, probabilities_file=None):
-        if probabilities_file:
-            filepath = probabilities_file
-        else:
-            filepath = os.path.join(
+        filepath = probabilities_file or \
+            os.path.join(
                 PROBABILITIES_DIR,
                 "%s.json" % (self.__class__.__name__)
             )
-        try:
-            f = open(filepath, 'r')
-            data = f.read()
-            f.close()
-            return json.loads(data)
-        except json.scanner.JSONDecodeError as e:
-            sys.stderr.write("Invalid Json - Problem decoding %s\n" % filepath)
-            sys.stderr.write("%s\n" % e)
-            sys.exit(1)
-        except IOError:
-            sys.stderr.write(
-                "Could not find probabilities file %s\n" % filepath)
-            sys.exit(1)
+        return json.load(open(filepath))
 
     def randomize(
             self,
@@ -65,7 +52,6 @@ class Params(object):
             param.randomize(probabilities=probabilities_dict.get(param.name))
 
 
-
     def execute(self):
         """calls the associated api"""
         if OFFLINE:
@@ -94,12 +80,6 @@ class Params(object):
             result[param.name] = param.value
         return result
 
-    def as_normalized(self):
-        return tuple([
-            {'name': param.name, 'normalized': param.as_normalized()}
-            for param in self._params
-        ])
-
     def as_serialized(self):
         """
         returns params in serialized form to use in a dataset
@@ -120,7 +100,11 @@ class Params(object):
                 param.value = params_dict[param.name]
 
     @classmethod
-    def from_classname(cls, classname):
+    def new_class_from_classname(cls, classname):
+        """
+        #FIXME make this class a plugin parent class
+        anything else look weird here?
+        """
         for subclass in cls.__subclasses__():
             if subclass.__name__ == classname:
                 return subclass()
diff --git a/ricky/utils.py b/ricky/utils.py
index 04e2074..98dca5f 100644
--- a/ricky/utils.py
+++ b/ricky/utils.py
@@ -17,7 +17,7 @@ def data_from_url(url):
         result = ImCmd.search(newfile=newfile).first()
         try:
             return {
-                "module": result.tag.split(":")[0],
+                "module": result.tag.split(":")[0], 
                 "params": json.loads(result.dataobj)
             }
         except AttributeError:
author	Pepper <pepper@scannerjammer.com>	2019-12-04 04:30:53 +0000
committer	Pepper <pepper@scannerjammer.com>	2019-12-04 04:30:53 +0000
commit	65697891be538591f57384d3469ab2a7f2a86568 (patch)
tree	5b34929c7e2c64abbec1949d755205839281456b
parent	1d8ed6e2f6ffb872c2e29104067a28ae098ec290 (diff)