diff options
| author | pepperpepperpepper <pepper@scannerjammer.com> | 2015-12-08 21:37:41 -0800 |
|---|---|---|
| committer | pepperpepperpepper <pepper@scannerjammer.com> | 2015-12-08 21:37:41 -0800 |
| commit | 0e082b3065d8c3bafbd82cbaf24d6efb85825b05 (patch) | |
| tree | 60df92a77a6d298aed851315ffad80d4d1e937ef /ricky | |
| parent | 518f5b63f5b61308a8d3df64eb9ff715bb3c0e2c (diff) | |
made progress in binaryclassifier rewrite, restructured file tree
Diffstat (limited to 'ricky')
| -rw-r--r-- | ricky/binaryclassifier.py | 34 | ||||
| -rw-r--r-- | ricky/dataset.py | 45 | ||||
| -rw-r--r-- | ricky/dataset/__init__.py | 0 | ||||
| -rw-r--r-- | ricky/param/__init__.py | 2 | ||||
| -rw-r--r-- | ricky/params/__init__.py | 2 |
5 files changed, 83 insertions, 0 deletions
diff --git a/ricky/binaryclassifier.py b/ricky/binaryclassifier.py new file mode 100644 index 0000000..f6d8ae6 --- /dev/null +++ b/ricky/binaryclassifier.py @@ -0,0 +1,34 @@ +from pybrain.tools.shortcuts import buildNetwork +from pybrain.structure import SoftmaxLayer +from pybrain.datasets import SupervisedDataSet +from pybrain.supervised.trainers import BackpropTrainer + + +class BinaryClassifier(object): + def __init__(self): + self._default_hidden_layers = 3 + pass + + def _train(self, dataset): + """ + pybrain.tools.shortcuts.buildNetwork(*layers, **options) + Build arbitrarily deep networks. + + layers should be a list or tuple of integers, that + indicate how many neurons the layers should have. + bias and outputbias are flags to indicate whether + the network should have the corresponding biases; + both default to True. + """ + net = buildNetwork( + dataset.params_length, + self._default_hidden_layers, + 1 # a binary classifier only requires one output layer + ) + ds = SupervisedDataSet(dataset) + trainer = BackpropTrainer(net, ds) + trainer.trainUntilConvergence() + net.activate(params.as_serialized) + + def classify(self, dataset): + return False diff --git a/ricky/dataset.py b/ricky/dataset.py new file mode 100644 index 0000000..4f8a422 --- /dev/null +++ b/ricky/dataset.py @@ -0,0 +1,45 @@ +import ricky.params +from ricky.utils import data_from_image +from pybrain.datasets import SupervisedDataSet + + +# while subclassing this works, we should try to detect the length of params +# and build a new data set for each type of params set... +# therefore, an instance of SupervisedDataSet could actually be +# accessed through the params instance...simplified one-to-one mapping + +# we are limited to only one classifier per params instance as well +# however this is sort of a good thing, because built into the params +# class can be a method that randomizes params, and then evaluates + +# we might be able to get this done through multiple inheritance +# keep all dataset related stuff in a separate class to make it better organized + +# we need +# .evaluate +# .generate_liked_image +# .train_from_url_list +# .reset + + +class DataSet(SupervisedDataSet): + + @staticmethod + def _file_into_list(self, filepath): + f = open(filepath, "r") + return f.read().split("\n") + + def _load_url_list(self, url_list, liked=False): + target = 0 + if liked: + target = 1 + data_list = [data_from_image(image) for image in url_list if image] + for data in data_list: + for params_class in ricky.params.Params.__subclasses__(): + if data['module'] == params_class.__name__: + params_instance = params_class() + params_instance.from_dict(data['params']) + self.addSample( + params_instance.as_normalized(), + target + ) diff --git a/ricky/dataset/__init__.py b/ricky/dataset/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/ricky/dataset/__init__.py +++ /dev/null diff --git a/ricky/param/__init__.py b/ricky/param/__init__.py index 3bf5c7e..a3bbf65 100644 --- a/ricky/param/__init__.py +++ b/ricky/param/__init__.py @@ -74,4 +74,6 @@ class Param(object): pass def as_normalized(self): + if self.value: + return 1 return 0 diff --git a/ricky/params/__init__.py b/ricky/params/__init__.py index 80da6c8..da4562f 100644 --- a/ricky/params/__init__.py +++ b/ricky/params/__init__.py @@ -24,6 +24,8 @@ class Params(object): """string representation""" return pprint.pformat(self.as_dict()) + def __len__(self): + return len(self._params) def _load_probabilities_json(self, probabilities_file=None): if probabilities_file: filepath = probabilities_file |
