made progress in binaryclassifier rewrite, restructured file tree

author: pepperpepperpepper <pepper@scannerjammer.com> 2015-12-08 21:37:41 -0800
committer: pepperpepperpepper <pepper@scannerjammer.com> 2015-12-08 21:37:41 -0800
commit: 0e082b3065d8c3bafbd82cbaf24d6efb85825b05 (patch)
tree: 60df92a77a6d298aed851315ffad80d4d1e937ef /ricky
parent: 518f5b63f5b61308a8d3df64eb9ff715bb3c0e2c (diff)
5 files changed, 83 insertions, 0 deletions
diff --git a/ricky/binaryclassifier.py b/ricky/binaryclassifier.py
new file mode 100644
index 0000000..f6d8ae6
--- /dev/null
+++ b/ricky/binaryclassifier.py
@@ -0,0 +1,34 @@
+from pybrain.tools.shortcuts import buildNetwork
+from pybrain.structure import SoftmaxLayer
+from pybrain.datasets import SupervisedDataSet
+from pybrain.supervised.trainers import BackpropTrainer
+
+
+class BinaryClassifier(object):
+    def __init__(self):
+        self._default_hidden_layers = 3
+        pass
+
+    def _train(self, dataset):
+        """
+        pybrain.tools.shortcuts.buildNetwork(*layers, **options)
+        Build arbitrarily deep networks.
+
+        layers should be a list or tuple of integers, that
+        indicate how many neurons the layers should have.
+        bias and outputbias are flags to indicate whether
+        the network should have the corresponding biases;
+        both default to True.
+        """
+        net = buildNetwork(
+            dataset.params_length,
+            self._default_hidden_layers,
+            1  # a binary classifier only requires one output layer
+        )
+        ds = SupervisedDataSet(dataset)
+        trainer = BackpropTrainer(net, ds)
+        trainer.trainUntilConvergence()
+        net.activate(params.as_serialized)
+
+    def classify(self, dataset):
+        return False
diff --git a/ricky/dataset.py b/ricky/dataset.py
new file mode 100644
index 0000000..4f8a422
--- /dev/null
+++ b/ricky/dataset.py
@@ -0,0 +1,45 @@
+import ricky.params
+from ricky.utils import data_from_image
+from pybrain.datasets import SupervisedDataSet
+
+
+# while subclassing this works, we should try to detect the length of params
+# and build a new data set for each type of params set...
+# therefore, an instance of SupervisedDataSet could actually be
+# accessed through the params instance...simplified one-to-one mapping
+
+# we are limited to only one classifier per params instance as well
+# however this is sort of a good thing, because built into the params
+# class can be a method that randomizes params, and then evaluates
+
+# we might be able to get this done through multiple inheritance
+# keep all dataset related stuff in a separate class to make it better organized
+
+# we need
+# .evaluate
+# .generate_liked_image
+# .train_from_url_list
+# .reset
+
+
+class DataSet(SupervisedDataSet):
+
+    @staticmethod
+    def _file_into_list(self, filepath):
+        f = open(filepath, "r")
+        return f.read().split("\n")
+
+    def _load_url_list(self, url_list, liked=False):
+        target = 0
+        if liked:
+            target = 1
+        data_list = [data_from_image(image) for image in url_list if image]
+        for data in data_list:
+            for params_class in ricky.params.Params.__subclasses__():
+                if data['module'] == params_class.__name__:
+                    params_instance = params_class()
+                    params_instance.from_dict(data['params'])
+                    self.addSample(
+                        params_instance.as_normalized(),
+                        target
+                    )
diff --git a/ricky/dataset/__init__.py b/ricky/dataset/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/ricky/dataset/__init__.py
+++ /dev/null
diff --git a/ricky/param/__init__.py b/ricky/param/__init__.py
index 3bf5c7e..a3bbf65 100644
--- a/ricky/param/__init__.py
+++ b/ricky/param/__init__.py
@@ -74,4 +74,6 @@ class Param(object):
         pass
 
     def as_normalized(self):
+        if self.value:
+            return 1
         return 0
diff --git a/ricky/params/__init__.py b/ricky/params/__init__.py
index 80da6c8..da4562f 100644
--- a/ricky/params/__init__.py
+++ b/ricky/params/__init__.py
@@ -24,6 +24,8 @@ class Params(object):
         """string representation"""
         return pprint.pformat(self.as_dict())
 
+    def __len__(self):
+        return len(self._params)
     def _load_probabilities_json(self, probabilities_file=None):
         if probabilities_file:
             filepath = probabilities_file
author	pepperpepperpepper <pepper@scannerjammer.com>	2015-12-08 21:37:41 -0800
committer	pepperpepperpepper <pepper@scannerjammer.com>	2015-12-08 21:37:41 -0800
commit	0e082b3065d8c3bafbd82cbaf24d6efb85825b05 (patch)
tree	60df92a77a6d298aed851315ffad80d4d1e937ef /ricky
parent	518f5b63f5b61308a8d3df64eb9ff715bb3c0e2c (diff)