summaryrefslogtreecommitdiff
path: root/ricky
diff options
context:
space:
mode:
Diffstat (limited to 'ricky')
-rw-r--r--ricky/binaryclassifier.py34
-rw-r--r--ricky/dataset.py45
-rw-r--r--ricky/dataset/__init__.py0
-rw-r--r--ricky/param/__init__.py2
-rw-r--r--ricky/params/__init__.py2
5 files changed, 83 insertions, 0 deletions
diff --git a/ricky/binaryclassifier.py b/ricky/binaryclassifier.py
new file mode 100644
index 0000000..f6d8ae6
--- /dev/null
+++ b/ricky/binaryclassifier.py
@@ -0,0 +1,34 @@
+from pybrain.tools.shortcuts import buildNetwork
+from pybrain.structure import SoftmaxLayer
+from pybrain.datasets import SupervisedDataSet
+from pybrain.supervised.trainers import BackpropTrainer
+
+
+class BinaryClassifier(object):
+ def __init__(self):
+ self._default_hidden_layers = 3
+ pass
+
+ def _train(self, dataset):
+ """
+ pybrain.tools.shortcuts.buildNetwork(*layers, **options)
+ Build arbitrarily deep networks.
+
+ layers should be a list or tuple of integers, that
+ indicate how many neurons the layers should have.
+ bias and outputbias are flags to indicate whether
+ the network should have the corresponding biases;
+ both default to True.
+ """
+ net = buildNetwork(
+ dataset.params_length,
+ self._default_hidden_layers,
+ 1 # a binary classifier only requires one output layer
+ )
+ ds = SupervisedDataSet(dataset)
+ trainer = BackpropTrainer(net, ds)
+ trainer.trainUntilConvergence()
+ net.activate(params.as_serialized)
+
+ def classify(self, dataset):
+ return False
diff --git a/ricky/dataset.py b/ricky/dataset.py
new file mode 100644
index 0000000..4f8a422
--- /dev/null
+++ b/ricky/dataset.py
@@ -0,0 +1,45 @@
+import ricky.params
+from ricky.utils import data_from_image
+from pybrain.datasets import SupervisedDataSet
+
+
+# while subclassing this works, we should try to detect the length of params
+# and build a new data set for each type of params set...
+# therefore, an instance of SupervisedDataSet could actually be
+# accessed through the params instance...simplified one-to-one mapping
+
+# we are limited to only one classifier per params instance as well
+# however this is sort of a good thing, because built into the params
+# class can be a method that randomizes params, and then evaluates
+
+# we might be able to get this done through multiple inheritance
+# keep all dataset related stuff in a separate class to make it better organized
+
+# we need
+# .evaluate
+# .generate_liked_image
+# .train_from_url_list
+# .reset
+
+
+class DataSet(SupervisedDataSet):
+
+ @staticmethod
+ def _file_into_list(self, filepath):
+ f = open(filepath, "r")
+ return f.read().split("\n")
+
+ def _load_url_list(self, url_list, liked=False):
+ target = 0
+ if liked:
+ target = 1
+ data_list = [data_from_image(image) for image in url_list if image]
+ for data in data_list:
+ for params_class in ricky.params.Params.__subclasses__():
+ if data['module'] == params_class.__name__:
+ params_instance = params_class()
+ params_instance.from_dict(data['params'])
+ self.addSample(
+ params_instance.as_normalized(),
+ target
+ )
diff --git a/ricky/dataset/__init__.py b/ricky/dataset/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/ricky/dataset/__init__.py
+++ /dev/null
diff --git a/ricky/param/__init__.py b/ricky/param/__init__.py
index 3bf5c7e..a3bbf65 100644
--- a/ricky/param/__init__.py
+++ b/ricky/param/__init__.py
@@ -74,4 +74,6 @@ class Param(object):
pass
def as_normalized(self):
+ if self.value:
+ return 1
return 0
diff --git a/ricky/params/__init__.py b/ricky/params/__init__.py
index 80da6c8..da4562f 100644
--- a/ricky/params/__init__.py
+++ b/ricky/params/__init__.py
@@ -24,6 +24,8 @@ class Params(object):
"""string representation"""
return pprint.pformat(self.as_dict())
+ def __len__(self):
+ return len(self._params)
def _load_probabilities_json(self, probabilities_file=None):
if probabilities_file:
filepath = probabilities_file