made progress in binaryclassifier rewrite, restructured file tree

author: pepperpepperpepper <pepper@scannerjammer.com> 2015-12-08 21:37:41 -0800
committer: pepperpepperpepper <pepper@scannerjammer.com> 2015-12-08 21:37:41 -0800
commit: 0e082b3065d8c3bafbd82cbaf24d6efb85825b05 (patch)
tree: 60df92a77a6d298aed851315ffad80d4d1e937ef
parent: 518f5b63f5b61308a8d3df64eb9ff715bb3c0e2c (diff)
11 files changed, 99 insertions, 180 deletions
diff --git a/example.py b/example.py
index e322718..8e932e8 100755
--- a/example.py
+++ b/example.py
@@ -4,17 +4,18 @@ import ricky.utils as utils
 
 params = ricky.params.PbGradient()
 params.randomize()
-print params.execute()
-print params
-data = utils.data_from_url(
-    "/im/cache/PbGradientrgb-234,155,194-"
-    "-rgb-9,252,50-_1449620530_RICHARD_GIOVANNI.jpg"
-)
-print data
-for params_class in ricky.params.Params.__subclasses__():
-    if data['module'] == params_class.__name__:
-        params_instance = params_class()
-        print type(params_instance)
-        params_instance.from_dict(data['params'])
-        print params_instance.execute()
-        print params_instance.as_normalized()
+print params.as_serialized()
+#print params.execute()
+#print params
+#data = utils.data_from_url(
+#    "/im/cache/PbGradientrgb-234,155,194-"
+#    "-rgb-9,252,50-_1449620530_RICHARD_GIOVANNI.jpg"
+#)
+#print data
+#for params_class in ricky.params.Params.__subclasses__():
+#    if data['module'] == params_class.__name__:
+#        params_instance = params_class()
+#        print type(params_instance)
+#        params_instance.from_dict(data['params'])
+#        print params_instance.execute()
+#        print params_instance.as_normalized()
diff --git a/pybrain_experiments/classification_test.py b/pybrain_experiments/classification_test.py
deleted file mode 100644
index ac5f272..0000000
--- a/pybrain_experiments/classification_test.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from pybrain.datasets            import ClassificationDataSet
-from pybrain.utilities           import percentError
-from pybrain.tools.shortcuts     import buildNetwork
-from pybrain.supervised.trainers import BackpropTrainer
-from pybrain.structure.modules   import SoftmaxLayer
-
-from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot
-from scipy import diag, arange, meshgrid, where
-from numpy.random import multivariate_normal
-
-
-# To have a nice dataset for visualization, we produce a set of points in
-# 2D belonging to three different classes. You could also read in your data
-# from a file, e.g. using pylab.load().
-
-means = [(-1,0),(2,4),(3,1)]
-cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]
-alldata = ClassificationDataSet(2, 1, nb_classes=3)
-for n in xrange(400):
-    for klass in range(3):
-        input = multivariate_normal(means[klass],cov[klass])
-        alldata.addSample(input, [klass])
-
-
-# Randomly split the dataset into 75% training and 25% test data sets.
-# Of course, we could also have created two different datasets to begin with.
-
-tstdata, trndata = alldata.splitWithProportion( 0.25 )
-
-
-# For neural network classification, it is highly advisable to encode
-# classes with one output neuron per class. Note that this operation duplicates
-# the original targets and stores them in an (integer) field named ‘class’.
-trndata._convertToOneOfMany( )
-tstdata._convertToOneOfMany( )
-
-
-print "Number of training patterns: ", len(trndata)
-print "Input and output dimensions: ", trndata.indim, trndata.outdim
-print "First sample (input, target, class):"
-print trndata['input'][0], trndata['target'][0], trndata['class'][0]
-
-
-
-
-
-# Now build a feed-forward network with 5 hidden units. We use the shortcut
-# buildNetwork() for this. The input and output layer size must match the
-# dataset’s input and target dimension. You could add additional hidden
-# layers by inserting more numbers giving the desired layer sizes.
-#
-# The output layer uses a softmax function because we are doing classification.
-# There are more options to explore here, e.g. try changing the hidden layer
-# transfer function to linear instead of (the default) sigmoid.
-#
-# See also Description buildNetwork() for more info on options, and the Network
-# tutorial Building Networks with Modules and Connections for info on how to
-# build your own non-standard networks.
-fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer )
-
-
-# Set up a trainer that basically takes the network and training dataset
-# as input. For a list of trainers, see trainers. We are using a
-# BackpropTrainer for this.
-
-trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1,
-                           verbose=True, weightdecay=0.01)
-
-
-# Now generate a square grid of data points and put it into a dataset,
-# which we can then classify to obtain a nice contour field for visualization.
-# Therefore the target values for this data set can be ignored.
-
-ticks = arange(-3.,6.,0.2)
-X, Y = meshgrid(ticks, ticks)
-# need column vectors in dataset, not arrays
-griddata = ClassificationDataSet(2,1, nb_classes=3)
-for i in xrange(X.size):
-    griddata.addSample([X.ravel()[i],Y.ravel()[i]], [0])
-griddata._convertToOneOfMany() # this is still needed to make the fnn feel comfy
-
-
-for i in range(20):
-# Train the network for some epochs. Usually you would
-# set something like 5 here, but for visualization purposes we
-# do this one epoch at a time.
-    trainer.trainEpochs( 1 )
-    trnresult = percentError( trainer.testOnClassData(),
-                              trndata['class'] )
-    tstresult = percentError( trainer.testOnClassData(
-           dataset=tstdata ), tstdata['class'] )
-
-    print "epoch: %4d" % trainer.totalepochs, \
-          "  train error: %5.2f%%" % trnresult, \
-          "  test error: %5.2f%%" % tstresult
-    out = fnn.activateOnDataset(griddata)
-    out = out.argmax(axis=1)  # the highest output activation gives the class
-    out = out.reshape(X.shape)
-    figure(1)
-    ioff()  # interactive graphics off
-    clf()   # clear the plot
-    hold(True) # overplot on
-    for c in [0,1,2]:
-        here, _ = where(tstdata['class']==c)
-        plot(tstdata['input'][here,0],tstdata['input'][here,1],'o')
-    if out.max()!=out.min():  # safety check against flat field
-        contourf(X, Y, out)   # plot the contour
-    ion()   # interactive graphics on
-    draw()  # update the plot
-
-ioff()
-show()
diff --git a/pybrain_experiments/test.py b/pybrain_experiments/test.py
deleted file mode 100644
index f7b0a01..0000000
--- a/pybrain_experiments/test.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from pybrain.structure import FeedForwardNetwork
-from pybrain.structure import LinearLayer, SigmoidLayer
-from pybrain.structure import FullConnection
-n = FeedForwardNetwork()
-
-inLayer = LinearLayer(2)
-hiddenLayer = SigmoidLayer(3)
-outLayer = LinearLayer(1)
-
-n.addInputModule(inLayer)
-n.addModule(hiddenLayer)
-n.addOutputModule(outLayer)
-
-in_to_hidden = FullConnection(inLayer, hiddenLayer)
-hidden_to_out = FullConnection(hiddenLayer, outLayer)
-
-
-n.addConnection(in_to_hidden)
-n.addConnection(hidden_to_out)
-
-
-# everything is wired together now
-# this makes it usable
-
-n.sortModules()
-
-
-if __name__ == "__main__":
-    #Again, this might look different on your machine -
-    #the weights of the connections have already been initialized randomly.
-    print n.activate([1, 2])
-    #look at the hidden weights
-    print in_to_hidden.params
-    print hidden_to_out.params
-    print n.params #weights here too
diff --git a/pybrain_experiments/test_recurrent.py b/pybrain_experiments/test_recurrent.py
deleted file mode 100644
index 692898a..0000000
--- a/pybrain_experiments/test_recurrent.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from pybrain.structure import RecurrentNetwork
-n = RecurrentNetwork()
-
-n.addInputModule(LinearLayer(2, name='in'))
-n.addModule(SigmoidLayer(3, name='hidden'))
-n.addOutputModule(LinearLayer(1, name='out'))
-n.addConnection(FullConnection(n['in'], n['hidden'], name='c1'))
-n.addConnection(FullConnection(n['hidden'], n['out'], name='c2'))
-n.addRecurrentConnection(FullConnection(n['hidden'], n['hidden'], name='c3'))
-
-
-n.sortModules()
-n.activate((2, 2))
-array([-0.1959887])
-n.activate((2, 2))
-array([-0.19623716])
-n.activate((2, 2))
-array([-0.19675801])
-n.reset() #clears history
diff --git a/ricky/binaryclassifier.py b/ricky/binaryclassifier.py
new file mode 100644
index 0000000..f6d8ae6
--- /dev/null
+++ b/ricky/binaryclassifier.py
@@ -0,0 +1,34 @@
+from pybrain.tools.shortcuts import buildNetwork
+from pybrain.structure import SoftmaxLayer
+from pybrain.datasets import SupervisedDataSet
+from pybrain.supervised.trainers import BackpropTrainer
+
+
+class BinaryClassifier(object):
+    def __init__(self):
+        self._default_hidden_layers = 3
+        pass
+
+    def _train(self, dataset):
+        """
+        pybrain.tools.shortcuts.buildNetwork(*layers, **options)
+        Build arbitrarily deep networks.
+
+        layers should be a list or tuple of integers, that
+        indicate how many neurons the layers should have.
+        bias and outputbias are flags to indicate whether
+        the network should have the corresponding biases;
+        both default to True.
+        """
+        net = buildNetwork(
+            dataset.params_length,
+            self._default_hidden_layers,
+            1  # a binary classifier only requires one output layer
+        )
+        ds = SupervisedDataSet(dataset)
+        trainer = BackpropTrainer(net, ds)
+        trainer.trainUntilConvergence()
+        net.activate(params.as_serialized)
+
+    def classify(self, dataset):
+        return False
diff --git a/ricky/dataset.py b/ricky/dataset.py
new file mode 100644
index 0000000..4f8a422
--- /dev/null
+++ b/ricky/dataset.py
@@ -0,0 +1,45 @@
+import ricky.params
+from ricky.utils import data_from_image
+from pybrain.datasets import SupervisedDataSet
+
+
+# while subclassing this works, we should try to detect the length of params
+# and build a new data set for each type of params set...
+# therefore, an instance of SupervisedDataSet could actually be
+# accessed through the params instance...simplified one-to-one mapping
+
+# we are limited to only one classifier per params instance as well
+# however this is sort of a good thing, because built into the params
+# class can be a method that randomizes params, and then evaluates
+
+# we might be able to get this done through multiple inheritance
+# keep all dataset related stuff in a separate class to make it better organized
+
+# we need
+# .evaluate
+# .generate_liked_image
+# .train_from_url_list
+# .reset
+
+
+class DataSet(SupervisedDataSet):
+
+    @staticmethod
+    def _file_into_list(self, filepath):
+        f = open(filepath, "r")
+        return f.read().split("\n")
+
+    def _load_url_list(self, url_list, liked=False):
+        target = 0
+        if liked:
+            target = 1
+        data_list = [data_from_image(image) for image in url_list if image]
+        for data in data_list:
+            for params_class in ricky.params.Params.__subclasses__():
+                if data['module'] == params_class.__name__:
+                    params_instance = params_class()
+                    params_instance.from_dict(data['params'])
+                    self.addSample(
+                        params_instance.as_normalized(),
+                        target
+                    )
diff --git a/ricky/dataset/__init__.py b/ricky/dataset/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/ricky/dataset/__init__.py
+++ /dev/null
diff --git a/ricky/param/__init__.py b/ricky/param/__init__.py
index 3bf5c7e..a3bbf65 100644
--- a/ricky/param/__init__.py
+++ b/ricky/param/__init__.py
@@ -74,4 +74,6 @@ class Param(object):
         pass
 
     def as_normalized(self):
+        if self.value:
+            return 1
         return 0
diff --git a/ricky/params/__init__.py b/ricky/params/__init__.py
index 80da6c8..da4562f 100644
--- a/ricky/params/__init__.py
+++ b/ricky/params/__init__.py
@@ -24,6 +24,8 @@ class Params(object):
         """string representation"""
         return pprint.pformat(self.as_dict())
 
+    def __len__(self):
+        return len(self._params)
     def _load_probabilities_json(self, probabilities_file=None):
         if probabilities_file:
             filepath = probabilities_file
diff --git a/IMAGES_I_LIKE b/share/image_url_sets/remote/IMAGES_LIKED
index b6015c1..b6015c1 100644
--- a/IMAGES_I_LIKE
+++ b/share/image_url_sets/remote/IMAGES_LIKED
diff --git a/share/install/requirements.txt b/share/install/requirements.txt
new file mode 100644
index 0000000..0b2327e
--- /dev/null
+++ b/share/install/requirements.txt
@@ -0,0 +1 @@
+pybrain
author	pepperpepperpepper <pepper@scannerjammer.com>	2015-12-08 21:37:41 -0800
committer	pepperpepperpepper <pepper@scannerjammer.com>	2015-12-08 21:37:41 -0800
commit	0e082b3065d8c3bafbd82cbaf24d6efb85825b05 (patch)
tree	60df92a77a6d298aed851315ffad80d4d1e937ef
parent	518f5b63f5b61308a8d3df64eb9ff715bb3c0e2c (diff)