summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xexample.py29
-rw-r--r--pybrain_experiments/classification_test.py112
-rw-r--r--pybrain_experiments/test.py35
-rw-r--r--pybrain_experiments/test_recurrent.py19
-rw-r--r--ricky/binaryclassifier.py34
-rw-r--r--ricky/dataset.py45
-rw-r--r--ricky/dataset/__init__.py0
-rw-r--r--ricky/param/__init__.py2
-rw-r--r--ricky/params/__init__.py2
-rw-r--r--share/image_url_sets/remote/IMAGES_LIKED (renamed from IMAGES_I_LIKE)0
-rw-r--r--share/install/requirements.txt1
11 files changed, 99 insertions, 180 deletions
diff --git a/example.py b/example.py
index e322718..8e932e8 100755
--- a/example.py
+++ b/example.py
@@ -4,17 +4,18 @@ import ricky.utils as utils
params = ricky.params.PbGradient()
params.randomize()
-print params.execute()
-print params
-data = utils.data_from_url(
- "/im/cache/PbGradientrgb-234,155,194-"
- "-rgb-9,252,50-_1449620530_RICHARD_GIOVANNI.jpg"
-)
-print data
-for params_class in ricky.params.Params.__subclasses__():
- if data['module'] == params_class.__name__:
- params_instance = params_class()
- print type(params_instance)
- params_instance.from_dict(data['params'])
- print params_instance.execute()
- print params_instance.as_normalized()
+print params.as_serialized()
+#print params.execute()
+#print params
+#data = utils.data_from_url(
+# "/im/cache/PbGradientrgb-234,155,194-"
+# "-rgb-9,252,50-_1449620530_RICHARD_GIOVANNI.jpg"
+#)
+#print data
+#for params_class in ricky.params.Params.__subclasses__():
+# if data['module'] == params_class.__name__:
+# params_instance = params_class()
+# print type(params_instance)
+# params_instance.from_dict(data['params'])
+# print params_instance.execute()
+# print params_instance.as_normalized()
diff --git a/pybrain_experiments/classification_test.py b/pybrain_experiments/classification_test.py
deleted file mode 100644
index ac5f272..0000000
--- a/pybrain_experiments/classification_test.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from pybrain.datasets import ClassificationDataSet
-from pybrain.utilities import percentError
-from pybrain.tools.shortcuts import buildNetwork
-from pybrain.supervised.trainers import BackpropTrainer
-from pybrain.structure.modules import SoftmaxLayer
-
-from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot
-from scipy import diag, arange, meshgrid, where
-from numpy.random import multivariate_normal
-
-
-# To have a nice dataset for visualization, we produce a set of points in
-# 2D belonging to three different classes. You could also read in your data
-# from a file, e.g. using pylab.load().
-
-means = [(-1,0),(2,4),(3,1)]
-cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]
-alldata = ClassificationDataSet(2, 1, nb_classes=3)
-for n in xrange(400):
- for klass in range(3):
- input = multivariate_normal(means[klass],cov[klass])
- alldata.addSample(input, [klass])
-
-
-# Randomly split the dataset into 75% training and 25% test data sets.
-# Of course, we could also have created two different datasets to begin with.
-
-tstdata, trndata = alldata.splitWithProportion( 0.25 )
-
-
-# For neural network classification, it is highly advisable to encode
-# classes with one output neuron per class. Note that this operation duplicates
-# the original targets and stores them in an (integer) field named ‘class’.
-trndata._convertToOneOfMany( )
-tstdata._convertToOneOfMany( )
-
-
-print "Number of training patterns: ", len(trndata)
-print "Input and output dimensions: ", trndata.indim, trndata.outdim
-print "First sample (input, target, class):"
-print trndata['input'][0], trndata['target'][0], trndata['class'][0]
-
-
-
-
-
-# Now build a feed-forward network with 5 hidden units. We use the shortcut
-# buildNetwork() for this. The input and output layer size must match the
-# dataset’s input and target dimension. You could add additional hidden
-# layers by inserting more numbers giving the desired layer sizes.
-#
-# The output layer uses a softmax function because we are doing classification.
-# There are more options to explore here, e.g. try changing the hidden layer
-# transfer function to linear instead of (the default) sigmoid.
-#
-# See also Description buildNetwork() for more info on options, and the Network
-# tutorial Building Networks with Modules and Connections for info on how to
-# build your own non-standard networks.
-fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer )
-
-
-# Set up a trainer that basically takes the network and training dataset
-# as input. For a list of trainers, see trainers. We are using a
-# BackpropTrainer for this.
-
-trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1,
- verbose=True, weightdecay=0.01)
-
-
-# Now generate a square grid of data points and put it into a dataset,
-# which we can then classify to obtain a nice contour field for visualization.
-# Therefore the target values for this data set can be ignored.
-
-ticks = arange(-3.,6.,0.2)
-X, Y = meshgrid(ticks, ticks)
-# need column vectors in dataset, not arrays
-griddata = ClassificationDataSet(2,1, nb_classes=3)
-for i in xrange(X.size):
- griddata.addSample([X.ravel()[i],Y.ravel()[i]], [0])
-griddata._convertToOneOfMany() # this is still needed to make the fnn feel comfy
-
-
-for i in range(20):
-# Train the network for some epochs. Usually you would
-# set something like 5 here, but for visualization purposes we
-# do this one epoch at a time.
- trainer.trainEpochs( 1 )
- trnresult = percentError( trainer.testOnClassData(),
- trndata['class'] )
- tstresult = percentError( trainer.testOnClassData(
- dataset=tstdata ), tstdata['class'] )
-
- print "epoch: %4d" % trainer.totalepochs, \
- " train error: %5.2f%%" % trnresult, \
- " test error: %5.2f%%" % tstresult
- out = fnn.activateOnDataset(griddata)
- out = out.argmax(axis=1) # the highest output activation gives the class
- out = out.reshape(X.shape)
- figure(1)
- ioff() # interactive graphics off
- clf() # clear the plot
- hold(True) # overplot on
- for c in [0,1,2]:
- here, _ = where(tstdata['class']==c)
- plot(tstdata['input'][here,0],tstdata['input'][here,1],'o')
- if out.max()!=out.min(): # safety check against flat field
- contourf(X, Y, out) # plot the contour
- ion() # interactive graphics on
- draw() # update the plot
-
-ioff()
-show()
diff --git a/pybrain_experiments/test.py b/pybrain_experiments/test.py
deleted file mode 100644
index f7b0a01..0000000
--- a/pybrain_experiments/test.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from pybrain.structure import FeedForwardNetwork
-from pybrain.structure import LinearLayer, SigmoidLayer
-from pybrain.structure import FullConnection
-n = FeedForwardNetwork()
-
-inLayer = LinearLayer(2)
-hiddenLayer = SigmoidLayer(3)
-outLayer = LinearLayer(1)
-
-n.addInputModule(inLayer)
-n.addModule(hiddenLayer)
-n.addOutputModule(outLayer)
-
-in_to_hidden = FullConnection(inLayer, hiddenLayer)
-hidden_to_out = FullConnection(hiddenLayer, outLayer)
-
-
-n.addConnection(in_to_hidden)
-n.addConnection(hidden_to_out)
-
-
-# everything is wired together now
-# this makes it usable
-
-n.sortModules()
-
-
-if __name__ == "__main__":
- #Again, this might look different on your machine -
- #the weights of the connections have already been initialized randomly.
- print n.activate([1, 2])
- #look at the hidden weights
- print in_to_hidden.params
- print hidden_to_out.params
- print n.params #weights here too
diff --git a/pybrain_experiments/test_recurrent.py b/pybrain_experiments/test_recurrent.py
deleted file mode 100644
index 692898a..0000000
--- a/pybrain_experiments/test_recurrent.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from pybrain.structure import RecurrentNetwork
-n = RecurrentNetwork()
-
-n.addInputModule(LinearLayer(2, name='in'))
-n.addModule(SigmoidLayer(3, name='hidden'))
-n.addOutputModule(LinearLayer(1, name='out'))
-n.addConnection(FullConnection(n['in'], n['hidden'], name='c1'))
-n.addConnection(FullConnection(n['hidden'], n['out'], name='c2'))
-n.addRecurrentConnection(FullConnection(n['hidden'], n['hidden'], name='c3'))
-
-
-n.sortModules()
-n.activate((2, 2))
-array([-0.1959887])
-n.activate((2, 2))
-array([-0.19623716])
-n.activate((2, 2))
-array([-0.19675801])
-n.reset() #clears history
diff --git a/ricky/binaryclassifier.py b/ricky/binaryclassifier.py
new file mode 100644
index 0000000..f6d8ae6
--- /dev/null
+++ b/ricky/binaryclassifier.py
@@ -0,0 +1,34 @@
+from pybrain.tools.shortcuts import buildNetwork
+from pybrain.structure import SoftmaxLayer
+from pybrain.datasets import SupervisedDataSet
+from pybrain.supervised.trainers import BackpropTrainer
+
+
+class BinaryClassifier(object):
+ def __init__(self):
+ self._default_hidden_layers = 3
+ pass
+
+ def _train(self, dataset):
+ """
+ pybrain.tools.shortcuts.buildNetwork(*layers, **options)
+ Build arbitrarily deep networks.
+
+ layers should be a list or tuple of integers, that
+ indicate how many neurons the layers should have.
+ bias and outputbias are flags to indicate whether
+ the network should have the corresponding biases;
+ both default to True.
+ """
+ net = buildNetwork(
+ dataset.params_length,
+ self._default_hidden_layers,
+ 1 # a binary classifier only requires one output layer
+ )
+ ds = SupervisedDataSet(dataset)
+ trainer = BackpropTrainer(net, ds)
+ trainer.trainUntilConvergence()
+ net.activate(params.as_serialized)
+
+ def classify(self, dataset):
+ return False
diff --git a/ricky/dataset.py b/ricky/dataset.py
new file mode 100644
index 0000000..4f8a422
--- /dev/null
+++ b/ricky/dataset.py
@@ -0,0 +1,45 @@
+import ricky.params
+from ricky.utils import data_from_image
+from pybrain.datasets import SupervisedDataSet
+
+
+# while subclassing this works, we should try to detect the length of params
+# and build a new data set for each type of params set...
+# therefore, an instance of SupervisedDataSet could actually be
+# accessed through the params instance...simplified one-to-one mapping
+
+# we are limited to only one classifier per params instance as well
+# however this is sort of a good thing, because built into the params
+# class can be a method that randomizes params, and then evaluates
+
+# we might be able to get this done through multiple inheritance
+# keep all dataset related stuff in a separate class to make it better organized
+
+# we need
+# .evaluate
+# .generate_liked_image
+# .train_from_url_list
+# .reset
+
+
+class DataSet(SupervisedDataSet):
+
+ @staticmethod
+ def _file_into_list(self, filepath):
+ f = open(filepath, "r")
+ return f.read().split("\n")
+
+ def _load_url_list(self, url_list, liked=False):
+ target = 0
+ if liked:
+ target = 1
+ data_list = [data_from_image(image) for image in url_list if image]
+ for data in data_list:
+ for params_class in ricky.params.Params.__subclasses__():
+ if data['module'] == params_class.__name__:
+ params_instance = params_class()
+ params_instance.from_dict(data['params'])
+ self.addSample(
+ params_instance.as_normalized(),
+ target
+ )
diff --git a/ricky/dataset/__init__.py b/ricky/dataset/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/ricky/dataset/__init__.py
+++ /dev/null
diff --git a/ricky/param/__init__.py b/ricky/param/__init__.py
index 3bf5c7e..a3bbf65 100644
--- a/ricky/param/__init__.py
+++ b/ricky/param/__init__.py
@@ -74,4 +74,6 @@ class Param(object):
pass
def as_normalized(self):
+ if self.value:
+ return 1
return 0
diff --git a/ricky/params/__init__.py b/ricky/params/__init__.py
index 80da6c8..da4562f 100644
--- a/ricky/params/__init__.py
+++ b/ricky/params/__init__.py
@@ -24,6 +24,8 @@ class Params(object):
"""string representation"""
return pprint.pformat(self.as_dict())
+ def __len__(self):
+ return len(self._params)
def _load_probabilities_json(self, probabilities_file=None):
if probabilities_file:
filepath = probabilities_file
diff --git a/IMAGES_I_LIKE b/share/image_url_sets/remote/IMAGES_LIKED
index b6015c1..b6015c1 100644
--- a/IMAGES_I_LIKE
+++ b/share/image_url_sets/remote/IMAGES_LIKED
diff --git a/share/install/requirements.txt b/share/install/requirements.txt
new file mode 100644
index 0000000..0b2327e
--- /dev/null
+++ b/share/install/requirements.txt
@@ -0,0 +1 @@
+pybrain