diff options
| author | pepperpepperpepper <pepper@scannerjammer.com> | 2015-12-08 21:37:41 -0800 |
|---|---|---|
| committer | pepperpepperpepper <pepper@scannerjammer.com> | 2015-12-08 21:37:41 -0800 |
| commit | 0e082b3065d8c3bafbd82cbaf24d6efb85825b05 (patch) | |
| tree | 60df92a77a6d298aed851315ffad80d4d1e937ef | |
| parent | 518f5b63f5b61308a8d3df64eb9ff715bb3c0e2c (diff) | |
made progress in binaryclassifier rewrite, restructured file tree
| -rwxr-xr-x | example.py | 29 | ||||
| -rw-r--r-- | pybrain_experiments/classification_test.py | 112 | ||||
| -rw-r--r-- | pybrain_experiments/test.py | 35 | ||||
| -rw-r--r-- | pybrain_experiments/test_recurrent.py | 19 | ||||
| -rw-r--r-- | ricky/binaryclassifier.py | 34 | ||||
| -rw-r--r-- | ricky/dataset.py | 45 | ||||
| -rw-r--r-- | ricky/dataset/__init__.py | 0 | ||||
| -rw-r--r-- | ricky/param/__init__.py | 2 | ||||
| -rw-r--r-- | ricky/params/__init__.py | 2 | ||||
| -rw-r--r-- | share/image_url_sets/remote/IMAGES_LIKED (renamed from IMAGES_I_LIKE) | 0 | ||||
| -rw-r--r-- | share/install/requirements.txt | 1 |
11 files changed, 99 insertions, 180 deletions
@@ -4,17 +4,18 @@ import ricky.utils as utils params = ricky.params.PbGradient() params.randomize() -print params.execute() -print params -data = utils.data_from_url( - "/im/cache/PbGradientrgb-234,155,194-" - "-rgb-9,252,50-_1449620530_RICHARD_GIOVANNI.jpg" -) -print data -for params_class in ricky.params.Params.__subclasses__(): - if data['module'] == params_class.__name__: - params_instance = params_class() - print type(params_instance) - params_instance.from_dict(data['params']) - print params_instance.execute() - print params_instance.as_normalized() +print params.as_serialized() +#print params.execute() +#print params +#data = utils.data_from_url( +# "/im/cache/PbGradientrgb-234,155,194-" +# "-rgb-9,252,50-_1449620530_RICHARD_GIOVANNI.jpg" +#) +#print data +#for params_class in ricky.params.Params.__subclasses__(): +# if data['module'] == params_class.__name__: +# params_instance = params_class() +# print type(params_instance) +# params_instance.from_dict(data['params']) +# print params_instance.execute() +# print params_instance.as_normalized() diff --git a/pybrain_experiments/classification_test.py b/pybrain_experiments/classification_test.py deleted file mode 100644 index ac5f272..0000000 --- a/pybrain_experiments/classification_test.py +++ /dev/null @@ -1,112 +0,0 @@ -from pybrain.datasets import ClassificationDataSet -from pybrain.utilities import percentError -from pybrain.tools.shortcuts import buildNetwork -from pybrain.supervised.trainers import BackpropTrainer -from pybrain.structure.modules import SoftmaxLayer - -from pylab import ion, ioff, figure, draw, contourf, clf, show, hold, plot -from scipy import diag, arange, meshgrid, where -from numpy.random import multivariate_normal - - -# To have a nice dataset for visualization, we produce a set of points in -# 2D belonging to three different classes. You could also read in your data -# from a file, e.g. using pylab.load(). - -means = [(-1,0),(2,4),(3,1)] -cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] -alldata = ClassificationDataSet(2, 1, nb_classes=3) -for n in xrange(400): - for klass in range(3): - input = multivariate_normal(means[klass],cov[klass]) - alldata.addSample(input, [klass]) - - -# Randomly split the dataset into 75% training and 25% test data sets. -# Of course, we could also have created two different datasets to begin with. - -tstdata, trndata = alldata.splitWithProportion( 0.25 ) - - -# For neural network classification, it is highly advisable to encode -# classes with one output neuron per class. Note that this operation duplicates -# the original targets and stores them in an (integer) field named ‘class’. -trndata._convertToOneOfMany( ) -tstdata._convertToOneOfMany( ) - - -print "Number of training patterns: ", len(trndata) -print "Input and output dimensions: ", trndata.indim, trndata.outdim -print "First sample (input, target, class):" -print trndata['input'][0], trndata['target'][0], trndata['class'][0] - - - - - -# Now build a feed-forward network with 5 hidden units. We use the shortcut -# buildNetwork() for this. The input and output layer size must match the -# dataset’s input and target dimension. You could add additional hidden -# layers by inserting more numbers giving the desired layer sizes. -# -# The output layer uses a softmax function because we are doing classification. -# There are more options to explore here, e.g. try changing the hidden layer -# transfer function to linear instead of (the default) sigmoid. -# -# See also Description buildNetwork() for more info on options, and the Network -# tutorial Building Networks with Modules and Connections for info on how to -# build your own non-standard networks. -fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer ) - - -# Set up a trainer that basically takes the network and training dataset -# as input. For a list of trainers, see trainers. We are using a -# BackpropTrainer for this. - -trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, - verbose=True, weightdecay=0.01) - - -# Now generate a square grid of data points and put it into a dataset, -# which we can then classify to obtain a nice contour field for visualization. -# Therefore the target values for this data set can be ignored. - -ticks = arange(-3.,6.,0.2) -X, Y = meshgrid(ticks, ticks) -# need column vectors in dataset, not arrays -griddata = ClassificationDataSet(2,1, nb_classes=3) -for i in xrange(X.size): - griddata.addSample([X.ravel()[i],Y.ravel()[i]], [0]) -griddata._convertToOneOfMany() # this is still needed to make the fnn feel comfy - - -for i in range(20): -# Train the network for some epochs. Usually you would -# set something like 5 here, but for visualization purposes we -# do this one epoch at a time. - trainer.trainEpochs( 1 ) - trnresult = percentError( trainer.testOnClassData(), - trndata['class'] ) - tstresult = percentError( trainer.testOnClassData( - dataset=tstdata ), tstdata['class'] ) - - print "epoch: %4d" % trainer.totalepochs, \ - " train error: %5.2f%%" % trnresult, \ - " test error: %5.2f%%" % tstresult - out = fnn.activateOnDataset(griddata) - out = out.argmax(axis=1) # the highest output activation gives the class - out = out.reshape(X.shape) - figure(1) - ioff() # interactive graphics off - clf() # clear the plot - hold(True) # overplot on - for c in [0,1,2]: - here, _ = where(tstdata['class']==c) - plot(tstdata['input'][here,0],tstdata['input'][here,1],'o') - if out.max()!=out.min(): # safety check against flat field - contourf(X, Y, out) # plot the contour - ion() # interactive graphics on - draw() # update the plot - -ioff() -show() diff --git a/pybrain_experiments/test.py b/pybrain_experiments/test.py deleted file mode 100644 index f7b0a01..0000000 --- a/pybrain_experiments/test.py +++ /dev/null @@ -1,35 +0,0 @@ -from pybrain.structure import FeedForwardNetwork -from pybrain.structure import LinearLayer, SigmoidLayer -from pybrain.structure import FullConnection -n = FeedForwardNetwork() - -inLayer = LinearLayer(2) -hiddenLayer = SigmoidLayer(3) -outLayer = LinearLayer(1) - -n.addInputModule(inLayer) -n.addModule(hiddenLayer) -n.addOutputModule(outLayer) - -in_to_hidden = FullConnection(inLayer, hiddenLayer) -hidden_to_out = FullConnection(hiddenLayer, outLayer) - - -n.addConnection(in_to_hidden) -n.addConnection(hidden_to_out) - - -# everything is wired together now -# this makes it usable - -n.sortModules() - - -if __name__ == "__main__": - #Again, this might look different on your machine - - #the weights of the connections have already been initialized randomly. - print n.activate([1, 2]) - #look at the hidden weights - print in_to_hidden.params - print hidden_to_out.params - print n.params #weights here too diff --git a/pybrain_experiments/test_recurrent.py b/pybrain_experiments/test_recurrent.py deleted file mode 100644 index 692898a..0000000 --- a/pybrain_experiments/test_recurrent.py +++ /dev/null @@ -1,19 +0,0 @@ -from pybrain.structure import RecurrentNetwork -n = RecurrentNetwork() - -n.addInputModule(LinearLayer(2, name='in')) -n.addModule(SigmoidLayer(3, name='hidden')) -n.addOutputModule(LinearLayer(1, name='out')) -n.addConnection(FullConnection(n['in'], n['hidden'], name='c1')) -n.addConnection(FullConnection(n['hidden'], n['out'], name='c2')) -n.addRecurrentConnection(FullConnection(n['hidden'], n['hidden'], name='c3')) - - -n.sortModules() -n.activate((2, 2)) -array([-0.1959887]) -n.activate((2, 2)) -array([-0.19623716]) -n.activate((2, 2)) -array([-0.19675801]) -n.reset() #clears history diff --git a/ricky/binaryclassifier.py b/ricky/binaryclassifier.py new file mode 100644 index 0000000..f6d8ae6 --- /dev/null +++ b/ricky/binaryclassifier.py @@ -0,0 +1,34 @@ +from pybrain.tools.shortcuts import buildNetwork +from pybrain.structure import SoftmaxLayer +from pybrain.datasets import SupervisedDataSet +from pybrain.supervised.trainers import BackpropTrainer + + +class BinaryClassifier(object): + def __init__(self): + self._default_hidden_layers = 3 + pass + + def _train(self, dataset): + """ + pybrain.tools.shortcuts.buildNetwork(*layers, **options) + Build arbitrarily deep networks. + + layers should be a list or tuple of integers, that + indicate how many neurons the layers should have. + bias and outputbias are flags to indicate whether + the network should have the corresponding biases; + both default to True. + """ + net = buildNetwork( + dataset.params_length, + self._default_hidden_layers, + 1 # a binary classifier only requires one output layer + ) + ds = SupervisedDataSet(dataset) + trainer = BackpropTrainer(net, ds) + trainer.trainUntilConvergence() + net.activate(params.as_serialized) + + def classify(self, dataset): + return False diff --git a/ricky/dataset.py b/ricky/dataset.py new file mode 100644 index 0000000..4f8a422 --- /dev/null +++ b/ricky/dataset.py @@ -0,0 +1,45 @@ +import ricky.params +from ricky.utils import data_from_image +from pybrain.datasets import SupervisedDataSet + + +# while subclassing this works, we should try to detect the length of params +# and build a new data set for each type of params set... +# therefore, an instance of SupervisedDataSet could actually be +# accessed through the params instance...simplified one-to-one mapping + +# we are limited to only one classifier per params instance as well +# however this is sort of a good thing, because built into the params +# class can be a method that randomizes params, and then evaluates + +# we might be able to get this done through multiple inheritance +# keep all dataset related stuff in a separate class to make it better organized + +# we need +# .evaluate +# .generate_liked_image +# .train_from_url_list +# .reset + + +class DataSet(SupervisedDataSet): + + @staticmethod + def _file_into_list(self, filepath): + f = open(filepath, "r") + return f.read().split("\n") + + def _load_url_list(self, url_list, liked=False): + target = 0 + if liked: + target = 1 + data_list = [data_from_image(image) for image in url_list if image] + for data in data_list: + for params_class in ricky.params.Params.__subclasses__(): + if data['module'] == params_class.__name__: + params_instance = params_class() + params_instance.from_dict(data['params']) + self.addSample( + params_instance.as_normalized(), + target + ) diff --git a/ricky/dataset/__init__.py b/ricky/dataset/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/ricky/dataset/__init__.py +++ /dev/null diff --git a/ricky/param/__init__.py b/ricky/param/__init__.py index 3bf5c7e..a3bbf65 100644 --- a/ricky/param/__init__.py +++ b/ricky/param/__init__.py @@ -74,4 +74,6 @@ class Param(object): pass def as_normalized(self): + if self.value: + return 1 return 0 diff --git a/ricky/params/__init__.py b/ricky/params/__init__.py index 80da6c8..da4562f 100644 --- a/ricky/params/__init__.py +++ b/ricky/params/__init__.py @@ -24,6 +24,8 @@ class Params(object): """string representation""" return pprint.pformat(self.as_dict()) + def __len__(self): + return len(self._params) def _load_probabilities_json(self, probabilities_file=None): if probabilities_file: filepath = probabilities_file diff --git a/IMAGES_I_LIKE b/share/image_url_sets/remote/IMAGES_LIKED index b6015c1..b6015c1 100644 --- a/IMAGES_I_LIKE +++ b/share/image_url_sets/remote/IMAGES_LIKED diff --git a/share/install/requirements.txt b/share/install/requirements.txt new file mode 100644 index 0000000..0b2327e --- /dev/null +++ b/share/install/requirements.txt @@ -0,0 +1 @@ +pybrain |
