From 0e082b3065d8c3bafbd82cbaf24d6efb85825b05 Mon Sep 17 00:00:00 2001 From: pepperpepperpepper Date: Tue, 8 Dec 2015 21:37:41 -0800 Subject: made progress in binaryclassifier rewrite, restructured file tree --- ricky/dataset.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 ricky/dataset.py (limited to 'ricky/dataset.py') diff --git a/ricky/dataset.py b/ricky/dataset.py new file mode 100644 index 0000000..4f8a422 --- /dev/null +++ b/ricky/dataset.py @@ -0,0 +1,45 @@ +import ricky.params +from ricky.utils import data_from_image +from pybrain.datasets import SupervisedDataSet + + +# while subclassing this works, we should try to detect the length of params +# and build a new data set for each type of params set... +# therefore, an instance of SupervisedDataSet could actually be +# accessed through the params instance...simplified one-to-one mapping + +# we are limited to only one classifier per params instance as well +# however this is sort of a good thing, because built into the params +# class can be a method that randomizes params, and then evaluates + +# we might be able to get this done through multiple inheritance +# keep all dataset related stuff in a separate class to make it better organized + +# we need +# .evaluate +# .generate_liked_image +# .train_from_url_list +# .reset + + +class DataSet(SupervisedDataSet): + + @staticmethod + def _file_into_list(self, filepath): + f = open(filepath, "r") + return f.read().split("\n") + + def _load_url_list(self, url_list, liked=False): + target = 0 + if liked: + target = 1 + data_list = [data_from_image(image) for image in url_list if image] + for data in data_list: + for params_class in ricky.params.Params.__subclasses__(): + if data['module'] == params_class.__name__: + params_instance = params_class() + params_instance.from_dict(data['params']) + self.addSample( + params_instance.as_normalized(), + target + ) -- cgit v1.2.3-70-g09d2