diff options
| author | pepperpepperpepper <pepper@scannerjammer.com> | 2015-12-08 21:37:41 -0800 |
|---|---|---|
| committer | pepperpepperpepper <pepper@scannerjammer.com> | 2015-12-08 21:37:41 -0800 |
| commit | 0e082b3065d8c3bafbd82cbaf24d6efb85825b05 (patch) | |
| tree | 60df92a77a6d298aed851315ffad80d4d1e937ef /ricky/dataset.py | |
| parent | 518f5b63f5b61308a8d3df64eb9ff715bb3c0e2c (diff) | |
made progress in binaryclassifier rewrite, restructured file tree
Diffstat (limited to 'ricky/dataset.py')
| -rw-r--r-- | ricky/dataset.py | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/ricky/dataset.py b/ricky/dataset.py new file mode 100644 index 0000000..4f8a422 --- /dev/null +++ b/ricky/dataset.py @@ -0,0 +1,45 @@ +import ricky.params +from ricky.utils import data_from_image +from pybrain.datasets import SupervisedDataSet + + +# while subclassing this works, we should try to detect the length of params +# and build a new data set for each type of params set... +# therefore, an instance of SupervisedDataSet could actually be +# accessed through the params instance...simplified one-to-one mapping + +# we are limited to only one classifier per params instance as well +# however this is sort of a good thing, because built into the params +# class can be a method that randomizes params, and then evaluates + +# we might be able to get this done through multiple inheritance +# keep all dataset related stuff in a separate class to make it better organized + +# we need +# .evaluate +# .generate_liked_image +# .train_from_url_list +# .reset + + +class DataSet(SupervisedDataSet): + + @staticmethod + def _file_into_list(self, filepath): + f = open(filepath, "r") + return f.read().split("\n") + + def _load_url_list(self, url_list, liked=False): + target = 0 + if liked: + target = 1 + data_list = [data_from_image(image) for image in url_list if image] + for data in data_list: + for params_class in ricky.params.Params.__subclasses__(): + if data['module'] == params_class.__name__: + params_instance = params_class() + params_instance.from_dict(data['params']) + self.addSample( + params_instance.as_normalized(), + target + ) |
