3 files changed, 61 insertions, 15 deletions
diff --git a/become_yukarin/config.py b/become_yukarin/config.py
index 424e598..864de4e 100644
--- a/become_yukarin/config.py
+++ b/become_yukarin/config.py
@@ -16,6 +16,7 @@ class DatasetConfig(NamedTuple):
     target_mean_path: Path
     target_var_path: Path
     features: List[str]
+    train_crop_size: int
     seed: int
     num_test: int
 
@@ -82,6 +83,7 @@ def create_from_json(s: Union[str, Path]):
             target_mean_path=Path(d['dataset']['target_mean_path']).expanduser(),
             target_var_path=Path(d['dataset']['target_var_path']).expanduser(),
             features=d['dataset']['features'],
+            train_crop_size=d['dataset']['train_crop_size'],
             seed=d['dataset']['seed'],
             num_test=d['dataset']['num_test'],
         ),
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 6328a1c..a6550b7 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -1,3 +1,4 @@
+import copy
 import typing
 from abc import ABCMeta, abstractmethod
 from collections import defaultdict
@@ -41,13 +42,16 @@ class DictKeyReplaceProcess(BaseDataProcess):
 
 class ChainProcess(BaseDataProcess):
     def __init__(self, process: typing.Iterable[BaseDataProcess]):
-        self._process = process
+        self._process = list(process)
 
     def __call__(self, data, test):
         for p in self._process:
             data = p(data, test)
         return data
 
+    def append(self, process: BaseDataProcess):
+        self._process.append(process)
+
 
 class SplitProcess(BaseDataProcess):
     def __init__(self, process: typing.Dict[str, typing.Optional[BaseDataProcess]]):
@@ -248,6 +252,24 @@ class ShapeAlignProcess(BaseDataProcess):
         return data
 
 
+class CropProcess(BaseDataProcess):
+    def __init__(self, crop_size: int, time_axis: int = 1):
+        self._crop_size = crop_size
+        self._time_axis = time_axis
+
+    def __call__(self, datas: Dict[str, any], test=True):
+        assert not test
+
+        data, seed = datas['data'], datas['seed']
+        random = numpy.random.RandomState(seed)
+
+        len_time = data.shape[self._time_axis]
+        assert len_time >= self._crop_size
+
+        start = random.randint(len_time - self._crop_size + 1)
+        return numpy.split(data, [start, start + self._crop_size], axis=self._time_axis)[1]
+
+
 class DataProcessDataset(chainer.dataset.DatasetMixin):
     def __init__(self, data: typing.List, data_process: BaseDataProcess):
         self._data = data
@@ -273,7 +295,7 @@ def create(config: DatasetConfig):
     target_var = acoustic_feature_load_process(config.target_var_path, test=True)
 
     # {input_path, target_path}
-    data_process = ChainProcess([
+    data_process_base = ChainProcess([
         SplitProcess(dict(
             input=ChainProcess([
                 LambdaProcess(lambda d, test: d['input_path']),
@@ -300,6 +322,28 @@ def create(config: DatasetConfig):
         ShapeAlignProcess(),
     ])
 
+    data_process_train = copy.deepcopy(data_process_base)
+    if config.train_crop_size is not None:
+        data_process_train.append(ChainProcess([
+            LambdaProcess(lambda d, test: dict(seed=numpy.random.randint(2 ** 32), **d)),
+            SplitProcess(dict(
+                input=ChainProcess([
+                    LambdaProcess(lambda d, test: dict(data=d['input'], seed=d['seed'])),
+                    CropProcess(crop_size=config.train_crop_size),
+                ]),
+                target=ChainProcess([
+                    LambdaProcess(lambda d, test: dict(data=d['target'], seed=d['seed'])),
+                    CropProcess(crop_size=config.train_crop_size),
+                ]),
+                mask=ChainProcess([
+                    LambdaProcess(lambda d, test: dict(data=d['mask'], seed=d['seed'])),
+                    CropProcess(crop_size=config.train_crop_size),
+                ]),
+            )),
+        ]))
+
+    data_process_test = data_process_base
+
     num_test = config.num_test
     pairs = [
         dict(input_path=input_path, target_path=target_path)
@@ -311,7 +355,7 @@ def create(config: DatasetConfig):
     train_for_evaluate_paths = train_paths[:num_test]
 
     return {
-        'train': DataProcessDataset(train_paths, data_process),
-        'test': DataProcessDataset(test_paths, data_process),
-        'train_eval': DataProcessDataset(train_for_evaluate_paths, data_process),
+        'train': DataProcessDataset(train_paths, data_process_train),
+        'test': DataProcessDataset(test_paths, data_process_test),
+        'train_eval': DataProcessDataset(train_for_evaluate_paths, data_process_test),
     }
diff --git a/become_yukarin/model.py b/become_yukarin/model.py
index 5f46876..9d69378 100644
--- a/become_yukarin/model.py
+++ b/become_yukarin/model.py
@@ -112,7 +112,7 @@ class CBHG(chainer.link.Chain):
             mode='constant',
         )
         self.max_pooling = chainer.functions.MaxPoolingND(1, max_pooling_k, 1, cover_all=False)
-        self.out_size = out_channels
+        self.out_size = out_channels * 2
 
         with self.init_scope():
             self.conv_bank = Conv1DBank(
@@ -128,12 +128,12 @@ class CBHG(chainer.link.Chain):
             self.highways = chainer.link.ChainList(
                 *([ConvHighway(out_channels) for _ in range(highway_layers)])
             )
-            # self.gru = chainer.links.NStepBiGRU(
-            #     n_layers=1,
-            #     in_size=out_channels,
-            #     out_size=out_channels,
-            #     dropout=0.0,
-            # )
+            self.gru = chainer.links.NStepBiGRU(
+                n_layers=1,
+                in_size=out_channels,
+                out_size=out_channels,
+                dropout=0.0,
+            )
 
     def __call__(self, x):
         h = x
@@ -144,9 +144,9 @@ class CBHG(chainer.link.Chain):
         for highway in self.highways:
             h = highway(h)
 
-        # h = chainer.functions.separate(chainer.functions.transpose(h, axes=(0, 2, 1)))
-        # _, h = self.gru(None, h)
-        # h = chainer.functions.transpose(chainer.functions.stack(h), axes=(0, 2, 1))
+        h = chainer.functions.separate(chainer.functions.transpose(h, axes=(0, 2, 1)))
+        _, h = self.gru(None, h)
+        h = chainer.functions.transpose(chainer.functions.stack(h), axes=(0, 2, 1))
         return h