diff options
| -rw-r--r-- | become_yukarin/config.py | 20 | ||||
| -rw-r--r-- | become_yukarin/model.py | 176 | ||||
| -rw-r--r-- | requirements.txt | 9 | ||||
| -rw-r--r-- | train.py | 13 |
4 files changed, 183 insertions, 35 deletions
diff --git a/become_yukarin/config.py b/become_yukarin/config.py index ecfcfab..50694b7 100644 --- a/become_yukarin/config.py +++ b/become_yukarin/config.py @@ -19,9 +19,13 @@ class DatasetConfig(NamedTuple): class ModelConfig(NamedTuple): - in_size: int - num_scale: int - base_num_z: int + in_channels: int + conv_bank_out_channels: int + conv_bank_k: int + max_pooling_k: int + conv_projections_hidden_channels: int + highway_layers: int + out_channels: int out_size: int @@ -80,9 +84,13 @@ def create_from_json(s: Union[str, Path]): num_test=d['dataset']['num_test'], ), model=ModelConfig( - in_size=d['model']['in_size'], - num_scale=d['model']['num_scale'], - base_num_z=d['model']['base_num_z'], + in_channels=d['model']['in_channels'], + conv_bank_out_channels=d['model']['conv_bank_out_channels'], + conv_bank_k=d['model']['conv_bank_k'], + max_pooling_k=d['model']['max_pooling_k'], + conv_projections_hidden_channels=d['model']['conv_projections_hidden_channels'], + highway_layers=d['model']['highway_layers'], + out_channels=d['model']['out_channels'], out_size=d['model']['out_size'], ), loss=LossConfig( diff --git a/become_yukarin/model.py b/become_yukarin/model.py index c67f351..38c4235 100644 --- a/become_yukarin/model.py +++ b/become_yukarin/model.py @@ -3,49 +3,179 @@ import chainer from .config import ModelConfig -class DeepConvolution1D(chainer.link.Chain): - def __init__(self, in_size: int, num_scale: int, base_num_z: int, **kwargs): - super().__init__(**kwargs) - self.num_scale = num_scale - self.out_size = base_num_z * 2 ** (num_scale - 1) +class Convolution1D(chainer.links.ConvolutionND): + def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, + nobias=False, initialW=None, initial_bias=None, + cover_all=False): + super(Convolution1D, self).__init__( + ndim=1, + in_channels=in_channels, + out_channels=out_channels, + ksize=ksize, + stride=stride, + pad=pad, + nobias=nobias, + initialW=initialW, + initial_bias=initial_bias, + cover_all=cover_all, + ) - for i in range(num_scale): - l = base_num_z * 2 ** i - self.add_link('conv{}'.format(i + 1), chainer.links.ConvolutionND(1, in_size, l, 3, 1, 1, nobias=True)) - self.add_link('bn{}'.format(i + 1), chainer.links.BatchNormalization(l)) - in_size = l + +class ConvHighway(chainer.link.Chain): + def __init__(self, in_out_size, nobias=False, activate=chainer.functions.relu, + init_Wh=None, init_Wt=None, init_bh=None, init_bt=-1): + super().__init__() + self.activate = activate + + with self.init_scope(): + self.plain = Convolution1D( + in_out_size, in_out_size, 1, nobias=nobias, + initialW=init_Wh, initial_bias=init_bh) + self.transform = Convolution1D( + in_out_size, in_out_size, 1, nobias=nobias, + initialW=init_Wt, initial_bias=init_bt) + + def __call__(self, x): + out_plain = self.activate(self.plain(x)) + out_transform = chainer.functions.sigmoid(self.transform(x)) + y = out_plain * out_transform + x * (1 - out_transform) + return y + + +class PreNet(chainer.link.Chain): + def __init__(self, in_channels: int, hidden_channels: int, out_channels: int): + super().__init__() + with self.init_scope(): + self.conv1 = Convolution1D(in_channels, hidden_channels, 1) + self.conv2 = Convolution1D(hidden_channels, out_channels, 1) + + def __call__(self, x): + h = x + h = chainer.functions.dropout((chainer.functions.relu(self.conv1(h)), 0.5)) + h = chainer.functions.dropout((chainer.functions.relu(self.conv2(h)), 0.5)) + return h + + +class Conv1DBank(chainer.link.Chain): + def __init__(self, in_channels: int, out_channels: int, k: int): + super().__init__() + self.stacked_channels = out_channels * k + self.pads = [ + chainer.functions.Pad(((0, 0), (0, 0), (i // 2, (i + 1) // 2)), mode='constant') + for i in range(k) + ] + + with self.init_scope(): + self.convs = chainer.link.ChainList( + *(Convolution1D(in_channels, out_channels, i + 1, nobias=True) for i in range(k)) + ) + self.bn = chainer.links.BatchNormalization(out_channels * k) def __call__(self, x): h = x - for i in range(self.num_scale): - conv = getattr(self, 'conv{}'.format(i + 1)) - bn = getattr(self, 'bn{}'.format(i + 1)) - h = chainer.functions.relu(bn(conv(h))) + h = chainer.functions.concat([conv(pad(h)) for pad, conv in zip(self.pads, self.convs)]) + h = chainer.functions.relu(self.bn(h)) + return h + + +class Conv1DProjections(chainer.link.Chain): + def __init__(self, in_channels: int, hidden_channels: int, out_channels: int): + super().__init__() + + with self.init_scope(): + self.conv1 = Convolution1D(in_channels, hidden_channels, 3, pad=1, nobias=True) + self.bn1 = chainer.links.BatchNormalization(hidden_channels) + self.conv2 = Convolution1D(hidden_channels, out_channels, 3, pad=1, nobias=True) + self.bn2 = chainer.links.BatchNormalization(out_channels) + + def __call__(self, x): + h = x + h = chainer.functions.relu(self.bn1(self.conv1(h))) + h = chainer.functions.relu(self.bn2(self.conv2(h))) + return h + + +class CBHG(chainer.link.Chain): + def __init__( + self, + in_channels: int, + conv_bank_out_channels: int, + conv_bank_k: int, + max_pooling_k: int, + conv_projections_hidden_channels: int, + highway_layers: int, + out_channels: int, + ): + super().__init__() + self.max_pooling_padding = chainer.functions.Pad( + ((0, 0), (0, 0), ((max_pooling_k - 1) // 2, max_pooling_k // 2)), + mode='constant', + ) + self.max_pooling = chainer.functions.MaxPoolingND(1, max_pooling_k, 1, cover_all=False) + self.out_size = out_channels + + with self.init_scope(): + self.conv_bank = Conv1DBank( + in_channels=in_channels, + out_channels=conv_bank_out_channels, + k=conv_bank_k, + ) + self.conv_projectoins = Conv1DProjections( + in_channels=self.conv_bank.stacked_channels, + hidden_channels=conv_projections_hidden_channels, + out_channels=out_channels, + ) + self.highways = chainer.link.ChainList( + *([ConvHighway(out_channels)] * highway_layers) + ) + self.gru = chainer.links.NStepBiGRU( + n_layers=1, + in_size=out_channels, + out_size=out_channels, + dropout=0.0, + ) + + def __call__(self, x): + h = x + h = self.conv_bank(h) + h = self.max_pooling(self.max_pooling_padding(h)) + h = self.conv_projectoins(h) + h = h + x + for highway in self.highways: + h = highway(h) + + # h = chainer.functions.separate(chainer.functions.transpose(h, axes=(0, 2, 1))) + # _, h = self.gru(None, h) + # h = chainer.functions.transpose(chainer.functions.stack(h), axes=(0, 2, 1)) return h class Model(chainer.link.Chain): - def __init__(self, convs: DeepConvolution1D, out_size: int): + def __init__(self, network, out_size: int): super().__init__() with self.init_scope(): - self.convs = convs - self.last = chainer.links.ConvolutionND(1, convs.out_size, out_size, 1) + self.network = network + self.last = Convolution1D(network.out_size, out_size, 1) def __call__(self, x): h = x - h = self.convs(h) + h = self.network(h) h = self.last(h) return h def create(config: ModelConfig): - convs = DeepConvolution1D( - in_size=config.in_size, - num_scale=config.num_scale, - base_num_z=config.base_num_z, + network = CBHG( + in_channels=config.in_channels, + conv_bank_out_channels=config.conv_bank_out_channels, + conv_bank_k=config.conv_bank_k, + max_pooling_k=config.max_pooling_k, + conv_projections_hidden_channels=config.conv_projections_hidden_channels, + highway_layers=config.highway_layers, + out_channels=config.out_channels, ) model = Model( - convs=convs, + network=network, out_size=config.out_size, ) return model diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..80fbb0d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +numpy +cupy +chainer +librosa +pysptk +pyworld +fastdtw +nnmnkwii +matplotlib @@ -37,30 +37,31 @@ optimizer = optimizers.Adam() optimizer.setup(model) # trainer -trigger_best = training.triggers.MinValueTrigger('test/main/loss', (config.train.snapshot_iteration, 'iteration')) +trigger_log = (config.train.log_iteration, 'iteration') +trigger_snapshot = (config.train.snapshot_iteration, 'iteration') converter = partial(convert.concat_examples, padding=0) updater = training.StandardUpdater(train_iter, optimizer, device=config.train.gpu, converter=converter) trainer = training.Trainer(updater, out=config.train.output) ext = extensions.Evaluator(test_iter, model, converter, device=config.train.gpu) -trainer.extend(ext, name='test', trigger=(config.train.log_iteration, 'iteration')) +trainer.extend(ext, name='test', trigger=trigger_log) ext = extensions.Evaluator(train_eval_iter, model, converter, device=config.train.gpu) -trainer.extend(ext, name='train', trigger=(config.train.log_iteration, 'iteration')) +trainer.extend(ext, name='train', trigger=trigger_log) trainer.extend(extensions.dump_graph('main/loss', out_name='graph.dot')) ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz') -trainer.extend(ext, trigger=trigger_best) +trainer.extend(ext, trigger=trigger_snapshot) -trainer.extend(extensions.LogReport(trigger=(config.train.log_iteration, 'iteration'), log_name='log.txt')) +trainer.extend(extensions.LogReport(trigger=trigger_log, log_name='log.txt')) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport( y_keys=['main/loss', 'test/main/loss'], x_key='iteration', file_name='loss.png', - trigger=(config.train.log_iteration, 'iteration'), + trigger=trigger_log, )) trainer.run() |
