summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--become_yukarin/acoustic_converter.py4
-rw-r--r--become_yukarin/config/config.py47
-rw-r--r--become_yukarin/config/old_config.py31
-rw-r--r--become_yukarin/model/cbhg_model.py292
-rw-r--r--become_yukarin/model/model.py313
-rw-r--r--become_yukarin/updater/updater.py116
-rw-r--r--train.py14
7 files changed, 476 insertions, 341 deletions
diff --git a/become_yukarin/acoustic_converter.py b/become_yukarin/acoustic_converter.py
index 498bdb1..62eacff 100644
--- a/become_yukarin/acoustic_converter.py
+++ b/become_yukarin/acoustic_converter.py
@@ -73,6 +73,9 @@ class AcousticConverter(object):
input = self._feature_normalize(input, test=True)
input = self._encode_feature(input, test=True)
+ pad = 128 - input.shape[1] % 128
+ input = numpy.pad(input, [(0, 0), (0, pad)], mode='minimum')
+
converter = partial(chainer.dataset.convert.concat_examples, device=self.gpu, padding=0)
inputs = converter([input])
@@ -81,6 +84,7 @@ class AcousticConverter(object):
if self.gpu is not None:
out = chainer.cuda.to_cpu(out)
+ out = out[:, :-pad]
out = self._decode_feature(out, test=True)
out = AcousticFeature(
diff --git a/become_yukarin/config/config.py b/become_yukarin/config/config.py
index ee1d68f..f49b185 100644
--- a/become_yukarin/config/config.py
+++ b/become_yukarin/config/config.py
@@ -27,32 +27,14 @@ class DatasetConfig(NamedTuple):
num_test: int
-class DiscriminatorModelConfig(NamedTuple):
- in_channels: int
- hidden_channels_list: List[int]
-
-
class ModelConfig(NamedTuple):
in_channels: int
- conv_bank_out_channels: int
- conv_bank_k: int
- max_pooling_k: int
- conv_projections_hidden_channels: int
- highway_layers: int
out_channels: int
- out_size: int
- aligner_out_time_length: int
- disable_last_rnn: bool
- enable_aligner: bool
- discriminator: Optional[DiscriminatorModelConfig]
class LossConfig(NamedTuple):
- l1: float
- predictor_fake: float
- discriminator_true: float
- discriminator_fake: float
- discriminator_grad: float
+ mse: float
+ adversarial: float
class TrainConfig(NamedTuple):
@@ -100,14 +82,6 @@ def create_from_json(s: Union[str, Path]):
backward_compatible(d)
- if d['model']['discriminator'] is not None:
- discriminator_model_config = DiscriminatorModelConfig(
- in_channels=d['model']['discriminator']['in_channels'],
- hidden_channels_list=d['model']['discriminator']['hidden_channels_list'],
- )
- else:
- discriminator_model_config = None
-
return Config(
dataset=DatasetConfig(
param=Param(),
@@ -128,24 +102,11 @@ def create_from_json(s: Union[str, Path]):
),
model=ModelConfig(
in_channels=d['model']['in_channels'],
- conv_bank_out_channels=d['model']['conv_bank_out_channels'],
- conv_bank_k=d['model']['conv_bank_k'],
- max_pooling_k=d['model']['max_pooling_k'],
- conv_projections_hidden_channels=d['model']['conv_projections_hidden_channels'],
- highway_layers=d['model']['highway_layers'],
out_channels=d['model']['out_channels'],
- out_size=d['model']['out_size'],
- aligner_out_time_length=d['model']['aligner_out_time_length'],
- disable_last_rnn=d['model']['disable_last_rnn'],
- enable_aligner=d['model']['enable_aligner'],
- discriminator=discriminator_model_config,
),
loss=LossConfig(
- l1=d['loss']['l1'],
- predictor_fake=d['loss']['predictor_fake'],
- discriminator_true=d['loss']['discriminator_true'],
- discriminator_fake=d['loss']['discriminator_fake'],
- discriminator_grad=d['loss']['discriminator_grad'],
+ mse=d['loss']['mse'],
+ adversarial=d['loss']['adversarial'],
),
train=TrainConfig(
batchsize=d['train']['batchsize'],
diff --git a/become_yukarin/config/old_config.py b/become_yukarin/config/old_config.py
new file mode 100644
index 0000000..002e2b2
--- /dev/null
+++ b/become_yukarin/config/old_config.py
@@ -0,0 +1,31 @@
+from typing import List
+from typing import NamedTuple
+from typing import Optional
+
+
+class CBHGDiscriminatorModelConfig(NamedTuple):
+ in_channels: int
+ hidden_channels_list: List[int]
+
+
+class CBHGModelConfig(NamedTuple):
+ in_channels: int
+ conv_bank_out_channels: int
+ conv_bank_k: int
+ max_pooling_k: int
+ conv_projections_hidden_channels: int
+ highway_layers: int
+ out_channels: int
+ out_size: int
+ aligner_out_time_length: int
+ disable_last_rnn: bool
+ enable_aligner: bool
+ discriminator: Optional[CBHGDiscriminatorModelConfig]
+
+
+class CBHGLossConfig(NamedTuple):
+ l1: float
+ predictor_fake: float
+ discriminator_true: float
+ discriminator_fake: float
+ discriminator_grad: float
diff --git a/become_yukarin/model/cbhg_model.py b/become_yukarin/model/cbhg_model.py
new file mode 100644
index 0000000..59c6c71
--- /dev/null
+++ b/become_yukarin/model/cbhg_model.py
@@ -0,0 +1,292 @@
+from functools import partial
+from typing import List
+
+import chainer
+
+from become_yukarin.config.old_config import CBHGDiscriminatorModelConfig
+from become_yukarin.config.old_config import CBHGModelConfig
+
+
+class Convolution1D(chainer.links.ConvolutionND):
+ def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
+ nobias=False, initialW=None, initial_bias=None,
+ cover_all=False):
+ super().__init__(
+ ndim=1,
+ in_channels=in_channels,
+ out_channels=out_channels,
+ ksize=ksize,
+ stride=stride,
+ pad=pad,
+ nobias=nobias,
+ initialW=initialW,
+ initial_bias=initial_bias,
+ cover_all=cover_all,
+ )
+
+
+class LegacyConvolution1D(chainer.links.Convolution2D):
+ def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0,
+ nobias=False, initialW=None, initial_bias=None, **kwargs):
+ assert ksize is None or isinstance(ksize, int)
+ assert isinstance(stride, int)
+ assert isinstance(pad, int)
+ super().__init__(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ ksize=(ksize, 1),
+ stride=(stride, 1),
+ pad=(pad, 0),
+ nobias=nobias,
+ initialW=initialW,
+ initial_bias=initial_bias,
+ **kwargs,
+ )
+
+ def __call__(self, x):
+ assert x.shape[-1] == 1
+ return super().__call__(x)
+
+
+class ConvHighway(chainer.link.Chain):
+ def __init__(self, in_out_size, nobias=False, activate=chainer.functions.relu,
+ init_Wh=None, init_Wt=None, init_bh=None, init_bt=-1):
+ super().__init__()
+ self.activate = activate
+
+ with self.init_scope():
+ self.plain = Convolution1D(
+ in_out_size, in_out_size, 1, nobias=nobias,
+ initialW=init_Wh, initial_bias=init_bh)
+ self.transform = Convolution1D(
+ in_out_size, in_out_size, 1, nobias=nobias,
+ initialW=init_Wt, initial_bias=init_bt)
+
+ def __call__(self, x):
+ out_plain = self.activate(self.plain(x))
+ out_transform = chainer.functions.sigmoid(self.transform(x))
+ y = out_plain * out_transform + x * (1 - out_transform)
+ return y
+
+
+class PreNet(chainer.link.Chain):
+ def __init__(self, in_channels: int, hidden_channels: int, out_channels: int) -> None:
+ super().__init__()
+ with self.init_scope():
+ self.conv1 = Convolution1D(in_channels, hidden_channels, 1)
+ self.conv2 = Convolution1D(hidden_channels, out_channels, 1)
+
+ def __call__(self, x):
+ h = x
+ h = chainer.functions.dropout((chainer.functions.relu(self.conv1(h)), 0.5))
+ h = chainer.functions.dropout((chainer.functions.relu(self.conv2(h)), 0.5))
+ return h
+
+
+class Conv1DBank(chainer.link.Chain):
+ def __init__(self, in_channels: int, out_channels: int, k: int) -> None:
+ super().__init__()
+ self.stacked_channels = out_channels * k
+ self.pads = [
+ partial(chainer.functions.pad, pad_width=((0, 0), (0, 0), (i // 2, (i + 1) // 2)), mode='constant')
+ for i in range(k)
+ ]
+
+ with self.init_scope():
+ self.convs = chainer.link.ChainList(
+ *(Convolution1D(in_channels, out_channels, i + 1, nobias=True) for i in range(k))
+ )
+ self.bn = chainer.links.BatchNormalization(out_channels * k)
+
+ def __call__(self, x):
+ h = x
+ h = chainer.functions.concat([conv(pad(h)) for pad, conv in zip(self.pads, self.convs)])
+ h = chainer.functions.relu(self.bn(h))
+ return h
+
+
+class Conv1DProjections(chainer.link.Chain):
+ def __init__(self, in_channels: int, hidden_channels: int, out_channels: int) -> None:
+ super().__init__()
+
+ with self.init_scope():
+ self.conv1 = Convolution1D(in_channels, hidden_channels, 3, pad=1, nobias=True)
+ self.bn1 = chainer.links.BatchNormalization(hidden_channels)
+ self.conv2 = Convolution1D(hidden_channels, out_channels, 3, pad=1, nobias=True)
+ self.bn2 = chainer.links.BatchNormalization(out_channels)
+
+ def __call__(self, x):
+ h = x
+ h = chainer.functions.relu(self.bn1(self.conv1(h)))
+ h = chainer.functions.relu(self.bn2(self.conv2(h)))
+ return h
+
+
+class CBHG(chainer.link.Chain):
+ def __init__(
+ self,
+ in_channels: int,
+ conv_bank_out_channels: int,
+ conv_bank_k: int,
+ max_pooling_k: int,
+ conv_projections_hidden_channels: int,
+ highway_layers: int,
+ out_channels: int,
+ disable_last_rnn: bool,
+ ) -> None:
+ super().__init__()
+ self.max_pooling_padding = partial(
+ chainer.functions.pad,
+ pad_width=((0, 0), (0, 0), ((max_pooling_k - 1) // 2, max_pooling_k // 2)),
+ mode='constant',
+ )
+ self.max_pooling = chainer.functions.MaxPoolingND(1, max_pooling_k, 1, cover_all=False)
+ self.out_size = out_channels * (1 if disable_last_rnn else 2)
+
+ with self.init_scope():
+ self.conv_bank = Conv1DBank(
+ in_channels=in_channels,
+ out_channels=conv_bank_out_channels,
+ k=conv_bank_k,
+ )
+ self.conv_projectoins = Conv1DProjections(
+ in_channels=self.conv_bank.stacked_channels,
+ hidden_channels=conv_projections_hidden_channels,
+ out_channels=out_channels,
+ )
+ self.highways = chainer.link.ChainList(
+ *([ConvHighway(out_channels) for _ in range(highway_layers)])
+ )
+ if not disable_last_rnn:
+ self.gru = chainer.links.NStepBiGRU(
+ n_layers=1,
+ in_size=out_channels,
+ out_size=out_channels,
+ dropout=0.0,
+ )
+
+ def __call__(self, x):
+ h = x
+ h = self.conv_bank(h)
+ h = self.max_pooling(self.max_pooling_padding(h))
+ h = self.conv_projectoins(h)
+ h = h + x
+ for highway in self.highways:
+ h = highway(h)
+
+ if hasattr(self, 'gru'):
+ h = chainer.functions.separate(chainer.functions.transpose(h, axes=(0, 2, 1)))
+ _, h = self.gru(None, h)
+ h = chainer.functions.transpose(chainer.functions.stack(h), axes=(0, 2, 1))
+ return h
+
+
+class Predictor(chainer.link.Chain):
+ def __init__(self, network, out_size: int) -> None:
+ super().__init__()
+ with self.init_scope():
+ self.network = network
+ self.last = Convolution1D(network.out_size, out_size, 1)
+
+ def __call__(self, x):
+ h = x
+ h = self.network(h)
+ h = self.last(h)
+ return h
+
+
+class Aligner(chainer.link.Chain):
+ def __init__(self, in_size: int, out_time_length: int) -> None:
+ super().__init__()
+ with self.init_scope():
+ self.gru = chainer.links.NStepBiGRU(
+ n_layers=1,
+ in_size=in_size,
+ out_size=in_size // 2,
+ dropout=0.0,
+ )
+ self.last = Convolution1D(in_size // 2 * 2, out_time_length, 1)
+
+ def __call__(self, x):
+ """
+ :param x: (batch, channel, timeA)
+ """
+ h = x
+ h = chainer.functions.separate(chainer.functions.transpose(h, axes=(0, 2, 1))) # h: batch * (timeA, channel)
+ _, h = self.gru(None, h) # h: batch * (timeA, ?)
+ h = chainer.functions.transpose(chainer.functions.stack(h), axes=(0, 2, 1)) # h: (batch, ?, timeA)
+ h = chainer.functions.softmax(self.last(h), axis=1) # h: (batch, timeB, timeA)
+
+ h = chainer.functions.matmul(x, h) # h: (batch, channel, time)
+ return h
+
+
+class Discriminator(chainer.link.Chain):
+ def __init__(self, in_channels: int, hidden_channels_list: List[int]) -> None:
+ super().__init__()
+ with self.init_scope():
+ self.convs = chainer.link.ChainList(*(
+ LegacyConvolution1D(i_c, o_c, ksize=2, stride=2)
+ for i_c, o_c in zip([in_channels] + hidden_channels_list[:-1], hidden_channels_list)
+ ))
+ self.last_conv = LegacyConvolution1D(hidden_channels_list[-1], 1, ksize=1)
+
+ def __call__(self, x):
+ """
+ :param x: (batch, channel, time)
+ """
+ h = x
+ h = chainer.functions.reshape(h, h.shape + (1,))
+ for conv in self.convs.children():
+ h = chainer.functions.relu(conv(h))
+ h = self.last_conv(h)
+ h = chainer.functions.reshape(h, h.shape[:-1])
+ return h
+
+
+def create_predictor(config: CBHGModelConfig):
+ network = CBHG(
+ in_channels=config.in_channels,
+ conv_bank_out_channels=config.conv_bank_out_channels,
+ conv_bank_k=config.conv_bank_k,
+ max_pooling_k=config.max_pooling_k,
+ conv_projections_hidden_channels=config.conv_projections_hidden_channels,
+ highway_layers=config.highway_layers,
+ out_channels=config.out_channels,
+ disable_last_rnn=config.disable_last_rnn,
+ )
+ predictor = Predictor(
+ network=network,
+ out_size=config.out_size,
+ )
+ return predictor
+
+
+def create_aligner(config: CBHGModelConfig):
+ assert config.enable_aligner
+ aligner = Aligner(
+ in_size=config.in_channels,
+ out_time_length=config.aligner_out_time_length,
+ )
+ return aligner
+
+
+def create_discriminator(config: CBHGDiscriminatorModelConfig):
+ discriminator = Discriminator(
+ in_channels=config.in_channels,
+ hidden_channels_list=config.hidden_channels_list,
+ )
+ return discriminator
+
+
+def create(config: CBHGModelConfig):
+ predictor = create_predictor(config)
+ if config.enable_aligner:
+ aligner = create_aligner(config)
+ else:
+ aligner = None
+ if config.discriminator is not None:
+ discriminator = create_discriminator(config.discriminator)
+ else:
+ discriminator = None
+ return predictor, aligner, discriminator
diff --git a/become_yukarin/model/model.py b/become_yukarin/model/model.py
index 71fb805..56870d9 100644
--- a/become_yukarin/model/model.py
+++ b/become_yukarin/model/model.py
@@ -1,16 +1,14 @@
-from functools import partial
-from typing import List
-
import chainer
+import chainer.functions as F
+import chainer.links as L
-from become_yukarin.config.config import DiscriminatorModelConfig
from become_yukarin.config.config import ModelConfig
class Convolution1D(chainer.links.ConvolutionND):
def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
nobias=False, initialW=None, initial_bias=None,
- cover_all=False):
+ cover_all=False) -> None:
super().__init__(
ndim=1,
in_channels=in_channels,
@@ -25,268 +23,135 @@ class Convolution1D(chainer.links.ConvolutionND):
)
-class LegacyConvolution1D(chainer.links.Convolution2D):
- def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0,
- nobias=False, initialW=None, initial_bias=None, **kwargs):
- assert ksize is None or isinstance(ksize, int)
- assert isinstance(stride, int)
- assert isinstance(pad, int)
+class Deconvolution1D(chainer.links.DeconvolutionND):
+ def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
+ nobias=False, outsize=None,
+ initialW=None, initial_bias=None) -> None:
super().__init__(
+ ndim=1,
in_channels=in_channels,
out_channels=out_channels,
- ksize=(ksize, 1),
- stride=(stride, 1),
- pad=(pad, 0),
+ ksize=ksize,
+ stride=stride,
+ pad=pad,
nobias=nobias,
+ outsize=outsize,
initialW=initialW,
initial_bias=initial_bias,
- **kwargs,
)
- def __call__(self, x):
- assert x.shape[-1] == 1
- return super().__call__(x)
-
-
-class ConvHighway(chainer.link.Chain):
- def __init__(self, in_out_size, nobias=False, activate=chainer.functions.relu,
- init_Wh=None, init_Wt=None, init_bh=None, init_bt=-1):
- super().__init__()
- self.activate = activate
-
- with self.init_scope():
- self.plain = Convolution1D(
- in_out_size, in_out_size, 1, nobias=nobias,
- initialW=init_Wh, initial_bias=init_bh)
- self.transform = Convolution1D(
- in_out_size, in_out_size, 1, nobias=nobias,
- initialW=init_Wt, initial_bias=init_bt)
-
- def __call__(self, x):
- out_plain = self.activate(self.plain(x))
- out_transform = chainer.functions.sigmoid(self.transform(x))
- y = out_plain * out_transform + x * (1 - out_transform)
- return y
-
-
-class PreNet(chainer.link.Chain):
- def __init__(self, in_channels: int, hidden_channels: int, out_channels: int) -> None:
- super().__init__()
- with self.init_scope():
- self.conv1 = Convolution1D(in_channels, hidden_channels, 1)
- self.conv2 = Convolution1D(hidden_channels, out_channels, 1)
-
- def __call__(self, x):
- h = x
- h = chainer.functions.dropout((chainer.functions.relu(self.conv1(h)), 0.5))
- h = chainer.functions.dropout((chainer.functions.relu(self.conv2(h)), 0.5))
- return h
-
-class Conv1DBank(chainer.link.Chain):
- def __init__(self, in_channels: int, out_channels: int, k: int) -> None:
+class CBR(chainer.Chain):
+ def __init__(self, ch0, ch1, bn=True, sample='down', activation=F.relu, dropout=False) -> None:
super().__init__()
- self.stacked_channels = out_channels * k
- self.pads = [
- partial(chainer.functions.pad, pad_width=((0, 0), (0, 0), (i // 2, (i + 1) // 2)), mode='constant')
- for i in range(k)
- ]
+ self.bn = bn
+ self.activation = activation
+ self.dropout = dropout
+ w = chainer.initializers.Normal(0.02)
with self.init_scope():
- self.convs = chainer.link.ChainList(
- *(Convolution1D(in_channels, out_channels, i + 1, nobias=True) for i in range(k))
- )
- self.bn = chainer.links.BatchNormalization(out_channels * k)
+ if sample == 'down':
+ self.c = Convolution1D(ch0, ch1, 4, 2, 1, initialW=w)
+ else:
+ self.c = Deconvolution1D(ch0, ch1, 4, 2, 1, initialW=w)
+ if bn:
+ self.batchnorm = L.BatchNormalization(ch1)
def __call__(self, x):
- h = x
- h = chainer.functions.concat([conv(pad(h)) for pad, conv in zip(self.pads, self.convs)])
- h = chainer.functions.relu(self.bn(h))
+ h = self.c(x)
+ if self.bn:
+ h = self.batchnorm(h)
+ if self.dropout:
+ h = F.dropout(h)
+ if self.activation is not None:
+ h = self.activation(h)
return h
-class Conv1DProjections(chainer.link.Chain):
- def __init__(self, in_channels: int, hidden_channels: int, out_channels: int) -> None:
+class Encoder(chainer.Chain):
+ def __init__(self, in_ch) -> None:
super().__init__()
-
+ w = chainer.initializers.Normal(0.02)
with self.init_scope():
- self.conv1 = Convolution1D(in_channels, hidden_channels, 3, pad=1, nobias=True)
- self.bn1 = chainer.links.BatchNormalization(hidden_channels)
- self.conv2 = Convolution1D(hidden_channels, out_channels, 3, pad=1, nobias=True)
- self.bn2 = chainer.links.BatchNormalization(out_channels)
+ self.c0 = Convolution1D(in_ch, 64, 3, 1, 1, initialW=w)
+ self.c1 = CBR(64, 128, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c2 = CBR(128, 256, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c3 = CBR(256, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c4 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c5 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c6 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c7 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
def __call__(self, x):
- h = x
- h = chainer.functions.relu(self.bn1(self.conv1(h)))
- h = chainer.functions.relu(self.bn2(self.conv2(h)))
- return h
+ hs = [F.leaky_relu(self.c0(x))]
+ for i in range(1, 8):
+ hs.append(self['c%d' % i](hs[i - 1]))
+ return hs
-class CBHG(chainer.link.Chain):
- def __init__(
- self,
- in_channels: int,
- conv_bank_out_channels: int,
- conv_bank_k: int,
- max_pooling_k: int,
- conv_projections_hidden_channels: int,
- highway_layers: int,
- out_channels: int,
- disable_last_rnn: bool,
- ) -> None:
+class Decoder(chainer.Chain):
+ def __init__(self, out_ch) -> None:
super().__init__()
- self.max_pooling_padding = partial(
- chainer.functions.pad,
- pad_width=((0, 0), (0, 0), ((max_pooling_k - 1) // 2, max_pooling_k // 2)),
- mode='constant',
- )
- self.max_pooling = chainer.functions.MaxPoolingND(1, max_pooling_k, 1, cover_all=False)
- self.out_size = out_channels * (1 if disable_last_rnn else 2)
-
+ w = chainer.initializers.Normal(0.02)
with self.init_scope():
- self.conv_bank = Conv1DBank(
- in_channels=in_channels,
- out_channels=conv_bank_out_channels,
- k=conv_bank_k,
- )
- self.conv_projectoins = Conv1DProjections(
- in_channels=self.conv_bank.stacked_channels,
- hidden_channels=conv_projections_hidden_channels,
- out_channels=out_channels,
- )
- self.highways = chainer.link.ChainList(
- *([ConvHighway(out_channels) for _ in range(highway_layers)])
- )
- if not disable_last_rnn:
- self.gru = chainer.links.NStepBiGRU(
- n_layers=1,
- in_size=out_channels,
- out_size=out_channels,
- dropout=0.0,
- )
+ self.c0 = CBR(512, 512, bn=True, sample='up', activation=F.relu, dropout=True)
+ self.c1 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=True)
+ self.c2 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=True)
+ self.c3 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=False)
+ self.c4 = CBR(1024, 256, bn=True, sample='up', activation=F.relu, dropout=False)
+ self.c5 = CBR(512, 128, bn=True, sample='up', activation=F.relu, dropout=False)
+ self.c6 = CBR(256, 64, bn=True, sample='up', activation=F.relu, dropout=False)
+ self.c7 = Convolution1D(128, out_ch, 3, 1, 1, initialW=w)
- def __call__(self, x):
- h = x
- h = self.conv_bank(h)
- h = self.max_pooling(self.max_pooling_padding(h))
- h = self.conv_projectoins(h)
- h = h + x
- for highway in self.highways:
- h = highway(h)
-
- if hasattr(self, 'gru'):
- h = chainer.functions.separate(chainer.functions.transpose(h, axes=(0, 2, 1)))
- _, h = self.gru(None, h)
- h = chainer.functions.transpose(chainer.functions.stack(h), axes=(0, 2, 1))
+ def __call__(self, hs):
+ h = self.c0(hs[-1])
+ for i in range(1, 8):
+ h = F.concat([h, hs[-i - 1]])
+ if i < 7:
+ h = self['c%d' % i](h)
+ else:
+ h = self.c7(h)
return h
-class Predictor(chainer.link.Chain):
- def __init__(self, network, out_size: int) -> None:
+class Predictor(chainer.Chain):
+ def __init__(self, in_ch, out_ch) -> None:
super().__init__()
with self.init_scope():
- self.network = network
- self.last = Convolution1D(network.out_size, out_size, 1)
+ self.encoder = Encoder(in_ch)
+ self.decoder = Decoder(out_ch)
def __call__(self, x):
- h = x
- h = self.network(h)
- h = self.last(h)
- return h
+ return self.decoder(self.encoder(x))
-class Aligner(chainer.link.Chain):
- def __init__(self, in_size: int, out_time_length: int) -> None:
+class Discriminator(chainer.Chain):
+ def __init__(self, in_ch, out_ch) -> None:
super().__init__()
+ w = chainer.initializers.Normal(0.02)
with self.init_scope():
- self.gru = chainer.links.NStepBiGRU(
- n_layers=1,
- in_size=in_size,
- out_size=in_size // 2,
- dropout=0.0,
- )
- self.last = Convolution1D(in_size // 2 * 2, out_time_length, 1)
+ self.c0_0 = CBR(in_ch, 32, bn=False, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c0_1 = CBR(out_ch, 32, bn=False, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c1 = CBR(64, 128, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c2 = CBR(128, 256, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c3 = CBR(256, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+ self.c4 = Convolution1D(512, 1, 3, 1, 1, initialW=w)
- def __call__(self, x):
- """
- :param x: (batch, channel, timeA)
- """
- h = x
- h = chainer.functions.separate(chainer.functions.transpose(h, axes=(0, 2, 1))) # h: batch * (timeA, channel)
- _, h = self.gru(None, h) # h: batch * (timeA, ?)
- h = chainer.functions.transpose(chainer.functions.stack(h), axes=(0, 2, 1)) # h: (batch, ?, timeA)
- h = chainer.functions.softmax(self.last(h), axis=1) # h: (batch, timeB, timeA)
-
- h = chainer.functions.matmul(x, h) # h: (batch, channel, time)
- return h
-
-
-class Discriminator(chainer.link.Chain):
- def __init__(self, in_channels: int, hidden_channels_list: List[int]) -> None:
- super().__init__()
- with self.init_scope():
- self.convs = chainer.link.ChainList(*(
- LegacyConvolution1D(i_c, o_c, ksize=2, stride=2)
- for i_c, o_c in zip([in_channels] + hidden_channels_list[:-1], hidden_channels_list)
- ))
- self.last_conv = LegacyConvolution1D(hidden_channels_list[-1], 1, ksize=1)
-
- def __call__(self, x):
- """
- :param x: (batch, channel, time)
- """
- h = x
- h = chainer.functions.reshape(h, h.shape + (1,))
- for conv in self.convs.children():
- h = chainer.functions.relu(conv(h))
- h = self.last_conv(h)
- h = chainer.functions.reshape(h, h.shape[:-1])
+ def __call__(self, x_0, x_1):
+ h = F.concat([self.c0_0(x_0), self.c0_1(x_1)])
+ h = self.c1(h)
+ h = self.c2(h)
+ h = self.c3(h)
+ h = self.c4(h)
+ # h = F.average_pooling_2d(h, h.data.shape[2], 1, 0)
return h
def create_predictor(config: ModelConfig):
- network = CBHG(
- in_channels=config.in_channels,
- conv_bank_out_channels=config.conv_bank_out_channels,
- conv_bank_k=config.conv_bank_k,
- max_pooling_k=config.max_pooling_k,
- conv_projections_hidden_channels=config.conv_projections_hidden_channels,
- highway_layers=config.highway_layers,
- out_channels=config.out_channels,
- disable_last_rnn=config.disable_last_rnn,
- )
- predictor = Predictor(
- network=network,
- out_size=config.out_size,
- )
- return predictor
-
-
-def create_aligner(config: ModelConfig):
- assert config.enable_aligner
- aligner = Aligner(
- in_size=config.in_channels,
- out_time_length=config.aligner_out_time_length,
- )
- return aligner
-
-
-def create_discriminator(config: DiscriminatorModelConfig):
- discriminator = Discriminator(
- in_channels=config.in_channels,
- hidden_channels_list=config.hidden_channels_list,
- )
- return discriminator
+ return Predictor(in_ch=config.in_channels, out_ch=config.out_channels)
def create(config: ModelConfig):
predictor = create_predictor(config)
- if config.enable_aligner:
- aligner = create_aligner(config)
- else:
- aligner = None
- if config.discriminator is not None:
- discriminator = create_discriminator(config.discriminator)
- else:
- discriminator = None
- return predictor, aligner, discriminator
+ discriminator = Discriminator(in_ch=config.in_channels, out_ch=config.out_channels)
+ return predictor, discriminator
diff --git a/become_yukarin/updater/updater.py b/become_yukarin/updater/updater.py
index 8dcb215..eb51068 100644
--- a/become_yukarin/updater/updater.py
+++ b/become_yukarin/updater/updater.py
@@ -1,9 +1,7 @@
import chainer
-import numpy
-from chainer import reporter
+import chainer.functions as F
from become_yukarin.config.config import LossConfig
-from become_yukarin.model.model import Aligner
from become_yukarin.model.model import Discriminator
from become_yukarin.model.model import Predictor
@@ -13,91 +11,77 @@ class Updater(chainer.training.StandardUpdater):
self,
loss_config: LossConfig,
predictor: Predictor,
- aligner: Aligner = None,
- discriminator: Discriminator = None,
+ discriminator: Discriminator,
*args,
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
self.loss_config = loss_config
self.predictor = predictor
- self.aligner = aligner
self.discriminator = discriminator
- def forward(self, input, target, mask):
- xp = self.predictor.xp
+ def _loss_predictor(self, predictor, output, target, d_fake):
+ b, _, t = d_fake.data.shape
- input = chainer.as_variable(input)
- target = chainer.as_variable(target)
- mask = chainer.as_variable(mask)
+ loss_mse = (F.mean_absolute_error(output, target))
+ chainer.report({'mse': loss_mse}, predictor)
- if self.aligner is not None:
- input = self.aligner(input)
- y = self.predictor(input)
+ loss_adv = F.sum(F.softplus(-d_fake)) / (b * t)
+ chainer.report({'adversarial': loss_adv}, predictor)
- loss_l1 = chainer.functions.sum(chainer.functions.absolute_error(y, target) * mask)
- loss_l1 = loss_l1 / chainer.functions.sum(mask)
- reporter.report({'l1': loss_l1}, self.predictor)
+ loss = self.loss_config.mse * loss_mse + self.loss_config.adversarial * loss_adv
+ chainer.report({'loss': loss}, predictor)
+ return loss
- if self.discriminator is not None:
- pair_fake = chainer.functions.concat([y * mask, input])
- pair_true = chainer.functions.concat([target * mask, input])
+ def _loss_discriminator(self, discriminator, d_real, d_fake):
+ b, _, t = d_real.data.shape
- # DRAGAN
- if chainer.config.train: # grad is not available on test
- std = xp.std(pair_true.data, axis=0, keepdims=True)
- rand = xp.random.uniform(0, 1, pair_true.shape).astype(xp.float32)
- perturb = chainer.Variable(pair_true.data + 0.5 * rand * std)
- grad, = chainer.grad([self.discriminator(perturb)], [perturb], enable_double_backprop=True)
- grad = chainer.functions.sqrt(chainer.functions.batch_l2_norm_squared(grad))
- loss_grad = chainer.functions.mean_squared_error(grad, xp.ones_like(grad.data, numpy.float32))
- reporter.report({'grad': loss_grad}, self.discriminator)
+ loss_real = F.sum(F.softplus(-d_real)) / (b * t)
+ chainer.report({'real': loss_real}, discriminator)
- if xp.any(xp.isnan(loss_grad.data)):
- import code
- code.interact(local=locals())
+ loss_fake = F.sum(F.softplus(d_fake)) / (b * t)
+ chainer.report({'fake': loss_fake}, discriminator)
- # GAN
- d_fake = self.discriminator(pair_fake)
- d_true = self.discriminator(pair_true)
- loss_dis_f = chainer.functions.average(chainer.functions.softplus(d_fake))
- loss_dis_t = chainer.functions.average(chainer.functions.softplus(-d_true))
- loss_gen_f = chainer.functions.average(chainer.functions.softplus(-d_fake))
- reporter.report({'fake': loss_dis_f}, self.discriminator)
- reporter.report({'true': loss_dis_t}, self.discriminator)
+ loss = loss_real + loss_fake
+ chainer.report({'loss': loss}, discriminator)
- tp = (d_true.data > 0.5).sum()
- fp = (d_fake.data > 0.5).sum()
- fn = (d_true.data <= 0.5).sum()
- tn = (d_fake.data <= 0.5).sum()
- accuracy = (tp + tn) / (tp + fp + fn + tn)
- precision = tp / (tp + fp)
- recall = tp / (tp + fn)
- reporter.report({'accuracy': accuracy}, self.discriminator)
- reporter.report({'precision': precision}, self.discriminator)
- reporter.report({'recall': recall}, self.discriminator)
+ tp = (d_real.data > 0.5).sum()
+ fp = (d_fake.data > 0.5).sum()
+ fn = (d_real.data <= 0.5).sum()
+ tn = (d_fake.data <= 0.5).sum()
+ accuracy = (tp + tn) / (tp + fp + fn + tn)
+ precision = tp / (tp + fp)
+ recall = tp / (tp + fn)
+ chainer.report({'accuracy': accuracy}, self.discriminator)
+ chainer.report({'precision': precision}, self.discriminator)
+ chainer.report({'recall': recall}, self.discriminator)
+ return loss
- loss = {'predictor': loss_l1 * self.loss_config.l1}
+ def forward(self, input, target, mask):
+ input = chainer.as_variable(input)
+ target = chainer.as_variable(target)
+ mask = chainer.as_variable(mask)
- if self.aligner is not None:
- loss['aligner'] = loss_l1 * self.loss_config.l1
- reporter.report({'loss': loss['aligner']}, self.aligner)
+ output = self.predictor(input)
+ output = output * mask
+ target = target * mask
- if self.discriminator is not None:
- loss['discriminator'] = \
- loss_dis_f * self.loss_config.discriminator_fake + \
- loss_dis_t * self.loss_config.discriminator_true
- if chainer.config.train: # grad is not available on test
- loss['discriminator'] += loss_grad * self.loss_config.discriminator_grad
- reporter.report({'loss': loss['discriminator']}, self.discriminator)
- loss['predictor'] += loss_gen_f * self.loss_config.predictor_fake
+ d_fake = self.discriminator(input, output)
+ d_real = self.discriminator(input, target)
- reporter.report({'loss': loss['predictor']}, self.predictor)
+ loss = {
+ 'predictor': self._loss_predictor(self.predictor, output, target, d_fake),
+ 'discriminator': self._loss_discriminator(self.discriminator, d_real, d_fake),
+ }
return loss
def update_core(self):
+ opt_predictor = self.get_optimizer('predictor')
+ opt_discriminator = self.get_optimizer('discriminator')
+
batch = self.get_iterator('main').next()
- loss = self.forward(**self.converter(batch, self.device))
+ batch = self.converter(batch, self.device)
+ loss = self.forward(**batch)
- for k, opt in self.get_all_optimizers().items():
- opt.update(loss.get, k)
+ opt_predictor.update(loss.get, 'predictor')
+ opt_discriminator.update(loss.get, 'discriminator')
diff --git a/train.py b/train.py
index 26490ce..c01915a 100644
--- a/train.py
+++ b/train.py
@@ -27,12 +27,11 @@ config.save_as_json((arguments.output / 'config.json').absolute())
# model
if config.train.gpu >= 0:
cuda.get_device_from_id(config.train.gpu).use()
-predictor, aligner, discriminator = create(config.model)
-models = {'predictor': predictor}
-if aligner is not None:
- models['aligner'] = aligner
-if discriminator is not None:
- models['discriminator'] = discriminator
+predictor, discriminator = create(config.model)
+models = {
+ 'predictor': predictor,
+ 'discriminator': discriminator,
+}
# dataset
dataset = create_dataset(config.dataset)
@@ -43,7 +42,7 @@ train_eval_iter = MultiprocessIterator(dataset['train_eval'], config.train.batch
# optimizer
def create_optimizer(model):
- optimizer = optimizers.Adam()
+ optimizer = optimizers.Adam(alpha=0.0002, beta1=0.5, beta2=0.999)
optimizer.setup(model)
return optimizer
@@ -55,7 +54,6 @@ converter = partial(convert.concat_examples, padding=0)
updater = Updater(
loss_config=config.loss,
predictor=predictor,
- aligner=aligner,
discriminator=discriminator,
device=config.train.gpu,
iterator=train_iter,