summaryrefslogtreecommitdiff
path: root/become_yukarin
diff options
context:
space:
mode:
Diffstat (limited to 'become_yukarin')
-rw-r--r--become_yukarin/config.py42
-rw-r--r--become_yukarin/dataset/dataset.py2
-rw-r--r--become_yukarin/loss.py32
-rw-r--r--become_yukarin/model.py44
-rw-r--r--become_yukarin/updater.py77
5 files changed, 157 insertions, 40 deletions
diff --git a/become_yukarin/config.py b/become_yukarin/config.py
index 0efbf04..80212b6 100644
--- a/become_yukarin/config.py
+++ b/become_yukarin/config.py
@@ -23,6 +23,12 @@ class DatasetConfig(NamedTuple):
num_test: int
+class DiscriminatorModelConfig(NamedTuple):
+ in_channels: int
+ hidden_channels_list: List[int]
+ last_channels: int
+
+
class ModelConfig(NamedTuple):
in_channels: int
conv_bank_out_channels: int
@@ -35,10 +41,14 @@ class ModelConfig(NamedTuple):
aligner_out_time_length: int
disable_last_rnn: bool
enable_aligner: bool
+ discriminator: DiscriminatorModelConfig
class LossConfig(NamedTuple):
l1: float
+ predictor_fake: float
+ discriminator_true: float
+ discriminator_fake: float
class TrainConfig(NamedTuple):
@@ -48,11 +58,17 @@ class TrainConfig(NamedTuple):
snapshot_iteration: int
+class ProjectConfig(NamedTuple):
+ name: str
+ tags: List[str]
+
+
class Config(NamedTuple):
dataset: DatasetConfig
model: ModelConfig
loss: LossConfig
train: TrainConfig
+ project: ProjectConfig
def save_as_json(self, path):
d = _namedtuple_to_dict(self)
@@ -78,15 +94,21 @@ def create_from_json(s: Union[str, Path]):
except TypeError:
d = json.load(open(s))
+ discriminator_model_config = DiscriminatorModelConfig(
+ in_channels=d['model']['discriminator']['in_channels'],
+ hidden_channels_list=d['model']['discriminator']['hidden_channels_list'],
+ last_channels=d['model']['discriminator']['last_channels'],
+ )
+
return Config(
dataset=DatasetConfig(
param=Param(),
- input_glob=Path(d['dataset']['input_glob']).expanduser(),
- target_glob=Path(d['dataset']['target_glob']).expanduser(),
- input_mean_path=Path(d['dataset']['input_mean_path']).expanduser(),
- input_var_path=Path(d['dataset']['input_var_path']).expanduser(),
- target_mean_path=Path(d['dataset']['target_mean_path']).expanduser(),
- target_var_path=Path(d['dataset']['target_var_path']).expanduser(),
+ input_glob=Path(d['dataset']['input_glob']),
+ target_glob=Path(d['dataset']['target_glob']),
+ input_mean_path=Path(d['dataset']['input_mean_path']),
+ input_var_path=Path(d['dataset']['input_var_path']),
+ target_mean_path=Path(d['dataset']['target_mean_path']),
+ target_var_path=Path(d['dataset']['target_var_path']),
features=d['dataset']['features'],
train_crop_size=d['dataset']['train_crop_size'],
global_noise=d['dataset']['global_noise'],
@@ -106,9 +128,13 @@ def create_from_json(s: Union[str, Path]):
aligner_out_time_length=d['model']['aligner_out_time_length'],
disable_last_rnn=d['model']['disable_last_rnn'],
enable_aligner=d['model']['enable_aligner'],
+ discriminator=discriminator_model_config,
),
loss=LossConfig(
l1=d['loss']['l1'],
+ predictor_fake=d['loss']['predictor_fake'],
+ discriminator_true=d['loss']['discriminator_true'],
+ discriminator_fake=d['loss']['discriminator_fake'],
),
train=TrainConfig(
batchsize=d['train']['batchsize'],
@@ -116,4 +142,8 @@ def create_from_json(s: Union[str, Path]):
log_iteration=d['train']['log_iteration'],
snapshot_iteration=d['train']['snapshot_iteration'],
),
+ project=ProjectConfig(
+ name=d['project']['name'],
+ tags=d['project']['tags'],
+ )
)
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index dae2324..b049ed8 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -112,7 +112,7 @@ class AcousticFeatureLoadProcess(BaseDataProcess):
self._validate = validate
def __call__(self, path: Path, test=None):
- d = numpy.load(path).item() # type: dict
+ d = numpy.load(path.expanduser()).item() # type: dict
feature = AcousticFeature(
f0=d['f0'],
spectrogram=d['spectrogram'],
diff --git a/become_yukarin/loss.py b/become_yukarin/loss.py
deleted file mode 100644
index b2b03fc..0000000
--- a/become_yukarin/loss.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import chainer
-from chainer import reporter
-
-from .config import LossConfig
-from .model import Aligner
-from .model import Predictor
-
-
-class Loss(chainer.link.Chain):
- def __init__(self, config: LossConfig, predictor: Predictor, aligner: Aligner = None):
- super().__init__()
- self.config = config
-
- with self.init_scope():
- self.predictor = predictor
- self.aligner = aligner
-
- def __call__(self, input, target, mask):
- input = chainer.as_variable(input)
- target = chainer.as_variable(target)
- mask = chainer.as_variable(mask)
-
- h = input
- if self.aligner is not None:
- h = self.aligner(h)
- y = self.predictor(h)
-
- loss = chainer.functions.sum(chainer.functions.absolute_error(y, target) * mask)
- loss = loss / chainer.functions.sum(mask)
- reporter.report({'loss': loss}, self)
-
- return loss * self.config.l1
diff --git a/become_yukarin/model.py b/become_yukarin/model.py
index c475685..8a727ae 100644
--- a/become_yukarin/model.py
+++ b/become_yukarin/model.py
@@ -1,5 +1,8 @@
+from typing import List
+
import chainer
+from .config import DiscriminatorModelConfig
from .config import ModelConfig
@@ -193,6 +196,35 @@ class Aligner(chainer.link.Chain):
return h
+class Discriminator(chainer.link.Chain):
+ def __init__(self, in_channels: int, hidden_channels_list: List[int], last_channels: int):
+ super().__init__()
+ with self.init_scope():
+ self.convs = chainer.link.ChainList(*(
+ Convolution1D(i_c, o_c, ksize=2, stride=2, nobias=True)
+ for i_c, o_c in zip([in_channels] + hidden_channels_list[:-1], hidden_channels_list)
+ ))
+ self.lstm_cell = chainer.links.StatelessLSTM(hidden_channels_list[-1], last_channels)
+ self.last_linear = chainer.links.Linear(last_channels, 1)
+
+ def __call__(self, x):
+ """
+ :param x: (batch, channel, time)
+ """
+ h = x
+ for conv in self.convs.children():
+ h = chainer.functions.relu(conv(h))
+
+ hs = chainer.functions.separate(h, axis=2)
+ c_next = h_next = None
+ for h in reversed(hs):
+ c_next, h_next = self.lstm_cell(c_next, h_next, h)
+ h = h_next
+
+ h = self.last_linear(h)
+ return h
+
+
def create_predictor(config: ModelConfig):
network = CBHG(
in_channels=config.in_channels,
@@ -220,10 +252,20 @@ def create_aligner(config: ModelConfig):
return aligner
+def create_discriminator(config: DiscriminatorModelConfig):
+ discriminator = Discriminator(
+ in_channels=config.in_channels,
+ hidden_channels_list=config.hidden_channels_list,
+ last_channels=config.last_channels,
+ )
+ return discriminator
+
+
def create(config: ModelConfig):
predictor = create_predictor(config)
if config.enable_aligner:
aligner = create_aligner(config)
else:
aligner = None
- return predictor, aligner
+ discriminator = create_discriminator(config.discriminator)
+ return predictor, aligner, discriminator
diff --git a/become_yukarin/updater.py b/become_yukarin/updater.py
new file mode 100644
index 0000000..927601f
--- /dev/null
+++ b/become_yukarin/updater.py
@@ -0,0 +1,77 @@
+import chainer
+import numpy
+from chainer import reporter
+
+from .config import LossConfig
+from .config import ModelConfig
+from .model import Aligner
+from .model import Discriminator
+from .model import Predictor
+
+
+class Updater(chainer.training.StandardUpdater):
+ def __init__(
+ self,
+ loss_config: LossConfig,
+ model_config: ModelConfig,
+ predictor: Predictor,
+ aligner: Aligner = None,
+ discriminator: Discriminator = None,
+ *args,
+ **kwargs,
+ ):
+ super().__init__(*args, **kwargs)
+ self.loss_config = loss_config
+ self.model_config = model_config
+ self.predictor = predictor
+ self.aligner = aligner
+ self.discriminator = discriminator
+
+ def forward(self, input, target, mask):
+ xp = self.predictor.xp
+
+ input = chainer.as_variable(input)
+ target = chainer.as_variable(target)
+ mask = chainer.as_variable(mask)
+
+ if self.aligner is not None:
+ input = self.aligner(input)
+ y = self.predictor(input)
+
+ loss_l1 = chainer.functions.sum(chainer.functions.absolute_error(y, target) * mask)
+ loss_l1 = loss_l1 / chainer.functions.sum(mask)
+ reporter.report({'l1': loss_l1}, self.predictor)
+
+ if self.discriminator is not None:
+ pair_fake = chainer.functions.concat([y * mask, input])
+ pair_true = chainer.functions.concat([target * mask, input])
+ d_fake = self.discriminator(pair_fake)
+ d_true = self.discriminator(pair_true)
+ loss_dis_f = chainer.functions.mean_squared_error(d_fake, xp.zeros_like(d_fake.data, numpy.float32))
+ loss_dis_t = chainer.functions.mean_squared_error(d_true, xp.ones_like(d_true.data, numpy.float32))
+ loss_gen_f = chainer.functions.mean_squared_error(d_fake, xp.ones_like(d_fake.data, numpy.float32))
+ reporter.report({'fake': loss_dis_f}, self.discriminator)
+ reporter.report({'true': loss_dis_t}, self.discriminator)
+
+ loss = {'predictor': loss_l1 * self.loss_config.l1}
+
+ if self.aligner is not None:
+ loss['aligner'] = loss_l1 * self.loss_config.l1
+ reporter.report({'loss': loss['aligner']}, self.aligner)
+
+ if self.discriminator is not None:
+ loss['discriminator'] = \
+ loss_dis_f * self.loss_config.discriminator_fake + \
+ loss_dis_t * self.loss_config.discriminator_true
+ reporter.report({'loss': loss['discriminator']}, self.discriminator)
+ loss['predictor'] += loss_gen_f * self.loss_config.predictor_fake
+
+ reporter.report({'loss': loss['predictor']}, self.predictor)
+ return loss
+
+ def update_core(self):
+ batch = self.get_iterator('main').next()
+ loss = self.forward(**self.converter(batch, self.device))
+
+ for k, opt in self.get_all_optimizers().items():
+ opt.update(loss.get, k)