summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--become_yukarin/config.py42
-rw-r--r--become_yukarin/dataset/dataset.py2
-rw-r--r--become_yukarin/loss.py32
-rw-r--r--become_yukarin/model.py44
-rw-r--r--become_yukarin/updater.py77
-rw-r--r--train.py51
6 files changed, 197 insertions, 51 deletions
diff --git a/become_yukarin/config.py b/become_yukarin/config.py
index 0efbf04..80212b6 100644
--- a/become_yukarin/config.py
+++ b/become_yukarin/config.py
@@ -23,6 +23,12 @@ class DatasetConfig(NamedTuple):
num_test: int
+class DiscriminatorModelConfig(NamedTuple):
+ in_channels: int
+ hidden_channels_list: List[int]
+ last_channels: int
+
+
class ModelConfig(NamedTuple):
in_channels: int
conv_bank_out_channels: int
@@ -35,10 +41,14 @@ class ModelConfig(NamedTuple):
aligner_out_time_length: int
disable_last_rnn: bool
enable_aligner: bool
+ discriminator: DiscriminatorModelConfig
class LossConfig(NamedTuple):
l1: float
+ predictor_fake: float
+ discriminator_true: float
+ discriminator_fake: float
class TrainConfig(NamedTuple):
@@ -48,11 +58,17 @@ class TrainConfig(NamedTuple):
snapshot_iteration: int
+class ProjectConfig(NamedTuple):
+ name: str
+ tags: List[str]
+
+
class Config(NamedTuple):
dataset: DatasetConfig
model: ModelConfig
loss: LossConfig
train: TrainConfig
+ project: ProjectConfig
def save_as_json(self, path):
d = _namedtuple_to_dict(self)
@@ -78,15 +94,21 @@ def create_from_json(s: Union[str, Path]):
except TypeError:
d = json.load(open(s))
+ discriminator_model_config = DiscriminatorModelConfig(
+ in_channels=d['model']['discriminator']['in_channels'],
+ hidden_channels_list=d['model']['discriminator']['hidden_channels_list'],
+ last_channels=d['model']['discriminator']['last_channels'],
+ )
+
return Config(
dataset=DatasetConfig(
param=Param(),
- input_glob=Path(d['dataset']['input_glob']).expanduser(),
- target_glob=Path(d['dataset']['target_glob']).expanduser(),
- input_mean_path=Path(d['dataset']['input_mean_path']).expanduser(),
- input_var_path=Path(d['dataset']['input_var_path']).expanduser(),
- target_mean_path=Path(d['dataset']['target_mean_path']).expanduser(),
- target_var_path=Path(d['dataset']['target_var_path']).expanduser(),
+ input_glob=Path(d['dataset']['input_glob']),
+ target_glob=Path(d['dataset']['target_glob']),
+ input_mean_path=Path(d['dataset']['input_mean_path']),
+ input_var_path=Path(d['dataset']['input_var_path']),
+ target_mean_path=Path(d['dataset']['target_mean_path']),
+ target_var_path=Path(d['dataset']['target_var_path']),
features=d['dataset']['features'],
train_crop_size=d['dataset']['train_crop_size'],
global_noise=d['dataset']['global_noise'],
@@ -106,9 +128,13 @@ def create_from_json(s: Union[str, Path]):
aligner_out_time_length=d['model']['aligner_out_time_length'],
disable_last_rnn=d['model']['disable_last_rnn'],
enable_aligner=d['model']['enable_aligner'],
+ discriminator=discriminator_model_config,
),
loss=LossConfig(
l1=d['loss']['l1'],
+ predictor_fake=d['loss']['predictor_fake'],
+ discriminator_true=d['loss']['discriminator_true'],
+ discriminator_fake=d['loss']['discriminator_fake'],
),
train=TrainConfig(
batchsize=d['train']['batchsize'],
@@ -116,4 +142,8 @@ def create_from_json(s: Union[str, Path]):
log_iteration=d['train']['log_iteration'],
snapshot_iteration=d['train']['snapshot_iteration'],
),
+ project=ProjectConfig(
+ name=d['project']['name'],
+ tags=d['project']['tags'],
+ )
)
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index dae2324..b049ed8 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -112,7 +112,7 @@ class AcousticFeatureLoadProcess(BaseDataProcess):
self._validate = validate
def __call__(self, path: Path, test=None):
- d = numpy.load(path).item() # type: dict
+ d = numpy.load(path.expanduser()).item() # type: dict
feature = AcousticFeature(
f0=d['f0'],
spectrogram=d['spectrogram'],
diff --git a/become_yukarin/loss.py b/become_yukarin/loss.py
deleted file mode 100644
index b2b03fc..0000000
--- a/become_yukarin/loss.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import chainer
-from chainer import reporter
-
-from .config import LossConfig
-from .model import Aligner
-from .model import Predictor
-
-
-class Loss(chainer.link.Chain):
- def __init__(self, config: LossConfig, predictor: Predictor, aligner: Aligner = None):
- super().__init__()
- self.config = config
-
- with self.init_scope():
- self.predictor = predictor
- self.aligner = aligner
-
- def __call__(self, input, target, mask):
- input = chainer.as_variable(input)
- target = chainer.as_variable(target)
- mask = chainer.as_variable(mask)
-
- h = input
- if self.aligner is not None:
- h = self.aligner(h)
- y = self.predictor(h)
-
- loss = chainer.functions.sum(chainer.functions.absolute_error(y, target) * mask)
- loss = loss / chainer.functions.sum(mask)
- reporter.report({'loss': loss}, self)
-
- return loss * self.config.l1
diff --git a/become_yukarin/model.py b/become_yukarin/model.py
index c475685..8a727ae 100644
--- a/become_yukarin/model.py
+++ b/become_yukarin/model.py
@@ -1,5 +1,8 @@
+from typing import List
+
import chainer
+from .config import DiscriminatorModelConfig
from .config import ModelConfig
@@ -193,6 +196,35 @@ class Aligner(chainer.link.Chain):
return h
+class Discriminator(chainer.link.Chain):
+ def __init__(self, in_channels: int, hidden_channels_list: List[int], last_channels: int):
+ super().__init__()
+ with self.init_scope():
+ self.convs = chainer.link.ChainList(*(
+ Convolution1D(i_c, o_c, ksize=2, stride=2, nobias=True)
+ for i_c, o_c in zip([in_channels] + hidden_channels_list[:-1], hidden_channels_list)
+ ))
+ self.lstm_cell = chainer.links.StatelessLSTM(hidden_channels_list[-1], last_channels)
+ self.last_linear = chainer.links.Linear(last_channels, 1)
+
+ def __call__(self, x):
+ """
+ :param x: (batch, channel, time)
+ """
+ h = x
+ for conv in self.convs.children():
+ h = chainer.functions.relu(conv(h))
+
+ hs = chainer.functions.separate(h, axis=2)
+ c_next = h_next = None
+ for h in reversed(hs):
+ c_next, h_next = self.lstm_cell(c_next, h_next, h)
+ h = h_next
+
+ h = self.last_linear(h)
+ return h
+
+
def create_predictor(config: ModelConfig):
network = CBHG(
in_channels=config.in_channels,
@@ -220,10 +252,20 @@ def create_aligner(config: ModelConfig):
return aligner
+def create_discriminator(config: DiscriminatorModelConfig):
+ discriminator = Discriminator(
+ in_channels=config.in_channels,
+ hidden_channels_list=config.hidden_channels_list,
+ last_channels=config.last_channels,
+ )
+ return discriminator
+
+
def create(config: ModelConfig):
predictor = create_predictor(config)
if config.enable_aligner:
aligner = create_aligner(config)
else:
aligner = None
- return predictor, aligner
+ discriminator = create_discriminator(config.discriminator)
+ return predictor, aligner, discriminator
diff --git a/become_yukarin/updater.py b/become_yukarin/updater.py
new file mode 100644
index 0000000..927601f
--- /dev/null
+++ b/become_yukarin/updater.py
@@ -0,0 +1,77 @@
+import chainer
+import numpy
+from chainer import reporter
+
+from .config import LossConfig
+from .config import ModelConfig
+from .model import Aligner
+from .model import Discriminator
+from .model import Predictor
+
+
+class Updater(chainer.training.StandardUpdater):
+ def __init__(
+ self,
+ loss_config: LossConfig,
+ model_config: ModelConfig,
+ predictor: Predictor,
+ aligner: Aligner = None,
+ discriminator: Discriminator = None,
+ *args,
+ **kwargs,
+ ):
+ super().__init__(*args, **kwargs)
+ self.loss_config = loss_config
+ self.model_config = model_config
+ self.predictor = predictor
+ self.aligner = aligner
+ self.discriminator = discriminator
+
+ def forward(self, input, target, mask):
+ xp = self.predictor.xp
+
+ input = chainer.as_variable(input)
+ target = chainer.as_variable(target)
+ mask = chainer.as_variable(mask)
+
+ if self.aligner is not None:
+ input = self.aligner(input)
+ y = self.predictor(input)
+
+ loss_l1 = chainer.functions.sum(chainer.functions.absolute_error(y, target) * mask)
+ loss_l1 = loss_l1 / chainer.functions.sum(mask)
+ reporter.report({'l1': loss_l1}, self.predictor)
+
+ if self.discriminator is not None:
+ pair_fake = chainer.functions.concat([y * mask, input])
+ pair_true = chainer.functions.concat([target * mask, input])
+ d_fake = self.discriminator(pair_fake)
+ d_true = self.discriminator(pair_true)
+ loss_dis_f = chainer.functions.mean_squared_error(d_fake, xp.zeros_like(d_fake.data, numpy.float32))
+ loss_dis_t = chainer.functions.mean_squared_error(d_true, xp.ones_like(d_true.data, numpy.float32))
+ loss_gen_f = chainer.functions.mean_squared_error(d_fake, xp.ones_like(d_fake.data, numpy.float32))
+ reporter.report({'fake': loss_dis_f}, self.discriminator)
+ reporter.report({'true': loss_dis_t}, self.discriminator)
+
+ loss = {'predictor': loss_l1 * self.loss_config.l1}
+
+ if self.aligner is not None:
+ loss['aligner'] = loss_l1 * self.loss_config.l1
+ reporter.report({'loss': loss['aligner']}, self.aligner)
+
+ if self.discriminator is not None:
+ loss['discriminator'] = \
+ loss_dis_f * self.loss_config.discriminator_fake + \
+ loss_dis_t * self.loss_config.discriminator_true
+ reporter.report({'loss': loss['discriminator']}, self.discriminator)
+ loss['predictor'] += loss_gen_f * self.loss_config.predictor_fake
+
+ reporter.report({'loss': loss['predictor']}, self.predictor)
+ return loss
+
+ def update_core(self):
+ batch = self.get_iterator('main').next()
+ loss = self.forward(**self.converter(batch, self.device))
+
+ for k, opt in self.get_all_optimizers().items():
+ opt.update(loss.get, k)
diff --git a/train.py b/train.py
index a3bea0f..f3d4885 100644
--- a/train.py
+++ b/train.py
@@ -11,7 +11,7 @@ from chainer.training import extensions
from become_yukarin.config import create_from_json
from become_yukarin.dataset import create as create_dataset
-from become_yukarin.loss import Loss
+from become_yukarin.updater import Updater
from become_yukarin.model import create
parser = argparse.ArgumentParser()
@@ -26,8 +26,12 @@ config.save_as_json((arguments.output / 'config.json').absolute())
# model
if config.train.gpu >= 0:
cuda.get_device_from_id(config.train.gpu).use()
-predictor, aligner = create(config.model)
-model = Loss(config.loss, predictor=predictor, aligner=aligner)
+predictor, aligner, discriminator = create(config.model)
+models = {'predictor': predictor}
+if aligner is not None:
+ models['aligner'] = aligner
+if discriminator is not None:
+ models['discriminator'] = discriminator
# dataset
dataset = create_dataset(config.dataset)
@@ -35,24 +39,42 @@ train_iter = MultiprocessIterator(dataset['train'], config.train.batchsize)
test_iter = MultiprocessIterator(dataset['test'], config.train.batchsize, repeat=False, shuffle=False)
train_eval_iter = MultiprocessIterator(dataset['train_eval'], config.train.batchsize, repeat=False, shuffle=False)
+
# optimizer
-optimizer = optimizers.Adam()
-optimizer.setup(model)
+def create_optimizer(model):
+ optimizer = optimizers.Adam()
+ optimizer.setup(model)
+ return optimizer
+
+
+opts = {key: create_optimizer(model) for key, model in models.items()}
+
+# updater
+converter = partial(convert.concat_examples, padding=0)
+updater = Updater(
+ loss_config=config.loss,
+ model_config=config.model,
+ predictor=predictor,
+ aligner=aligner,
+ discriminator=discriminator,
+ device=config.train.gpu,
+ iterator=train_iter,
+ optimizer=opts,
+ converter=converter,
+)
# trainer
trigger_log = (config.train.log_iteration, 'iteration')
trigger_snapshot = (config.train.snapshot_iteration, 'iteration')
-converter = partial(convert.concat_examples, padding=0)
-updater = training.StandardUpdater(train_iter, optimizer, device=config.train.gpu, converter=converter)
trainer = training.Trainer(updater, out=arguments.output)
-ext = extensions.Evaluator(test_iter, model, converter, device=config.train.gpu)
+ext = extensions.Evaluator(test_iter, models, converter, device=config.train.gpu, eval_func=updater.forward)
trainer.extend(ext, name='test', trigger=trigger_log)
-ext = extensions.Evaluator(train_eval_iter, model, converter, device=config.train.gpu)
+ext = extensions.Evaluator(train_eval_iter, models, converter, device=config.train.gpu, eval_func=updater.forward)
trainer.extend(ext, name='train', trigger=trigger_log)
-trainer.extend(extensions.dump_graph('main/loss', out_name='graph.dot'))
+trainer.extend(extensions.dump_graph('predictor/loss', out_name='graph.dot'))
ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz')
trainer.extend(ext, trigger=trigger_snapshot)
@@ -61,7 +83,14 @@ trainer.extend(extensions.LogReport(trigger=trigger_log, log_name='log.txt'))
if extensions.PlotReport.available():
trainer.extend(extensions.PlotReport(
- y_keys=['main/loss', 'test/main/loss', 'train/main/loss'],
+ y_keys=[
+ 'predictor/loss',
+ 'predictor/l1',
+ 'test/predictor/loss',
+ 'train/predictor/loss',
+ 'discriminator/fake',
+ 'discriminator/true',
+ ],
x_key='iteration',
file_name='loss.png',
trigger=trigger_log,