diff options
| -rw-r--r-- | become_yukarin/config.py | 42 | ||||
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 2 | ||||
| -rw-r--r-- | become_yukarin/loss.py | 32 | ||||
| -rw-r--r-- | become_yukarin/model.py | 44 | ||||
| -rw-r--r-- | become_yukarin/updater.py | 77 | ||||
| -rw-r--r-- | train.py | 51 |
6 files changed, 197 insertions, 51 deletions
diff --git a/become_yukarin/config.py b/become_yukarin/config.py index 0efbf04..80212b6 100644 --- a/become_yukarin/config.py +++ b/become_yukarin/config.py @@ -23,6 +23,12 @@ class DatasetConfig(NamedTuple): num_test: int +class DiscriminatorModelConfig(NamedTuple): + in_channels: int + hidden_channels_list: List[int] + last_channels: int + + class ModelConfig(NamedTuple): in_channels: int conv_bank_out_channels: int @@ -35,10 +41,14 @@ class ModelConfig(NamedTuple): aligner_out_time_length: int disable_last_rnn: bool enable_aligner: bool + discriminator: DiscriminatorModelConfig class LossConfig(NamedTuple): l1: float + predictor_fake: float + discriminator_true: float + discriminator_fake: float class TrainConfig(NamedTuple): @@ -48,11 +58,17 @@ class TrainConfig(NamedTuple): snapshot_iteration: int +class ProjectConfig(NamedTuple): + name: str + tags: List[str] + + class Config(NamedTuple): dataset: DatasetConfig model: ModelConfig loss: LossConfig train: TrainConfig + project: ProjectConfig def save_as_json(self, path): d = _namedtuple_to_dict(self) @@ -78,15 +94,21 @@ def create_from_json(s: Union[str, Path]): except TypeError: d = json.load(open(s)) + discriminator_model_config = DiscriminatorModelConfig( + in_channels=d['model']['discriminator']['in_channels'], + hidden_channels_list=d['model']['discriminator']['hidden_channels_list'], + last_channels=d['model']['discriminator']['last_channels'], + ) + return Config( dataset=DatasetConfig( param=Param(), - input_glob=Path(d['dataset']['input_glob']).expanduser(), - target_glob=Path(d['dataset']['target_glob']).expanduser(), - input_mean_path=Path(d['dataset']['input_mean_path']).expanduser(), - input_var_path=Path(d['dataset']['input_var_path']).expanduser(), - target_mean_path=Path(d['dataset']['target_mean_path']).expanduser(), - target_var_path=Path(d['dataset']['target_var_path']).expanduser(), + input_glob=Path(d['dataset']['input_glob']), + target_glob=Path(d['dataset']['target_glob']), + input_mean_path=Path(d['dataset']['input_mean_path']), + input_var_path=Path(d['dataset']['input_var_path']), + target_mean_path=Path(d['dataset']['target_mean_path']), + target_var_path=Path(d['dataset']['target_var_path']), features=d['dataset']['features'], train_crop_size=d['dataset']['train_crop_size'], global_noise=d['dataset']['global_noise'], @@ -106,9 +128,13 @@ def create_from_json(s: Union[str, Path]): aligner_out_time_length=d['model']['aligner_out_time_length'], disable_last_rnn=d['model']['disable_last_rnn'], enable_aligner=d['model']['enable_aligner'], + discriminator=discriminator_model_config, ), loss=LossConfig( l1=d['loss']['l1'], + predictor_fake=d['loss']['predictor_fake'], + discriminator_true=d['loss']['discriminator_true'], + discriminator_fake=d['loss']['discriminator_fake'], ), train=TrainConfig( batchsize=d['train']['batchsize'], @@ -116,4 +142,8 @@ def create_from_json(s: Union[str, Path]): log_iteration=d['train']['log_iteration'], snapshot_iteration=d['train']['snapshot_iteration'], ), + project=ProjectConfig( + name=d['project']['name'], + tags=d['project']['tags'], + ) ) diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index dae2324..b049ed8 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -112,7 +112,7 @@ class AcousticFeatureLoadProcess(BaseDataProcess): self._validate = validate def __call__(self, path: Path, test=None): - d = numpy.load(path).item() # type: dict + d = numpy.load(path.expanduser()).item() # type: dict feature = AcousticFeature( f0=d['f0'], spectrogram=d['spectrogram'], diff --git a/become_yukarin/loss.py b/become_yukarin/loss.py deleted file mode 100644 index b2b03fc..0000000 --- a/become_yukarin/loss.py +++ /dev/null @@ -1,32 +0,0 @@ -import chainer -from chainer import reporter - -from .config import LossConfig -from .model import Aligner -from .model import Predictor - - -class Loss(chainer.link.Chain): - def __init__(self, config: LossConfig, predictor: Predictor, aligner: Aligner = None): - super().__init__() - self.config = config - - with self.init_scope(): - self.predictor = predictor - self.aligner = aligner - - def __call__(self, input, target, mask): - input = chainer.as_variable(input) - target = chainer.as_variable(target) - mask = chainer.as_variable(mask) - - h = input - if self.aligner is not None: - h = self.aligner(h) - y = self.predictor(h) - - loss = chainer.functions.sum(chainer.functions.absolute_error(y, target) * mask) - loss = loss / chainer.functions.sum(mask) - reporter.report({'loss': loss}, self) - - return loss * self.config.l1 diff --git a/become_yukarin/model.py b/become_yukarin/model.py index c475685..8a727ae 100644 --- a/become_yukarin/model.py +++ b/become_yukarin/model.py @@ -1,5 +1,8 @@ +from typing import List + import chainer +from .config import DiscriminatorModelConfig from .config import ModelConfig @@ -193,6 +196,35 @@ class Aligner(chainer.link.Chain): return h +class Discriminator(chainer.link.Chain): + def __init__(self, in_channels: int, hidden_channels_list: List[int], last_channels: int): + super().__init__() + with self.init_scope(): + self.convs = chainer.link.ChainList(*( + Convolution1D(i_c, o_c, ksize=2, stride=2, nobias=True) + for i_c, o_c in zip([in_channels] + hidden_channels_list[:-1], hidden_channels_list) + )) + self.lstm_cell = chainer.links.StatelessLSTM(hidden_channels_list[-1], last_channels) + self.last_linear = chainer.links.Linear(last_channels, 1) + + def __call__(self, x): + """ + :param x: (batch, channel, time) + """ + h = x + for conv in self.convs.children(): + h = chainer.functions.relu(conv(h)) + + hs = chainer.functions.separate(h, axis=2) + c_next = h_next = None + for h in reversed(hs): + c_next, h_next = self.lstm_cell(c_next, h_next, h) + h = h_next + + h = self.last_linear(h) + return h + + def create_predictor(config: ModelConfig): network = CBHG( in_channels=config.in_channels, @@ -220,10 +252,20 @@ def create_aligner(config: ModelConfig): return aligner +def create_discriminator(config: DiscriminatorModelConfig): + discriminator = Discriminator( + in_channels=config.in_channels, + hidden_channels_list=config.hidden_channels_list, + last_channels=config.last_channels, + ) + return discriminator + + def create(config: ModelConfig): predictor = create_predictor(config) if config.enable_aligner: aligner = create_aligner(config) else: aligner = None - return predictor, aligner + discriminator = create_discriminator(config.discriminator) + return predictor, aligner, discriminator diff --git a/become_yukarin/updater.py b/become_yukarin/updater.py new file mode 100644 index 0000000..927601f --- /dev/null +++ b/become_yukarin/updater.py @@ -0,0 +1,77 @@ +import chainer +import numpy +from chainer import reporter + +from .config import LossConfig +from .config import ModelConfig +from .model import Aligner +from .model import Discriminator +from .model import Predictor + + +class Updater(chainer.training.StandardUpdater): + def __init__( + self, + loss_config: LossConfig, + model_config: ModelConfig, + predictor: Predictor, + aligner: Aligner = None, + discriminator: Discriminator = None, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.loss_config = loss_config + self.model_config = model_config + self.predictor = predictor + self.aligner = aligner + self.discriminator = discriminator + + def forward(self, input, target, mask): + xp = self.predictor.xp + + input = chainer.as_variable(input) + target = chainer.as_variable(target) + mask = chainer.as_variable(mask) + + if self.aligner is not None: + input = self.aligner(input) + y = self.predictor(input) + + loss_l1 = chainer.functions.sum(chainer.functions.absolute_error(y, target) * mask) + loss_l1 = loss_l1 / chainer.functions.sum(mask) + reporter.report({'l1': loss_l1}, self.predictor) + + if self.discriminator is not None: + pair_fake = chainer.functions.concat([y * mask, input]) + pair_true = chainer.functions.concat([target * mask, input]) + d_fake = self.discriminator(pair_fake) + d_true = self.discriminator(pair_true) + loss_dis_f = chainer.functions.mean_squared_error(d_fake, xp.zeros_like(d_fake.data, numpy.float32)) + loss_dis_t = chainer.functions.mean_squared_error(d_true, xp.ones_like(d_true.data, numpy.float32)) + loss_gen_f = chainer.functions.mean_squared_error(d_fake, xp.ones_like(d_fake.data, numpy.float32)) + reporter.report({'fake': loss_dis_f}, self.discriminator) + reporter.report({'true': loss_dis_t}, self.discriminator) + + loss = {'predictor': loss_l1 * self.loss_config.l1} + + if self.aligner is not None: + loss['aligner'] = loss_l1 * self.loss_config.l1 + reporter.report({'loss': loss['aligner']}, self.aligner) + + if self.discriminator is not None: + loss['discriminator'] = \ + loss_dis_f * self.loss_config.discriminator_fake + \ + loss_dis_t * self.loss_config.discriminator_true + reporter.report({'loss': loss['discriminator']}, self.discriminator) + loss['predictor'] += loss_gen_f * self.loss_config.predictor_fake + + reporter.report({'loss': loss['predictor']}, self.predictor) + return loss + + def update_core(self): + batch = self.get_iterator('main').next() + loss = self.forward(**self.converter(batch, self.device)) + + for k, opt in self.get_all_optimizers().items(): + opt.update(loss.get, k) @@ -11,7 +11,7 @@ from chainer.training import extensions from become_yukarin.config import create_from_json from become_yukarin.dataset import create as create_dataset -from become_yukarin.loss import Loss +from become_yukarin.updater import Updater from become_yukarin.model import create parser = argparse.ArgumentParser() @@ -26,8 +26,12 @@ config.save_as_json((arguments.output / 'config.json').absolute()) # model if config.train.gpu >= 0: cuda.get_device_from_id(config.train.gpu).use() -predictor, aligner = create(config.model) -model = Loss(config.loss, predictor=predictor, aligner=aligner) +predictor, aligner, discriminator = create(config.model) +models = {'predictor': predictor} +if aligner is not None: + models['aligner'] = aligner +if discriminator is not None: + models['discriminator'] = discriminator # dataset dataset = create_dataset(config.dataset) @@ -35,24 +39,42 @@ train_iter = MultiprocessIterator(dataset['train'], config.train.batchsize) test_iter = MultiprocessIterator(dataset['test'], config.train.batchsize, repeat=False, shuffle=False) train_eval_iter = MultiprocessIterator(dataset['train_eval'], config.train.batchsize, repeat=False, shuffle=False) + # optimizer -optimizer = optimizers.Adam() -optimizer.setup(model) +def create_optimizer(model): + optimizer = optimizers.Adam() + optimizer.setup(model) + return optimizer + + +opts = {key: create_optimizer(model) for key, model in models.items()} + +# updater +converter = partial(convert.concat_examples, padding=0) +updater = Updater( + loss_config=config.loss, + model_config=config.model, + predictor=predictor, + aligner=aligner, + discriminator=discriminator, + device=config.train.gpu, + iterator=train_iter, + optimizer=opts, + converter=converter, +) # trainer trigger_log = (config.train.log_iteration, 'iteration') trigger_snapshot = (config.train.snapshot_iteration, 'iteration') -converter = partial(convert.concat_examples, padding=0) -updater = training.StandardUpdater(train_iter, optimizer, device=config.train.gpu, converter=converter) trainer = training.Trainer(updater, out=arguments.output) -ext = extensions.Evaluator(test_iter, model, converter, device=config.train.gpu) +ext = extensions.Evaluator(test_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='test', trigger=trigger_log) -ext = extensions.Evaluator(train_eval_iter, model, converter, device=config.train.gpu) +ext = extensions.Evaluator(train_eval_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='train', trigger=trigger_log) -trainer.extend(extensions.dump_graph('main/loss', out_name='graph.dot')) +trainer.extend(extensions.dump_graph('predictor/loss', out_name='graph.dot')) ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz') trainer.extend(ext, trigger=trigger_snapshot) @@ -61,7 +83,14 @@ trainer.extend(extensions.LogReport(trigger=trigger_log, log_name='log.txt')) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport( - y_keys=['main/loss', 'test/main/loss', 'train/main/loss'], + y_keys=[ + 'predictor/loss', + 'predictor/l1', + 'test/predictor/loss', + 'train/predictor/loss', + 'discriminator/fake', + 'discriminator/true', + ], x_key='iteration', file_name='loss.png', trigger=trigger_log, |
