diff options
| -rw-r--r-- | become_yukarin/config.py | 2 | ||||
| -rw-r--r-- | become_yukarin/model.py | 46 | ||||
| -rw-r--r-- | become_yukarin/updater.py | 24 | ||||
| -rw-r--r-- | scripts/launch.py | 99 | ||||
| -rw-r--r-- | train.py | 1 |
5 files changed, 156 insertions, 16 deletions
diff --git a/become_yukarin/config.py b/become_yukarin/config.py index 80212b6..a65a72c 100644 --- a/become_yukarin/config.py +++ b/become_yukarin/config.py @@ -49,6 +49,7 @@ class LossConfig(NamedTuple): predictor_fake: float discriminator_true: float discriminator_fake: float + discriminator_grad: float class TrainConfig(NamedTuple): @@ -135,6 +136,7 @@ def create_from_json(s: Union[str, Path]): predictor_fake=d['loss']['predictor_fake'], discriminator_true=d['loss']['discriminator_true'], discriminator_fake=d['loss']['discriminator_fake'], + discriminator_grad=d['loss']['discriminator_grad'], ), train=TrainConfig( batchsize=d['train']['batchsize'], diff --git a/become_yukarin/model.py b/become_yukarin/model.py index 8a727ae..d4fa369 100644 --- a/become_yukarin/model.py +++ b/become_yukarin/model.py @@ -1,3 +1,4 @@ +from functools import partial from typing import List import chainer @@ -10,7 +11,7 @@ class Convolution1D(chainer.links.ConvolutionND): def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, nobias=False, initialW=None, initial_bias=None, cover_all=False): - super(Convolution1D, self).__init__( + super().__init__( ndim=1, in_channels=in_channels, out_channels=out_channels, @@ -24,6 +25,29 @@ class Convolution1D(chainer.links.ConvolutionND): ) +class LegacyConvolution1D(chainer.links.Convolution2D): + def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, + nobias=False, initialW=None, initial_bias=None, **kwargs): + assert ksize is None or isinstance(ksize, int) + assert isinstance(stride, int) + assert isinstance(pad, int) + super().__init__( + in_channels=in_channels, + out_channels=out_channels, + ksize=(ksize, 1), + stride=(stride, 1), + pad=(pad, 0), + nobias=nobias, + initialW=initialW, + initial_bias=initial_bias, + **kwargs, + ) + + def __call__(self, x): + assert x.shape[-1] == 1 + return super().__call__(x) + + class ConvHighway(chainer.link.Chain): def __init__(self, in_out_size, nobias=False, activate=chainer.functions.relu, init_Wh=None, init_Wt=None, init_bh=None, init_bt=-1): @@ -64,7 +88,7 @@ class Conv1DBank(chainer.link.Chain): super().__init__() self.stacked_channels = out_channels * k self.pads = [ - chainer.functions.Pad(((0, 0), (0, 0), (i // 2, (i + 1) // 2)), mode='constant') + partial(chainer.functions.pad, pad_width=((0, 0), (0, 0), (i // 2, (i + 1) // 2)), mode='constant') for i in range(k) ] @@ -111,8 +135,9 @@ class CBHG(chainer.link.Chain): disable_last_rnn: bool, ): super().__init__() - self.max_pooling_padding = chainer.functions.Pad( - ((0, 0), (0, 0), ((max_pooling_k - 1) // 2, max_pooling_k // 2)), + self.max_pooling_padding = partial( + chainer.functions.pad, + pad_width=((0, 0), (0, 0), ((max_pooling_k - 1) // 2, max_pooling_k // 2)), mode='constant', ) self.max_pooling = chainer.functions.MaxPoolingND(1, max_pooling_k, 1, cover_all=False) @@ -201,25 +226,20 @@ class Discriminator(chainer.link.Chain): super().__init__() with self.init_scope(): self.convs = chainer.link.ChainList(*( - Convolution1D(i_c, o_c, ksize=2, stride=2, nobias=True) + LegacyConvolution1D(i_c, o_c, ksize=2, stride=2) for i_c, o_c in zip([in_channels] + hidden_channels_list[:-1], hidden_channels_list) )) - self.lstm_cell = chainer.links.StatelessLSTM(hidden_channels_list[-1], last_channels) - self.last_linear = chainer.links.Linear(last_channels, 1) + self.last_linear = chainer.links.Linear(None, 1) def __call__(self, x): """ :param x: (batch, channel, time) """ h = x + h = chainer.functions.reshape(h, h.shape + (1,)) for conv in self.convs.children(): h = chainer.functions.relu(conv(h)) - - hs = chainer.functions.separate(h, axis=2) - c_next = h_next = None - for h in reversed(hs): - c_next, h_next = self.lstm_cell(c_next, h_next, h) - h = h_next + h = chainer.functions.reshape(h, h.shape[:-1]) h = self.last_linear(h) return h diff --git a/become_yukarin/updater.py b/become_yukarin/updater.py index 927601f..02ea5d3 100644 --- a/become_yukarin/updater.py +++ b/become_yukarin/updater.py @@ -45,11 +45,27 @@ class Updater(chainer.training.StandardUpdater): if self.discriminator is not None: pair_fake = chainer.functions.concat([y * mask, input]) pair_true = chainer.functions.concat([target * mask, input]) + + # DRAGAN + if chainer.config.train: # grad is not available on test + std = xp.std(pair_true.data, axis=0, keepdims=True) + rand = xp.random.uniform(0, 1, pair_true.shape).astype(xp.float32) + perturb = chainer.Variable(pair_true.data + 0.5 * rand * std) + grad, = chainer.grad([self.discriminator(perturb)], [perturb], enable_double_backprop=True) + grad = chainer.functions.sqrt(chainer.functions.batch_l2_norm_squared(grad)) + loss_grad = chainer.functions.mean_squared_error(grad, xp.ones_like(grad.data, numpy.float32)) + reporter.report({'grad': loss_grad}, self.discriminator) + + if xp.any(xp.isnan(loss_grad.data)): + import code + code.interact(local=locals()) + + # GAN d_fake = self.discriminator(pair_fake) d_true = self.discriminator(pair_true) - loss_dis_f = chainer.functions.mean_squared_error(d_fake, xp.zeros_like(d_fake.data, numpy.float32)) - loss_dis_t = chainer.functions.mean_squared_error(d_true, xp.ones_like(d_true.data, numpy.float32)) - loss_gen_f = chainer.functions.mean_squared_error(d_fake, xp.ones_like(d_fake.data, numpy.float32)) + loss_dis_f = chainer.functions.average(chainer.functions.softplus(d_fake)) + loss_dis_t = chainer.functions.average(chainer.functions.softplus(-d_true)) + loss_gen_f = chainer.functions.average(chainer.functions.softplus(-d_fake)) reporter.report({'fake': loss_dis_f}, self.discriminator) reporter.report({'true': loss_dis_t}, self.discriminator) @@ -63,6 +79,8 @@ class Updater(chainer.training.StandardUpdater): loss['discriminator'] = \ loss_dis_f * self.loss_config.discriminator_fake + \ loss_dis_t * self.loss_config.discriminator_true + if chainer.config.train: # grad is not available on test + loss['discriminator'] += loss_grad * self.loss_config.discriminator_grad reporter.report({'loss': loss['discriminator']}, self.discriminator) loss['predictor'] += loss_gen_f * self.loss_config.predictor_fake diff --git a/scripts/launch.py b/scripts/launch.py new file mode 100644 index 0000000..3186161 --- /dev/null +++ b/scripts/launch.py @@ -0,0 +1,99 @@ +""" +launcher for some task that have diff params +""" + +import argparse +import copy +import datetime +import hashlib +import json +import subprocess +import time +from pathlib import Path + +base_command_default = \ + "screen -d -m -S {project/name}_gpu{train/gpu} ;" + \ + "screen -S {project/name}_gpu{train/gpu} -X stuff 'python3 {python_file_path} {recipe_path} {output}\n'" + +parser = argparse.ArgumentParser() +parser.add_argument('output_dir', type=Path) +parser.add_argument('--python_file_path', default='train.py') +parser.add_argument('--recipe_json_path', default='recipe/recipe.json') +parser.add_argument('--base_config_json_path', default='recipe/config.json') +parser.add_argument('--base_command', default=base_command_default) +args = parser.parse_args() + +recipe = json.load(open(args.recipe_json_path, encoding='utf-8')) +recipe_each = recipe['each'] +recipe_all = recipe['all'] +base_config = json.load(open(args.base_config_json_path, encoding='utf-8')) + + +def put_config_value(config, recipe_key, value): + key_tree = recipe_key.split('/') + target = config + for key in key_tree[:-1]: + target = target[key] + + target[key_tree[-1]] = value + + +def _replace_name(dist): + _format = {} + now = datetime.datetime.now() + + if '{date}' in dist['project']['name']: + _format['date'] = now.strftime('%Y%m%d%H%M%S') + if '{hash}' in dist['project']['name']: + _format['hash'] = hashlib.md5(bytes(str(now), 'utf')).hexdigest()[:6] + + if len(_format) > 0: + dist['project']['name'] = dist['project']['name'].format(**_format) + + +num_task = min(len(list(value)) for value in recipe_each.values()) +command_list = [] + +for i in range(num_task): + config = copy.deepcopy(base_config) + + for recipe_key in recipe_all.keys(): + put_config_value(config, recipe_key, recipe_all[recipe_key]) + + for recipe_key in recipe_each.keys(): + put_config_value(config, recipe_key, recipe_each[recipe_key][i]) + + _replace_name(config) + + # add git branch name + git_branch = subprocess.check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode("utf-8").strip() + config['project']['tags'].append('git branch name:' + git_branch) + + made_recipe_path = "{}.{}.json".format(datetime.datetime.now().strftime('%Y%m%d%H%M%S'), i) + with open(made_recipe_path, 'w', encoding='utf') as f: + json.dump(config, f, indent=2, sort_keys=True, ensure_ascii=False) + + + def make_key_chain(key_chain, value, dist): + if not isinstance(value, dict): + dist['/'.join(key_chain)] = value + else: + for key in value.keys(): + make_key_chain(key_chain + [key], value[key], dist) + + + dist = {} + make_key_chain([], config, dist) + + dist['output'] = args.output_dir / config['project']['name'] + dist['python_file_path'] = args.python_file_path + dist['recipe_path'] = made_recipe_path + + command = args.base_command.format(**dist) + command_list += [command] + + print(config['project']['name']) + +for command in command_list: + time.sleep(1) + subprocess.check_output(command, shell=True) @@ -90,6 +90,7 @@ if extensions.PlotReport.available(): 'train/predictor/loss', 'discriminator/fake', 'discriminator/true', + 'discriminator/grad', ], x_key='iteration', file_name='loss.png', |
