diff options
| author | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2018-01-15 04:17:10 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2018-01-15 04:41:33 +0900 |
| commit | 7bfc3321e356f24f49c790b578917e8db22bd30d (patch) | |
| tree | 65e8070b34d22e8b6211bb41e7dd448eb39dccd1 | |
| parent | 2be3f03adc5695f82c6ab86da780108f786ed014 (diff) | |
超解像学習を可能に
| -rw-r--r-- | become_yukarin/config/sr_config.py | 23 | ||||
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 25 | ||||
| -rw-r--r-- | become_yukarin/model/sr_model.py | 6 | ||||
| -rw-r--r-- | become_yukarin/updater/sr_updater.py | 19 | ||||
| -rw-r--r-- | become_yukarin/voice_changer.py | 3 | ||||
| -rw-r--r-- | scripts/extract_spectrogram_pair.py | 4 | ||||
| -rw-r--r-- | train_sr.py | 1 |
7 files changed, 41 insertions, 40 deletions
diff --git a/become_yukarin/config/sr_config.py b/become_yukarin/config/sr_config.py index 93db424..4c62808 100644 --- a/become_yukarin/config/sr_config.py +++ b/become_yukarin/config/sr_config.py @@ -17,17 +17,7 @@ class SRDatasetConfig(NamedTuple): class SRModelConfig(NamedTuple): - in_channels: int - conv_bank_out_channels: int - conv_bank_k: int - max_pooling_k: int - conv_projections_hidden_channels: int - highway_layers: int - out_channels: int - out_size: int - aligner_out_time_length: int - disable_last_rnn: bool - enable_aligner: bool + pass class SRLossConfig(NamedTuple): @@ -89,17 +79,6 @@ def create_from_json(s: Union[str, Path]): num_test=d['dataset']['num_test'], ), model=SRModelConfig( - in_channels=d['model']['in_channels'], - conv_bank_out_channels=d['model']['conv_bank_out_channels'], - conv_bank_k=d['model']['conv_bank_k'], - max_pooling_k=d['model']['max_pooling_k'], - conv_projections_hidden_channels=d['model']['conv_projections_hidden_channels'], - highway_layers=d['model']['highway_layers'], - out_channels=d['model']['out_channels'], - out_size=d['model']['out_size'], - aligner_out_time_length=d['model']['aligner_out_time_length'], - disable_last_rnn=d['model']['disable_last_rnn'], - enable_aligner=d['model']['enable_aligner'], ), loss=SRLossConfig( mse=d['loss']['mse'], diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index b0f9807..38cf749 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -313,7 +313,6 @@ class ShapeAlignProcess(BaseDataProcess): class RandomPaddingProcess(BaseDataProcess): def __init__(self, min_size: int, time_axis: int = 1): - assert time_axis == 1 self._min_size = min_size self._time_axis = time_axis @@ -328,7 +327,9 @@ class RandomPaddingProcess(BaseDataProcess): pre = random.randint(self._min_size - data.shape[self._time_axis] + 1) post = self._min_size - pre - return numpy.pad(data, ((0, 0), (pre, post)), mode='constant') + pad = [(0, 0)] * data.ndim + pad[self._time_axis] = (pre, post) + return numpy.pad(data, pad, mode='constant') class LastPaddingProcess(BaseDataProcess): @@ -520,8 +521,8 @@ def create_sr(config: SRDatasetConfig): data_process_base = ChainProcess([ LowHighSpectrogramFeatureLoadProcess(validate=True), SplitProcess(dict( - input=LambdaProcess(lambda d, test: numpy.log(d.low)), - target=LambdaProcess(lambda d, test: numpy.log(d.high)), + input=LambdaProcess(lambda d, test: numpy.log(d.low[:, :-1])), + target=LambdaProcess(lambda d, test: numpy.log(d.high[:, :-1])), )), ]) @@ -535,13 +536,13 @@ def create_sr(config: SRDatasetConfig): def padding(s): return ChainProcess([ LambdaProcess(lambda d, test: dict(data=d[s], seed=d['seed'])), - RandomPaddingProcess(min_size=config.train_crop_size), + RandomPaddingProcess(min_size=config.train_crop_size, time_axis=0), ]) def crop(s): return ChainProcess([ LambdaProcess(lambda d, test: dict(data=d[s], seed=d['seed'])), - RandomCropProcess(crop_size=config.train_crop_size), + RandomCropProcess(crop_size=config.train_crop_size, time_axis=0), ]) data_process_train.append(ChainProcess([ @@ -550,6 +551,10 @@ def create_sr(config: SRDatasetConfig): add_seed(), SplitProcess(dict(input=crop('input'), target=crop('target'))), ])) + data_process_train.append(LambdaProcess(lambda d, test: { + 'input': d['input'][numpy.newaxis], + 'target': d['target'][numpy.newaxis], + })) data_process_test = copy.deepcopy(data_process_base) if config.train_crop_size is not None: @@ -557,14 +562,18 @@ def create_sr(config: SRDatasetConfig): input=ChainProcess([ LambdaProcess(lambda d, test: d['input']), LastPaddingProcess(min_size=config.train_crop_size), - FirstCropProcess(crop_size=config.train_crop_size), + FirstCropProcess(crop_size=config.train_crop_size, time_axis=0), ]), target=ChainProcess([ LambdaProcess(lambda d, test: d['target']), LastPaddingProcess(min_size=config.train_crop_size), - FirstCropProcess(crop_size=config.train_crop_size), + FirstCropProcess(crop_size=config.train_crop_size, time_axis=0), ]), ))) + data_process_test.append(LambdaProcess(lambda d, test: { + 'input': d['input'][numpy.newaxis], + 'target': d['target'][numpy.newaxis], + })) input_paths = list(sorted([Path(p) for p in glob.glob(str(config.input_glob))])) diff --git a/become_yukarin/model/sr_model.py b/become_yukarin/model/sr_model.py index 74119a4..64158ca 100644 --- a/become_yukarin/model/sr_model.py +++ b/become_yukarin/model/sr_model.py @@ -47,7 +47,6 @@ class Encoder(chainer.Chain): self.c7 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) def __call__(self, x): - x = F.reshape(x, (len(x), 1) + x.shape[1:]) hs = [F.leaky_relu(self.c0(x))] for i in range(1, 8): hs.append(self['c%d' % i](hs[i - 1])) @@ -103,7 +102,6 @@ class SRDiscriminator(chainer.Chain): self.c4 = L.Convolution2D(512, 1, 3, 1, 1, initialW=w) def __call__(self, x_0, x_1): - x_0 = F.reshape(x_0, (len(x_0), 1) + x_0.shape[1:]) h = F.concat([self.c0_0(x_0), self.c0_1(x_1)]) h = self.c1(h) h = self.c2(h) @@ -114,6 +112,6 @@ class SRDiscriminator(chainer.Chain): def create_sr(config: SRModelConfig): - predictor = SRPredictor(in_ch=1, out_ch=3) - discriminator = SRDiscriminator(in_ch=1, out_ch=3) + predictor = SRPredictor(in_ch=1, out_ch=1) + discriminator = SRDiscriminator(in_ch=1, out_ch=1) return predictor, discriminator diff --git a/become_yukarin/updater/sr_updater.py b/become_yukarin/updater/sr_updater.py index a6b1d22..6e2b400 100644 --- a/become_yukarin/updater/sr_updater.py +++ b/become_yukarin/updater/sr_updater.py @@ -33,17 +33,28 @@ class SRUpdater(chainer.training.StandardUpdater): chainer.report({'loss': loss}, predictor) return loss - def _loss_discriminator(self, discriminator, y_in, y_out): - b, _, w, h = y_in.data.shape + def _loss_discriminator(self, discriminator, d_real, d_fake): + b, _, w, h = d_real.data.shape - loss_real = F.sum(F.softplus(-y_in)) / (b * w * h) + loss_real = F.sum(F.softplus(-d_real)) / (b * w * h) chainer.report({'real': loss_real}, discriminator) - loss_fake = F.sum(F.softplus(y_out)) / (b * w * h) + loss_fake = F.sum(F.softplus(d_fake)) / (b * w * h) chainer.report({'fake': loss_fake}, discriminator) loss = loss_real + loss_fake chainer.report({'loss': loss}, discriminator) + + tp = (d_real.data > 0.5).sum() + fp = (d_fake.data > 0.5).sum() + fn = (d_real.data <= 0.5).sum() + tn = (d_fake.data <= 0.5).sum() + accuracy = (tp + tn) / (tp + fp + fn + tn) + precision = tp / (tp + fp) + recall = tp / (tp + fn) + chainer.report({'accuracy': accuracy}, self.discriminator) + chainer.report({'precision': precision}, self.discriminator) + chainer.report({'recall': recall}, self.discriminator) return loss def forward(self, input, target): diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py index a8a207a..822d8c5 100644 --- a/become_yukarin/voice_changer.py +++ b/become_yukarin/voice_changer.py @@ -118,6 +118,9 @@ class VoiceChanger(object): return self.convert_from_feature(input, out_sampling_rate) def convert_from_feature(self, input: AcousticFeature, out_sampling_rate: Optional[int] = None): + if out_sampling_rate is None: + out_sampling_rate = self.config.dataset.param.voice_param.sample_rate + out = self.convert_to_feature(input=input, out_sampling_rate=out_sampling_rate) out = pyworld.synthesize( f0=out.f0.ravel(), diff --git a/scripts/extract_spectrogram_pair.py b/scripts/extract_spectrogram_pair.py index 02108fc..be21459 100644 --- a/scripts/extract_spectrogram_pair.py +++ b/scripts/extract_spectrogram_pair.py @@ -10,6 +10,7 @@ from pprint import pprint import numpy import pysptk import pyworld +from tqdm import tqdm from become_yukarin.dataset.dataset import AcousticFeatureProcess from become_yukarin.dataset.dataset import WaveFileLoadProcess @@ -68,7 +69,6 @@ def generate_file(path): 'low': low_spectrogram, 'high': high_spectrogram, }) - print('saved!', out) def main(): @@ -76,7 +76,7 @@ def main(): arguments.output_directory.mkdir(exist_ok=True) pool = multiprocessing.Pool() - pool.map(generate_file, paths) + list(tqdm(pool.imap(generate_file, paths), total=len(paths))) if __name__ == '__main__': diff --git a/train_sr.py b/train_sr.py index c714aa0..96f11e7 100644 --- a/train_sr.py +++ b/train_sr.py @@ -78,6 +78,7 @@ ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iterat trainer.extend(ext, trigger=trigger_snapshot) trainer.extend(extensions.LogReport(trigger=trigger_log)) +trainer.extend(extensions.PrintReport(['predictor/loss'])) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport( |
