diff options
| -rw-r--r-- | become_yukarin/model.py | 4 | ||||
| -rw-r--r-- | scripts/extract_acoustic_feature.py | 52 | ||||
| -rw-r--r-- | train.py | 3 |
3 files changed, 41 insertions, 18 deletions
diff --git a/become_yukarin/model.py b/become_yukarin/model.py index 6dfb2c1..3b5102e 100644 --- a/become_yukarin/model.py +++ b/become_yukarin/model.py @@ -187,9 +187,9 @@ class Aligner(chainer.link.Chain): h = chainer.functions.separate(chainer.functions.transpose(h, axes=(0, 2, 1))) # h: batch * (timeA, channel) _, h = self.gru(None, h) # h: batch * (timeA, ?) h = chainer.functions.transpose(chainer.functions.stack(h), axes=(0, 2, 1)) # h: (batch, ?, timeA) - h = chainer.functions.softmax(self.last(h), axis=2) # h: (batch, timeB, timeA) + h = chainer.functions.softmax(self.last(h), axis=1) # h: (batch, timeB, timeA) - h = chainer.functions.matmul(x, h, transb=True) # h: (batch, channel, timeB) + h = chainer.functions.matmul(x, h) # h: (batch, channel, time) return h diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py index f7cbaa1..efde9b1 100644 --- a/scripts/extract_acoustic_feature.py +++ b/scripts/extract_acoustic_feature.py @@ -30,6 +30,7 @@ parser.add_argument('--top_db', type=float, default=base_voice_param.top_db) parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period) parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order) parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha) +parser.add_argument('--disable_alignment', action='store_true') arguments = parser.parse_args() @@ -65,34 +66,53 @@ def generate_feature(path1, path2): f2 = acoustic_feature_process(wave2, test=True) # alignment - aligner = MFCCAligner(f1.mfcc, f2.mfcc) + if not arguments.disable_alignment: + aligner = MFCCAligner(f1.mfcc, f2.mfcc) - f0_1, f0_2 = aligner.align(f1.f0, f2.f0) - spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram) - aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity) - mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc) - voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced) + f0_1, f0_2 = aligner.align(f1.f0, f2.f0) + spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram) + aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity) + mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc) + voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced) + + f1 = AcousticFeature( + f0=f0_1, + spectrogram=spectrogram_1, + aperiodicity=aperiodicity_1, + mfcc=mfcc_1, + voiced=voiced_1, + ) + f2 = AcousticFeature( + f0=f0_2, + spectrogram=spectrogram_2, + aperiodicity=aperiodicity_2, + mfcc=mfcc_2, + voiced=voiced_2, + ) + + f1.validate() + f2.validate() # save acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True) path = Path(arguments.output1_directory, path1.stem + '.npy') feature = AcousticFeature( - f0=f0_1, - spectrogram=spectrogram_1, - aperiodicity=aperiodicity_1, - mfcc=mfcc_1, - voiced=voiced_1, + f0=f1.f0, + spectrogram=f1.spectrogram, + aperiodicity=f1.aperiodicity, + mfcc=f1.mfcc, + voiced=f1.voiced, ) acoustic_feature_save_process({'path': path, 'feature': feature}) print('saved!', path) path = Path(arguments.output2_directory, path2.stem + '.npy') feature = AcousticFeature( - f0=f0_2, - spectrogram=spectrogram_2, - aperiodicity=aperiodicity_2, - mfcc=mfcc_2, - voiced=voiced_2, + f0=f2.f0, + spectrogram=f2.spectrogram, + aperiodicity=f2.aperiodicity, + mfcc=f2.mfcc, + voiced=f2.voiced, ) acoustic_feature_save_process({'path': path, 'feature': feature}) print('saved!', path) @@ -2,6 +2,7 @@ import argparse from functools import partial from pathlib import Path +from chainer import cuda from chainer import optimizers from chainer import training from chainer.dataset import convert @@ -24,6 +25,8 @@ arguments.output.mkdir(exist_ok=True) config.save_as_json((arguments.output / 'config.json').absolute()) # model +if config.train.gpu >= 0: + cuda.get_device_from_id(config.train.gpu).use() predictor = create_predictor(config.model) aligner = create_aligner(config.model) model = Loss(config.loss, predictor=predictor, aligner=aligner) |
