diff options
| author | Hiroshiba Kazuyuki <hihokaruta@gmail.com> | 2017-11-08 09:46:00 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <hihokaruta@gmail.com> | 2017-11-08 09:46:00 +0900 |
| commit | 6b2bae905e59d3b8756c624e38a447786c2b9e9d (patch) | |
| tree | 59104ed514a162810b02f44af31d9d3c4b00be49 /become_yukarin/voice_changer.py | |
| parent | 6119849270c2aed117627d7d2b060f37d1c25de4 (diff) | |
can convert voice
Diffstat (limited to 'become_yukarin/voice_changer.py')
| -rw-r--r-- | become_yukarin/voice_changer.py | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py new file mode 100644 index 0000000..2ff1b8e --- /dev/null +++ b/become_yukarin/voice_changer.py @@ -0,0 +1,97 @@ +from functools import partial +from pathlib import Path +from typing import Optional + +import chainer +import numpy +import pysptk +import pyworld + +from become_yukarin.config import Config +from become_yukarin.data_struct import AcousticFeature +from become_yukarin.data_struct import Wave +from become_yukarin.dataset.dataset import AcousticFeatureDenormalizeProcess +from become_yukarin.dataset.dataset import AcousticFeatureLoadProcess +from become_yukarin.dataset.dataset import AcousticFeatureNormalizeProcess +from become_yukarin.dataset.dataset import AcousticFeatureProcess +from become_yukarin.dataset.dataset import EncodeFeatureProcess +from become_yukarin.dataset.dataset import DecodeFeatureProcess +from become_yukarin.dataset.dataset import WaveFileLoadProcess +from become_yukarin.model import create as create_model + + +class VoiceChanger(object): + def __init__(self, config: Config, model_path: Path): + self.config = config + self.model_path = model_path + + self.model = model = create_model(config.model) + chainer.serializers.load_npz(str(model_path), model) + + self._param = param = config.dataset.param + self._wave_process = WaveFileLoadProcess( + sample_rate=param.voice_param.sample_rate, + top_db=param.voice_param.top_db, + ) + self._feature_process = AcousticFeatureProcess( + frame_period=param.acoustic_feature_param.frame_period, + order=param.acoustic_feature_param.order, + alpha=param.acoustic_feature_param.alpha, + ) + + _acoustic_feature_load_process = AcousticFeatureLoadProcess() + + input_mean = _acoustic_feature_load_process(config.dataset.input_mean_path, test=True) + input_var = _acoustic_feature_load_process(config.dataset.input_var_path, test=True) + target_mean = _acoustic_feature_load_process(config.dataset.target_mean_path, test=True) + target_var = _acoustic_feature_load_process(config.dataset.target_var_path, test=True) + self._feature_normalize = AcousticFeatureNormalizeProcess( + mean=input_mean, + var=input_var, + ) + self._feature_denormalize = AcousticFeatureDenormalizeProcess( + mean=target_mean, + var=target_var, + ) + + self._encode_feature = EncodeFeatureProcess(['mfcc']) + self._decode_feature = DecodeFeatureProcess(['mfcc']) + + def __call__(self, voice_path: Path, out_sampling_rate: Optional[int] = None): + input = input_wave = self._wave_process(str(voice_path), test=True) + if out_sampling_rate is None: + out_sampling_rate = input_wave.sampling_rate + + input = input_feature = self._feature_process(input, test=True) + input = self._feature_normalize(input, test=True) + input = self._encode_feature(input, test=True) + + converter = partial(chainer.dataset.convert.concat_examples, padding=0) + inputs = converter([input]) + + out = self.model(inputs).data[0] + out = self._decode_feature(out, test=True) + out = self._feature_denormalize(out, test=True) + + fftlen = pyworld.get_cheaptrick_fft_size(input_wave.sampling_rate) + spectrogram = pysptk.mc2sp( + out.mfcc, + alpha=self._param.acoustic_feature_param.alpha, + fftlen=fftlen, + ) + + out = AcousticFeature( + f0=input_feature.f0, + spectrogram=spectrogram, + aperiodicity=input_feature.aperiodicity, + mfcc=out.mfcc, + ).astype(numpy.float64) + out = pyworld.synthesize( + f0=out.f0, + spectrogram=out.spectrogram, + aperiodicity=out.aperiodicity, + fs=out_sampling_rate, + frame_period=self._param.acoustic_feature_param.frame_period, + ) + + return Wave(out, sampling_rate=out_sampling_rate) |
