diff options
Diffstat (limited to 'become_yukarin/voice_changer.py')
| -rw-r--r-- | become_yukarin/voice_changer.py | 148 |
1 files changed, 20 insertions, 128 deletions
diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py index 822d8c5..30fbf28 100644 --- a/become_yukarin/voice_changer.py +++ b/become_yukarin/voice_changer.py @@ -1,135 +1,27 @@ -from functools import partial -from pathlib import Path -from typing import Optional - -import chainer import numpy -import pysptk -import pyworld -from become_yukarin.config.config import Config -from become_yukarin.data_struct import AcousticFeature -from become_yukarin.data_struct import Wave -from become_yukarin.dataset.dataset import AcousticFeatureDenormalizeProcess -from become_yukarin.dataset.dataset import AcousticFeatureLoadProcess -from become_yukarin.dataset.dataset import AcousticFeatureNormalizeProcess -from become_yukarin.dataset.dataset import AcousticFeatureProcess -from become_yukarin.dataset.dataset import DecodeFeatureProcess -from become_yukarin.dataset.dataset import EncodeFeatureProcess -from become_yukarin.dataset.dataset import WaveFileLoadProcess -from become_yukarin.model.model import create_predictor +from .acoustic_converter import AcousticConverter +from .super_resolution import SuperResolution class VoiceChanger(object): - def __init__(self, config: Config, model_path: Path): - self.config = config - self.model_path = model_path - - self.model = model = create_predictor(config.model) - chainer.serializers.load_npz(str(model_path), model) - - self._param = param = config.dataset.param - self._wave_process = WaveFileLoadProcess( - sample_rate=param.voice_param.sample_rate, - top_db=None, - ) - self._feature_process = AcousticFeatureProcess( - frame_period=param.acoustic_feature_param.frame_period, - order=param.acoustic_feature_param.order, - alpha=param.acoustic_feature_param.alpha, - ) - - self._acoustic_feature_load_process = acoustic_feature_load_process = AcousticFeatureLoadProcess() - - input_mean = acoustic_feature_load_process(config.dataset.input_mean_path, test=True) - input_var = acoustic_feature_load_process(config.dataset.input_var_path, test=True) - target_mean = acoustic_feature_load_process(config.dataset.target_mean_path, test=True) - target_var = acoustic_feature_load_process(config.dataset.target_var_path, test=True) - self._feature_normalize = AcousticFeatureNormalizeProcess( - mean=input_mean, - var=input_var, - ) - self._feature_denormalize = AcousticFeatureDenormalizeProcess( - mean=target_mean, - var=target_var, - ) - - feature_sizes = AcousticFeature.get_sizes( - sampling_rate=param.voice_param.sample_rate, - order=param.acoustic_feature_param.order, - ) - self._encode_feature = EncodeFeatureProcess(config.dataset.features) - self._decode_feature = DecodeFeatureProcess(config.dataset.features, feature_sizes) - - def convert_to_feature(self, input: AcousticFeature, out_sampling_rate: Optional[int] = None): - if out_sampling_rate is None: - out_sampling_rate = self.config.dataset.param.voice_param.sample_rate - - input_feature = input - input = self._feature_normalize(input, test=True) - input = self._encode_feature(input, test=True) - - converter = partial(chainer.dataset.convert.concat_examples, padding=0) - inputs = converter([input]) - - with chainer.using_config('train', False): - out = self.model(inputs).data[0] - - out = self._decode_feature(out, test=True) - out = AcousticFeature( - f0=out.f0, - spectrogram=out.spectrogram, - aperiodicity=out.aperiodicity, - mfcc=out.mfcc, - voiced=input_feature.voiced, - ) - out = self._feature_denormalize(out, test=True) - out = AcousticFeature( - f0=out.f0, - spectrogram=out.spectrogram, - aperiodicity=input_feature.aperiodicity, - mfcc=out.mfcc, - voiced=out.voiced, - ) - - fftlen = pyworld.get_cheaptrick_fft_size(out_sampling_rate) - spectrogram = pysptk.mc2sp( - out.mfcc, - alpha=self._param.acoustic_feature_param.alpha, - fftlen=fftlen, - ) - - out = AcousticFeature( - f0=out.f0, - spectrogram=spectrogram, - aperiodicity=out.aperiodicity, - mfcc=out.mfcc, - voiced=out.voiced, - ).astype(numpy.float64) - return out - - def convert_from_audio_path(self, input: Path, out_sampling_rate: Optional[int] = None): - input = self._wave_process(str(input), test=True) - input = self._feature_process(input, test=True) - return self.convert_from_feature(input, out_sampling_rate) - - def convert_from_feature_path(self, input: Path, out_sampling_rate: Optional[int] = None): - input = self._acoustic_feature_load_process(input, test=True) - return self.convert_from_feature(input, out_sampling_rate) - - def convert_from_feature(self, input: AcousticFeature, out_sampling_rate: Optional[int] = None): - if out_sampling_rate is None: - out_sampling_rate = self.config.dataset.param.voice_param.sample_rate + def __init__( + self, + acoustic_converter: AcousticConverter, + super_resolution: SuperResolution, + output_sampling_rate: int = None, + ): + if output_sampling_rate is None: + output_sampling_rate = super_resolution.config.dataset.param.voice_param.sample_rate - out = self.convert_to_feature(input=input, out_sampling_rate=out_sampling_rate) - out = pyworld.synthesize( - f0=out.f0.ravel(), - spectrogram=out.spectrogram, - aperiodicity=out.aperiodicity, - fs=out_sampling_rate, - frame_period=self._param.acoustic_feature_param.frame_period, - ) - return Wave(out, sampling_rate=out_sampling_rate) + self.acoustic_converter = acoustic_converter + self.super_resolution = super_resolution + self.output_sampling_rate = output_sampling_rate - def __call__(self, voice_path: Path, out_sampling_rate: Optional[int] = None): - return self.convert_from_audio_path(voice_path, out_sampling_rate) + def convert_from_wave_path(self, wave_path: str): + w_in = self.acoustic_converter._wave_process(wave_path) + f_in = self.acoustic_converter._feature_process(w_in) + f_low = self.acoustic_converter.convert_to_feature(f_in) + s_high = self.super_resolution.convert(f_low.spectrogram.astype(numpy.float32)) + wave = self.super_resolution(s_high, acoustic_feature=f_low, sampling_rate=self.output_sampling_rate) + return wave |
