summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2017-12-18 12:52:13 +0900
committerHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2017-12-18 12:52:13 +0900
commit658e2570428867a27470c38b071ac81e7756bcf2 (patch)
tree34d2738a4f822c8f7865111ad590a5df083b1350
parenta6add91610d40c62896352414d5af2e40a2bcdbd (diff)
can convert from acoustic feature
-rw-r--r--become_yukarin/voice_changer.py31
1 files changed, 21 insertions, 10 deletions
diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py
index d1090a2..aeae0e6 100644
--- a/become_yukarin/voice_changer.py
+++ b/become_yukarin/voice_changer.py
@@ -39,12 +39,12 @@ class VoiceChanger(object):
alpha=param.acoustic_feature_param.alpha,
)
- _acoustic_feature_load_process = AcousticFeatureLoadProcess()
+ self._acoustic_feature_load_process = acoustic_feature_load_process = AcousticFeatureLoadProcess()
- input_mean = _acoustic_feature_load_process(config.dataset.input_mean_path, test=True)
- input_var = _acoustic_feature_load_process(config.dataset.input_var_path, test=True)
- target_mean = _acoustic_feature_load_process(config.dataset.target_mean_path, test=True)
- target_var = _acoustic_feature_load_process(config.dataset.target_var_path, test=True)
+ input_mean = acoustic_feature_load_process(config.dataset.input_mean_path, test=True)
+ input_var = acoustic_feature_load_process(config.dataset.input_var_path, test=True)
+ target_mean = acoustic_feature_load_process(config.dataset.target_mean_path, test=True)
+ target_var = acoustic_feature_load_process(config.dataset.target_var_path, test=True)
self._feature_normalize = AcousticFeatureNormalizeProcess(
mean=input_mean,
var=input_var,
@@ -61,12 +61,20 @@ class VoiceChanger(object):
self._encode_feature = EncodeFeatureProcess(config.dataset.features)
self._decode_feature = DecodeFeatureProcess(config.dataset.features, feature_sizes)
- def __call__(self, voice_path: Path, out_sampling_rate: Optional[int] = None):
- input = input_wave = self._wave_process(str(voice_path), test=True)
+ def convert_from_audio_path(self, input: Path, out_sampling_rate: Optional[int] = None):
+ input = self._wave_process(str(input), test=True)
+ input = self._feature_process(input, test=True)
+ return self.convert_from_feature(input, out_sampling_rate)
+
+ def convert_from_feature_path(self, input: Path, out_sampling_rate: Optional[int] = None):
+ input = self._acoustic_feature_load_process(input, test=True)
+ return self.convert_from_feature(input, out_sampling_rate)
+
+ def convert_from_feature(self, input: AcousticFeature, out_sampling_rate: Optional[int] = None):
if out_sampling_rate is None:
- out_sampling_rate = input_wave.sampling_rate
+ out_sampling_rate = self.config.dataset.param.voice_param.sample_rate
- input = input_feature = self._feature_process(input, test=True)
+ input_feature = input
input = self._feature_normalize(input, test=True)
input = self._encode_feature(input, test=True)
@@ -93,7 +101,7 @@ class VoiceChanger(object):
voiced=out.voiced,
)
- fftlen = pyworld.get_cheaptrick_fft_size(input_wave.sampling_rate)
+ fftlen = pyworld.get_cheaptrick_fft_size(out_sampling_rate)
spectrogram = pysptk.mc2sp(
out.mfcc,
alpha=self._param.acoustic_feature_param.alpha,
@@ -116,3 +124,6 @@ class VoiceChanger(object):
)
return Wave(out, sampling_rate=out_sampling_rate)
+
+ def __call__(self, voice_path: Path, out_sampling_rate: Optional[int] = None):
+ return self.convert_from_audio_path(voice_path, out_sampling_rate)