summaryrefslogtreecommitdiff
path: root/tests/test_voice_changer.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_voice_changer.py')
-rw-r--r--tests/test_voice_changer.py38
1 files changed, 29 insertions, 9 deletions
diff --git a/tests/test_voice_changer.py b/tests/test_voice_changer.py
index ceddf9c..66ea003 100644
--- a/tests/test_voice_changer.py
+++ b/tests/test_voice_changer.py
@@ -32,16 +32,16 @@ test_output_path = Path('output.wav')
print('model loading...', flush=True)
-model_path = model_base_path / Path('harvest-innoise03/predictor_1390000.npz')
-config_path = model_base_path / Path('harvest-innoise03/config.json')
+model_path = model_base_path / Path('pp-weakD-innoise01-tarnoise001/predictor_120000.npz')
+config_path = model_base_path / Path('pp-weakD-innoise01-tarnoise001/config.json')
config = create_config(config_path)
-acoustic_converter = AcousticConverter(config, model_path, gpu=0)
+acoustic_converter = AcousticConverter(config, model_path)
print('model 1 loaded!', flush=True)
model_path = model_base_path / Path('sr-noise3/predictor_180000.npz')
config_path = model_base_path / Path('sr-noise3/config.json')
sr_config = create_sr_config(config_path)
-super_resolution = SuperResolution(sr_config, model_path, gpu=0)
+super_resolution = SuperResolution(sr_config, model_path)
print('model 2 loaded!', flush=True)
audio_config = AudioConfig(
@@ -50,6 +50,7 @@ audio_config = AudioConfig(
vocoder_buffer_size=config.dataset.param.voice_param.sample_rate // 16,
out_norm=4.5,
)
+frame_period = config.dataset.param.acoustic_feature_param.frame_period
vocoder = RealtimeVocoder(
acoustic_feature_param=config.dataset.param.acoustic_feature_param,
@@ -57,22 +58,24 @@ vocoder = RealtimeVocoder(
buffer_size=audio_config.vocoder_buffer_size,
number_of_pointers=16,
)
-# vocoder.warm_up(audio_config.vocoder_buffer_size / config.dataset.param.voice_param.sample_rate)
voice_changer = VoiceChanger(
super_resolution=super_resolution,
acoustic_converter=acoustic_converter,
- vocoder=vocoder,
)
voice_changer_stream = VoiceChangerStream(
- voice_changer=voice_changer,
sampling_rate=audio_config.rate,
+ frame_period=acoustic_converter._param.acoustic_feature_param.frame_period,
in_dtype=numpy.float32,
)
+voice_changer_stream.voice_changer = voice_changer
+voice_changer_stream.vocoder = vocoder
+
wrapper = VoiceChangerStreamWrapper(
voice_changer_stream=voice_changer_stream,
+ extra_time_pre=1,
extra_time=0.2,
)
@@ -85,9 +88,26 @@ for i in range(0, len(raw_wave), audio_config.chunk):
wrapper.voice_changer_stream.add_wave(start_time=start_time, wave=wave_in)
start_time += len(wave_in.wave) / wave_in.sampling_rate
- wave_out = wrapper.convert_next(time_length=audio_config.chunk / audio_config.rate)
+start_time = 0
+for i in range(len(raw_wave) // audio_config.chunk + 1):
+ feature_in = wrapper.pre_convert_next(time_length=audio_config.chunk / audio_config.rate)
+ wrapper.voice_changer_stream.add_in_feature(start_time=start_time, feature=feature_in, frame_period=frame_period)
+ start_time += audio_config.chunk / audio_config.rate
+ print('pre', i, flush=True)
+
+start_time = 0
+for i in range(len(raw_wave) // audio_config.chunk + 1):
+ feature_out = wrapper.convert_next(time_length=audio_config.chunk / audio_config.rate)
+ wrapper.voice_changer_stream.add_out_feature(start_time=start_time, feature=feature_out, frame_period=frame_period)
+ start_time += audio_config.chunk / audio_config.rate
+ print('cent', i, flush=True)
+
+start_time = 0
+for i in range(len(raw_wave) // audio_config.chunk + 1):
+ wave_out = wrapper.post_convert_next(time_length=audio_config.chunk / audio_config.rate)
wave_out_list.append(wave_out)
- wrapper.remove_previous_wave()
+ start_time += audio_config.chunk / audio_config.rate
+ print('post', i, flush=True)
out_wave = numpy.concatenate([w.wave for w in wave_out_list]).astype(numpy.float32)
librosa.output.write_wav(str(test_output_path), out_wave, sr=audio_config.rate)