diff options
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/test_voice_changer.py | 38 |
1 files changed, 29 insertions, 9 deletions
diff --git a/tests/test_voice_changer.py b/tests/test_voice_changer.py index ceddf9c..66ea003 100644 --- a/tests/test_voice_changer.py +++ b/tests/test_voice_changer.py @@ -32,16 +32,16 @@ test_output_path = Path('output.wav') print('model loading...', flush=True) -model_path = model_base_path / Path('harvest-innoise03/predictor_1390000.npz') -config_path = model_base_path / Path('harvest-innoise03/config.json') +model_path = model_base_path / Path('pp-weakD-innoise01-tarnoise001/predictor_120000.npz') +config_path = model_base_path / Path('pp-weakD-innoise01-tarnoise001/config.json') config = create_config(config_path) -acoustic_converter = AcousticConverter(config, model_path, gpu=0) +acoustic_converter = AcousticConverter(config, model_path) print('model 1 loaded!', flush=True) model_path = model_base_path / Path('sr-noise3/predictor_180000.npz') config_path = model_base_path / Path('sr-noise3/config.json') sr_config = create_sr_config(config_path) -super_resolution = SuperResolution(sr_config, model_path, gpu=0) +super_resolution = SuperResolution(sr_config, model_path) print('model 2 loaded!', flush=True) audio_config = AudioConfig( @@ -50,6 +50,7 @@ audio_config = AudioConfig( vocoder_buffer_size=config.dataset.param.voice_param.sample_rate // 16, out_norm=4.5, ) +frame_period = config.dataset.param.acoustic_feature_param.frame_period vocoder = RealtimeVocoder( acoustic_feature_param=config.dataset.param.acoustic_feature_param, @@ -57,22 +58,24 @@ vocoder = RealtimeVocoder( buffer_size=audio_config.vocoder_buffer_size, number_of_pointers=16, ) -# vocoder.warm_up(audio_config.vocoder_buffer_size / config.dataset.param.voice_param.sample_rate) voice_changer = VoiceChanger( super_resolution=super_resolution, acoustic_converter=acoustic_converter, - vocoder=vocoder, ) voice_changer_stream = VoiceChangerStream( - voice_changer=voice_changer, sampling_rate=audio_config.rate, + frame_period=acoustic_converter._param.acoustic_feature_param.frame_period, in_dtype=numpy.float32, ) +voice_changer_stream.voice_changer = voice_changer +voice_changer_stream.vocoder = vocoder + wrapper = VoiceChangerStreamWrapper( voice_changer_stream=voice_changer_stream, + extra_time_pre=1, extra_time=0.2, ) @@ -85,9 +88,26 @@ for i in range(0, len(raw_wave), audio_config.chunk): wrapper.voice_changer_stream.add_wave(start_time=start_time, wave=wave_in) start_time += len(wave_in.wave) / wave_in.sampling_rate - wave_out = wrapper.convert_next(time_length=audio_config.chunk / audio_config.rate) +start_time = 0 +for i in range(len(raw_wave) // audio_config.chunk + 1): + feature_in = wrapper.pre_convert_next(time_length=audio_config.chunk / audio_config.rate) + wrapper.voice_changer_stream.add_in_feature(start_time=start_time, feature=feature_in, frame_period=frame_period) + start_time += audio_config.chunk / audio_config.rate + print('pre', i, flush=True) + +start_time = 0 +for i in range(len(raw_wave) // audio_config.chunk + 1): + feature_out = wrapper.convert_next(time_length=audio_config.chunk / audio_config.rate) + wrapper.voice_changer_stream.add_out_feature(start_time=start_time, feature=feature_out, frame_period=frame_period) + start_time += audio_config.chunk / audio_config.rate + print('cent', i, flush=True) + +start_time = 0 +for i in range(len(raw_wave) // audio_config.chunk + 1): + wave_out = wrapper.post_convert_next(time_length=audio_config.chunk / audio_config.rate) wave_out_list.append(wave_out) - wrapper.remove_previous_wave() + start_time += audio_config.chunk / audio_config.rate + print('post', i, flush=True) out_wave = numpy.concatenate([w.wave for w in wave_out_list]).astype(numpy.float32) librosa.output.write_wav(str(test_output_path), out_wave, sr=audio_config.rate) |
