diff options
| author | Hiroshiba Kazuyuki <hihokaruta@gmail.com> | 2018-03-11 02:37:25 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <hihokaruta@gmail.com> | 2018-03-11 02:37:25 +0900 |
| commit | f8823b1913c29ce2710f92d51b74cb84b74323b0 (patch) | |
| tree | b6a20f58f30a413369520c6e391729c6a45ba383 | |
| parent | ef2be3c389412e69d33d7da0066a4831500689d2 (diff) | |
無音を無視するように変更
| -rw-r--r-- | become_yukarin/config/config.py | 3 | ||||
| -rw-r--r-- | become_yukarin/voice_changer.py | 9 | ||||
| -rw-r--r-- | scripts/realtime_voice_changer.py | 12 | ||||
| -rw-r--r-- | tests/test_voice_changer.py | 1 |
4 files changed, 17 insertions, 8 deletions
diff --git a/become_yukarin/config/config.py b/become_yukarin/config/config.py index f1f24cf..ed128c4 100644 --- a/become_yukarin/config/config.py +++ b/become_yukarin/config/config.py @@ -145,3 +145,6 @@ def backward_compatible(d: Dict): d['model']['generator_extensive_layers'] = 8 d['model']['discriminator_base_channels'] = 32 d['model']['discriminator_extensive_layers'] = 5 + + if 'weak_discriminator' not in d['model']: + d['model']['weak_discriminator'] = False diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py index bed155f..7f7bbe4 100644 --- a/become_yukarin/voice_changer.py +++ b/become_yukarin/voice_changer.py @@ -4,6 +4,7 @@ from typing import NamedTuple import numpy +from become_yukarin.param import Param from .acoustic_converter import AcousticConverter from .data_struct import AcousticFeature from .data_struct import Wave @@ -78,10 +79,12 @@ class VoiceChangerStream(object): self, sampling_rate: int, frame_period: float, + order: int, in_dtype=numpy.float32, ): self.sampling_rate = sampling_rate self.frame_period = frame_period + self.order = order self.in_dtype = in_dtype self.voice_changer: VoiceChanger = None @@ -189,8 +192,7 @@ class VoiceChangerStream(object): return in_feature def convert(self, start_time: float, time_length: float, extra_time: float): - order = self.voice_changer.acoustic_converter.config.dataset.param.acoustic_feature_param.order - sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=order) + sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=self.order) keys = ['f0', 'aperiodicity', 'mfcc', 'voiced'] in_feature = self.fetch( start_time=start_time, @@ -209,8 +211,7 @@ class VoiceChangerStream(object): return out_feature def post_convert(self, start_time: float, time_length: float): - order = self.voice_changer.acoustic_converter.config.dataset.param.acoustic_feature_param.order - sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=order) + sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=self.order) keys = ['f0', 'aperiodicity', 'spectrogram', 'voiced'] out_feature = self.fetch( start_time=start_time, diff --git a/scripts/realtime_voice_changer.py b/scripts/realtime_voice_changer.py index e96ce4e..bda64dd 100644 --- a/scripts/realtime_voice_changer.py +++ b/scripts/realtime_voice_changer.py @@ -1,11 +1,9 @@ +import librosa import world4py world4py._WORLD_LIBRARY_PATH = 'x64_world.dll' -from functools import partial from pathlib import Path -import signal -import time from typing import NamedTuple from multiprocessing import Queue from multiprocessing import Process @@ -34,6 +32,7 @@ class AudioConfig(NamedTuple): convert_chunk: int vocoder_buffer_size: int out_norm: float + silent_threshold: float def encode_worker( @@ -123,7 +122,10 @@ def decode_worker( wave_fragment = numpy.concatenate([wave_fragment, wave]) if len(wave_fragment) >= audio_config.audio_chunk: wave, wave_fragment = wave_fragment[:audio_config.audio_chunk], wave_fragment[audio_config.audio_chunk:] - queue_output.put(wave) + + power = librosa.core.power_to_db(numpy.abs(librosa.stft(wave)) ** 2).mean() + if power >= audio_config.silent_threshold: + queue_output.put(wave) def main(): @@ -154,11 +156,13 @@ def main(): convert_chunk=config.dataset.param.voice_param.sample_rate, vocoder_buffer_size=config.dataset.param.voice_param.sample_rate // 16, out_norm=2.5, + silent_threshold=-99.0, ) voice_changer_stream = VoiceChangerStream( sampling_rate=audio_config.rate, frame_period=config.dataset.param.acoustic_feature_param.frame_period, + order=config.dataset.param.acoustic_feature_param.order, in_dtype=numpy.float32, ) diff --git a/tests/test_voice_changer.py b/tests/test_voice_changer.py index 66ea003..9772bbe 100644 --- a/tests/test_voice_changer.py +++ b/tests/test_voice_changer.py @@ -67,6 +67,7 @@ voice_changer = VoiceChanger( voice_changer_stream = VoiceChangerStream( sampling_rate=audio_config.rate, frame_period=acoustic_converter._param.acoustic_feature_param.frame_period, + order=acoustic_converter._param.acoustic_feature_param.order, in_dtype=numpy.float32, ) |
