diff options
| author | Hiroshiba Kazuyuki <hihokaruta@gmail.com> | 2018-02-04 09:39:39 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <hihokaruta@gmail.com> | 2018-02-04 09:39:39 +0900 |
| commit | 29f0994ea34f3df7dd5eea0b330d429b2e492211 (patch) | |
| tree | 4c6827c3983ad7463f582c619db937e9f7e9f561 /become_yukarin | |
| parent | 48addd22a87f248bb8041bca47e9c209a16175a4 (diff) | |
リアルタイム変換が可能
Diffstat (limited to 'become_yukarin')
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 15 | ||||
| -rw-r--r-- | become_yukarin/vocoder.py | 6 | ||||
| -rw-r--r-- | become_yukarin/voice_changer.py | 12 |
3 files changed, 25 insertions, 8 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 178844a..7e3acbf 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -87,8 +87,16 @@ class WaveFileLoadProcess(BaseDataProcess): class AcousticFeatureProcess(BaseDataProcess): - def __init__(self, frame_period, order, alpha, f0_estimating_method, f0_floor=71, f0_ceil=800, - dtype=numpy.float32) -> None: + def __init__( + self, + frame_period, + order, + alpha, + f0_estimating_method, + f0_floor=71, + f0_ceil=800, + dtype=numpy.float32, + ) -> None: self._frame_period = frame_period self._order = order self._alpha = alpha @@ -110,7 +118,8 @@ class AcousticFeatureProcess(BaseDataProcess): f0_ceil=self._f0_ceil, ) else: - _f0, t = pyworld.harvest( + from world4py.np import apis + _f0, t = apis.harvest( x, fs, frame_period=self._frame_period, diff --git a/become_yukarin/vocoder.py b/become_yukarin/vocoder.py index f1a9f03..0674e0f 100644 --- a/become_yukarin/vocoder.py +++ b/become_yukarin/vocoder.py @@ -65,7 +65,7 @@ class RealtimeVocoder(Vocoder): number_of_pointers, # number of pointers self._synthesizer, ) - self._before_buffer = None # for holding memory + self._before_buffer = [] # for holding memory def decode( self, @@ -93,7 +93,9 @@ class RealtimeVocoder(Vocoder): sampling_rate=self.out_sampling_rate, ) - self._before_buffer = (f0_buffer, sp_buffer, ap_buffer) # for holding memory + self._before_buffer.append((f0_buffer, sp_buffer, ap_buffer)) # for holding memory + if len(self._before_buffer) > 16: + self._before_buffer.pop(0) return out_wave def warm_up(self, time_length: float): diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py index 05f5a96..5e0eac0 100644 --- a/become_yukarin/voice_changer.py +++ b/become_yukarin/voice_changer.py @@ -83,7 +83,7 @@ class VoiceChangerStream(object): def remove_wave(self, end_time: float): self._data_stream = list(filter(lambda s: s.end_time > end_time, self._data_stream)) - def convert(self, start_time: float, time_length: float): + def convert_to_feature(self, start_time: float, time_length: float): end_time = start_time + time_length buffer_list = [] stream = filter(lambda s: not (end_time < s.start_time or s.end_time < start_time), self._data_stream) @@ -121,12 +121,18 @@ class VoiceChangerStream(object): buffer_list.append(pad) buffer = numpy.concatenate(buffer_list) - print('buffer', len(buffer), flush=True) in_wave = Wave(wave=buffer, sampling_rate=self.sampling_rate) in_feature = self.vocoder.encode(in_wave) out_feature = self.voice_changer.convert_from_acoustic_feature(in_feature) return out_feature + def convert(self, start_time: float, time_length: float): + feature = self.convert_to_feature(start_time=start_time, time_length=time_length) + out_wave = self.vocoder.decode( + acoustic_feature=feature, + ) + return out_wave + def convert_with_extra_time(self, start_time: float, time_length: float, extra_time: float): """ :param extra_time: 音声変換時に余分に使うデータの時間長。ゼロパディングを防ぐ。 @@ -136,7 +142,7 @@ class VoiceChangerStream(object): start_time -= extra_time time_length += extra_time * 2 - extra_feature = self.convert(start_time=start_time, time_length=time_length) + extra_feature = self.convert_to_feature(start_time=start_time, time_length=time_length) pad = int(extra_time / (frame_period / 1000)) feature = AcousticFeature( |
