summaryrefslogtreecommitdiff
path: root/become_yukarin/vocoder.py
diff options
context:
space:
mode:
Diffstat (limited to 'become_yukarin/vocoder.py')
-rw-r--r--become_yukarin/vocoder.py105
1 files changed, 105 insertions, 0 deletions
diff --git a/become_yukarin/vocoder.py b/become_yukarin/vocoder.py
new file mode 100644
index 0000000..a49e8f2
--- /dev/null
+++ b/become_yukarin/vocoder.py
@@ -0,0 +1,105 @@
+import numpy
+import pyworld
+from world4py.native import structures, apidefinitions, utils
+
+from become_yukarin.data_struct import AcousticFeature
+from become_yukarin.data_struct import Wave
+from become_yukarin.dataset.dataset import AcousticFeatureProcess
+from become_yukarin.param import AcousticFeatureParam
+
+
+class Vocoder(object):
+ def __init__(
+ self,
+ acoustic_feature_param: AcousticFeatureParam,
+ out_sampling_rate: int,
+ ):
+ self.acoustic_feature_param = acoustic_feature_param
+ self.out_sampling_rate = out_sampling_rate
+ self._encoder = AcousticFeatureProcess(
+ frame_period=acoustic_feature_param.frame_period,
+ order=acoustic_feature_param.order,
+ alpha=acoustic_feature_param.alpha,
+ )
+
+ def encode(self, wave: Wave):
+ return self._encoder(wave)
+
+ def decode(
+ self,
+ acoustic_feature: AcousticFeature,
+ ):
+ acoustic_feature = acoustic_feature.astype_only_float(numpy.float64)
+ out = pyworld.synthesize(
+ f0=acoustic_feature.f0.ravel(),
+ spectrogram=acoustic_feature.spectrogram,
+ aperiodicity=acoustic_feature.aperiodicity,
+ fs=self.out_sampling_rate,
+ frame_period=self.acoustic_feature_param.frame_period
+ )
+ return Wave(out, sampling_rate=self.out_sampling_rate)
+
+
+class RealtimeVocoder(Vocoder):
+ def __init__(
+ self,
+ acoustic_feature_param: AcousticFeatureParam,
+ out_sampling_rate: int,
+ buffer_size: int,
+ number_of_pointers: int,
+ ):
+ super().__init__(
+ acoustic_feature_param=acoustic_feature_param,
+ out_sampling_rate=out_sampling_rate,
+ )
+
+ self.buffer_size = buffer_size
+
+ self._synthesizer = structures.WorldSynthesizer()
+ apidefinitions._InitializeSynthesizer(
+ self.out_sampling_rate, # sampling rate
+ self.acoustic_feature_param.frame_period, # frame period
+ pyworld.get_cheaptrick_fft_size(out_sampling_rate), # fft size
+ buffer_size, # buffer size
+ number_of_pointers, # number of pointers
+ self._synthesizer,
+ )
+ self._before_buffer = None # for holding memory
+
+ def decode(
+ self,
+ acoustic_feature: AcousticFeature,
+ ):
+ length = len(acoustic_feature.f0)
+ f0_buffer = utils.cast_1d_list_to_1d_pointer(acoustic_feature.f0.flatten().tolist())
+ sp_buffer = utils.cast_2d_list_to_2d_pointer(acoustic_feature.spectrogram.tolist())
+ ap_buffer = utils.cast_2d_list_to_2d_pointer(acoustic_feature.aperiodicity.tolist())
+ apidefinitions._AddParameters(f0_buffer, length, sp_buffer, ap_buffer, self._synthesizer)
+
+ ys = []
+ while apidefinitions._Synthesis2(self._synthesizer) != 0:
+ y = numpy.array([self._synthesizer.buffer[i] for i in range(self.buffer_size)])
+ ys.append(y)
+
+ if len(ys) > 0:
+ out_wave = Wave(
+ wave=numpy.concatenate(ys),
+ sampling_rate=self.out_sampling_rate,
+ )
+ else:
+ out_wave = Wave(
+ wave=numpy.empty(0),
+ sampling_rate=self.out_sampling_rate,
+ )
+
+ self._before_buffer = (f0_buffer, sp_buffer, ap_buffer) # for holding memory
+ return out_wave
+
+ def warm_up(self, time_length: float):
+ y = numpy.zeros(int(time_length * self.out_sampling_rate))
+ w = Wave(wave=y, sampling_rate=self.out_sampling_rate)
+ f = self.encode(w)
+ self.decode(f)
+
+ def __del__(self):
+ apidefinitions._DestroySynthesizer(self._synthesizer)