diff options
| -rw-r--r-- | become_yukarin/data_struct.py | 2 | ||||
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 6 | ||||
| -rw-r--r-- | become_yukarin/voice_changer.py | 1 |
3 files changed, 9 insertions, 0 deletions
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py index f0601a5..63043e2 100644 --- a/become_yukarin/data_struct.py +++ b/become_yukarin/data_struct.py @@ -13,6 +13,7 @@ class AcousticFeature(NamedTuple): spectrogram: numpy.ndarray aperiodicity: numpy.ndarray mfcc: numpy.ndarray + voiced: numpy.ndarray def astype(self, dtype): return AcousticFeature( @@ -20,4 +21,5 @@ class AcousticFeature(NamedTuple): spectrogram=self.spectrogram.astype(dtype), aperiodicity=self.aperiodicity.astype(dtype), mfcc=self.mfcc.astype(dtype), + voiced=self.mfcc.astype(dtype), ) diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 7a6ce08..93619e3 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -88,11 +88,13 @@ class AcousticFeatureProcess(BaseDataProcess): spectrogram = pyworld.cheaptrick(x, f0, t, fs) aperiodicity = pyworld.d4c(x, f0, t, fs) mfcc = pysptk.sp2mc(spectrogram, order=self._order, alpha=self._alpha) + voiced = ~(f0 == 0) # type: numpy.ndarray return AcousticFeature( f0=f0.astype(self._dtype), spectrogram=spectrogram.astype(self._dtype), aperiodicity=aperiodicity.astype(self._dtype), mfcc=mfcc.astype(self._dtype), + voiced=voiced.astype(self._dtype), ) @@ -107,6 +109,7 @@ class AcousticFeatureLoadProcess(BaseDataProcess): spectrogram=d['spectrogram'], aperiodicity=d['aperiodicity'], mfcc=d['mfcc'], + voiced=d['voiced'], ) @@ -121,6 +124,7 @@ class AcousticFeatureNormalizeProcess(BaseDataProcess): spectrogram=(data.spectrogram - self._mean.spectrogram) / numpy.sqrt(self._var.spectrogram), aperiodicity=(data.aperiodicity - self._mean.aperiodicity) / numpy.sqrt(self._var.aperiodicity), mfcc=(data.mfcc - self._mean.mfcc) / numpy.sqrt(self._var.mfcc), + voiced=data.voiced, ) @@ -135,6 +139,7 @@ class AcousticFeatureDenormalizeProcess(BaseDataProcess): spectrogram=data.spectrogram * numpy.sqrt(self._var.spectrogram) + self._mean.spectrogram, aperiodicity=data.aperiodicity * numpy.sqrt(self._var.aperiodicity) + self._mean.aperiodicity, mfcc=data.mfcc * numpy.sqrt(self._var.mfcc) + self._mean.mfcc, + voiced=data.voiced, ) @@ -160,6 +165,7 @@ class DecodeFeatureProcess(BaseDataProcess): spectrogram=numpy.nan, aperiodicity=numpy.nan, mfcc=data, + voiced=numpy.nan, ) diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py index 15c532c..e40069c 100644 --- a/become_yukarin/voice_changer.py +++ b/become_yukarin/voice_changer.py @@ -87,6 +87,7 @@ class VoiceChanger(object): spectrogram=spectrogram, aperiodicity=input_feature.aperiodicity, mfcc=out.mfcc, + voiced=input_feature.f0 < pyworld.get_cheaptrick_f0_floor(input_wave.sampling_rate, fftlen), ).astype(numpy.float64) out = pyworld.synthesize( f0=out.f0, |
