summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--become_yukarin/data_struct.py2
-rw-r--r--become_yukarin/dataset/dataset.py6
-rw-r--r--become_yukarin/voice_changer.py1
3 files changed, 9 insertions, 0 deletions
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py
index f0601a5..63043e2 100644
--- a/become_yukarin/data_struct.py
+++ b/become_yukarin/data_struct.py
@@ -13,6 +13,7 @@ class AcousticFeature(NamedTuple):
spectrogram: numpy.ndarray
aperiodicity: numpy.ndarray
mfcc: numpy.ndarray
+ voiced: numpy.ndarray
def astype(self, dtype):
return AcousticFeature(
@@ -20,4 +21,5 @@ class AcousticFeature(NamedTuple):
spectrogram=self.spectrogram.astype(dtype),
aperiodicity=self.aperiodicity.astype(dtype),
mfcc=self.mfcc.astype(dtype),
+ voiced=self.mfcc.astype(dtype),
)
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 7a6ce08..93619e3 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -88,11 +88,13 @@ class AcousticFeatureProcess(BaseDataProcess):
spectrogram = pyworld.cheaptrick(x, f0, t, fs)
aperiodicity = pyworld.d4c(x, f0, t, fs)
mfcc = pysptk.sp2mc(spectrogram, order=self._order, alpha=self._alpha)
+ voiced = ~(f0 == 0) # type: numpy.ndarray
return AcousticFeature(
f0=f0.astype(self._dtype),
spectrogram=spectrogram.astype(self._dtype),
aperiodicity=aperiodicity.astype(self._dtype),
mfcc=mfcc.astype(self._dtype),
+ voiced=voiced.astype(self._dtype),
)
@@ -107,6 +109,7 @@ class AcousticFeatureLoadProcess(BaseDataProcess):
spectrogram=d['spectrogram'],
aperiodicity=d['aperiodicity'],
mfcc=d['mfcc'],
+ voiced=d['voiced'],
)
@@ -121,6 +124,7 @@ class AcousticFeatureNormalizeProcess(BaseDataProcess):
spectrogram=(data.spectrogram - self._mean.spectrogram) / numpy.sqrt(self._var.spectrogram),
aperiodicity=(data.aperiodicity - self._mean.aperiodicity) / numpy.sqrt(self._var.aperiodicity),
mfcc=(data.mfcc - self._mean.mfcc) / numpy.sqrt(self._var.mfcc),
+ voiced=data.voiced,
)
@@ -135,6 +139,7 @@ class AcousticFeatureDenormalizeProcess(BaseDataProcess):
spectrogram=data.spectrogram * numpy.sqrt(self._var.spectrogram) + self._mean.spectrogram,
aperiodicity=data.aperiodicity * numpy.sqrt(self._var.aperiodicity) + self._mean.aperiodicity,
mfcc=data.mfcc * numpy.sqrt(self._var.mfcc) + self._mean.mfcc,
+ voiced=data.voiced,
)
@@ -160,6 +165,7 @@ class DecodeFeatureProcess(BaseDataProcess):
spectrogram=numpy.nan,
aperiodicity=numpy.nan,
mfcc=data,
+ voiced=numpy.nan,
)
diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py
index 15c532c..e40069c 100644
--- a/become_yukarin/voice_changer.py
+++ b/become_yukarin/voice_changer.py
@@ -87,6 +87,7 @@ class VoiceChanger(object):
spectrogram=spectrogram,
aperiodicity=input_feature.aperiodicity,
mfcc=out.mfcc,
+ voiced=input_feature.f0 < pyworld.get_cheaptrick_f0_floor(input_wave.sampling_rate, fftlen),
).astype(numpy.float64)
out = pyworld.synthesize(
f0=out.f0,