add unvoiced and f0

author: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> 2017-11-15 00:59:31 +0900
committer: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> 2017-11-15 00:59:31 +0900
commit: a4f60ab4cd44d1fc89e83bb662fe430e3824d0dc (patch)
tree: c35277c543045016e44b97cc359c0507c3f5cef5 /become_yukarin
parent: 1ad9c9a59a6ccc9fbb9d27d17c95c23d3cbabcc3 (diff)
3 files changed, 49 insertions, 9 deletions
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py
index 7b220f0..786dd7e 100644
--- a/become_yukarin/data_struct.py
+++ b/become_yukarin/data_struct.py
@@ -26,6 +26,21 @@ class AcousticFeature(NamedTuple):
             voiced=self.mfcc.astype(dtype),
         )
 
+    def validate(self):
+        assert self.f0.ndim == 2
+        assert self.spectrogram.ndim == 2
+        assert self.aperiodicity.ndim == 2
+        assert self.mfcc.ndim == 2
+        assert self.voiced.ndim == 2
+
+        len_time = len(self.f0)
+        assert len(self.spectrogram) == len_time
+        assert len(self.aperiodicity) == len_time
+        assert len(self.mfcc) == len_time
+        assert len(self.voiced) == len_time
+
+        assert self.voiced.dtype == numpy.bool
+
     @staticmethod
     def get_sizes(sampling_rate: int, order: int):
         fft_size = pyworld.get_cheaptrick_fft_size(fs=sampling_rate)
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 09931b3..dc5bc74 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -88,30 +88,55 @@ class AcousticFeatureProcess(BaseDataProcess):
         f0 = pyworld.stonemask(x, _f0, t, fs)
         spectrogram = pyworld.cheaptrick(x, f0, t, fs)
         aperiodicity = pyworld.d4c(x, f0, t, fs)
+
         mfcc = pysptk.sp2mc(spectrogram, order=self._order, alpha=self._alpha)
         voiced = ~(f0 == 0)  # type: numpy.ndarray
-        return AcousticFeature(
-            f0=f0.astype(self._dtype),
+
+        feature = AcousticFeature(
+            f0=f0[:, None].astype(self._dtype),
             spectrogram=spectrogram.astype(self._dtype),
             aperiodicity=aperiodicity.astype(self._dtype),
             mfcc=mfcc.astype(self._dtype),
-            voiced=voiced.astype(self._dtype),
+            voiced=voiced[:, None].astype(self._dtype),
         )
+        feature.validate()
+        return feature
 
 
 class AcousticFeatureLoadProcess(BaseDataProcess):
-    def __init__(self):
-        pass
+    def __init__(self, validate=False):
+        self._validate = validate
 
-    def __call__(self, path: Path, test):
+    def __call__(self, path: Path, test=None):
         d = numpy.load(path).item()  # type: dict
-        return AcousticFeature(
+        feature = AcousticFeature(
             f0=d['f0'],
             spectrogram=d['spectrogram'],
             aperiodicity=d['aperiodicity'],
             mfcc=d['mfcc'],
             voiced=d['voiced'],
         )
+        if self._validate:
+            feature.validate()
+        return feature
+
+
+class AcousticFeatureSaveProcess(BaseDataProcess):
+    def __init__(self, validate=False):
+        self._validate = validate
+
+    def __call__(self, data: Dict[str, any], test=None):
+        path = data['path']  # type: Path
+        feature = data['feature']  # type: AcousticFeature
+        if self._validate:
+            feature.validate()
+        numpy.save(path.absolute(), dict(
+            f0=feature.f0,
+            spectrogram=feature.spectrogram,
+            aperiodicity=feature.aperiodicity,
+            mfcc=feature.mfcc,
+            voiced=feature.voiced,
+        ))
 
 
 class AcousticFeatureNormalizeProcess(BaseDataProcess):
diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py
index d6d39c6..c160a5d 100644
--- a/become_yukarin/voice_changer.py
+++ b/become_yukarin/voice_changer.py
@@ -87,11 +87,11 @@ class VoiceChanger(object):
         )
 
         out = AcousticFeature(
-            f0=input_feature.f0,
+            f0=out.f0,
             spectrogram=spectrogram,
             aperiodicity=input_feature.aperiodicity,
             mfcc=out.mfcc,
-            voiced=input_feature.f0 < pyworld.get_cheaptrick_f0_floor(input_wave.sampling_rate, fftlen),
+            voiced=input_feature.voiced,
         ).astype(numpy.float64)
         out = pyworld.synthesize(
             f0=out.f0,
author	Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>	2017-11-15 00:59:31 +0900
committer	Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>	2017-11-15 00:59:31 +0900
commit	a4f60ab4cd44d1fc89e83bb662fe430e3824d0dc (patch)
tree	c35277c543045016e44b97cc359c0507c3f5cef5 /become_yukarin
parent	1ad9c9a59a6ccc9fbb9d27d17c95c23d3cbabcc3 (diff)