summaryrefslogtreecommitdiff
path: root/become_yukarin
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2017-11-15 00:59:31 +0900
committerHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2017-11-15 00:59:31 +0900
commita4f60ab4cd44d1fc89e83bb662fe430e3824d0dc (patch)
treec35277c543045016e44b97cc359c0507c3f5cef5 /become_yukarin
parent1ad9c9a59a6ccc9fbb9d27d17c95c23d3cbabcc3 (diff)
add unvoiced and f0
Diffstat (limited to 'become_yukarin')
-rw-r--r--become_yukarin/data_struct.py15
-rw-r--r--become_yukarin/dataset/dataset.py39
-rw-r--r--become_yukarin/voice_changer.py4
3 files changed, 49 insertions, 9 deletions
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py
index 7b220f0..786dd7e 100644
--- a/become_yukarin/data_struct.py
+++ b/become_yukarin/data_struct.py
@@ -26,6 +26,21 @@ class AcousticFeature(NamedTuple):
voiced=self.mfcc.astype(dtype),
)
+ def validate(self):
+ assert self.f0.ndim == 2
+ assert self.spectrogram.ndim == 2
+ assert self.aperiodicity.ndim == 2
+ assert self.mfcc.ndim == 2
+ assert self.voiced.ndim == 2
+
+ len_time = len(self.f0)
+ assert len(self.spectrogram) == len_time
+ assert len(self.aperiodicity) == len_time
+ assert len(self.mfcc) == len_time
+ assert len(self.voiced) == len_time
+
+ assert self.voiced.dtype == numpy.bool
+
@staticmethod
def get_sizes(sampling_rate: int, order: int):
fft_size = pyworld.get_cheaptrick_fft_size(fs=sampling_rate)
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 09931b3..dc5bc74 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -88,30 +88,55 @@ class AcousticFeatureProcess(BaseDataProcess):
f0 = pyworld.stonemask(x, _f0, t, fs)
spectrogram = pyworld.cheaptrick(x, f0, t, fs)
aperiodicity = pyworld.d4c(x, f0, t, fs)
+
mfcc = pysptk.sp2mc(spectrogram, order=self._order, alpha=self._alpha)
voiced = ~(f0 == 0) # type: numpy.ndarray
- return AcousticFeature(
- f0=f0.astype(self._dtype),
+
+ feature = AcousticFeature(
+ f0=f0[:, None].astype(self._dtype),
spectrogram=spectrogram.astype(self._dtype),
aperiodicity=aperiodicity.astype(self._dtype),
mfcc=mfcc.astype(self._dtype),
- voiced=voiced.astype(self._dtype),
+ voiced=voiced[:, None].astype(self._dtype),
)
+ feature.validate()
+ return feature
class AcousticFeatureLoadProcess(BaseDataProcess):
- def __init__(self):
- pass
+ def __init__(self, validate=False):
+ self._validate = validate
- def __call__(self, path: Path, test):
+ def __call__(self, path: Path, test=None):
d = numpy.load(path).item() # type: dict
- return AcousticFeature(
+ feature = AcousticFeature(
f0=d['f0'],
spectrogram=d['spectrogram'],
aperiodicity=d['aperiodicity'],
mfcc=d['mfcc'],
voiced=d['voiced'],
)
+ if self._validate:
+ feature.validate()
+ return feature
+
+
+class AcousticFeatureSaveProcess(BaseDataProcess):
+ def __init__(self, validate=False):
+ self._validate = validate
+
+ def __call__(self, data: Dict[str, any], test=None):
+ path = data['path'] # type: Path
+ feature = data['feature'] # type: AcousticFeature
+ if self._validate:
+ feature.validate()
+ numpy.save(path.absolute(), dict(
+ f0=feature.f0,
+ spectrogram=feature.spectrogram,
+ aperiodicity=feature.aperiodicity,
+ mfcc=feature.mfcc,
+ voiced=feature.voiced,
+ ))
class AcousticFeatureNormalizeProcess(BaseDataProcess):
diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py
index d6d39c6..c160a5d 100644
--- a/become_yukarin/voice_changer.py
+++ b/become_yukarin/voice_changer.py
@@ -87,11 +87,11 @@ class VoiceChanger(object):
)
out = AcousticFeature(
- f0=input_feature.f0,
+ f0=out.f0,
spectrogram=spectrogram,
aperiodicity=input_feature.aperiodicity,
mfcc=out.mfcc,
- voiced=input_feature.f0 < pyworld.get_cheaptrick_f0_floor(input_wave.sampling_rate, fftlen),
+ voiced=input_feature.voiced,
).astype(numpy.float64)
out = pyworld.synthesize(
f0=out.f0,