diff options
| author | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2017-11-15 00:59:31 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2017-11-15 00:59:31 +0900 |
| commit | a4f60ab4cd44d1fc89e83bb662fe430e3824d0dc (patch) | |
| tree | c35277c543045016e44b97cc359c0507c3f5cef5 /become_yukarin | |
| parent | 1ad9c9a59a6ccc9fbb9d27d17c95c23d3cbabcc3 (diff) | |
add unvoiced and f0
Diffstat (limited to 'become_yukarin')
| -rw-r--r-- | become_yukarin/data_struct.py | 15 | ||||
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 39 | ||||
| -rw-r--r-- | become_yukarin/voice_changer.py | 4 |
3 files changed, 49 insertions, 9 deletions
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py index 7b220f0..786dd7e 100644 --- a/become_yukarin/data_struct.py +++ b/become_yukarin/data_struct.py @@ -26,6 +26,21 @@ class AcousticFeature(NamedTuple): voiced=self.mfcc.astype(dtype), ) + def validate(self): + assert self.f0.ndim == 2 + assert self.spectrogram.ndim == 2 + assert self.aperiodicity.ndim == 2 + assert self.mfcc.ndim == 2 + assert self.voiced.ndim == 2 + + len_time = len(self.f0) + assert len(self.spectrogram) == len_time + assert len(self.aperiodicity) == len_time + assert len(self.mfcc) == len_time + assert len(self.voiced) == len_time + + assert self.voiced.dtype == numpy.bool + @staticmethod def get_sizes(sampling_rate: int, order: int): fft_size = pyworld.get_cheaptrick_fft_size(fs=sampling_rate) diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 09931b3..dc5bc74 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -88,30 +88,55 @@ class AcousticFeatureProcess(BaseDataProcess): f0 = pyworld.stonemask(x, _f0, t, fs) spectrogram = pyworld.cheaptrick(x, f0, t, fs) aperiodicity = pyworld.d4c(x, f0, t, fs) + mfcc = pysptk.sp2mc(spectrogram, order=self._order, alpha=self._alpha) voiced = ~(f0 == 0) # type: numpy.ndarray - return AcousticFeature( - f0=f0.astype(self._dtype), + + feature = AcousticFeature( + f0=f0[:, None].astype(self._dtype), spectrogram=spectrogram.astype(self._dtype), aperiodicity=aperiodicity.astype(self._dtype), mfcc=mfcc.astype(self._dtype), - voiced=voiced.astype(self._dtype), + voiced=voiced[:, None].astype(self._dtype), ) + feature.validate() + return feature class AcousticFeatureLoadProcess(BaseDataProcess): - def __init__(self): - pass + def __init__(self, validate=False): + self._validate = validate - def __call__(self, path: Path, test): + def __call__(self, path: Path, test=None): d = numpy.load(path).item() # type: dict - return AcousticFeature( + feature = AcousticFeature( f0=d['f0'], spectrogram=d['spectrogram'], aperiodicity=d['aperiodicity'], mfcc=d['mfcc'], voiced=d['voiced'], ) + if self._validate: + feature.validate() + return feature + + +class AcousticFeatureSaveProcess(BaseDataProcess): + def __init__(self, validate=False): + self._validate = validate + + def __call__(self, data: Dict[str, any], test=None): + path = data['path'] # type: Path + feature = data['feature'] # type: AcousticFeature + if self._validate: + feature.validate() + numpy.save(path.absolute(), dict( + f0=feature.f0, + spectrogram=feature.spectrogram, + aperiodicity=feature.aperiodicity, + mfcc=feature.mfcc, + voiced=feature.voiced, + )) class AcousticFeatureNormalizeProcess(BaseDataProcess): diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py index d6d39c6..c160a5d 100644 --- a/become_yukarin/voice_changer.py +++ b/become_yukarin/voice_changer.py @@ -87,11 +87,11 @@ class VoiceChanger(object): ) out = AcousticFeature( - f0=input_feature.f0, + f0=out.f0, spectrogram=spectrogram, aperiodicity=input_feature.aperiodicity, mfcc=out.mfcc, - voiced=input_feature.f0 < pyworld.get_cheaptrick_f0_floor(input_wave.sampling_rate, fftlen), + voiced=input_feature.voiced, ).astype(numpy.float64) out = pyworld.synthesize( f0=out.f0, |
