diff options
| -rw-r--r-- | become_yukarin/data_struct.py | 15 | ||||
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 39 | ||||
| -rw-r--r-- | become_yukarin/voice_changer.py | 4 | ||||
| -rw-r--r-- | scripts/extract_acoustic_feature.py | 50 |
4 files changed, 84 insertions, 24 deletions
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py index 7b220f0..786dd7e 100644 --- a/become_yukarin/data_struct.py +++ b/become_yukarin/data_struct.py @@ -26,6 +26,21 @@ class AcousticFeature(NamedTuple): voiced=self.mfcc.astype(dtype), ) + def validate(self): + assert self.f0.ndim == 2 + assert self.spectrogram.ndim == 2 + assert self.aperiodicity.ndim == 2 + assert self.mfcc.ndim == 2 + assert self.voiced.ndim == 2 + + len_time = len(self.f0) + assert len(self.spectrogram) == len_time + assert len(self.aperiodicity) == len_time + assert len(self.mfcc) == len_time + assert len(self.voiced) == len_time + + assert self.voiced.dtype == numpy.bool + @staticmethod def get_sizes(sampling_rate: int, order: int): fft_size = pyworld.get_cheaptrick_fft_size(fs=sampling_rate) diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 09931b3..dc5bc74 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -88,30 +88,55 @@ class AcousticFeatureProcess(BaseDataProcess): f0 = pyworld.stonemask(x, _f0, t, fs) spectrogram = pyworld.cheaptrick(x, f0, t, fs) aperiodicity = pyworld.d4c(x, f0, t, fs) + mfcc = pysptk.sp2mc(spectrogram, order=self._order, alpha=self._alpha) voiced = ~(f0 == 0) # type: numpy.ndarray - return AcousticFeature( - f0=f0.astype(self._dtype), + + feature = AcousticFeature( + f0=f0[:, None].astype(self._dtype), spectrogram=spectrogram.astype(self._dtype), aperiodicity=aperiodicity.astype(self._dtype), mfcc=mfcc.astype(self._dtype), - voiced=voiced.astype(self._dtype), + voiced=voiced[:, None].astype(self._dtype), ) + feature.validate() + return feature class AcousticFeatureLoadProcess(BaseDataProcess): - def __init__(self): - pass + def __init__(self, validate=False): + self._validate = validate - def __call__(self, path: Path, test): + def __call__(self, path: Path, test=None): d = numpy.load(path).item() # type: dict - return AcousticFeature( + feature = AcousticFeature( f0=d['f0'], spectrogram=d['spectrogram'], aperiodicity=d['aperiodicity'], mfcc=d['mfcc'], voiced=d['voiced'], ) + if self._validate: + feature.validate() + return feature + + +class AcousticFeatureSaveProcess(BaseDataProcess): + def __init__(self, validate=False): + self._validate = validate + + def __call__(self, data: Dict[str, any], test=None): + path = data['path'] # type: Path + feature = data['feature'] # type: AcousticFeature + if self._validate: + feature.validate() + numpy.save(path.absolute(), dict( + f0=feature.f0, + spectrogram=feature.spectrogram, + aperiodicity=feature.aperiodicity, + mfcc=feature.mfcc, + voiced=feature.voiced, + )) class AcousticFeatureNormalizeProcess(BaseDataProcess): diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py index d6d39c6..c160a5d 100644 --- a/become_yukarin/voice_changer.py +++ b/become_yukarin/voice_changer.py @@ -87,11 +87,11 @@ class VoiceChanger(object): ) out = AcousticFeature( - f0=input_feature.f0, + f0=out.f0, spectrogram=spectrogram, aperiodicity=input_feature.aperiodicity, mfcc=out.mfcc, - voiced=input_feature.f0 < pyworld.get_cheaptrick_f0_floor(input_wave.sampling_rate, fftlen), + voiced=input_feature.voiced, ).astype(numpy.float64) out = pyworld.synthesize( f0=out.f0, diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py index ccc8d66..f7cbaa1 100644 --- a/scripts/extract_acoustic_feature.py +++ b/scripts/extract_acoustic_feature.py @@ -8,7 +8,10 @@ from pathlib import Path import numpy +from become_yukarin.data_struct import AcousticFeature +from become_yukarin.dataset.dataset import AcousticFeatureLoadProcess from become_yukarin.dataset.dataset import AcousticFeatureProcess +from become_yukarin.dataset.dataset import AcousticFeatureSaveProcess from become_yukarin.dataset.dataset import WaveFileLoadProcess from become_yukarin.dataset.utility import MFCCAligner from become_yukarin.param import AcousticFeatureParam @@ -52,10 +55,6 @@ def generate_feature(path1, path2): wave1 = wave_file_load_process(path1, test=True) wave2 = wave_file_load_process(path2, test=True) - # m = max(len(wave1.wave), len(wave2.wave)) - # wave1 = Wave(wave=numpy.pad(wave1.wave, (0, m - len(wave1.wave)), mode='mean'), sampling_rate=wave1.sampling_rate) - # wave2 = Wave(wave=numpy.pad(wave2.wave, (0, m - len(wave2.wave)), mode='mean'), sampling_rate=wave2.sampling_rate) - # make acoustic feature acoustic_feature_process = AcousticFeatureProcess( frame_period=arguments.frame_period, @@ -72,14 +71,30 @@ def generate_feature(path1, path2): spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram) aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity) mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc) + voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced) # save + acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True) path = Path(arguments.output1_directory, path1.stem + '.npy') - numpy.save(path.absolute(), dict(f0=f0_1, spectrogram=spectrogram_1, aperiodicity=aperiodicity_1, mfcc=mfcc_1)) + feature = AcousticFeature( + f0=f0_1, + spectrogram=spectrogram_1, + aperiodicity=aperiodicity_1, + mfcc=mfcc_1, + voiced=voiced_1, + ) + acoustic_feature_save_process({'path': path, 'feature': feature}) print('saved!', path) path = Path(arguments.output2_directory, path2.stem + '.npy') - numpy.save(path.absolute(), dict(f0=f0_2, spectrogram=spectrogram_2, aperiodicity=aperiodicity_2, mfcc=mfcc_2)) + feature = AcousticFeature( + f0=f0_2, + spectrogram=spectrogram_2, + aperiodicity=aperiodicity_2, + mfcc=mfcc_2, + voiced=voiced_2, + ) + acoustic_feature_save_process({'path': path, 'feature': feature}) print('saved!', path) @@ -91,37 +106,42 @@ def generate_mean_var(path_directory: Path): if var_mean.exists(): var_mean.unlink() + acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=True) + acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=False) + f0_list = [] spectrogram_list = [] aperiodicity_list = [] mfcc_list = [] for path in path_directory.glob('*'): - d = numpy.load(path).item() # type: dict - f0_list.append(d['f0'].ravel()) - spectrogram_list.append(d['spectrogram'].ravel()) - aperiodicity_list.append(d['aperiodicity'].ravel()) - mfcc_list.append(d['mfcc'].ravel()) + feature = acoustic_feature_load_process(path) + f0_list.append(feature.f0[feature.voiced].ravel()) # remove unvoiced + spectrogram_list.append(feature.spectrogram.ravel()) + aperiodicity_list.append(feature.aperiodicity.ravel()) + mfcc_list.append(feature.mfcc.ravel()) f0_list = numpy.concatenate(f0_list) spectrogram_list = numpy.concatenate(spectrogram_list) aperiodicity_list = numpy.concatenate(aperiodicity_list) mfcc_list = numpy.concatenate(mfcc_list) - mean = dict( + mean = AcousticFeature( f0=numpy.mean(f0_list), spectrogram=numpy.mean(spectrogram_list), aperiodicity=numpy.mean(aperiodicity_list), mfcc=numpy.mean(mfcc_list), + voiced=numpy.nan, ) - var = dict( + var = AcousticFeature( f0=numpy.var(f0_list), spectrogram=numpy.var(spectrogram_list), aperiodicity=numpy.var(aperiodicity_list), mfcc=numpy.var(mfcc_list), + voiced=numpy.nan, ) - numpy.save(path_mean.absolute(), mean) - numpy.save(var_mean.absolute(), var) + acoustic_feature_save_process({'path': path_mean, 'feature': mean}) + acoustic_feature_save_process({'path': var_mean, 'feature': var}) def main(): |
