diff options
| author | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2017-11-15 00:59:31 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2017-11-15 00:59:31 +0900 |
| commit | a4f60ab4cd44d1fc89e83bb662fe430e3824d0dc (patch) | |
| tree | c35277c543045016e44b97cc359c0507c3f5cef5 /scripts | |
| parent | 1ad9c9a59a6ccc9fbb9d27d17c95c23d3cbabcc3 (diff) | |
add unvoiced and f0
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/extract_acoustic_feature.py | 50 |
1 files changed, 35 insertions, 15 deletions
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py index ccc8d66..f7cbaa1 100644 --- a/scripts/extract_acoustic_feature.py +++ b/scripts/extract_acoustic_feature.py @@ -8,7 +8,10 @@ from pathlib import Path import numpy +from become_yukarin.data_struct import AcousticFeature +from become_yukarin.dataset.dataset import AcousticFeatureLoadProcess from become_yukarin.dataset.dataset import AcousticFeatureProcess +from become_yukarin.dataset.dataset import AcousticFeatureSaveProcess from become_yukarin.dataset.dataset import WaveFileLoadProcess from become_yukarin.dataset.utility import MFCCAligner from become_yukarin.param import AcousticFeatureParam @@ -52,10 +55,6 @@ def generate_feature(path1, path2): wave1 = wave_file_load_process(path1, test=True) wave2 = wave_file_load_process(path2, test=True) - # m = max(len(wave1.wave), len(wave2.wave)) - # wave1 = Wave(wave=numpy.pad(wave1.wave, (0, m - len(wave1.wave)), mode='mean'), sampling_rate=wave1.sampling_rate) - # wave2 = Wave(wave=numpy.pad(wave2.wave, (0, m - len(wave2.wave)), mode='mean'), sampling_rate=wave2.sampling_rate) - # make acoustic feature acoustic_feature_process = AcousticFeatureProcess( frame_period=arguments.frame_period, @@ -72,14 +71,30 @@ def generate_feature(path1, path2): spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram) aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity) mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc) + voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced) # save + acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True) path = Path(arguments.output1_directory, path1.stem + '.npy') - numpy.save(path.absolute(), dict(f0=f0_1, spectrogram=spectrogram_1, aperiodicity=aperiodicity_1, mfcc=mfcc_1)) + feature = AcousticFeature( + f0=f0_1, + spectrogram=spectrogram_1, + aperiodicity=aperiodicity_1, + mfcc=mfcc_1, + voiced=voiced_1, + ) + acoustic_feature_save_process({'path': path, 'feature': feature}) print('saved!', path) path = Path(arguments.output2_directory, path2.stem + '.npy') - numpy.save(path.absolute(), dict(f0=f0_2, spectrogram=spectrogram_2, aperiodicity=aperiodicity_2, mfcc=mfcc_2)) + feature = AcousticFeature( + f0=f0_2, + spectrogram=spectrogram_2, + aperiodicity=aperiodicity_2, + mfcc=mfcc_2, + voiced=voiced_2, + ) + acoustic_feature_save_process({'path': path, 'feature': feature}) print('saved!', path) @@ -91,37 +106,42 @@ def generate_mean_var(path_directory: Path): if var_mean.exists(): var_mean.unlink() + acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=True) + acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=False) + f0_list = [] spectrogram_list = [] aperiodicity_list = [] mfcc_list = [] for path in path_directory.glob('*'): - d = numpy.load(path).item() # type: dict - f0_list.append(d['f0'].ravel()) - spectrogram_list.append(d['spectrogram'].ravel()) - aperiodicity_list.append(d['aperiodicity'].ravel()) - mfcc_list.append(d['mfcc'].ravel()) + feature = acoustic_feature_load_process(path) + f0_list.append(feature.f0[feature.voiced].ravel()) # remove unvoiced + spectrogram_list.append(feature.spectrogram.ravel()) + aperiodicity_list.append(feature.aperiodicity.ravel()) + mfcc_list.append(feature.mfcc.ravel()) f0_list = numpy.concatenate(f0_list) spectrogram_list = numpy.concatenate(spectrogram_list) aperiodicity_list = numpy.concatenate(aperiodicity_list) mfcc_list = numpy.concatenate(mfcc_list) - mean = dict( + mean = AcousticFeature( f0=numpy.mean(f0_list), spectrogram=numpy.mean(spectrogram_list), aperiodicity=numpy.mean(aperiodicity_list), mfcc=numpy.mean(mfcc_list), + voiced=numpy.nan, ) - var = dict( + var = AcousticFeature( f0=numpy.var(f0_list), spectrogram=numpy.var(spectrogram_list), aperiodicity=numpy.var(aperiodicity_list), mfcc=numpy.var(mfcc_list), + voiced=numpy.nan, ) - numpy.save(path_mean.absolute(), mean) - numpy.save(var_mean.absolute(), var) + acoustic_feature_save_process({'path': path_mean, 'feature': mean}) + acoustic_feature_save_process({'path': var_mean, 'feature': var}) def main(): |
