summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--become_yukarin/data_struct.py15
-rw-r--r--become_yukarin/dataset/dataset.py39
-rw-r--r--become_yukarin/voice_changer.py4
-rw-r--r--scripts/extract_acoustic_feature.py50
4 files changed, 84 insertions, 24 deletions
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py
index 7b220f0..786dd7e 100644
--- a/become_yukarin/data_struct.py
+++ b/become_yukarin/data_struct.py
@@ -26,6 +26,21 @@ class AcousticFeature(NamedTuple):
voiced=self.mfcc.astype(dtype),
)
+ def validate(self):
+ assert self.f0.ndim == 2
+ assert self.spectrogram.ndim == 2
+ assert self.aperiodicity.ndim == 2
+ assert self.mfcc.ndim == 2
+ assert self.voiced.ndim == 2
+
+ len_time = len(self.f0)
+ assert len(self.spectrogram) == len_time
+ assert len(self.aperiodicity) == len_time
+ assert len(self.mfcc) == len_time
+ assert len(self.voiced) == len_time
+
+ assert self.voiced.dtype == numpy.bool
+
@staticmethod
def get_sizes(sampling_rate: int, order: int):
fft_size = pyworld.get_cheaptrick_fft_size(fs=sampling_rate)
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 09931b3..dc5bc74 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -88,30 +88,55 @@ class AcousticFeatureProcess(BaseDataProcess):
f0 = pyworld.stonemask(x, _f0, t, fs)
spectrogram = pyworld.cheaptrick(x, f0, t, fs)
aperiodicity = pyworld.d4c(x, f0, t, fs)
+
mfcc = pysptk.sp2mc(spectrogram, order=self._order, alpha=self._alpha)
voiced = ~(f0 == 0) # type: numpy.ndarray
- return AcousticFeature(
- f0=f0.astype(self._dtype),
+
+ feature = AcousticFeature(
+ f0=f0[:, None].astype(self._dtype),
spectrogram=spectrogram.astype(self._dtype),
aperiodicity=aperiodicity.astype(self._dtype),
mfcc=mfcc.astype(self._dtype),
- voiced=voiced.astype(self._dtype),
+ voiced=voiced[:, None].astype(self._dtype),
)
+ feature.validate()
+ return feature
class AcousticFeatureLoadProcess(BaseDataProcess):
- def __init__(self):
- pass
+ def __init__(self, validate=False):
+ self._validate = validate
- def __call__(self, path: Path, test):
+ def __call__(self, path: Path, test=None):
d = numpy.load(path).item() # type: dict
- return AcousticFeature(
+ feature = AcousticFeature(
f0=d['f0'],
spectrogram=d['spectrogram'],
aperiodicity=d['aperiodicity'],
mfcc=d['mfcc'],
voiced=d['voiced'],
)
+ if self._validate:
+ feature.validate()
+ return feature
+
+
+class AcousticFeatureSaveProcess(BaseDataProcess):
+ def __init__(self, validate=False):
+ self._validate = validate
+
+ def __call__(self, data: Dict[str, any], test=None):
+ path = data['path'] # type: Path
+ feature = data['feature'] # type: AcousticFeature
+ if self._validate:
+ feature.validate()
+ numpy.save(path.absolute(), dict(
+ f0=feature.f0,
+ spectrogram=feature.spectrogram,
+ aperiodicity=feature.aperiodicity,
+ mfcc=feature.mfcc,
+ voiced=feature.voiced,
+ ))
class AcousticFeatureNormalizeProcess(BaseDataProcess):
diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py
index d6d39c6..c160a5d 100644
--- a/become_yukarin/voice_changer.py
+++ b/become_yukarin/voice_changer.py
@@ -87,11 +87,11 @@ class VoiceChanger(object):
)
out = AcousticFeature(
- f0=input_feature.f0,
+ f0=out.f0,
spectrogram=spectrogram,
aperiodicity=input_feature.aperiodicity,
mfcc=out.mfcc,
- voiced=input_feature.f0 < pyworld.get_cheaptrick_f0_floor(input_wave.sampling_rate, fftlen),
+ voiced=input_feature.voiced,
).astype(numpy.float64)
out = pyworld.synthesize(
f0=out.f0,
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index ccc8d66..f7cbaa1 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -8,7 +8,10 @@ from pathlib import Path
import numpy
+from become_yukarin.data_struct import AcousticFeature
+from become_yukarin.dataset.dataset import AcousticFeatureLoadProcess
from become_yukarin.dataset.dataset import AcousticFeatureProcess
+from become_yukarin.dataset.dataset import AcousticFeatureSaveProcess
from become_yukarin.dataset.dataset import WaveFileLoadProcess
from become_yukarin.dataset.utility import MFCCAligner
from become_yukarin.param import AcousticFeatureParam
@@ -52,10 +55,6 @@ def generate_feature(path1, path2):
wave1 = wave_file_load_process(path1, test=True)
wave2 = wave_file_load_process(path2, test=True)
- # m = max(len(wave1.wave), len(wave2.wave))
- # wave1 = Wave(wave=numpy.pad(wave1.wave, (0, m - len(wave1.wave)), mode='mean'), sampling_rate=wave1.sampling_rate)
- # wave2 = Wave(wave=numpy.pad(wave2.wave, (0, m - len(wave2.wave)), mode='mean'), sampling_rate=wave2.sampling_rate)
-
# make acoustic feature
acoustic_feature_process = AcousticFeatureProcess(
frame_period=arguments.frame_period,
@@ -72,14 +71,30 @@ def generate_feature(path1, path2):
spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram)
aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity)
mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc)
+ voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced)
# save
+ acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True)
path = Path(arguments.output1_directory, path1.stem + '.npy')
- numpy.save(path.absolute(), dict(f0=f0_1, spectrogram=spectrogram_1, aperiodicity=aperiodicity_1, mfcc=mfcc_1))
+ feature = AcousticFeature(
+ f0=f0_1,
+ spectrogram=spectrogram_1,
+ aperiodicity=aperiodicity_1,
+ mfcc=mfcc_1,
+ voiced=voiced_1,
+ )
+ acoustic_feature_save_process({'path': path, 'feature': feature})
print('saved!', path)
path = Path(arguments.output2_directory, path2.stem + '.npy')
- numpy.save(path.absolute(), dict(f0=f0_2, spectrogram=spectrogram_2, aperiodicity=aperiodicity_2, mfcc=mfcc_2))
+ feature = AcousticFeature(
+ f0=f0_2,
+ spectrogram=spectrogram_2,
+ aperiodicity=aperiodicity_2,
+ mfcc=mfcc_2,
+ voiced=voiced_2,
+ )
+ acoustic_feature_save_process({'path': path, 'feature': feature})
print('saved!', path)
@@ -91,37 +106,42 @@ def generate_mean_var(path_directory: Path):
if var_mean.exists():
var_mean.unlink()
+ acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=True)
+ acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=False)
+
f0_list = []
spectrogram_list = []
aperiodicity_list = []
mfcc_list = []
for path in path_directory.glob('*'):
- d = numpy.load(path).item() # type: dict
- f0_list.append(d['f0'].ravel())
- spectrogram_list.append(d['spectrogram'].ravel())
- aperiodicity_list.append(d['aperiodicity'].ravel())
- mfcc_list.append(d['mfcc'].ravel())
+ feature = acoustic_feature_load_process(path)
+ f0_list.append(feature.f0[feature.voiced].ravel()) # remove unvoiced
+ spectrogram_list.append(feature.spectrogram.ravel())
+ aperiodicity_list.append(feature.aperiodicity.ravel())
+ mfcc_list.append(feature.mfcc.ravel())
f0_list = numpy.concatenate(f0_list)
spectrogram_list = numpy.concatenate(spectrogram_list)
aperiodicity_list = numpy.concatenate(aperiodicity_list)
mfcc_list = numpy.concatenate(mfcc_list)
- mean = dict(
+ mean = AcousticFeature(
f0=numpy.mean(f0_list),
spectrogram=numpy.mean(spectrogram_list),
aperiodicity=numpy.mean(aperiodicity_list),
mfcc=numpy.mean(mfcc_list),
+ voiced=numpy.nan,
)
- var = dict(
+ var = AcousticFeature(
f0=numpy.var(f0_list),
spectrogram=numpy.var(spectrogram_list),
aperiodicity=numpy.var(aperiodicity_list),
mfcc=numpy.var(mfcc_list),
+ voiced=numpy.nan,
)
- numpy.save(path_mean.absolute(), mean)
- numpy.save(var_mean.absolute(), var)
+ acoustic_feature_save_process({'path': path_mean, 'feature': mean})
+ acoustic_feature_save_process({'path': var_mean, 'feature': var})
def main():