diff options
Diffstat (limited to 'become_yukarin/dataset/dataset.py')
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 51 |
1 files changed, 41 insertions, 10 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 39331c3..7a6ce08 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -61,21 +61,23 @@ class SplitProcess(BaseDataProcess): class WaveFileLoadProcess(BaseDataProcess): - def __init__(self, sample_rate: int, top_db: float): + def __init__(self, sample_rate: int, top_db: float, dtype=numpy.float32): self._sample_rate = sample_rate self._top_db = top_db + self._dtype = dtype def __call__(self, data: str, test): - wave = librosa.core.load(data, sr=self._sample_rate)[0] + wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0] wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db)) return Wave(wave, self._sample_rate) class AcousticFeatureProcess(BaseDataProcess): - def __init__(self, frame_period, order, alpha): + def __init__(self, frame_period, order, alpha, dtype=numpy.float32): self._frame_period = frame_period self._order = order self._alpha = alpha + self._dtype = dtype def __call__(self, data: Wave, test): x = data.wave.astype(numpy.float64) @@ -87,10 +89,10 @@ class AcousticFeatureProcess(BaseDataProcess): aperiodicity = pyworld.d4c(x, f0, t, fs) mfcc = pysptk.sp2mc(spectrogram, order=self._order, alpha=self._alpha) return AcousticFeature( - f0=f0, - spectrogram=spectrogram, - aperiodicity=aperiodicity, - mfcc=mfcc, + f0=f0.astype(self._dtype), + spectrogram=spectrogram.astype(self._dtype), + aperiodicity=aperiodicity.astype(self._dtype), + mfcc=mfcc.astype(self._dtype), ) @@ -122,7 +124,21 @@ class AcousticFeatureNormalizeProcess(BaseDataProcess): ) -class ReshapeFeatureProcess(BaseDataProcess): +class AcousticFeatureDenormalizeProcess(BaseDataProcess): + def __init__(self, mean: AcousticFeature, var: AcousticFeature): + self._mean = mean + self._var = var + + def __call__(self, data: AcousticFeature, test): + return AcousticFeature( + f0=data.f0 * numpy.sqrt(self._var.f0) + self._mean.f0, + spectrogram=data.spectrogram * numpy.sqrt(self._var.spectrogram) + self._mean.spectrogram, + aperiodicity=data.aperiodicity * numpy.sqrt(self._var.aperiodicity) + self._mean.aperiodicity, + mfcc=data.mfcc * numpy.sqrt(self._var.mfcc) + self._mean.mfcc, + ) + + +class EncodeFeatureProcess(BaseDataProcess): def __init__(self, targets: List[str]): self._targets = targets @@ -132,6 +148,21 @@ class ReshapeFeatureProcess(BaseDataProcess): return feature +class DecodeFeatureProcess(BaseDataProcess): + def __init__(self, targets: List[str]): + self._targets = targets + + def __call__(self, data: numpy.ndarray, test): + # TODO: implement for other features + data = data.T + return AcousticFeature( + f0=numpy.nan, + spectrogram=numpy.nan, + aperiodicity=numpy.nan, + mfcc=data, + ) + + class ShapeAlignProcess(BaseDataProcess): def __call__(self, data, test): data1, data2 = data['input'], data['target'] @@ -173,13 +204,13 @@ def create(config: DatasetConfig): LambdaProcess(lambda d, test: d['input_path']), acoustic_feature_load_process, AcousticFeatureNormalizeProcess(mean=input_mean, var=input_var), - ReshapeFeatureProcess(['mfcc']), + EncodeFeatureProcess(['mfcc']), ]), target=ChainProcess([ LambdaProcess(lambda d, test: d['target_path']), acoustic_feature_load_process, AcousticFeatureNormalizeProcess(mean=target_mean, var=target_var), - ReshapeFeatureProcess(['mfcc']), + EncodeFeatureProcess(['mfcc']), ]), )), ShapeAlignProcess(), |
