summaryrefslogtreecommitdiff
path: root/become_yukarin/dataset/dataset.py
diff options
context:
space:
mode:
Diffstat (limited to 'become_yukarin/dataset/dataset.py')
-rw-r--r--become_yukarin/dataset/dataset.py51
1 files changed, 41 insertions, 10 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 39331c3..7a6ce08 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -61,21 +61,23 @@ class SplitProcess(BaseDataProcess):
class WaveFileLoadProcess(BaseDataProcess):
- def __init__(self, sample_rate: int, top_db: float):
+ def __init__(self, sample_rate: int, top_db: float, dtype=numpy.float32):
self._sample_rate = sample_rate
self._top_db = top_db
+ self._dtype = dtype
def __call__(self, data: str, test):
- wave = librosa.core.load(data, sr=self._sample_rate)[0]
+ wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0]
wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db))
return Wave(wave, self._sample_rate)
class AcousticFeatureProcess(BaseDataProcess):
- def __init__(self, frame_period, order, alpha):
+ def __init__(self, frame_period, order, alpha, dtype=numpy.float32):
self._frame_period = frame_period
self._order = order
self._alpha = alpha
+ self._dtype = dtype
def __call__(self, data: Wave, test):
x = data.wave.astype(numpy.float64)
@@ -87,10 +89,10 @@ class AcousticFeatureProcess(BaseDataProcess):
aperiodicity = pyworld.d4c(x, f0, t, fs)
mfcc = pysptk.sp2mc(spectrogram, order=self._order, alpha=self._alpha)
return AcousticFeature(
- f0=f0,
- spectrogram=spectrogram,
- aperiodicity=aperiodicity,
- mfcc=mfcc,
+ f0=f0.astype(self._dtype),
+ spectrogram=spectrogram.astype(self._dtype),
+ aperiodicity=aperiodicity.astype(self._dtype),
+ mfcc=mfcc.astype(self._dtype),
)
@@ -122,7 +124,21 @@ class AcousticFeatureNormalizeProcess(BaseDataProcess):
)
-class ReshapeFeatureProcess(BaseDataProcess):
+class AcousticFeatureDenormalizeProcess(BaseDataProcess):
+ def __init__(self, mean: AcousticFeature, var: AcousticFeature):
+ self._mean = mean
+ self._var = var
+
+ def __call__(self, data: AcousticFeature, test):
+ return AcousticFeature(
+ f0=data.f0 * numpy.sqrt(self._var.f0) + self._mean.f0,
+ spectrogram=data.spectrogram * numpy.sqrt(self._var.spectrogram) + self._mean.spectrogram,
+ aperiodicity=data.aperiodicity * numpy.sqrt(self._var.aperiodicity) + self._mean.aperiodicity,
+ mfcc=data.mfcc * numpy.sqrt(self._var.mfcc) + self._mean.mfcc,
+ )
+
+
+class EncodeFeatureProcess(BaseDataProcess):
def __init__(self, targets: List[str]):
self._targets = targets
@@ -132,6 +148,21 @@ class ReshapeFeatureProcess(BaseDataProcess):
return feature
+class DecodeFeatureProcess(BaseDataProcess):
+ def __init__(self, targets: List[str]):
+ self._targets = targets
+
+ def __call__(self, data: numpy.ndarray, test):
+ # TODO: implement for other features
+ data = data.T
+ return AcousticFeature(
+ f0=numpy.nan,
+ spectrogram=numpy.nan,
+ aperiodicity=numpy.nan,
+ mfcc=data,
+ )
+
+
class ShapeAlignProcess(BaseDataProcess):
def __call__(self, data, test):
data1, data2 = data['input'], data['target']
@@ -173,13 +204,13 @@ def create(config: DatasetConfig):
LambdaProcess(lambda d, test: d['input_path']),
acoustic_feature_load_process,
AcousticFeatureNormalizeProcess(mean=input_mean, var=input_var),
- ReshapeFeatureProcess(['mfcc']),
+ EncodeFeatureProcess(['mfcc']),
]),
target=ChainProcess([
LambdaProcess(lambda d, test: d['target_path']),
acoustic_feature_load_process,
AcousticFeatureNormalizeProcess(mean=target_mean, var=target_var),
- ReshapeFeatureProcess(['mfcc']),
+ EncodeFeatureProcess(['mfcc']),
]),
)),
ShapeAlignProcess(),