summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--become_yukarin/dataset/dataset.py48
-rw-r--r--become_yukarin/loss.py9
-rw-r--r--become_yukarin/model.py12
3 files changed, 57 insertions, 12 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 633843e..4fded11 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -139,6 +139,35 @@ class AcousticFeatureSaveProcess(BaseDataProcess):
))
+class DistillateUsingFeatureProcess(BaseDataProcess):
+ def __init__(self, targets: List[str]):
+ self._targets = targets
+
+ def __call__(self, feature: AcousticFeature, test=None):
+ d = defaultdict(lambda: numpy.nan, **{t: getattr(feature, t) for t in self._targets})
+ return AcousticFeature(
+ f0=d['f0'],
+ spectrogram=d['spectrogram'],
+ aperiodicity=d['aperiodicity'],
+ mfcc=d['mfcc'],
+ voiced=d['voiced'],
+ )
+
+
+class MakeMaskProcess(BaseDataProcess):
+ def __init__(self):
+ pass
+
+ def __call__(self, feature: AcousticFeature, test=None):
+ return AcousticFeature(
+ f0=feature.voiced,
+ spectrogram=numpy.ones_like(feature.spectrogram, dtype=numpy.bool),
+ aperiodicity=numpy.ones_like(feature.aperiodicity, dtype=numpy.bool),
+ mfcc=numpy.ones_like(feature.mfcc, dtype=numpy.bool),
+ voiced=numpy.ones_like(feature.voiced, dtype=numpy.bool),
+ ).astype(numpy.float32)
+
+
class AcousticFeatureNormalizeProcess(BaseDataProcess):
def __init__(self, mean: AcousticFeature, var: AcousticFeature):
self._mean = mean
@@ -163,6 +192,7 @@ class AcousticFeatureDenormalizeProcess(BaseDataProcess):
def __call__(self, data: AcousticFeature, test):
f0 = data.f0 * numpy.sqrt(self._var.f0) + self._mean.f0
+ print(data.voiced.dtype)
f0[~data.voiced] = 0
return AcousticFeature(
f0=f0,
@@ -210,11 +240,12 @@ class DecodeFeatureProcess(BaseDataProcess):
class ShapeAlignProcess(BaseDataProcess):
def __call__(self, data, test):
- data1, data2 = data['input'], data['target']
- m = max(data1.shape[1], data2.shape[1])
+ data1, data2, data3 = data['input'], data['target'], data['mask']
+ m = max(data1.shape[1], data2.shape[1], data3.shape[1])
data1 = numpy.pad(data1, ((0, 0), (0, m - data1.shape[1])), mode='constant')
data2 = numpy.pad(data2, ((0, 0), (0, m - data2.shape[1])), mode='constant')
- data['input'], data['target'] = data1, data2
+ data3 = numpy.pad(data3, ((0, 0), (0, m - data3.shape[1])), mode='constant')
+ data['input'], data['target'], data['mask'] = data1, data2, data3
return data
@@ -248,16 +279,25 @@ def create(config: DatasetConfig):
input=ChainProcess([
LambdaProcess(lambda d, test: d['input_path']),
acoustic_feature_load_process,
+ DistillateUsingFeatureProcess(config.features + ['voiced']),
AcousticFeatureNormalizeProcess(mean=input_mean, var=input_var),
EncodeFeatureProcess(config.features),
]),
target=ChainProcess([
LambdaProcess(lambda d, test: d['target_path']),
acoustic_feature_load_process,
+ DistillateUsingFeatureProcess(config.features + ['voiced']),
AcousticFeatureNormalizeProcess(mean=target_mean, var=target_var),
- EncodeFeatureProcess(config.features),
+ SplitProcess(dict(
+ feature=EncodeFeatureProcess(config.features),
+ mask=ChainProcess([
+ MakeMaskProcess(),
+ EncodeFeatureProcess(config.features),
+ ])
+ )),
]),
)),
+ LambdaProcess(lambda d, test: dict(input=d['input'], target=d['target']['feature'], mask=d['target']['mask'])),
ShapeAlignProcess(),
])
diff --git a/become_yukarin/loss.py b/become_yukarin/loss.py
index c088691..3d89908 100644
--- a/become_yukarin/loss.py
+++ b/become_yukarin/loss.py
@@ -14,11 +14,16 @@ class Loss(chainer.link.Chain):
with self.init_scope():
self.predictor = predictor
- def __call__(self, input, target):
+ def __call__(self, input, target, mask):
+ input = chainer.as_variable(input)
+ target = chainer.as_variable(target)
+ mask = chainer.as_variable(mask)
+
h = input
y = self.predictor(h)
- loss = chainer.functions.mean_absolute_error(y, target)
+ loss = chainer.functions.sum(chainer.functions.absolute_error(y, target) * mask)
+ loss = loss / chainer.functions.sum(mask)
reporter.report({'loss': loss}, self)
return loss * self.config.l1
diff --git a/become_yukarin/model.py b/become_yukarin/model.py
index 2585b39..5f46876 100644
--- a/become_yukarin/model.py
+++ b/become_yukarin/model.py
@@ -128,12 +128,12 @@ class CBHG(chainer.link.Chain):
self.highways = chainer.link.ChainList(
*([ConvHighway(out_channels) for _ in range(highway_layers)])
)
- self.gru = chainer.links.NStepBiGRU(
- n_layers=1,
- in_size=out_channels,
- out_size=out_channels,
- dropout=0.0,
- )
+ # self.gru = chainer.links.NStepBiGRU(
+ # n_layers=1,
+ # in_size=out_channels,
+ # out_size=out_channels,
+ # dropout=0.0,
+ # )
def __call__(self, x):
h = x