summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--become_yukarin/config/sr_config.py23
-rw-r--r--become_yukarin/dataset/dataset.py25
-rw-r--r--become_yukarin/model/sr_model.py6
-rw-r--r--become_yukarin/updater/sr_updater.py19
-rw-r--r--become_yukarin/voice_changer.py3
-rw-r--r--scripts/extract_spectrogram_pair.py4
-rw-r--r--train_sr.py1
7 files changed, 41 insertions, 40 deletions
diff --git a/become_yukarin/config/sr_config.py b/become_yukarin/config/sr_config.py
index 93db424..4c62808 100644
--- a/become_yukarin/config/sr_config.py
+++ b/become_yukarin/config/sr_config.py
@@ -17,17 +17,7 @@ class SRDatasetConfig(NamedTuple):
class SRModelConfig(NamedTuple):
- in_channels: int
- conv_bank_out_channels: int
- conv_bank_k: int
- max_pooling_k: int
- conv_projections_hidden_channels: int
- highway_layers: int
- out_channels: int
- out_size: int
- aligner_out_time_length: int
- disable_last_rnn: bool
- enable_aligner: bool
+ pass
class SRLossConfig(NamedTuple):
@@ -89,17 +79,6 @@ def create_from_json(s: Union[str, Path]):
num_test=d['dataset']['num_test'],
),
model=SRModelConfig(
- in_channels=d['model']['in_channels'],
- conv_bank_out_channels=d['model']['conv_bank_out_channels'],
- conv_bank_k=d['model']['conv_bank_k'],
- max_pooling_k=d['model']['max_pooling_k'],
- conv_projections_hidden_channels=d['model']['conv_projections_hidden_channels'],
- highway_layers=d['model']['highway_layers'],
- out_channels=d['model']['out_channels'],
- out_size=d['model']['out_size'],
- aligner_out_time_length=d['model']['aligner_out_time_length'],
- disable_last_rnn=d['model']['disable_last_rnn'],
- enable_aligner=d['model']['enable_aligner'],
),
loss=SRLossConfig(
mse=d['loss']['mse'],
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index b0f9807..38cf749 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -313,7 +313,6 @@ class ShapeAlignProcess(BaseDataProcess):
class RandomPaddingProcess(BaseDataProcess):
def __init__(self, min_size: int, time_axis: int = 1):
- assert time_axis == 1
self._min_size = min_size
self._time_axis = time_axis
@@ -328,7 +327,9 @@ class RandomPaddingProcess(BaseDataProcess):
pre = random.randint(self._min_size - data.shape[self._time_axis] + 1)
post = self._min_size - pre
- return numpy.pad(data, ((0, 0), (pre, post)), mode='constant')
+ pad = [(0, 0)] * data.ndim
+ pad[self._time_axis] = (pre, post)
+ return numpy.pad(data, pad, mode='constant')
class LastPaddingProcess(BaseDataProcess):
@@ -520,8 +521,8 @@ def create_sr(config: SRDatasetConfig):
data_process_base = ChainProcess([
LowHighSpectrogramFeatureLoadProcess(validate=True),
SplitProcess(dict(
- input=LambdaProcess(lambda d, test: numpy.log(d.low)),
- target=LambdaProcess(lambda d, test: numpy.log(d.high)),
+ input=LambdaProcess(lambda d, test: numpy.log(d.low[:, :-1])),
+ target=LambdaProcess(lambda d, test: numpy.log(d.high[:, :-1])),
)),
])
@@ -535,13 +536,13 @@ def create_sr(config: SRDatasetConfig):
def padding(s):
return ChainProcess([
LambdaProcess(lambda d, test: dict(data=d[s], seed=d['seed'])),
- RandomPaddingProcess(min_size=config.train_crop_size),
+ RandomPaddingProcess(min_size=config.train_crop_size, time_axis=0),
])
def crop(s):
return ChainProcess([
LambdaProcess(lambda d, test: dict(data=d[s], seed=d['seed'])),
- RandomCropProcess(crop_size=config.train_crop_size),
+ RandomCropProcess(crop_size=config.train_crop_size, time_axis=0),
])
data_process_train.append(ChainProcess([
@@ -550,6 +551,10 @@ def create_sr(config: SRDatasetConfig):
add_seed(),
SplitProcess(dict(input=crop('input'), target=crop('target'))),
]))
+ data_process_train.append(LambdaProcess(lambda d, test: {
+ 'input': d['input'][numpy.newaxis],
+ 'target': d['target'][numpy.newaxis],
+ }))
data_process_test = copy.deepcopy(data_process_base)
if config.train_crop_size is not None:
@@ -557,14 +562,18 @@ def create_sr(config: SRDatasetConfig):
input=ChainProcess([
LambdaProcess(lambda d, test: d['input']),
LastPaddingProcess(min_size=config.train_crop_size),
- FirstCropProcess(crop_size=config.train_crop_size),
+ FirstCropProcess(crop_size=config.train_crop_size, time_axis=0),
]),
target=ChainProcess([
LambdaProcess(lambda d, test: d['target']),
LastPaddingProcess(min_size=config.train_crop_size),
- FirstCropProcess(crop_size=config.train_crop_size),
+ FirstCropProcess(crop_size=config.train_crop_size, time_axis=0),
]),
)))
+ data_process_test.append(LambdaProcess(lambda d, test: {
+ 'input': d['input'][numpy.newaxis],
+ 'target': d['target'][numpy.newaxis],
+ }))
input_paths = list(sorted([Path(p) for p in glob.glob(str(config.input_glob))]))
diff --git a/become_yukarin/model/sr_model.py b/become_yukarin/model/sr_model.py
index 74119a4..64158ca 100644
--- a/become_yukarin/model/sr_model.py
+++ b/become_yukarin/model/sr_model.py
@@ -47,7 +47,6 @@ class Encoder(chainer.Chain):
self.c7 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
def __call__(self, x):
- x = F.reshape(x, (len(x), 1) + x.shape[1:])
hs = [F.leaky_relu(self.c0(x))]
for i in range(1, 8):
hs.append(self['c%d' % i](hs[i - 1]))
@@ -103,7 +102,6 @@ class SRDiscriminator(chainer.Chain):
self.c4 = L.Convolution2D(512, 1, 3, 1, 1, initialW=w)
def __call__(self, x_0, x_1):
- x_0 = F.reshape(x_0, (len(x_0), 1) + x_0.shape[1:])
h = F.concat([self.c0_0(x_0), self.c0_1(x_1)])
h = self.c1(h)
h = self.c2(h)
@@ -114,6 +112,6 @@ class SRDiscriminator(chainer.Chain):
def create_sr(config: SRModelConfig):
- predictor = SRPredictor(in_ch=1, out_ch=3)
- discriminator = SRDiscriminator(in_ch=1, out_ch=3)
+ predictor = SRPredictor(in_ch=1, out_ch=1)
+ discriminator = SRDiscriminator(in_ch=1, out_ch=1)
return predictor, discriminator
diff --git a/become_yukarin/updater/sr_updater.py b/become_yukarin/updater/sr_updater.py
index a6b1d22..6e2b400 100644
--- a/become_yukarin/updater/sr_updater.py
+++ b/become_yukarin/updater/sr_updater.py
@@ -33,17 +33,28 @@ class SRUpdater(chainer.training.StandardUpdater):
chainer.report({'loss': loss}, predictor)
return loss
- def _loss_discriminator(self, discriminator, y_in, y_out):
- b, _, w, h = y_in.data.shape
+ def _loss_discriminator(self, discriminator, d_real, d_fake):
+ b, _, w, h = d_real.data.shape
- loss_real = F.sum(F.softplus(-y_in)) / (b * w * h)
+ loss_real = F.sum(F.softplus(-d_real)) / (b * w * h)
chainer.report({'real': loss_real}, discriminator)
- loss_fake = F.sum(F.softplus(y_out)) / (b * w * h)
+ loss_fake = F.sum(F.softplus(d_fake)) / (b * w * h)
chainer.report({'fake': loss_fake}, discriminator)
loss = loss_real + loss_fake
chainer.report({'loss': loss}, discriminator)
+
+ tp = (d_real.data > 0.5).sum()
+ fp = (d_fake.data > 0.5).sum()
+ fn = (d_real.data <= 0.5).sum()
+ tn = (d_fake.data <= 0.5).sum()
+ accuracy = (tp + tn) / (tp + fp + fn + tn)
+ precision = tp / (tp + fp)
+ recall = tp / (tp + fn)
+ chainer.report({'accuracy': accuracy}, self.discriminator)
+ chainer.report({'precision': precision}, self.discriminator)
+ chainer.report({'recall': recall}, self.discriminator)
return loss
def forward(self, input, target):
diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py
index a8a207a..822d8c5 100644
--- a/become_yukarin/voice_changer.py
+++ b/become_yukarin/voice_changer.py
@@ -118,6 +118,9 @@ class VoiceChanger(object):
return self.convert_from_feature(input, out_sampling_rate)
def convert_from_feature(self, input: AcousticFeature, out_sampling_rate: Optional[int] = None):
+ if out_sampling_rate is None:
+ out_sampling_rate = self.config.dataset.param.voice_param.sample_rate
+
out = self.convert_to_feature(input=input, out_sampling_rate=out_sampling_rate)
out = pyworld.synthesize(
f0=out.f0.ravel(),
diff --git a/scripts/extract_spectrogram_pair.py b/scripts/extract_spectrogram_pair.py
index 02108fc..be21459 100644
--- a/scripts/extract_spectrogram_pair.py
+++ b/scripts/extract_spectrogram_pair.py
@@ -10,6 +10,7 @@ from pprint import pprint
import numpy
import pysptk
import pyworld
+from tqdm import tqdm
from become_yukarin.dataset.dataset import AcousticFeatureProcess
from become_yukarin.dataset.dataset import WaveFileLoadProcess
@@ -68,7 +69,6 @@ def generate_file(path):
'low': low_spectrogram,
'high': high_spectrogram,
})
- print('saved!', out)
def main():
@@ -76,7 +76,7 @@ def main():
arguments.output_directory.mkdir(exist_ok=True)
pool = multiprocessing.Pool()
- pool.map(generate_file, paths)
+ list(tqdm(pool.imap(generate_file, paths), total=len(paths)))
if __name__ == '__main__':
diff --git a/train_sr.py b/train_sr.py
index c714aa0..96f11e7 100644
--- a/train_sr.py
+++ b/train_sr.py
@@ -78,6 +78,7 @@ ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iterat
trainer.extend(ext, trigger=trigger_snapshot)
trainer.extend(extensions.LogReport(trigger=trigger_log))
+trainer.extend(extensions.PrintReport(['predictor/loss']))
if extensions.PlotReport.available():
trainer.extend(extensions.PlotReport(