summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--become_yukarin/config.py90
-rw-r--r--become_yukarin/dataset/__init__.py1
-rw-r--r--become_yukarin/dataset/dataset.py29
-rw-r--r--become_yukarin/loss.py24
-rw-r--r--become_yukarin/model.py43
-rw-r--r--scripts/extract_acoustic_feature.py10
-rw-r--r--train.py66
7 files changed, 243 insertions, 20 deletions
diff --git a/become_yukarin/config.py b/become_yukarin/config.py
index b15dc6f..d00f179 100644
--- a/become_yukarin/config.py
+++ b/become_yukarin/config.py
@@ -1,6 +1,8 @@
+import json
+from pathlib import Path
from typing import NamedTuple
+from typing import Union
-from .data_struct import AcousticFeature
from .param import Param
@@ -8,13 +10,89 @@ class DatasetConfig(NamedTuple):
param: Param
input_glob: str
target_glob: str
- input_mean: AcousticFeature
- input_var: AcousticFeature
- target_mean: AcousticFeature
- target_var: AcousticFeature
+ input_mean_path: Path
+ input_var_path: Path
+ target_mean_path: Path
+ target_var_path: Path
seed: int
num_test: int
+class ModelConfig(NamedTuple):
+ in_size: int
+ num_scale: int
+ base_num_z: int
+ out_size: int
+
+
+class LossConfig(NamedTuple):
+ l1: float
+
+
+class TrainConfig(NamedTuple):
+ batchsize: int
+ gpu: int
+ log_iteration: int
+ snapshot_iteration: int
+ output: Path
+
+
class Config(NamedTuple):
- dataset_config: DatasetConfig
+ dataset: DatasetConfig
+ model: ModelConfig
+ loss: LossConfig
+ train: TrainConfig
+
+ def save_as_json(self, path):
+ d = _namedtuple_to_dict(self)
+ json.dump(d, open(path, 'w'), indent=2, sort_keys=True, default=_default_path)
+
+
+def _default_path(o):
+ if isinstance(o, Path):
+ return str(o)
+ raise TypeError(repr(o) + " is not JSON serializable")
+
+
+def _namedtuple_to_dict(o: NamedTuple):
+ return {
+ k: v if not hasattr(v, '_asdict') else _namedtuple_to_dict(v)
+ for k, v in o._asdict().items()
+ }
+
+
+def create_from_json(s: Union[str, Path]):
+ try:
+ d = json.loads(s)
+ except TypeError:
+ d = json.load(open(s))
+
+ return Config(
+ dataset=DatasetConfig(
+ param=Param(),
+ input_glob=d['dataset']['input_glob'],
+ target_glob=d['dataset']['target_glob'],
+ input_mean_path=Path(d['dataset']['input_mean']),
+ input_var_path=Path(d['dataset']['input_var']),
+ target_mean_path=Path(d['dataset']['target_mean']),
+ target_var_path=Path(d['dataset']['target_var']),
+ seed=d['dataset']['seed'],
+ num_test=d['dataset']['num_test'],
+ ),
+ model=ModelConfig(
+ in_size=d['model']['in_size'],
+ num_scale=d['model']['num_scale'],
+ base_num_z=d['model']['base_num_z'],
+ out_size=d['model']['out_size'],
+ ),
+ loss=LossConfig(
+ l1=d['loss']['l1'],
+ ),
+ train=TrainConfig(
+ batchsize=d['train']['batchsize'],
+ gpu=d['train']['gpu'],
+ log_iteration=d['train']['log_iteration'],
+ snapshot_iteration=d['train']['snapshot_iteration'],
+ output=Path(d['train']['output']),
+ ),
+ )
diff --git a/become_yukarin/dataset/__init__.py b/become_yukarin/dataset/__init__.py
index cdd8cf4..4606e7b 100644
--- a/become_yukarin/dataset/__init__.py
+++ b/become_yukarin/dataset/__init__.py
@@ -1,2 +1,3 @@
from . import dataset
from . import utility
+from .dataset import create
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index f9db53e..39331c3 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -128,10 +128,20 @@ class ReshapeFeatureProcess(BaseDataProcess):
def __call__(self, data: AcousticFeature, test):
feature = numpy.concatenate([getattr(data, t) for t in self._targets])
- feature = feature[numpy.newaxis]
+ feature = feature.T
return feature
+class ShapeAlignProcess(BaseDataProcess):
+ def __call__(self, data, test):
+ data1, data2 = data['input'], data['target']
+ m = max(data1.shape[1], data2.shape[1])
+ data1 = numpy.pad(data1, ((0, 0), (0, m - data1.shape[1])), mode='constant')
+ data2 = numpy.pad(data2, ((0, 0), (0, m - data2.shape[1])), mode='constant')
+ data['input'], data['target'] = data1, data2
+ return data
+
+
class DataProcessDataset(chainer.dataset.DatasetMixin):
def __init__(self, data: typing.List, data_process: BaseDataProcess):
self._data = data
@@ -144,28 +154,35 @@ class DataProcessDataset(chainer.dataset.DatasetMixin):
return self._data_process(data=self._data[i], test=not chainer.config.train)
-def choose(config: DatasetConfig):
+def create(config: DatasetConfig):
import glob
input_paths = list(sorted([Path(p) for p in glob.glob(config.input_glob)]))
target_paths = list(sorted([Path(p) for p in glob.glob(config.target_glob)]))
assert len(input_paths) == len(target_paths)
+ acoustic_feature_load_process = AcousticFeatureLoadProcess()
+ input_mean = acoustic_feature_load_process(config.input_mean_path, test=True)
+ input_var = acoustic_feature_load_process(config.input_var_path, test=True)
+ target_mean = acoustic_feature_load_process(config.target_mean_path, test=True)
+ target_var = acoustic_feature_load_process(config.target_var_path, test=True)
+
# {input_path, target_path}
data_process = ChainProcess([
SplitProcess(dict(
input=ChainProcess([
LambdaProcess(lambda d, test: d['input_path']),
- AcousticFeatureLoadProcess(),
- AcousticFeatureNormalizeProcess(mean=config.input_mean, var=config.input_var),
+ acoustic_feature_load_process,
+ AcousticFeatureNormalizeProcess(mean=input_mean, var=input_var),
ReshapeFeatureProcess(['mfcc']),
]),
target=ChainProcess([
LambdaProcess(lambda d, test: d['target_path']),
- AcousticFeatureLoadProcess(),
- AcousticFeatureNormalizeProcess(mean=config.target_mean, var=config.target_var),
+ acoustic_feature_load_process,
+ AcousticFeatureNormalizeProcess(mean=target_mean, var=target_var),
ReshapeFeatureProcess(['mfcc']),
]),
)),
+ ShapeAlignProcess(),
])
num_test = config.num_test
diff --git a/become_yukarin/loss.py b/become_yukarin/loss.py
new file mode 100644
index 0000000..c088691
--- /dev/null
+++ b/become_yukarin/loss.py
@@ -0,0 +1,24 @@
+from .config import LossConfig
+from .model import Model
+
+import chainer
+
+from chainer import reporter
+
+
+class Loss(chainer.link.Chain):
+ def __init__(self, config: LossConfig, predictor: Model):
+ super().__init__()
+ self.config = config
+
+ with self.init_scope():
+ self.predictor = predictor
+
+ def __call__(self, input, target):
+ h = input
+ y = self.predictor(h)
+
+ loss = chainer.functions.mean_absolute_error(y, target)
+ reporter.report({'loss': loss}, self)
+
+ return loss * self.config.l1
diff --git a/become_yukarin/model.py b/become_yukarin/model.py
index 087afcd..c67f351 100644
--- a/become_yukarin/model.py
+++ b/become_yukarin/model.py
@@ -1,24 +1,51 @@
import chainer
+from .config import ModelConfig
-class DeepConvolution(chainer.link.Chain):
- def __init__(self, num_scale: int, base_num_z: int, **kwargs):
+
+class DeepConvolution1D(chainer.link.Chain):
+ def __init__(self, in_size: int, num_scale: int, base_num_z: int, **kwargs):
super().__init__(**kwargs)
self.num_scale = num_scale
+ self.out_size = base_num_z * 2 ** (num_scale - 1)
for i in range(num_scale):
l = base_num_z * 2 ** i
- self.add_link('conv{}'.format(i + 1),
- chainer.links.Convolution2D(None, l, 4, 2, 1, nobias=True))
+ self.add_link('conv{}'.format(i + 1), chainer.links.ConvolutionND(1, in_size, l, 3, 1, 1, nobias=True))
self.add_link('bn{}'.format(i + 1), chainer.links.BatchNormalization(l))
-
- def get_scaled_width(self, base_width):
- return base_width // (2 ** self.num_scale)
+ in_size = l
def __call__(self, x):
h = x
for i in range(self.num_scale):
conv = getattr(self, 'conv{}'.format(i + 1))
bn = getattr(self, 'bn{}'.format(i + 1))
- chainer.functions.relu(bn(conv(h)))
+ h = chainer.functions.relu(bn(conv(h)))
+ return h
+
+
+class Model(chainer.link.Chain):
+ def __init__(self, convs: DeepConvolution1D, out_size: int):
+ super().__init__()
+ with self.init_scope():
+ self.convs = convs
+ self.last = chainer.links.ConvolutionND(1, convs.out_size, out_size, 1)
+
+ def __call__(self, x):
+ h = x
+ h = self.convs(h)
+ h = self.last(h)
return h
+
+
+def create(config: ModelConfig):
+ convs = DeepConvolution1D(
+ in_size=config.in_size,
+ num_scale=config.num_scale,
+ base_num_z=config.base_num_z,
+ )
+ model = Model(
+ convs=convs,
+ out_size=config.out_size,
+ )
+ return model
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index ccc8d66..b280db6 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -73,6 +73,16 @@ def generate_feature(path1, path2):
aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity)
mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc)
+ # convert type
+ f0_1 = f0_1.astype(numpy.float32)
+ f0_2 = f0_2.astype(numpy.float32)
+ spectrogram_1 = spectrogram_1.astype(numpy.float32)
+ spectrogram_2 = spectrogram_2.astype(numpy.float32)
+ aperiodicity_1 = aperiodicity_1.astype(numpy.float32)
+ aperiodicity_2 = aperiodicity_2.astype(numpy.float32)
+ mfcc_1 = mfcc_1.astype(numpy.float32)
+ mfcc_2 = mfcc_2.astype(numpy.float32)
+
# save
path = Path(arguments.output1_directory, path1.stem + '.npy')
numpy.save(path.absolute(), dict(f0=f0_1, spectrogram=spectrogram_1, aperiodicity=aperiodicity_1, mfcc=mfcc_1))
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..eb0833d
--- /dev/null
+++ b/train.py
@@ -0,0 +1,66 @@
+import argparse
+from pathlib import Path
+
+from chainer.iterators import MultiprocessIterator
+from chainer import optimizers
+from chainer import training
+from chainer.training import extensions
+from chainer.dataset import convert
+
+from become_yukarin.config import create_from_json
+from become_yukarin.dataset import create as create_dataset
+from become_yukarin.model import create as create_model
+from become_yukarin.loss import Loss
+
+from functools import partial
+
+parser = argparse.ArgumentParser()
+parser.add_argument('config_json_path', type=Path)
+arguments = parser.parse_args()
+
+config = create_from_json(arguments.config_json_path)
+config.train.output.mkdir(exist_ok=True)
+config.save_as_json((config.train.output / 'config.json').absolute())
+
+# model
+predictor = create_model(config.model)
+model = Loss(config.loss, predictor=predictor)
+
+# dataset
+dataset = create_dataset(config.dataset)
+train_iter = MultiprocessIterator(dataset['train'], config.train.batchsize)
+test_iter = MultiprocessIterator(dataset['test'], config.train.batchsize, repeat=False, shuffle=False)
+train_eval_iter = MultiprocessIterator(dataset['train_eval'], config.train.batchsize, repeat=False, shuffle=False)
+
+# optimizer
+optimizer = optimizers.Adam()
+optimizer.setup(model)
+
+# trainer
+trigger_best = training.triggers.MinValueTrigger('test/main/loss', (config.train.snapshot_iteration, 'iteration'))
+
+converter = partial(convert.concat_examples, padding=0)
+updater = training.StandardUpdater(train_iter, optimizer, device=config.train.gpu, converter=converter)
+trainer = training.Trainer(updater, out=config.train.output)
+
+ext = extensions.Evaluator(test_iter, model, converter, device=config.train.gpu)
+trainer.extend(ext, name='test', trigger=(config.train.log_iteration, 'iteration'))
+ext = extensions.Evaluator(train_eval_iter, model, converter, device=config.train.gpu)
+trainer.extend(ext, name='train', trigger=(config.train.log_iteration, 'iteration'))
+
+trainer.extend(extensions.dump_graph('main/loss', out_name='graph.dot'))
+
+ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz')
+trainer.extend(ext, trigger=trigger_best)
+
+trainer.extend(extensions.LogReport(trigger=(config.train.log_iteration, 'iteration'), log_name='log.txt'))
+
+if extensions.PlotReport.available():
+ trainer.extend(extensions.PlotReport(
+ y_keys=['main/loss', 'test/main/loss'],
+ x_key='iteration',
+ file_name='loss.png',
+ trigger=(config.train.log_iteration, 'iteration'),
+ ))
+
+trainer.run()