diff options
| -rw-r--r-- | become_yukarin/config.py | 90 | ||||
| -rw-r--r-- | become_yukarin/dataset/__init__.py | 1 | ||||
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 29 | ||||
| -rw-r--r-- | become_yukarin/loss.py | 24 | ||||
| -rw-r--r-- | become_yukarin/model.py | 43 | ||||
| -rw-r--r-- | scripts/extract_acoustic_feature.py | 10 | ||||
| -rw-r--r-- | train.py | 66 |
7 files changed, 243 insertions, 20 deletions
diff --git a/become_yukarin/config.py b/become_yukarin/config.py index b15dc6f..d00f179 100644 --- a/become_yukarin/config.py +++ b/become_yukarin/config.py @@ -1,6 +1,8 @@ +import json +from pathlib import Path from typing import NamedTuple +from typing import Union -from .data_struct import AcousticFeature from .param import Param @@ -8,13 +10,89 @@ class DatasetConfig(NamedTuple): param: Param input_glob: str target_glob: str - input_mean: AcousticFeature - input_var: AcousticFeature - target_mean: AcousticFeature - target_var: AcousticFeature + input_mean_path: Path + input_var_path: Path + target_mean_path: Path + target_var_path: Path seed: int num_test: int +class ModelConfig(NamedTuple): + in_size: int + num_scale: int + base_num_z: int + out_size: int + + +class LossConfig(NamedTuple): + l1: float + + +class TrainConfig(NamedTuple): + batchsize: int + gpu: int + log_iteration: int + snapshot_iteration: int + output: Path + + class Config(NamedTuple): - dataset_config: DatasetConfig + dataset: DatasetConfig + model: ModelConfig + loss: LossConfig + train: TrainConfig + + def save_as_json(self, path): + d = _namedtuple_to_dict(self) + json.dump(d, open(path, 'w'), indent=2, sort_keys=True, default=_default_path) + + +def _default_path(o): + if isinstance(o, Path): + return str(o) + raise TypeError(repr(o) + " is not JSON serializable") + + +def _namedtuple_to_dict(o: NamedTuple): + return { + k: v if not hasattr(v, '_asdict') else _namedtuple_to_dict(v) + for k, v in o._asdict().items() + } + + +def create_from_json(s: Union[str, Path]): + try: + d = json.loads(s) + except TypeError: + d = json.load(open(s)) + + return Config( + dataset=DatasetConfig( + param=Param(), + input_glob=d['dataset']['input_glob'], + target_glob=d['dataset']['target_glob'], + input_mean_path=Path(d['dataset']['input_mean']), + input_var_path=Path(d['dataset']['input_var']), + target_mean_path=Path(d['dataset']['target_mean']), + target_var_path=Path(d['dataset']['target_var']), + seed=d['dataset']['seed'], + num_test=d['dataset']['num_test'], + ), + model=ModelConfig( + in_size=d['model']['in_size'], + num_scale=d['model']['num_scale'], + base_num_z=d['model']['base_num_z'], + out_size=d['model']['out_size'], + ), + loss=LossConfig( + l1=d['loss']['l1'], + ), + train=TrainConfig( + batchsize=d['train']['batchsize'], + gpu=d['train']['gpu'], + log_iteration=d['train']['log_iteration'], + snapshot_iteration=d['train']['snapshot_iteration'], + output=Path(d['train']['output']), + ), + ) diff --git a/become_yukarin/dataset/__init__.py b/become_yukarin/dataset/__init__.py index cdd8cf4..4606e7b 100644 --- a/become_yukarin/dataset/__init__.py +++ b/become_yukarin/dataset/__init__.py @@ -1,2 +1,3 @@ from . import dataset from . import utility +from .dataset import create diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index f9db53e..39331c3 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -128,10 +128,20 @@ class ReshapeFeatureProcess(BaseDataProcess): def __call__(self, data: AcousticFeature, test): feature = numpy.concatenate([getattr(data, t) for t in self._targets]) - feature = feature[numpy.newaxis] + feature = feature.T return feature +class ShapeAlignProcess(BaseDataProcess): + def __call__(self, data, test): + data1, data2 = data['input'], data['target'] + m = max(data1.shape[1], data2.shape[1]) + data1 = numpy.pad(data1, ((0, 0), (0, m - data1.shape[1])), mode='constant') + data2 = numpy.pad(data2, ((0, 0), (0, m - data2.shape[1])), mode='constant') + data['input'], data['target'] = data1, data2 + return data + + class DataProcessDataset(chainer.dataset.DatasetMixin): def __init__(self, data: typing.List, data_process: BaseDataProcess): self._data = data @@ -144,28 +154,35 @@ class DataProcessDataset(chainer.dataset.DatasetMixin): return self._data_process(data=self._data[i], test=not chainer.config.train) -def choose(config: DatasetConfig): +def create(config: DatasetConfig): import glob input_paths = list(sorted([Path(p) for p in glob.glob(config.input_glob)])) target_paths = list(sorted([Path(p) for p in glob.glob(config.target_glob)])) assert len(input_paths) == len(target_paths) + acoustic_feature_load_process = AcousticFeatureLoadProcess() + input_mean = acoustic_feature_load_process(config.input_mean_path, test=True) + input_var = acoustic_feature_load_process(config.input_var_path, test=True) + target_mean = acoustic_feature_load_process(config.target_mean_path, test=True) + target_var = acoustic_feature_load_process(config.target_var_path, test=True) + # {input_path, target_path} data_process = ChainProcess([ SplitProcess(dict( input=ChainProcess([ LambdaProcess(lambda d, test: d['input_path']), - AcousticFeatureLoadProcess(), - AcousticFeatureNormalizeProcess(mean=config.input_mean, var=config.input_var), + acoustic_feature_load_process, + AcousticFeatureNormalizeProcess(mean=input_mean, var=input_var), ReshapeFeatureProcess(['mfcc']), ]), target=ChainProcess([ LambdaProcess(lambda d, test: d['target_path']), - AcousticFeatureLoadProcess(), - AcousticFeatureNormalizeProcess(mean=config.target_mean, var=config.target_var), + acoustic_feature_load_process, + AcousticFeatureNormalizeProcess(mean=target_mean, var=target_var), ReshapeFeatureProcess(['mfcc']), ]), )), + ShapeAlignProcess(), ]) num_test = config.num_test diff --git a/become_yukarin/loss.py b/become_yukarin/loss.py new file mode 100644 index 0000000..c088691 --- /dev/null +++ b/become_yukarin/loss.py @@ -0,0 +1,24 @@ +from .config import LossConfig +from .model import Model + +import chainer + +from chainer import reporter + + +class Loss(chainer.link.Chain): + def __init__(self, config: LossConfig, predictor: Model): + super().__init__() + self.config = config + + with self.init_scope(): + self.predictor = predictor + + def __call__(self, input, target): + h = input + y = self.predictor(h) + + loss = chainer.functions.mean_absolute_error(y, target) + reporter.report({'loss': loss}, self) + + return loss * self.config.l1 diff --git a/become_yukarin/model.py b/become_yukarin/model.py index 087afcd..c67f351 100644 --- a/become_yukarin/model.py +++ b/become_yukarin/model.py @@ -1,24 +1,51 @@ import chainer +from .config import ModelConfig -class DeepConvolution(chainer.link.Chain): - def __init__(self, num_scale: int, base_num_z: int, **kwargs): + +class DeepConvolution1D(chainer.link.Chain): + def __init__(self, in_size: int, num_scale: int, base_num_z: int, **kwargs): super().__init__(**kwargs) self.num_scale = num_scale + self.out_size = base_num_z * 2 ** (num_scale - 1) for i in range(num_scale): l = base_num_z * 2 ** i - self.add_link('conv{}'.format(i + 1), - chainer.links.Convolution2D(None, l, 4, 2, 1, nobias=True)) + self.add_link('conv{}'.format(i + 1), chainer.links.ConvolutionND(1, in_size, l, 3, 1, 1, nobias=True)) self.add_link('bn{}'.format(i + 1), chainer.links.BatchNormalization(l)) - - def get_scaled_width(self, base_width): - return base_width // (2 ** self.num_scale) + in_size = l def __call__(self, x): h = x for i in range(self.num_scale): conv = getattr(self, 'conv{}'.format(i + 1)) bn = getattr(self, 'bn{}'.format(i + 1)) - chainer.functions.relu(bn(conv(h))) + h = chainer.functions.relu(bn(conv(h))) + return h + + +class Model(chainer.link.Chain): + def __init__(self, convs: DeepConvolution1D, out_size: int): + super().__init__() + with self.init_scope(): + self.convs = convs + self.last = chainer.links.ConvolutionND(1, convs.out_size, out_size, 1) + + def __call__(self, x): + h = x + h = self.convs(h) + h = self.last(h) return h + + +def create(config: ModelConfig): + convs = DeepConvolution1D( + in_size=config.in_size, + num_scale=config.num_scale, + base_num_z=config.base_num_z, + ) + model = Model( + convs=convs, + out_size=config.out_size, + ) + return model diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py index ccc8d66..b280db6 100644 --- a/scripts/extract_acoustic_feature.py +++ b/scripts/extract_acoustic_feature.py @@ -73,6 +73,16 @@ def generate_feature(path1, path2): aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity) mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc) + # convert type + f0_1 = f0_1.astype(numpy.float32) + f0_2 = f0_2.astype(numpy.float32) + spectrogram_1 = spectrogram_1.astype(numpy.float32) + spectrogram_2 = spectrogram_2.astype(numpy.float32) + aperiodicity_1 = aperiodicity_1.astype(numpy.float32) + aperiodicity_2 = aperiodicity_2.astype(numpy.float32) + mfcc_1 = mfcc_1.astype(numpy.float32) + mfcc_2 = mfcc_2.astype(numpy.float32) + # save path = Path(arguments.output1_directory, path1.stem + '.npy') numpy.save(path.absolute(), dict(f0=f0_1, spectrogram=spectrogram_1, aperiodicity=aperiodicity_1, mfcc=mfcc_1)) diff --git a/train.py b/train.py new file mode 100644 index 0000000..eb0833d --- /dev/null +++ b/train.py @@ -0,0 +1,66 @@ +import argparse +from pathlib import Path + +from chainer.iterators import MultiprocessIterator +from chainer import optimizers +from chainer import training +from chainer.training import extensions +from chainer.dataset import convert + +from become_yukarin.config import create_from_json +from become_yukarin.dataset import create as create_dataset +from become_yukarin.model import create as create_model +from become_yukarin.loss import Loss + +from functools import partial + +parser = argparse.ArgumentParser() +parser.add_argument('config_json_path', type=Path) +arguments = parser.parse_args() + +config = create_from_json(arguments.config_json_path) +config.train.output.mkdir(exist_ok=True) +config.save_as_json((config.train.output / 'config.json').absolute()) + +# model +predictor = create_model(config.model) +model = Loss(config.loss, predictor=predictor) + +# dataset +dataset = create_dataset(config.dataset) +train_iter = MultiprocessIterator(dataset['train'], config.train.batchsize) +test_iter = MultiprocessIterator(dataset['test'], config.train.batchsize, repeat=False, shuffle=False) +train_eval_iter = MultiprocessIterator(dataset['train_eval'], config.train.batchsize, repeat=False, shuffle=False) + +# optimizer +optimizer = optimizers.Adam() +optimizer.setup(model) + +# trainer +trigger_best = training.triggers.MinValueTrigger('test/main/loss', (config.train.snapshot_iteration, 'iteration')) + +converter = partial(convert.concat_examples, padding=0) +updater = training.StandardUpdater(train_iter, optimizer, device=config.train.gpu, converter=converter) +trainer = training.Trainer(updater, out=config.train.output) + +ext = extensions.Evaluator(test_iter, model, converter, device=config.train.gpu) +trainer.extend(ext, name='test', trigger=(config.train.log_iteration, 'iteration')) +ext = extensions.Evaluator(train_eval_iter, model, converter, device=config.train.gpu) +trainer.extend(ext, name='train', trigger=(config.train.log_iteration, 'iteration')) + +trainer.extend(extensions.dump_graph('main/loss', out_name='graph.dot')) + +ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz') +trainer.extend(ext, trigger=trigger_best) + +trainer.extend(extensions.LogReport(trigger=(config.train.log_iteration, 'iteration'), log_name='log.txt')) + +if extensions.PlotReport.available(): + trainer.extend(extensions.PlotReport( + y_keys=['main/loss', 'test/main/loss'], + x_key='iteration', + file_name='loss.png', + trigger=(config.train.log_iteration, 'iteration'), + )) + +trainer.run() |
