summaryrefslogtreecommitdiff
path: root/become_yukarin
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <hihokaruta@gmail.com>2017-11-07 10:20:04 +0900
committerHiroshiba Kazuyuki <hihokaruta@gmail.com>2017-11-07 10:29:28 +0900
commit6119849270c2aed117627d7d2b060f37d1c25de4 (patch)
tree178e8df7a0c3d33f9de776b85ee4ff545f2ecdc3 /become_yukarin
parent8e637c41a262373786b94d40a8f3559caf5cd44c (diff)
can train
Diffstat (limited to 'become_yukarin')
-rw-r--r--become_yukarin/config.py90
-rw-r--r--become_yukarin/dataset/__init__.py1
-rw-r--r--become_yukarin/dataset/dataset.py29
-rw-r--r--become_yukarin/loss.py24
-rw-r--r--become_yukarin/model.py43
5 files changed, 167 insertions, 20 deletions
diff --git a/become_yukarin/config.py b/become_yukarin/config.py
index b15dc6f..d00f179 100644
--- a/become_yukarin/config.py
+++ b/become_yukarin/config.py
@@ -1,6 +1,8 @@
+import json
+from pathlib import Path
from typing import NamedTuple
+from typing import Union
-from .data_struct import AcousticFeature
from .param import Param
@@ -8,13 +10,89 @@ class DatasetConfig(NamedTuple):
param: Param
input_glob: str
target_glob: str
- input_mean: AcousticFeature
- input_var: AcousticFeature
- target_mean: AcousticFeature
- target_var: AcousticFeature
+ input_mean_path: Path
+ input_var_path: Path
+ target_mean_path: Path
+ target_var_path: Path
seed: int
num_test: int
+class ModelConfig(NamedTuple):
+ in_size: int
+ num_scale: int
+ base_num_z: int
+ out_size: int
+
+
+class LossConfig(NamedTuple):
+ l1: float
+
+
+class TrainConfig(NamedTuple):
+ batchsize: int
+ gpu: int
+ log_iteration: int
+ snapshot_iteration: int
+ output: Path
+
+
class Config(NamedTuple):
- dataset_config: DatasetConfig
+ dataset: DatasetConfig
+ model: ModelConfig
+ loss: LossConfig
+ train: TrainConfig
+
+ def save_as_json(self, path):
+ d = _namedtuple_to_dict(self)
+ json.dump(d, open(path, 'w'), indent=2, sort_keys=True, default=_default_path)
+
+
+def _default_path(o):
+ if isinstance(o, Path):
+ return str(o)
+ raise TypeError(repr(o) + " is not JSON serializable")
+
+
+def _namedtuple_to_dict(o: NamedTuple):
+ return {
+ k: v if not hasattr(v, '_asdict') else _namedtuple_to_dict(v)
+ for k, v in o._asdict().items()
+ }
+
+
+def create_from_json(s: Union[str, Path]):
+ try:
+ d = json.loads(s)
+ except TypeError:
+ d = json.load(open(s))
+
+ return Config(
+ dataset=DatasetConfig(
+ param=Param(),
+ input_glob=d['dataset']['input_glob'],
+ target_glob=d['dataset']['target_glob'],
+ input_mean_path=Path(d['dataset']['input_mean']),
+ input_var_path=Path(d['dataset']['input_var']),
+ target_mean_path=Path(d['dataset']['target_mean']),
+ target_var_path=Path(d['dataset']['target_var']),
+ seed=d['dataset']['seed'],
+ num_test=d['dataset']['num_test'],
+ ),
+ model=ModelConfig(
+ in_size=d['model']['in_size'],
+ num_scale=d['model']['num_scale'],
+ base_num_z=d['model']['base_num_z'],
+ out_size=d['model']['out_size'],
+ ),
+ loss=LossConfig(
+ l1=d['loss']['l1'],
+ ),
+ train=TrainConfig(
+ batchsize=d['train']['batchsize'],
+ gpu=d['train']['gpu'],
+ log_iteration=d['train']['log_iteration'],
+ snapshot_iteration=d['train']['snapshot_iteration'],
+ output=Path(d['train']['output']),
+ ),
+ )
diff --git a/become_yukarin/dataset/__init__.py b/become_yukarin/dataset/__init__.py
index cdd8cf4..4606e7b 100644
--- a/become_yukarin/dataset/__init__.py
+++ b/become_yukarin/dataset/__init__.py
@@ -1,2 +1,3 @@
from . import dataset
from . import utility
+from .dataset import create
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index f9db53e..39331c3 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -128,10 +128,20 @@ class ReshapeFeatureProcess(BaseDataProcess):
def __call__(self, data: AcousticFeature, test):
feature = numpy.concatenate([getattr(data, t) for t in self._targets])
- feature = feature[numpy.newaxis]
+ feature = feature.T
return feature
+class ShapeAlignProcess(BaseDataProcess):
+ def __call__(self, data, test):
+ data1, data2 = data['input'], data['target']
+ m = max(data1.shape[1], data2.shape[1])
+ data1 = numpy.pad(data1, ((0, 0), (0, m - data1.shape[1])), mode='constant')
+ data2 = numpy.pad(data2, ((0, 0), (0, m - data2.shape[1])), mode='constant')
+ data['input'], data['target'] = data1, data2
+ return data
+
+
class DataProcessDataset(chainer.dataset.DatasetMixin):
def __init__(self, data: typing.List, data_process: BaseDataProcess):
self._data = data
@@ -144,28 +154,35 @@ class DataProcessDataset(chainer.dataset.DatasetMixin):
return self._data_process(data=self._data[i], test=not chainer.config.train)
-def choose(config: DatasetConfig):
+def create(config: DatasetConfig):
import glob
input_paths = list(sorted([Path(p) for p in glob.glob(config.input_glob)]))
target_paths = list(sorted([Path(p) for p in glob.glob(config.target_glob)]))
assert len(input_paths) == len(target_paths)
+ acoustic_feature_load_process = AcousticFeatureLoadProcess()
+ input_mean = acoustic_feature_load_process(config.input_mean_path, test=True)
+ input_var = acoustic_feature_load_process(config.input_var_path, test=True)
+ target_mean = acoustic_feature_load_process(config.target_mean_path, test=True)
+ target_var = acoustic_feature_load_process(config.target_var_path, test=True)
+
# {input_path, target_path}
data_process = ChainProcess([
SplitProcess(dict(
input=ChainProcess([
LambdaProcess(lambda d, test: d['input_path']),
- AcousticFeatureLoadProcess(),
- AcousticFeatureNormalizeProcess(mean=config.input_mean, var=config.input_var),
+ acoustic_feature_load_process,
+ AcousticFeatureNormalizeProcess(mean=input_mean, var=input_var),
ReshapeFeatureProcess(['mfcc']),
]),
target=ChainProcess([
LambdaProcess(lambda d, test: d['target_path']),
- AcousticFeatureLoadProcess(),
- AcousticFeatureNormalizeProcess(mean=config.target_mean, var=config.target_var),
+ acoustic_feature_load_process,
+ AcousticFeatureNormalizeProcess(mean=target_mean, var=target_var),
ReshapeFeatureProcess(['mfcc']),
]),
)),
+ ShapeAlignProcess(),
])
num_test = config.num_test
diff --git a/become_yukarin/loss.py b/become_yukarin/loss.py
new file mode 100644
index 0000000..c088691
--- /dev/null
+++ b/become_yukarin/loss.py
@@ -0,0 +1,24 @@
+from .config import LossConfig
+from .model import Model
+
+import chainer
+
+from chainer import reporter
+
+
+class Loss(chainer.link.Chain):
+ def __init__(self, config: LossConfig, predictor: Model):
+ super().__init__()
+ self.config = config
+
+ with self.init_scope():
+ self.predictor = predictor
+
+ def __call__(self, input, target):
+ h = input
+ y = self.predictor(h)
+
+ loss = chainer.functions.mean_absolute_error(y, target)
+ reporter.report({'loss': loss}, self)
+
+ return loss * self.config.l1
diff --git a/become_yukarin/model.py b/become_yukarin/model.py
index 087afcd..c67f351 100644
--- a/become_yukarin/model.py
+++ b/become_yukarin/model.py
@@ -1,24 +1,51 @@
import chainer
+from .config import ModelConfig
-class DeepConvolution(chainer.link.Chain):
- def __init__(self, num_scale: int, base_num_z: int, **kwargs):
+
+class DeepConvolution1D(chainer.link.Chain):
+ def __init__(self, in_size: int, num_scale: int, base_num_z: int, **kwargs):
super().__init__(**kwargs)
self.num_scale = num_scale
+ self.out_size = base_num_z * 2 ** (num_scale - 1)
for i in range(num_scale):
l = base_num_z * 2 ** i
- self.add_link('conv{}'.format(i + 1),
- chainer.links.Convolution2D(None, l, 4, 2, 1, nobias=True))
+ self.add_link('conv{}'.format(i + 1), chainer.links.ConvolutionND(1, in_size, l, 3, 1, 1, nobias=True))
self.add_link('bn{}'.format(i + 1), chainer.links.BatchNormalization(l))
-
- def get_scaled_width(self, base_width):
- return base_width // (2 ** self.num_scale)
+ in_size = l
def __call__(self, x):
h = x
for i in range(self.num_scale):
conv = getattr(self, 'conv{}'.format(i + 1))
bn = getattr(self, 'bn{}'.format(i + 1))
- chainer.functions.relu(bn(conv(h)))
+ h = chainer.functions.relu(bn(conv(h)))
+ return h
+
+
+class Model(chainer.link.Chain):
+ def __init__(self, convs: DeepConvolution1D, out_size: int):
+ super().__init__()
+ with self.init_scope():
+ self.convs = convs
+ self.last = chainer.links.ConvolutionND(1, convs.out_size, out_size, 1)
+
+ def __call__(self, x):
+ h = x
+ h = self.convs(h)
+ h = self.last(h)
return h
+
+
+def create(config: ModelConfig):
+ convs = DeepConvolution1D(
+ in_size=config.in_size,
+ num_scale=config.num_scale,
+ base_num_z=config.base_num_z,
+ )
+ model = Model(
+ convs=convs,
+ out_size=config.out_size,
+ )
+ return model