summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <hihokaruta@gmail.com>2017-11-06 18:28:25 +0900
committerHiroshiba Kazuyuki <hihokaruta@gmail.com>2017-11-06 18:28:25 +0900
commit8e637c41a262373786b94d40a8f3559caf5cd44c (patch)
tree6fd4ffa10857fcdaf4f69091c647135bcecbc42d
parent116f831c65b6217532795bcce25d1001410b275f (diff)
add dataset classes
-rw-r--r--become_yukarin/config.py20
-rw-r--r--become_yukarin/data_struct.py15
-rw-r--r--become_yukarin/dataset/dataset.py189
-rw-r--r--scripts/extract_acoustic_feature.py1
4 files changed, 147 insertions, 78 deletions
diff --git a/become_yukarin/config.py b/become_yukarin/config.py
new file mode 100644
index 0000000..b15dc6f
--- /dev/null
+++ b/become_yukarin/config.py
@@ -0,0 +1,20 @@
+from typing import NamedTuple
+
+from .data_struct import AcousticFeature
+from .param import Param
+
+
+class DatasetConfig(NamedTuple):
+ param: Param
+ input_glob: str
+ target_glob: str
+ input_mean: AcousticFeature
+ input_var: AcousticFeature
+ target_mean: AcousticFeature
+ target_var: AcousticFeature
+ seed: int
+ num_test: int
+
+
+class Config(NamedTuple):
+ dataset_config: DatasetConfig
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py
new file mode 100644
index 0000000..c215ecc
--- /dev/null
+++ b/become_yukarin/data_struct.py
@@ -0,0 +1,15 @@
+from typing import NamedTuple
+
+import numpy
+
+
+class Wave(NamedTuple):
+ wave: numpy.ndarray
+ sampling_rate: int
+
+
+class AcousticFeature(NamedTuple):
+ f0: numpy.ndarray
+ spectrogram: numpy.ndarray
+ aperiodicity: numpy.ndarray
+ mfcc: numpy.ndarray
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 781dbec..f9db53e 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -1,27 +1,19 @@
-import json
-import os
import typing
from abc import ABCMeta, abstractmethod
-from typing import NamedTuple
+from pathlib import Path
+from typing import Callable
+from typing import Dict
+from typing import List
-import nnmnkwii.preprocessing
import chainer
import librosa
import numpy
import pysptk
import pyworld
-
-class Wave(NamedTuple):
- wave: numpy.ndarray
- sampling_rate: int
-
-
-class AcousticFeature(NamedTuple):
- f0: numpy.ndarray
- spectrogram: numpy.ndarray
- aperiodicity: numpy.ndarray
- mfcc: numpy.ndarray
+from ..config import DatasetConfig
+from ..data_struct import AcousticFeature
+from ..data_struct import Wave
class BaseDataProcess(metaclass=ABCMeta):
@@ -30,6 +22,22 @@ class BaseDataProcess(metaclass=ABCMeta):
pass
+class LambdaProcess(BaseDataProcess):
+ def __init__(self, process: Callable[[any, bool], any]):
+ self._process = process
+
+ def __call__(self, data, test):
+ return self._process(data, test)
+
+
+class DictKeyReplaceProcess(BaseDataProcess):
+ def __init__(self, key_map: Dict[str, str]):
+ self._key_map = key_map
+
+ def __call__(self, data: Dict[str, any], test):
+ return {key_after: data[key_before] for key_after, key_before in self._key_map}
+
+
class ChainProcess(BaseDataProcess):
def __init__(self, process: typing.Iterable[BaseDataProcess]):
self._process = process
@@ -52,19 +60,6 @@ class SplitProcess(BaseDataProcess):
return data
-class DataProcessDataset(chainer.dataset.DatasetMixin):
- def __init__(self, data: typing.List, data_process: BaseDataProcess, test):
- self._data = data
- self._data_process = data_process
- self._test = test
-
- def __len__(self):
- return len(self._data)
-
- def get_example(self, i):
- return self._data_process(data=self._data[i], test=self._test)
-
-
class WaveFileLoadProcess(BaseDataProcess):
def __init__(self, sample_rate: int, top_db: float):
self._sample_rate = sample_rate
@@ -99,52 +94,92 @@ class AcousticFeatureProcess(BaseDataProcess):
)
-# data_process = ChainProcess([
-# SplitProcess(dict(
-# input=ChainProcess([
-# WaveFileLoadProcess(),
-# AcousticFeatureProcess(),
-# ]),
-# tareget=ChainProcess([
-# WaveFileLoadProcess(),
-# AcousticFeatureProcess(),
-# ]),
-# )),
-#
-# PILImageProcess(mode='RGB'),
-# RandomFlipImageProcess(p_flip_horizontal=0.5, p_flip_vertical=0),
-# RandomResizeImageProcess(min_short=128, max_short=160),
-# RandomCropImageProcess(crop_width=128, crop_height=128),
-# RgbImageArrayProcess(),
-# SplitProcess({
-# 'target': None,
-# 'raw_line': RawLineImageArrayProcess(),
-# })
-# ])
-#
-#
-# def choose(config: DatasetConfig):
-# if config.images_glob is not None:
-# import glob
-# paths = glob.glob(config.images_glob)
-# paths = data_filter(
-# datas=paths,
-# keys=list(map(lambda p: os.path.basename(p), paths)),
-# filter_func=filter_image,
-# num_process=None,
-# cache_path=config.cache_path,
-# )
-# paths = list(paths)
-# else:
-# paths = json.load(open(config.images_list))
-#
-# num_test = config.num_test
-# train_paths = paths[num_test:]
-# test_paths = paths[:num_test]
-# train_for_evaluate_paths = train_paths[:num_test]
-#
-# return {
-# 'train': DataProcessDataset(train_paths, data_process, test=False),
-# 'test': DataProcessDataset(test_paths, data_process, test=True),
-# 'train_eval': DataProcessDataset(train_for_evaluate_paths, data_process, test=True),
-# }
+class AcousticFeatureLoadProcess(BaseDataProcess):
+ def __init__(self):
+ pass
+
+ def __call__(self, path: Path, test):
+ d = numpy.load(path).item() # type: dict
+ return AcousticFeature(
+ f0=d['f0'],
+ spectrogram=d['spectrogram'],
+ aperiodicity=d['aperiodicity'],
+ mfcc=d['mfcc'],
+ )
+
+
+class AcousticFeatureNormalizeProcess(BaseDataProcess):
+ def __init__(self, mean: AcousticFeature, var: AcousticFeature):
+ self._mean = mean
+ self._var = var
+
+ def __call__(self, data: AcousticFeature, test):
+ return AcousticFeature(
+ f0=(data.f0 - self._mean.f0) / numpy.sqrt(self._var.f0),
+ spectrogram=(data.spectrogram - self._mean.spectrogram) / numpy.sqrt(self._var.spectrogram),
+ aperiodicity=(data.aperiodicity - self._mean.aperiodicity) / numpy.sqrt(self._var.aperiodicity),
+ mfcc=(data.mfcc - self._mean.mfcc) / numpy.sqrt(self._var.mfcc),
+ )
+
+
+class ReshapeFeatureProcess(BaseDataProcess):
+ def __init__(self, targets: List[str]):
+ self._targets = targets
+
+ def __call__(self, data: AcousticFeature, test):
+ feature = numpy.concatenate([getattr(data, t) for t in self._targets])
+ feature = feature[numpy.newaxis]
+ return feature
+
+
+class DataProcessDataset(chainer.dataset.DatasetMixin):
+ def __init__(self, data: typing.List, data_process: BaseDataProcess):
+ self._data = data
+ self._data_process = data_process
+
+ def __len__(self):
+ return len(self._data)
+
+ def get_example(self, i):
+ return self._data_process(data=self._data[i], test=not chainer.config.train)
+
+
+def choose(config: DatasetConfig):
+ import glob
+ input_paths = list(sorted([Path(p) for p in glob.glob(config.input_glob)]))
+ target_paths = list(sorted([Path(p) for p in glob.glob(config.target_glob)]))
+ assert len(input_paths) == len(target_paths)
+
+ # {input_path, target_path}
+ data_process = ChainProcess([
+ SplitProcess(dict(
+ input=ChainProcess([
+ LambdaProcess(lambda d, test: d['input_path']),
+ AcousticFeatureLoadProcess(),
+ AcousticFeatureNormalizeProcess(mean=config.input_mean, var=config.input_var),
+ ReshapeFeatureProcess(['mfcc']),
+ ]),
+ target=ChainProcess([
+ LambdaProcess(lambda d, test: d['target_path']),
+ AcousticFeatureLoadProcess(),
+ AcousticFeatureNormalizeProcess(mean=config.target_mean, var=config.target_var),
+ ReshapeFeatureProcess(['mfcc']),
+ ]),
+ )),
+ ])
+
+ num_test = config.num_test
+ pairs = [
+ dict(input_path=input_path, target_path=target_path)
+ for input_path, target_path in zip(input_paths, target_paths)
+ ]
+ numpy.random.RandomState(config.seed).shuffle(pairs)
+ train_paths = pairs[num_test:]
+ test_paths = pairs[:num_test]
+ train_for_evaluate_paths = train_paths[:num_test]
+
+ return {
+ 'train': DataProcessDataset(train_paths, data_process),
+ 'test': DataProcessDataset(test_paths, data_process),
+ 'train_eval': DataProcessDataset(train_for_evaluate_paths, data_process),
+ }
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index edc85c3..ccc8d66 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -9,7 +9,6 @@ from pathlib import Path
import numpy
from become_yukarin.dataset.dataset import AcousticFeatureProcess
-from become_yukarin.dataset.dataset import Wave
from become_yukarin.dataset.dataset import WaveFileLoadProcess
from become_yukarin.dataset.utility import MFCCAligner
from become_yukarin.param import AcousticFeatureParam