diff options
| author | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2018-02-10 20:03:24 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2018-02-10 20:06:13 +0900 |
| commit | 4ff5252dbdc0cdaeecc7fbe399c629e4d29de3a3 (patch) | |
| tree | 8d63b4ce9e25231a9c3da740006aee8980fc6e51 | |
| parent | 29f0994ea34f3df7dd5eea0b330d429b2e492211 (diff) | |
時間方向のブラー増強
| -rw-r--r-- | become_yukarin/config/sr_config.py | 5 | ||||
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 25 | ||||
| -rw-r--r-- | scripts/voice_conversion_test.py | 9 |
3 files changed, 36 insertions, 3 deletions
diff --git a/become_yukarin/config/sr_config.py b/become_yukarin/config/sr_config.py index 266ea04..4f980a2 100644 --- a/become_yukarin/config/sr_config.py +++ b/become_yukarin/config/sr_config.py @@ -14,6 +14,7 @@ class SRDatasetConfig(NamedTuple): train_crop_size: int input_global_noise: float input_local_noise: float + blur_size_factor: float seed: int num_test: int @@ -79,6 +80,7 @@ def create_from_json(s: Union[str, Path]): train_crop_size=d['dataset']['train_crop_size'], input_global_noise=d['dataset']['input_global_noise'], input_local_noise=d['dataset']['input_local_noise'], + blur_size_factor=d['dataset']['blur_size_factor'], seed=d['dataset']['seed'], num_test=d['dataset']['num_test'], ), @@ -102,4 +104,5 @@ def create_from_json(s: Union[str, Path]): def backward_compatible(d: Dict): - pass + if 'blur_size_factor' not in d['dataset']: + d['dataset']['blur_size_factor'] = 0 diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 7e3acbf..b584e44 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -8,6 +8,7 @@ from typing import Any from typing import Callable from typing import Dict from typing import List +import scipy.ndimage import chainer import librosa @@ -418,6 +419,19 @@ class AddNoiseProcess(BaseDataProcess): return data + g + l +class RandomBlurProcess(BaseDataProcess): + def __init__(self, blur_size_factor: float, time_axis: int = 1) -> None: + assert time_axis == 1 + self._blur_size_factor = blur_size_factor + self._time_axis = time_axis + + def __call__(self, data: numpy.ndarray, test=None): + assert not test + + blur_size = numpy.abs(numpy.random.randn()) * self._blur_size_factor + return scipy.ndimage.gaussian_filter(data, (0, blur_size)) + + class DataProcessDataset(chainer.dataset.DatasetMixin): def __init__(self, data: typing.List, data_process: BaseDataProcess) -> None: self._data = data @@ -559,6 +573,17 @@ def create_sr(config: SRDatasetConfig): data_process_train = copy.deepcopy(data_process_base) + # blur + data_process_train.append(SplitProcess(dict( + input=ChainProcess([ + LambdaProcess(lambda d, test: d['input']), + RandomBlurProcess(blur_size_factor=config.blur_size_factor), + ]), + target=ChainProcess([ + LambdaProcess(lambda d, test: d['target']), + ]), + ))) + # cropping if config.train_crop_size is not None: def add_seed(): diff --git a/scripts/voice_conversion_test.py b/scripts/voice_conversion_test.py index d96d5ce..27e588b 100644 --- a/scripts/voice_conversion_test.py +++ b/scripts/voice_conversion_test.py @@ -16,11 +16,13 @@ parser.add_argument('model_names', nargs='+') parser.add_argument('-md', '--model_directory', type=Path, default=Path('/mnt/dwango/hiroshiba/become-yukarin/')) parser.add_argument('-iwd', '--input_wave_directory', type=Path, default=Path('/mnt/dwango/hiroshiba/become-yukarin/dataset/hiho-wave/hiho-pause-atr503-subset/')) +parser.add_argument('-it', '--iteration', type=int) parser.add_argument('-g', '--gpu', type=int) args = parser.parse_args() model_directory = args.model_directory # type: Path input_wave_directory = args.input_wave_directory # type: Path +it = args.iteration gpu = args.gpu paths_test = list(Path('./test_data/').glob('*.wav')) @@ -53,8 +55,11 @@ for model_name in args.model_names: path_train = input_paths[0] path_test = input_paths[-1] - model_paths = base_model.glob('predictor*.npz') - model_path = list(sorted(model_paths, key=extract_number))[-1] + if it is not None: + model_path = base_model / 'predictor_{}.npz'.format(it) + else: + model_paths = base_model.glob('predictor_*.npz') + model_path = list(sorted(model_paths, key=extract_number))[-1] print(model_path) acoustic_converter = AcousticConverter(config, model_path, gpu=gpu) |
