summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2018-02-10 20:03:24 +0900
committerHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2018-02-10 20:06:13 +0900
commit4ff5252dbdc0cdaeecc7fbe399c629e4d29de3a3 (patch)
tree8d63b4ce9e25231a9c3da740006aee8980fc6e51
parent29f0994ea34f3df7dd5eea0b330d429b2e492211 (diff)
時間方向のブラー増強
-rw-r--r--become_yukarin/config/sr_config.py5
-rw-r--r--become_yukarin/dataset/dataset.py25
-rw-r--r--scripts/voice_conversion_test.py9
3 files changed, 36 insertions, 3 deletions
diff --git a/become_yukarin/config/sr_config.py b/become_yukarin/config/sr_config.py
index 266ea04..4f980a2 100644
--- a/become_yukarin/config/sr_config.py
+++ b/become_yukarin/config/sr_config.py
@@ -14,6 +14,7 @@ class SRDatasetConfig(NamedTuple):
train_crop_size: int
input_global_noise: float
input_local_noise: float
+ blur_size_factor: float
seed: int
num_test: int
@@ -79,6 +80,7 @@ def create_from_json(s: Union[str, Path]):
train_crop_size=d['dataset']['train_crop_size'],
input_global_noise=d['dataset']['input_global_noise'],
input_local_noise=d['dataset']['input_local_noise'],
+ blur_size_factor=d['dataset']['blur_size_factor'],
seed=d['dataset']['seed'],
num_test=d['dataset']['num_test'],
),
@@ -102,4 +104,5 @@ def create_from_json(s: Union[str, Path]):
def backward_compatible(d: Dict):
- pass
+ if 'blur_size_factor' not in d['dataset']:
+ d['dataset']['blur_size_factor'] = 0
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 7e3acbf..b584e44 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -8,6 +8,7 @@ from typing import Any
from typing import Callable
from typing import Dict
from typing import List
+import scipy.ndimage
import chainer
import librosa
@@ -418,6 +419,19 @@ class AddNoiseProcess(BaseDataProcess):
return data + g + l
+class RandomBlurProcess(BaseDataProcess):
+ def __init__(self, blur_size_factor: float, time_axis: int = 1) -> None:
+ assert time_axis == 1
+ self._blur_size_factor = blur_size_factor
+ self._time_axis = time_axis
+
+ def __call__(self, data: numpy.ndarray, test=None):
+ assert not test
+
+ blur_size = numpy.abs(numpy.random.randn()) * self._blur_size_factor
+ return scipy.ndimage.gaussian_filter(data, (0, blur_size))
+
+
class DataProcessDataset(chainer.dataset.DatasetMixin):
def __init__(self, data: typing.List, data_process: BaseDataProcess) -> None:
self._data = data
@@ -559,6 +573,17 @@ def create_sr(config: SRDatasetConfig):
data_process_train = copy.deepcopy(data_process_base)
+ # blur
+ data_process_train.append(SplitProcess(dict(
+ input=ChainProcess([
+ LambdaProcess(lambda d, test: d['input']),
+ RandomBlurProcess(blur_size_factor=config.blur_size_factor),
+ ]),
+ target=ChainProcess([
+ LambdaProcess(lambda d, test: d['target']),
+ ]),
+ )))
+
# cropping
if config.train_crop_size is not None:
def add_seed():
diff --git a/scripts/voice_conversion_test.py b/scripts/voice_conversion_test.py
index d96d5ce..27e588b 100644
--- a/scripts/voice_conversion_test.py
+++ b/scripts/voice_conversion_test.py
@@ -16,11 +16,13 @@ parser.add_argument('model_names', nargs='+')
parser.add_argument('-md', '--model_directory', type=Path, default=Path('/mnt/dwango/hiroshiba/become-yukarin/'))
parser.add_argument('-iwd', '--input_wave_directory', type=Path,
default=Path('/mnt/dwango/hiroshiba/become-yukarin/dataset/hiho-wave/hiho-pause-atr503-subset/'))
+parser.add_argument('-it', '--iteration', type=int)
parser.add_argument('-g', '--gpu', type=int)
args = parser.parse_args()
model_directory = args.model_directory # type: Path
input_wave_directory = args.input_wave_directory # type: Path
+it = args.iteration
gpu = args.gpu
paths_test = list(Path('./test_data/').glob('*.wav'))
@@ -53,8 +55,11 @@ for model_name in args.model_names:
path_train = input_paths[0]
path_test = input_paths[-1]
- model_paths = base_model.glob('predictor*.npz')
- model_path = list(sorted(model_paths, key=extract_number))[-1]
+ if it is not None:
+ model_path = base_model / 'predictor_{}.npz'.format(it)
+ else:
+ model_paths = base_model.glob('predictor_*.npz')
+ model_path = list(sorted(model_paths, key=extract_number))[-1]
print(model_path)
acoustic_converter = AcousticConverter(config, model_path, gpu=gpu)