diff options
| -rw-r--r-- | become_yukarin/__init__.py | 1 | ||||
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 4 | ||||
| -rw-r--r-- | become_yukarin/model/sr_model.py | 2 | ||||
| -rw-r--r-- | become_yukarin/super_resolution.py | 16 | ||||
| -rw-r--r-- | scripts/super_resolution_test.py | 80 | ||||
| -rw-r--r-- | scripts/voice_conversion_test.py | 2 |
6 files changed, 96 insertions, 9 deletions
diff --git a/become_yukarin/__init__.py b/become_yukarin/__init__.py index 81bea87..810ea1f 100644 --- a/become_yukarin/__init__.py +++ b/become_yukarin/__init__.py @@ -1,4 +1,5 @@ from . import config from . import dataset from . import param +from .super_resolution import SuperResolution from .voice_changer import VoiceChanger diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 38cf749..ab05568 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -75,7 +75,7 @@ class WaveFileLoadProcess(BaseDataProcess): self._pad_second = pad_second self._dtype = dtype - def __call__(self, data: str, test): + def __call__(self, data: str, test=None): wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0] if self._top_db is not None: wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db)) @@ -92,7 +92,7 @@ class AcousticFeatureProcess(BaseDataProcess): self._alpha = alpha self._dtype = dtype - def __call__(self, data: Wave, test): + def __call__(self, data: Wave, test=None): x = data.wave.astype(numpy.float64) fs = data.sampling_rate diff --git a/become_yukarin/model/sr_model.py b/become_yukarin/model/sr_model.py index 8f9a34b..2e83526 100644 --- a/become_yukarin/model/sr_model.py +++ b/become_yukarin/model/sr_model.py @@ -116,6 +116,6 @@ def create_predictor_sr(config: SRModelConfig): def create_sr(config: SRModelConfig): - predictor = create_predictor_sr() + predictor = create_predictor_sr(config) discriminator = SRDiscriminator(in_ch=1, out_ch=1) return predictor, discriminator diff --git a/become_yukarin/super_resolution.py b/become_yukarin/super_resolution.py index 535af57..bdb2e61 100644 --- a/become_yukarin/super_resolution.py +++ b/become_yukarin/super_resolution.py @@ -1,6 +1,5 @@ from functools import partial from pathlib import Path -from typing import Optional import chainer import numpy @@ -39,21 +38,28 @@ class SuperResolution(object): def convert(self, input: numpy.ndarray) -> numpy.ndarray: converter = partial(chainer.dataset.convert.concat_examples, padding=0) - inputs = converter([numpy.log(input)[:, :-1]]) + pad = 128 - len(input) % 128 + input = numpy.pad(input, [(0, pad), (0, 0)], mode='minimum') + input = numpy.log(input)[:, :-1] + input = input[numpy.newaxis] + inputs = converter([input]) with chainer.using_config('train', False): out = self.model(inputs).data[0] out = out[0] - out[:, out.shape[1]] = out[:, -1] + out = numpy.pad(out, [(0, 0), (0, 1)], mode='edge') + out = numpy.exp(out) + out = out[:-pad] return out def convert_to_audio( self, input: numpy.ndarray, acoustic_feature: AcousticFeature, - sampling_rate: Optional[int] = None, + sampling_rate: int, ): + acoustic_feature = acoustic_feature.astype_only_float(numpy.float64) out = pyworld.synthesize( f0=acoustic_feature.f0.ravel(), spectrogram=input.astype(numpy.float64), @@ -76,7 +82,7 @@ class SuperResolution(object): self, input: numpy.ndarray, acoustic_feature: AcousticFeature, - sampling_rate: Optional[int] = None, + sampling_rate: int, ): high = self.convert(input) return self.convert_to_audio(high, acoustic_feature=acoustic_feature, sampling_rate=sampling_rate) diff --git a/scripts/super_resolution_test.py b/scripts/super_resolution_test.py new file mode 100644 index 0000000..8b04ce0 --- /dev/null +++ b/scripts/super_resolution_test.py @@ -0,0 +1,80 @@ +import argparse +import glob +import multiprocessing +import re +from functools import partial +from pathlib import Path + +import librosa +import numpy + +from become_yukarin import SuperResolution +from become_yukarin.config.sr_config import create_from_json as create_config +from become_yukarin.dataset.dataset import AcousticFeatureProcess +from become_yukarin.dataset.dataset import WaveFileLoadProcess + +parser = argparse.ArgumentParser() +parser.add_argument('model_names', nargs='+') +parser.add_argument('-md', '--model_directory', type=Path, default=Path('/mnt/dwango/hiroshiba/become-yukarin/')) +parser.add_argument('-iwd', '--input_wave_directory', type=Path, + default=Path('/mnt/dwango/hiroshiba/become-yukarin/dataset/yukari-wave/yukari-news/')) +args = parser.parse_args() + +model_directory = args.model_directory # type: Path +input_wave_directory = args.input_wave_directory # type: Path + +paths_test = list(Path('./test_data_sr/').glob('*.wav')) + + +def extract_number(f): + s = re.findall("\d+", str(f)) + return int(s[-1]) if s else -1 + + +def process(p: Path, super_resolution: SuperResolution): + param = config.dataset.param + wave_process = WaveFileLoadProcess( + sample_rate=param.voice_param.sample_rate, + top_db=None, + ) + acoustic_feature_process = AcousticFeatureProcess( + frame_period=param.acoustic_feature_param.frame_period, + order=param.acoustic_feature_param.order, + alpha=param.acoustic_feature_param.alpha, + ) + + try: + if p.suffix in ['.npy', '.npz']: + p = glob.glob(str(input_wave_directory / p.stem) + '.*')[0] + p = Path(p) + input = acoustic_feature_process(wave_process(str(p))) + wave = super_resolution(input.spectrogram, acoustic_feature=input, sampling_rate=param.voice_param.sample_rate) + librosa.output.write_wav(str(output / p.stem) + '.wav', wave.wave, wave.sampling_rate, norm=True) + except: + import traceback + print('error!', str(p)) + traceback.format_exc() + + +for model_name in args.model_names: + base_model = model_directory / model_name + config = create_config(base_model / 'config.json') + + input_paths = list(sorted([Path(p) for p in glob.glob(str(config.dataset.input_glob))])) + numpy.random.RandomState(config.dataset.seed).shuffle(input_paths) + path_train = input_paths[0] + path_test = input_paths[-1] + + model_paths = base_model.glob('predictor*.npz') + model_path = list(sorted(model_paths, key=extract_number))[-1] + print(model_path) + super_resolution = SuperResolution(config, model_path) + + output = Path('./output').absolute() / base_model.name + output.mkdir(exist_ok=True) + + paths = [path_train, path_test] + paths_test + + process_partial = partial(process, super_resolution=super_resolution) + pool = multiprocessing.Pool() + pool.map(process_partial, paths) diff --git a/scripts/voice_conversion_test.py b/scripts/voice_conversion_test.py index e0ed8c4..24982ea 100644 --- a/scripts/voice_conversion_test.py +++ b/scripts/voice_conversion_test.py @@ -9,7 +9,7 @@ import librosa import numpy from become_yukarin import VoiceChanger -from become_yukarin.config import create_from_json as create_config +from become_yukarin.config.config import create_from_json as create_config parser = argparse.ArgumentParser() parser.add_argument('model_names', nargs='+') |
