diff options
| author | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2018-01-15 22:18:02 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2018-01-15 22:18:02 +0900 |
| commit | 83608c12e7bb28df1966cbe5b9d86a8e23175044 (patch) | |
| tree | 6fc24caaa01d447bf9819bf6c45b3e2d33685579 /become_yukarin | |
| parent | c0f3eacabde5d41992a5ae1d8d8f0f170f6b155e (diff) | |
超解像可能に
Diffstat (limited to 'become_yukarin')
| -rw-r--r-- | become_yukarin/__init__.py | 1 | ||||
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 4 | ||||
| -rw-r--r-- | become_yukarin/model/sr_model.py | 2 | ||||
| -rw-r--r-- | become_yukarin/super_resolution.py | 16 |
4 files changed, 15 insertions, 8 deletions
diff --git a/become_yukarin/__init__.py b/become_yukarin/__init__.py index 81bea87..810ea1f 100644 --- a/become_yukarin/__init__.py +++ b/become_yukarin/__init__.py @@ -1,4 +1,5 @@ from . import config from . import dataset from . import param +from .super_resolution import SuperResolution from .voice_changer import VoiceChanger diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 38cf749..ab05568 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -75,7 +75,7 @@ class WaveFileLoadProcess(BaseDataProcess): self._pad_second = pad_second self._dtype = dtype - def __call__(self, data: str, test): + def __call__(self, data: str, test=None): wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0] if self._top_db is not None: wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db)) @@ -92,7 +92,7 @@ class AcousticFeatureProcess(BaseDataProcess): self._alpha = alpha self._dtype = dtype - def __call__(self, data: Wave, test): + def __call__(self, data: Wave, test=None): x = data.wave.astype(numpy.float64) fs = data.sampling_rate diff --git a/become_yukarin/model/sr_model.py b/become_yukarin/model/sr_model.py index 8f9a34b..2e83526 100644 --- a/become_yukarin/model/sr_model.py +++ b/become_yukarin/model/sr_model.py @@ -116,6 +116,6 @@ def create_predictor_sr(config: SRModelConfig): def create_sr(config: SRModelConfig): - predictor = create_predictor_sr() + predictor = create_predictor_sr(config) discriminator = SRDiscriminator(in_ch=1, out_ch=1) return predictor, discriminator diff --git a/become_yukarin/super_resolution.py b/become_yukarin/super_resolution.py index 535af57..bdb2e61 100644 --- a/become_yukarin/super_resolution.py +++ b/become_yukarin/super_resolution.py @@ -1,6 +1,5 @@ from functools import partial from pathlib import Path -from typing import Optional import chainer import numpy @@ -39,21 +38,28 @@ class SuperResolution(object): def convert(self, input: numpy.ndarray) -> numpy.ndarray: converter = partial(chainer.dataset.convert.concat_examples, padding=0) - inputs = converter([numpy.log(input)[:, :-1]]) + pad = 128 - len(input) % 128 + input = numpy.pad(input, [(0, pad), (0, 0)], mode='minimum') + input = numpy.log(input)[:, :-1] + input = input[numpy.newaxis] + inputs = converter([input]) with chainer.using_config('train', False): out = self.model(inputs).data[0] out = out[0] - out[:, out.shape[1]] = out[:, -1] + out = numpy.pad(out, [(0, 0), (0, 1)], mode='edge') + out = numpy.exp(out) + out = out[:-pad] return out def convert_to_audio( self, input: numpy.ndarray, acoustic_feature: AcousticFeature, - sampling_rate: Optional[int] = None, + sampling_rate: int, ): + acoustic_feature = acoustic_feature.astype_only_float(numpy.float64) out = pyworld.synthesize( f0=acoustic_feature.f0.ravel(), spectrogram=input.astype(numpy.float64), @@ -76,7 +82,7 @@ class SuperResolution(object): self, input: numpy.ndarray, acoustic_feature: AcousticFeature, - sampling_rate: Optional[int] = None, + sampling_rate: int, ): high = self.convert(input) return self.convert_to_audio(high, acoustic_feature=acoustic_feature, sampling_rate=sampling_rate) |
