summaryrefslogtreecommitdiff
path: root/become_yukarin
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2018-01-15 22:18:02 +0900
committerHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2018-01-15 22:18:02 +0900
commit83608c12e7bb28df1966cbe5b9d86a8e23175044 (patch)
tree6fc24caaa01d447bf9819bf6c45b3e2d33685579 /become_yukarin
parentc0f3eacabde5d41992a5ae1d8d8f0f170f6b155e (diff)
超解像可能に
Diffstat (limited to 'become_yukarin')
-rw-r--r--become_yukarin/__init__.py1
-rw-r--r--become_yukarin/dataset/dataset.py4
-rw-r--r--become_yukarin/model/sr_model.py2
-rw-r--r--become_yukarin/super_resolution.py16
4 files changed, 15 insertions, 8 deletions
diff --git a/become_yukarin/__init__.py b/become_yukarin/__init__.py
index 81bea87..810ea1f 100644
--- a/become_yukarin/__init__.py
+++ b/become_yukarin/__init__.py
@@ -1,4 +1,5 @@
from . import config
from . import dataset
from . import param
+from .super_resolution import SuperResolution
from .voice_changer import VoiceChanger
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 38cf749..ab05568 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -75,7 +75,7 @@ class WaveFileLoadProcess(BaseDataProcess):
self._pad_second = pad_second
self._dtype = dtype
- def __call__(self, data: str, test):
+ def __call__(self, data: str, test=None):
wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0]
if self._top_db is not None:
wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db))
@@ -92,7 +92,7 @@ class AcousticFeatureProcess(BaseDataProcess):
self._alpha = alpha
self._dtype = dtype
- def __call__(self, data: Wave, test):
+ def __call__(self, data: Wave, test=None):
x = data.wave.astype(numpy.float64)
fs = data.sampling_rate
diff --git a/become_yukarin/model/sr_model.py b/become_yukarin/model/sr_model.py
index 8f9a34b..2e83526 100644
--- a/become_yukarin/model/sr_model.py
+++ b/become_yukarin/model/sr_model.py
@@ -116,6 +116,6 @@ def create_predictor_sr(config: SRModelConfig):
def create_sr(config: SRModelConfig):
- predictor = create_predictor_sr()
+ predictor = create_predictor_sr(config)
discriminator = SRDiscriminator(in_ch=1, out_ch=1)
return predictor, discriminator
diff --git a/become_yukarin/super_resolution.py b/become_yukarin/super_resolution.py
index 535af57..bdb2e61 100644
--- a/become_yukarin/super_resolution.py
+++ b/become_yukarin/super_resolution.py
@@ -1,6 +1,5 @@
from functools import partial
from pathlib import Path
-from typing import Optional
import chainer
import numpy
@@ -39,21 +38,28 @@ class SuperResolution(object):
def convert(self, input: numpy.ndarray) -> numpy.ndarray:
converter = partial(chainer.dataset.convert.concat_examples, padding=0)
- inputs = converter([numpy.log(input)[:, :-1]])
+ pad = 128 - len(input) % 128
+ input = numpy.pad(input, [(0, pad), (0, 0)], mode='minimum')
+ input = numpy.log(input)[:, :-1]
+ input = input[numpy.newaxis]
+ inputs = converter([input])
with chainer.using_config('train', False):
out = self.model(inputs).data[0]
out = out[0]
- out[:, out.shape[1]] = out[:, -1]
+ out = numpy.pad(out, [(0, 0), (0, 1)], mode='edge')
+ out = numpy.exp(out)
+ out = out[:-pad]
return out
def convert_to_audio(
self,
input: numpy.ndarray,
acoustic_feature: AcousticFeature,
- sampling_rate: Optional[int] = None,
+ sampling_rate: int,
):
+ acoustic_feature = acoustic_feature.astype_only_float(numpy.float64)
out = pyworld.synthesize(
f0=acoustic_feature.f0.ravel(),
spectrogram=input.astype(numpy.float64),
@@ -76,7 +82,7 @@ class SuperResolution(object):
self,
input: numpy.ndarray,
acoustic_feature: AcousticFeature,
- sampling_rate: Optional[int] = None,
+ sampling_rate: int,
):
high = self.convert(input)
return self.convert_to_audio(high, acoustic_feature=acoustic_feature, sampling_rate=sampling_rate)