summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <hihokaruta@gmail.com>2017-12-04 06:19:49 +0900
committerHiroshiba Kazuyuki <hihokaruta@gmail.com>2017-12-04 06:19:49 +0900
commit26b794150ac8b6c1abe4fb20b420fd6bf11bc4c6 (patch)
tree4d60ceadf337f459e01e3135502da4521f29dd61
parentf5683ac7cf8225841dbb4a83a014ca742205a9a8 (diff)
align without eliminate
-rw-r--r--become_yukarin/dataset/dataset.py3
-rw-r--r--become_yukarin/dataset/utility.py19
-rw-r--r--become_yukarin/param.py2
-rw-r--r--scripts/extract_acoustic_feature.py2
4 files changed, 21 insertions, 5 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index b049ed8..0ca35df 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -73,7 +73,8 @@ class WaveFileLoadProcess(BaseDataProcess):
def __call__(self, data: str, test):
wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0]
- wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db))
+ if self._top_db is not None:
+ wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db))
return Wave(wave, self._sample_rate)
diff --git a/become_yukarin/dataset/utility.py b/become_yukarin/dataset/utility.py
index b2f5480..c28b1df 100644
--- a/become_yukarin/dataset/utility.py
+++ b/become_yukarin/dataset/utility.py
@@ -42,5 +42,20 @@ class DTWAligner(object):
class MFCCAligner(DTWAligner):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, dist=nnmnkwii.metrics.melcd, **kwargs)
+ def __init__(self, x, y, *args, **kwargs):
+ x = self._calc_aligner_feature(x)
+ y = self._calc_aligner_feature(y)
+ super().__init__(x, y, *args, dist=nnmnkwii.metrics.melcd, **kwargs)
+
+ @classmethod
+ def _calc_delta(cls, x):
+ x = numpy.zeros_like(x, x.dtype)
+ x[:-1] = x[:-1] - x[1:]
+ x[-1] = 0
+ return x
+
+ @classmethod
+ def _calc_aligner_feature(cls, x):
+ d = cls._calc_delta(x)
+ feature = numpy.concatenate((x, d), axis=1)[:, 1:]
+ return feature
diff --git a/become_yukarin/param.py b/become_yukarin/param.py
index 529378c..9173470 100644
--- a/become_yukarin/param.py
+++ b/become_yukarin/param.py
@@ -3,7 +3,7 @@ from typing import NamedTuple
class VoiceParam(NamedTuple):
sample_rate: int = 24000
- top_db: float = 40
+ top_db: float = None
class AcousticFeatureParam(NamedTuple):
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index eba3253..55632c3 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -55,7 +55,7 @@ def generate_feature(path1, path2):
if out1.exists() and out2.exists() and not arguments.enable_overwrite:
return
- # load wave and padding
+ # load wave and padding
wave_file_load_process = WaveFileLoadProcess(
sample_rate=arguments.sample_rate,
top_db=arguments.top_db,