diff options
| author | Hiroshiba Kazuyuki <hihokaruta@gmail.com> | 2017-12-04 06:19:49 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <hihokaruta@gmail.com> | 2017-12-04 06:19:49 +0900 |
| commit | 26b794150ac8b6c1abe4fb20b420fd6bf11bc4c6 (patch) | |
| tree | 4d60ceadf337f459e01e3135502da4521f29dd61 | |
| parent | f5683ac7cf8225841dbb4a83a014ca742205a9a8 (diff) | |
align without eliminate
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 3 | ||||
| -rw-r--r-- | become_yukarin/dataset/utility.py | 19 | ||||
| -rw-r--r-- | become_yukarin/param.py | 2 | ||||
| -rw-r--r-- | scripts/extract_acoustic_feature.py | 2 |
4 files changed, 21 insertions, 5 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index b049ed8..0ca35df 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -73,7 +73,8 @@ class WaveFileLoadProcess(BaseDataProcess): def __call__(self, data: str, test): wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0] - wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db)) + if self._top_db is not None: + wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db)) return Wave(wave, self._sample_rate) diff --git a/become_yukarin/dataset/utility.py b/become_yukarin/dataset/utility.py index b2f5480..c28b1df 100644 --- a/become_yukarin/dataset/utility.py +++ b/become_yukarin/dataset/utility.py @@ -42,5 +42,20 @@ class DTWAligner(object): class MFCCAligner(DTWAligner): - def __init__(self, *args, **kwargs): - super().__init__(*args, dist=nnmnkwii.metrics.melcd, **kwargs) + def __init__(self, x, y, *args, **kwargs): + x = self._calc_aligner_feature(x) + y = self._calc_aligner_feature(y) + super().__init__(x, y, *args, dist=nnmnkwii.metrics.melcd, **kwargs) + + @classmethod + def _calc_delta(cls, x): + x = numpy.zeros_like(x, x.dtype) + x[:-1] = x[:-1] - x[1:] + x[-1] = 0 + return x + + @classmethod + def _calc_aligner_feature(cls, x): + d = cls._calc_delta(x) + feature = numpy.concatenate((x, d), axis=1)[:, 1:] + return feature diff --git a/become_yukarin/param.py b/become_yukarin/param.py index 529378c..9173470 100644 --- a/become_yukarin/param.py +++ b/become_yukarin/param.py @@ -3,7 +3,7 @@ from typing import NamedTuple class VoiceParam(NamedTuple): sample_rate: int = 24000 - top_db: float = 40 + top_db: float = None class AcousticFeatureParam(NamedTuple): diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py index eba3253..55632c3 100644 --- a/scripts/extract_acoustic_feature.py +++ b/scripts/extract_acoustic_feature.py @@ -55,7 +55,7 @@ def generate_feature(path1, path2): if out1.exists() and out2.exists() and not arguments.enable_overwrite: return - # load wave and padding + # load wave and padding wave_file_load_process = WaveFileLoadProcess( sample_rate=arguments.sample_rate, top_db=arguments.top_db, |
