diff options
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 6 | ||||
| -rw-r--r-- | become_yukarin/param.py | 1 | ||||
| -rw-r--r-- | scripts/extract_acoustic_feature.py | 2 |
3 files changed, 8 insertions, 1 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 0ca35df..fa68a78 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -66,15 +66,19 @@ class SplitProcess(BaseDataProcess): class WaveFileLoadProcess(BaseDataProcess): - def __init__(self, sample_rate: int, top_db: float, dtype=numpy.float32): + def __init__(self, sample_rate: int, top_db: float, pad_second: float = 0, dtype=numpy.float32): self._sample_rate = sample_rate self._top_db = top_db + self._pad_second = pad_second self._dtype = dtype def __call__(self, data: str, test): wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0] if self._top_db is not None: wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db)) + if self._pad_second > 0.0: + p = int(self._sample_rate * self._pad_second) + wave = numpy.pad(wave, pad_width=(p, p), mode='constant') return Wave(wave, self._sample_rate) diff --git a/become_yukarin/param.py b/become_yukarin/param.py index 9173470..0870bab 100644 --- a/become_yukarin/param.py +++ b/become_yukarin/param.py @@ -4,6 +4,7 @@ from typing import NamedTuple class VoiceParam(NamedTuple): sample_rate: int = 24000 top_db: float = None + pad_second: float = 0.0 class AcousticFeatureParam(NamedTuple): diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py index 55632c3..169e3f3 100644 --- a/scripts/extract_acoustic_feature.py +++ b/scripts/extract_acoustic_feature.py @@ -27,6 +27,7 @@ parser.add_argument('--output1_directory', '-o1', type=Path) parser.add_argument('--output2_directory', '-o2', type=Path) parser.add_argument('--sample_rate', type=int, default=base_voice_param.sample_rate) parser.add_argument('--top_db', type=float, default=base_voice_param.top_db) +parser.add_argument('--pad_second', type=float, default=base_voice_param.pad_second) parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period) parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order) parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha) @@ -59,6 +60,7 @@ def generate_feature(path1, path2): wave_file_load_process = WaveFileLoadProcess( sample_rate=arguments.sample_rate, top_db=arguments.top_db, + pad_second=arguments.pad_second, ) wave1 = wave_file_load_process(path1, test=True) wave2 = wave_file_load_process(path2, test=True) |
