summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <hihokaruta@gmail.com>2017-12-24 20:24:21 +0900
committerHiroshiba Kazuyuki <hihokaruta@gmail.com>2017-12-24 20:24:21 +0900
commit93df4c160b8332a4ef41190860b5056905143def (patch)
tree15a512e79fc81b2c37d56a3c4cdc265bd4a466d8
parent658e2570428867a27470c38b071ac81e7756bcf2 (diff)
add pad second
-rw-r--r--become_yukarin/dataset/dataset.py6
-rw-r--r--become_yukarin/param.py1
-rw-r--r--scripts/extract_acoustic_feature.py2
3 files changed, 8 insertions, 1 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 0ca35df..fa68a78 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -66,15 +66,19 @@ class SplitProcess(BaseDataProcess):
class WaveFileLoadProcess(BaseDataProcess):
- def __init__(self, sample_rate: int, top_db: float, dtype=numpy.float32):
+ def __init__(self, sample_rate: int, top_db: float, pad_second: float = 0, dtype=numpy.float32):
self._sample_rate = sample_rate
self._top_db = top_db
+ self._pad_second = pad_second
self._dtype = dtype
def __call__(self, data: str, test):
wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0]
if self._top_db is not None:
wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db))
+ if self._pad_second > 0.0:
+ p = int(self._sample_rate * self._pad_second)
+ wave = numpy.pad(wave, pad_width=(p, p), mode='constant')
return Wave(wave, self._sample_rate)
diff --git a/become_yukarin/param.py b/become_yukarin/param.py
index 9173470..0870bab 100644
--- a/become_yukarin/param.py
+++ b/become_yukarin/param.py
@@ -4,6 +4,7 @@ from typing import NamedTuple
class VoiceParam(NamedTuple):
sample_rate: int = 24000
top_db: float = None
+ pad_second: float = 0.0
class AcousticFeatureParam(NamedTuple):
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index 55632c3..169e3f3 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -27,6 +27,7 @@ parser.add_argument('--output1_directory', '-o1', type=Path)
parser.add_argument('--output2_directory', '-o2', type=Path)
parser.add_argument('--sample_rate', type=int, default=base_voice_param.sample_rate)
parser.add_argument('--top_db', type=float, default=base_voice_param.top_db)
+parser.add_argument('--pad_second', type=float, default=base_voice_param.pad_second)
parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period)
parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order)
parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha)
@@ -59,6 +60,7 @@ def generate_feature(path1, path2):
wave_file_load_process = WaveFileLoadProcess(
sample_rate=arguments.sample_rate,
top_db=arguments.top_db,
+ pad_second=arguments.pad_second,
)
wave1 = wave_file_load_process(path1, test=True)
wave2 = wave_file_load_process(path2, test=True)