summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--become_yukarin/dataset/dataset.py6
-rw-r--r--become_yukarin/dataset/utility.py12
-rw-r--r--become_yukarin/param.py2
-rw-r--r--scripts/extract_acoustic_feature.py47
4 files changed, 36 insertions, 31 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index b597bba..d259734 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -237,12 +237,12 @@ class DecodeFeatureProcess(BaseDataProcess):
def __call__(self, data: numpy.ndarray, test):
data = data.T
- lens = [self._sizes[t] for t in self._targets]
- assert data.shape[1] == sum(lens)
+ lasts = numpy.cumsum([self._sizes[t] for t in self._targets]).tolist()
+ assert data.shape[1] == lasts[-1]
d = defaultdict(lambda: numpy.nan, **{
t: data[:, bef:aft]
- for t, bef, aft in zip(self._targets, [0] + lens[:-1], lens)
+ for t, bef, aft in zip(self._targets, [0] + lasts[:-1], lasts)
})
return AcousticFeature(
f0=d['f0'],
diff --git a/become_yukarin/dataset/utility.py b/become_yukarin/dataset/utility.py
index c28b1df..9797818 100644
--- a/become_yukarin/dataset/utility.py
+++ b/become_yukarin/dataset/utility.py
@@ -13,8 +13,9 @@ class DTWAligner(object):
assert x.ndim == 2 and y.ndim == 2
_, path = fastdtw.fastdtw(x, y, radius=radius, dist=dist)
- self.normed_path_x = numpy.array(list(map(lambda l: l[0], path))) / len(x)
- self.normed_path_y = numpy.array(list(map(lambda l: l[1], path))) / len(y)
+ path = numpy.array(path)
+ self.normed_path_x = path[:, 0] / len(x)
+ self.normed_path_y = path[:, 1] / len(y)
def align_x(self, x):
path = self._interp_path(self.normed_path_x, len(x))
@@ -34,10 +35,7 @@ class DTWAligner(object):
@staticmethod
def _interp_path(normed_path: numpy.ndarray, target_length: int):
- base = numpy.linspace(0, 1, len(normed_path))
- target = numpy.linspace(0, 1, target_length)
- path = scipy.interpolate.interp1d(base, normed_path)(target)
- path = numpy.floor(path * target_length).astype(numpy.int)
+ path = numpy.floor(normed_path * target_length).astype(numpy.int)
return path
@@ -50,7 +48,7 @@ class MFCCAligner(DTWAligner):
@classmethod
def _calc_delta(cls, x):
x = numpy.zeros_like(x, x.dtype)
- x[:-1] = x[:-1] - x[1:]
+ x[:-1] = x[1:] - x[:-1]
x[-1] = 0
return x
diff --git a/become_yukarin/param.py b/become_yukarin/param.py
index 0870bab..e6f46bc 100644
--- a/become_yukarin/param.py
+++ b/become_yukarin/param.py
@@ -9,7 +9,7 @@ class VoiceParam(NamedTuple):
class AcousticFeatureParam(NamedTuple):
frame_period: int = 5
- order: int = 59
+ order: int = 25
alpha: float = 0.466
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index e1794cf..297c10b 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -116,11 +116,11 @@ def generate_feature(path1, path2):
def generate_mean_var(path_directory: Path):
path_mean = Path(path_directory, 'mean.npy')
- var_mean = Path(path_directory, 'var.npy')
+ path_var = Path(path_directory, 'var.npy')
if path_mean.exists():
path_mean.unlink()
- if var_mean.exists():
- var_mean.unlink()
+ if path_var.exists():
+ path_var.unlink()
acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=False)
acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=False)
@@ -131,33 +131,40 @@ def generate_mean_var(path_directory: Path):
mfcc_list = []
for path in path_directory.glob('*'):
feature = acoustic_feature_load_process(path)
- f0_list.append(numpy.ravel(feature.f0[feature.voiced])) # remove unvoiced
- spectrogram_list.append(numpy.ravel(feature.spectrogram))
- aperiodicity_list.append(numpy.ravel(feature.aperiodicity))
- mfcc_list.append(numpy.ravel(feature.mfcc))
+ f0_list.append(feature.f0[feature.voiced]) # remove unvoiced
+ spectrogram_list.append(feature.spectrogram)
+ aperiodicity_list.append(feature.aperiodicity)
+ mfcc_list.append(feature.mfcc)
- f0_list = numpy.concatenate(f0_list)
- spectrogram_list = numpy.concatenate(spectrogram_list)
- aperiodicity_list = numpy.concatenate(aperiodicity_list)
- mfcc_list = numpy.concatenate(mfcc_list)
+ def concatenate(arr_list):
+ try:
+ arr_list = numpy.concatenate(arr_list)
+ except:
+ pass
+ return arr_list
+
+ f0_list = concatenate(f0_list)
+ spectrogram_list = concatenate(spectrogram_list)
+ aperiodicity_list = concatenate(aperiodicity_list)
+ mfcc_list = concatenate(mfcc_list)
mean = AcousticFeature(
- f0=numpy.mean(f0_list),
- spectrogram=numpy.mean(spectrogram_list),
- aperiodicity=numpy.mean(aperiodicity_list),
- mfcc=numpy.mean(mfcc_list),
+ f0=numpy.mean(f0_list, axis=0, keepdims=True),
+ spectrogram=numpy.mean(spectrogram_list, axis=0, keepdims=True),
+ aperiodicity=numpy.mean(aperiodicity_list, axis=0, keepdims=True),
+ mfcc=numpy.mean(mfcc_list, axis=0, keepdims=True),
voiced=numpy.nan,
)
var = AcousticFeature(
- f0=numpy.var(f0_list),
- spectrogram=numpy.var(spectrogram_list),
- aperiodicity=numpy.var(aperiodicity_list),
- mfcc=numpy.var(mfcc_list),
+ f0=numpy.var(f0_list, axis=0, keepdims=True),
+ spectrogram=numpy.var(spectrogram_list, axis=0, keepdims=True),
+ aperiodicity=numpy.var(aperiodicity_list, axis=0, keepdims=True),
+ mfcc=numpy.var(mfcc_list, axis=0, keepdims=True),
voiced=numpy.nan,
)
acoustic_feature_save_process({'path': path_mean, 'feature': mean})
- acoustic_feature_save_process({'path': var_mean, 'feature': var})
+ acoustic_feature_save_process({'path': path_var, 'feature': var})
def main():