diff options
| author | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2018-01-03 18:01:19 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2018-01-03 18:01:19 +0900 |
| commit | 12cb80fb45d0f19c5d98ee60cda346ad324d1377 (patch) | |
| tree | 24550850be2ee54205345a73899dfef0bb6cad6f | |
| parent | 123fd90875f0b3d18192712a97008beb1493243a (diff) | |
true alignment
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 6 | ||||
| -rw-r--r-- | become_yukarin/dataset/utility.py | 12 | ||||
| -rw-r--r-- | become_yukarin/param.py | 2 | ||||
| -rw-r--r-- | scripts/extract_acoustic_feature.py | 47 |
4 files changed, 36 insertions, 31 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index b597bba..d259734 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -237,12 +237,12 @@ class DecodeFeatureProcess(BaseDataProcess): def __call__(self, data: numpy.ndarray, test): data = data.T - lens = [self._sizes[t] for t in self._targets] - assert data.shape[1] == sum(lens) + lasts = numpy.cumsum([self._sizes[t] for t in self._targets]).tolist() + assert data.shape[1] == lasts[-1] d = defaultdict(lambda: numpy.nan, **{ t: data[:, bef:aft] - for t, bef, aft in zip(self._targets, [0] + lens[:-1], lens) + for t, bef, aft in zip(self._targets, [0] + lasts[:-1], lasts) }) return AcousticFeature( f0=d['f0'], diff --git a/become_yukarin/dataset/utility.py b/become_yukarin/dataset/utility.py index c28b1df..9797818 100644 --- a/become_yukarin/dataset/utility.py +++ b/become_yukarin/dataset/utility.py @@ -13,8 +13,9 @@ class DTWAligner(object): assert x.ndim == 2 and y.ndim == 2 _, path = fastdtw.fastdtw(x, y, radius=radius, dist=dist) - self.normed_path_x = numpy.array(list(map(lambda l: l[0], path))) / len(x) - self.normed_path_y = numpy.array(list(map(lambda l: l[1], path))) / len(y) + path = numpy.array(path) + self.normed_path_x = path[:, 0] / len(x) + self.normed_path_y = path[:, 1] / len(y) def align_x(self, x): path = self._interp_path(self.normed_path_x, len(x)) @@ -34,10 +35,7 @@ class DTWAligner(object): @staticmethod def _interp_path(normed_path: numpy.ndarray, target_length: int): - base = numpy.linspace(0, 1, len(normed_path)) - target = numpy.linspace(0, 1, target_length) - path = scipy.interpolate.interp1d(base, normed_path)(target) - path = numpy.floor(path * target_length).astype(numpy.int) + path = numpy.floor(normed_path * target_length).astype(numpy.int) return path @@ -50,7 +48,7 @@ class MFCCAligner(DTWAligner): @classmethod def _calc_delta(cls, x): x = numpy.zeros_like(x, x.dtype) - x[:-1] = x[:-1] - x[1:] + x[:-1] = x[1:] - x[:-1] x[-1] = 0 return x diff --git a/become_yukarin/param.py b/become_yukarin/param.py index 0870bab..e6f46bc 100644 --- a/become_yukarin/param.py +++ b/become_yukarin/param.py @@ -9,7 +9,7 @@ class VoiceParam(NamedTuple): class AcousticFeatureParam(NamedTuple): frame_period: int = 5 - order: int = 59 + order: int = 25 alpha: float = 0.466 diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py index e1794cf..297c10b 100644 --- a/scripts/extract_acoustic_feature.py +++ b/scripts/extract_acoustic_feature.py @@ -116,11 +116,11 @@ def generate_feature(path1, path2): def generate_mean_var(path_directory: Path): path_mean = Path(path_directory, 'mean.npy') - var_mean = Path(path_directory, 'var.npy') + path_var = Path(path_directory, 'var.npy') if path_mean.exists(): path_mean.unlink() - if var_mean.exists(): - var_mean.unlink() + if path_var.exists(): + path_var.unlink() acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=False) acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=False) @@ -131,33 +131,40 @@ def generate_mean_var(path_directory: Path): mfcc_list = [] for path in path_directory.glob('*'): feature = acoustic_feature_load_process(path) - f0_list.append(numpy.ravel(feature.f0[feature.voiced])) # remove unvoiced - spectrogram_list.append(numpy.ravel(feature.spectrogram)) - aperiodicity_list.append(numpy.ravel(feature.aperiodicity)) - mfcc_list.append(numpy.ravel(feature.mfcc)) + f0_list.append(feature.f0[feature.voiced]) # remove unvoiced + spectrogram_list.append(feature.spectrogram) + aperiodicity_list.append(feature.aperiodicity) + mfcc_list.append(feature.mfcc) - f0_list = numpy.concatenate(f0_list) - spectrogram_list = numpy.concatenate(spectrogram_list) - aperiodicity_list = numpy.concatenate(aperiodicity_list) - mfcc_list = numpy.concatenate(mfcc_list) + def concatenate(arr_list): + try: + arr_list = numpy.concatenate(arr_list) + except: + pass + return arr_list + + f0_list = concatenate(f0_list) + spectrogram_list = concatenate(spectrogram_list) + aperiodicity_list = concatenate(aperiodicity_list) + mfcc_list = concatenate(mfcc_list) mean = AcousticFeature( - f0=numpy.mean(f0_list), - spectrogram=numpy.mean(spectrogram_list), - aperiodicity=numpy.mean(aperiodicity_list), - mfcc=numpy.mean(mfcc_list), + f0=numpy.mean(f0_list, axis=0, keepdims=True), + spectrogram=numpy.mean(spectrogram_list, axis=0, keepdims=True), + aperiodicity=numpy.mean(aperiodicity_list, axis=0, keepdims=True), + mfcc=numpy.mean(mfcc_list, axis=0, keepdims=True), voiced=numpy.nan, ) var = AcousticFeature( - f0=numpy.var(f0_list), - spectrogram=numpy.var(spectrogram_list), - aperiodicity=numpy.var(aperiodicity_list), - mfcc=numpy.var(mfcc_list), + f0=numpy.var(f0_list, axis=0, keepdims=True), + spectrogram=numpy.var(spectrogram_list, axis=0, keepdims=True), + aperiodicity=numpy.var(aperiodicity_list, axis=0, keepdims=True), + mfcc=numpy.var(mfcc_list, axis=0, keepdims=True), voiced=numpy.nan, ) acoustic_feature_save_process({'path': path_mean, 'feature': mean}) - acoustic_feature_save_process({'path': var_mean, 'feature': var}) + acoustic_feature_save_process({'path': path_var, 'feature': var}) def main(): |
