summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2017-11-21 06:37:41 +0900
committerHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2017-11-21 06:37:41 +0900
commitd6af2a851644afe253b97461b35138011a479a95 (patch)
treebc94f2d6e6723ee3240032f901175d9501d512c2
parent16b4e72fe6728e2e64d4c6357b7c73ac06868c1c (diff)
modify aligner
-rw-r--r--become_yukarin/model.py4
-rw-r--r--scripts/extract_acoustic_feature.py52
-rw-r--r--train.py3
3 files changed, 41 insertions, 18 deletions
diff --git a/become_yukarin/model.py b/become_yukarin/model.py
index 6dfb2c1..3b5102e 100644
--- a/become_yukarin/model.py
+++ b/become_yukarin/model.py
@@ -187,9 +187,9 @@ class Aligner(chainer.link.Chain):
h = chainer.functions.separate(chainer.functions.transpose(h, axes=(0, 2, 1))) # h: batch * (timeA, channel)
_, h = self.gru(None, h) # h: batch * (timeA, ?)
h = chainer.functions.transpose(chainer.functions.stack(h), axes=(0, 2, 1)) # h: (batch, ?, timeA)
- h = chainer.functions.softmax(self.last(h), axis=2) # h: (batch, timeB, timeA)
+ h = chainer.functions.softmax(self.last(h), axis=1) # h: (batch, timeB, timeA)
- h = chainer.functions.matmul(x, h, transb=True) # h: (batch, channel, timeB)
+ h = chainer.functions.matmul(x, h) # h: (batch, channel, time)
return h
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index f7cbaa1..efde9b1 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -30,6 +30,7 @@ parser.add_argument('--top_db', type=float, default=base_voice_param.top_db)
parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period)
parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order)
parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha)
+parser.add_argument('--disable_alignment', action='store_true')
arguments = parser.parse_args()
@@ -65,34 +66,53 @@ def generate_feature(path1, path2):
f2 = acoustic_feature_process(wave2, test=True)
# alignment
- aligner = MFCCAligner(f1.mfcc, f2.mfcc)
+ if not arguments.disable_alignment:
+ aligner = MFCCAligner(f1.mfcc, f2.mfcc)
- f0_1, f0_2 = aligner.align(f1.f0, f2.f0)
- spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram)
- aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity)
- mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc)
- voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced)
+ f0_1, f0_2 = aligner.align(f1.f0, f2.f0)
+ spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram)
+ aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity)
+ mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc)
+ voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced)
+
+ f1 = AcousticFeature(
+ f0=f0_1,
+ spectrogram=spectrogram_1,
+ aperiodicity=aperiodicity_1,
+ mfcc=mfcc_1,
+ voiced=voiced_1,
+ )
+ f2 = AcousticFeature(
+ f0=f0_2,
+ spectrogram=spectrogram_2,
+ aperiodicity=aperiodicity_2,
+ mfcc=mfcc_2,
+ voiced=voiced_2,
+ )
+
+ f1.validate()
+ f2.validate()
# save
acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True)
path = Path(arguments.output1_directory, path1.stem + '.npy')
feature = AcousticFeature(
- f0=f0_1,
- spectrogram=spectrogram_1,
- aperiodicity=aperiodicity_1,
- mfcc=mfcc_1,
- voiced=voiced_1,
+ f0=f1.f0,
+ spectrogram=f1.spectrogram,
+ aperiodicity=f1.aperiodicity,
+ mfcc=f1.mfcc,
+ voiced=f1.voiced,
)
acoustic_feature_save_process({'path': path, 'feature': feature})
print('saved!', path)
path = Path(arguments.output2_directory, path2.stem + '.npy')
feature = AcousticFeature(
- f0=f0_2,
- spectrogram=spectrogram_2,
- aperiodicity=aperiodicity_2,
- mfcc=mfcc_2,
- voiced=voiced_2,
+ f0=f2.f0,
+ spectrogram=f2.spectrogram,
+ aperiodicity=f2.aperiodicity,
+ mfcc=f2.mfcc,
+ voiced=f2.voiced,
)
acoustic_feature_save_process({'path': path, 'feature': feature})
print('saved!', path)
diff --git a/train.py b/train.py
index 08ef2d9..a9f4e79 100644
--- a/train.py
+++ b/train.py
@@ -2,6 +2,7 @@ import argparse
from functools import partial
from pathlib import Path
+from chainer import cuda
from chainer import optimizers
from chainer import training
from chainer.dataset import convert
@@ -24,6 +25,8 @@ arguments.output.mkdir(exist_ok=True)
config.save_as_json((arguments.output / 'config.json').absolute())
# model
+if config.train.gpu >= 0:
+ cuda.get_device_from_id(config.train.gpu).use()
predictor = create_predictor(config.model)
aligner = create_aligner(config.model)
model = Loss(config.loss, predictor=predictor, aligner=aligner)