modify aligner

author: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> 2017-11-21 06:37:41 +0900
committer: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> 2017-11-21 06:37:41 +0900
commit: d6af2a851644afe253b97461b35138011a479a95 (patch)
tree: bc94f2d6e6723ee3240032f901175d9501d512c2
parent: 16b4e72fe6728e2e64d4c6357b7c73ac06868c1c (diff)
3 files changed, 41 insertions, 18 deletions
diff --git a/become_yukarin/model.py b/become_yukarin/model.py
index 6dfb2c1..3b5102e 100644
--- a/become_yukarin/model.py
+++ b/become_yukarin/model.py
@@ -187,9 +187,9 @@ class Aligner(chainer.link.Chain):
         h = chainer.functions.separate(chainer.functions.transpose(h, axes=(0, 2, 1)))  # h: batch * (timeA, channel)
         _, h = self.gru(None, h)  # h: batch * (timeA, ?)
         h = chainer.functions.transpose(chainer.functions.stack(h), axes=(0, 2, 1))  # h: (batch, ?, timeA)
-        h = chainer.functions.softmax(self.last(h), axis=2)  # h: (batch, timeB, timeA)
+        h = chainer.functions.softmax(self.last(h), axis=1)  # h: (batch, timeB, timeA)
 
-        h = chainer.functions.matmul(x, h, transb=True)  # h: (batch, channel, timeB)
+        h = chainer.functions.matmul(x, h)  # h: (batch, channel, time)
         return h
 
 
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index f7cbaa1..efde9b1 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -30,6 +30,7 @@ parser.add_argument('--top_db', type=float, default=base_voice_param.top_db)
 parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period)
 parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order)
 parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha)
+parser.add_argument('--disable_alignment', action='store_true')
 arguments = parser.parse_args()
 
 
@@ -65,34 +66,53 @@ def generate_feature(path1, path2):
     f2 = acoustic_feature_process(wave2, test=True)
 
     # alignment
-    aligner = MFCCAligner(f1.mfcc, f2.mfcc)
+    if not arguments.disable_alignment:
+        aligner = MFCCAligner(f1.mfcc, f2.mfcc)
 
-    f0_1, f0_2 = aligner.align(f1.f0, f2.f0)
-    spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram)
-    aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity)
-    mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc)
-    voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced)
+        f0_1, f0_2 = aligner.align(f1.f0, f2.f0)
+        spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram)
+        aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity)
+        mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc)
+        voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced)
+
+        f1 = AcousticFeature(
+            f0=f0_1,
+            spectrogram=spectrogram_1,
+            aperiodicity=aperiodicity_1,
+            mfcc=mfcc_1,
+            voiced=voiced_1,
+        )
+        f2 = AcousticFeature(
+            f0=f0_2,
+            spectrogram=spectrogram_2,
+            aperiodicity=aperiodicity_2,
+            mfcc=mfcc_2,
+            voiced=voiced_2,
+        )
+
+        f1.validate()
+        f2.validate()
 
     # save
     acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True)
     path = Path(arguments.output1_directory, path1.stem + '.npy')
     feature = AcousticFeature(
-        f0=f0_1,
-        spectrogram=spectrogram_1,
-        aperiodicity=aperiodicity_1,
-        mfcc=mfcc_1,
-        voiced=voiced_1,
+        f0=f1.f0,
+        spectrogram=f1.spectrogram,
+        aperiodicity=f1.aperiodicity,
+        mfcc=f1.mfcc,
+        voiced=f1.voiced,
     )
     acoustic_feature_save_process({'path': path, 'feature': feature})
     print('saved!', path)
 
     path = Path(arguments.output2_directory, path2.stem + '.npy')
     feature = AcousticFeature(
-        f0=f0_2,
-        spectrogram=spectrogram_2,
-        aperiodicity=aperiodicity_2,
-        mfcc=mfcc_2,
-        voiced=voiced_2,
+        f0=f2.f0,
+        spectrogram=f2.spectrogram,
+        aperiodicity=f2.aperiodicity,
+        mfcc=f2.mfcc,
+        voiced=f2.voiced,
     )
     acoustic_feature_save_process({'path': path, 'feature': feature})
     print('saved!', path)
diff --git a/train.py b/train.py
index 08ef2d9..a9f4e79 100644
--- a/train.py
+++ b/train.py
@@ -2,6 +2,7 @@ import argparse
 from functools import partial
 from pathlib import Path
 
+from chainer import cuda
 from chainer import optimizers
 from chainer import training
 from chainer.dataset import convert
@@ -24,6 +25,8 @@ arguments.output.mkdir(exist_ok=True)
 config.save_as_json((arguments.output / 'config.json').absolute())
 
 # model
+if config.train.gpu >= 0:
+    cuda.get_device_from_id(config.train.gpu).use()
 predictor = create_predictor(config.model)
 aligner = create_aligner(config.model)
 model = Loss(config.loss, predictor=predictor, aligner=aligner)
author	Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>	2017-11-21 06:37:41 +0900
committer	Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>	2017-11-21 06:37:41 +0900
commit	d6af2a851644afe253b97461b35138011a479a95 (patch)
tree	bc94f2d6e6723ee3240032f901175d9501d512c2
parent	16b4e72fe6728e2e64d4c6357b7c73ac06868c1c (diff)