diff options
| author | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2018-01-21 01:45:45 +0900 |
|---|---|---|
| committer | Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> | 2018-01-21 01:45:45 +0900 |
| commit | b0e8f8594c5f37d60773fa36d2643789fafcaf86 (patch) | |
| tree | 2075b5e802920d8cb2e85630104c602e071b9d7d /scripts/extract_acoustic_feature.py | |
| parent | 4b581ca1c7552094221d236d596e7488aa69d0de (diff) | |
pre convert
Diffstat (limited to 'scripts/extract_acoustic_feature.py')
| -rw-r--r-- | scripts/extract_acoustic_feature.py | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py index 7943639..d6e7711 100644 --- a/scripts/extract_acoustic_feature.py +++ b/scripts/extract_acoustic_feature.py @@ -9,6 +9,8 @@ from pprint import pprint import numpy +from become_yukarin.acoustic_converter import AcousticConverter +from become_yukarin.config.config import create_from_json as create_config from become_yukarin.data_struct import AcousticFeature from become_yukarin.dataset.dataset import AcousticFeatureLoadProcess from become_yukarin.dataset.dataset import AcousticFeatureProcess @@ -26,6 +28,8 @@ parser.add_argument('--input1_directory', '-i1', type=Path) parser.add_argument('--input2_directory', '-i2', type=Path) parser.add_argument('--output1_directory', '-o1', type=Path) parser.add_argument('--output2_directory', '-o2', type=Path) +parser.add_argument('--pre_converter1_config', type=Path) +parser.add_argument('--pre_converter1_model', type=Path) parser.add_argument('--sample_rate', type=int, default=base_voice_param.sample_rate) parser.add_argument('--top_db', type=float, default=base_voice_param.top_db) parser.add_argument('--pad_second', type=float, default=base_voice_param.pad_second) @@ -39,6 +43,13 @@ arguments = parser.parse_args() pprint(dir(arguments)) +pre_convert = arguments.pre_converter1_config is not None +if pre_convert: + config = create_config(arguments.pre_converter1_config) + pre_converter1 = AcousticConverter(config, arguments.pre_converter1_model) +else: + pre_converter1 = None + def generate_feature(path1, path2): out1 = Path(arguments.output1_directory, path1.stem + '.npy') @@ -64,9 +75,15 @@ def generate_feature(path1, path2): f1 = acoustic_feature_process(wave1, test=True).astype_only_float(numpy.float32) f2 = acoustic_feature_process(wave2, test=True).astype_only_float(numpy.float32) + # pre convert + if pre_convert: + f1_ref = pre_converter1.convert_to_feature(f1) + else: + f1_ref = f1 + # alignment if not arguments.disable_alignment: - aligner = MFCCAligner(f1.mfcc, f2.mfcc) + aligner = MFCCAligner(f1_ref.mfcc, f2.mfcc) f0_1, f0_2 = aligner.align(f1.f0, f2.f0) spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram) @@ -163,7 +180,7 @@ def main(): arguments.output2_directory.mkdir(exist_ok=True) pool = multiprocessing.Pool() - pool.starmap(generate_feature, zip(paths1, paths2)) + pool.starmap(generate_feature, zip(paths1, paths2), chunksize=16) generate_mean_var(arguments.output1_directory) generate_mean_var(arguments.output2_directory) |
