import argparse import glob import multiprocessing import re from functools import partial from pathlib import Path import librosa import numpy from become_yukarin import AcousticConverter from become_yukarin import SuperResolution from become_yukarin import VoiceChanger from become_yukarin.data_struct import AcousticFeature from become_yukarin.config.config import create_from_json as create_conv_config from become_yukarin.config.sr_config import create_from_json as create_sr_config from become_yukarin.dataset.dataset import AcousticFeatureProcess from become_yukarin.dataset.dataset import WaveFileLoadProcess parser = argparse.ArgumentParser() parser.add_argument('-cmd', '--conv_model_directory', type=Path, default=Path('/mnt/dwango/hiroshiba/become-yukarin/')) parser.add_argument('-srmd', '--sr_model_directory', type=Path, default=Path('/mnt/dwango/hiroshiba/become-yukarin/')) parser.add_argument('-iwd', '--input_directory', type=Path, default=Path('./input')) parser.add_argument('-owd', '--output_directory', type=Path, default=Path('./output')) parser.add_argument('-it', '--iteration', type=int) parser.add_argument('-g', '--gpu', type=int) args = parser.parse_args() conv_model = args.conv_model_directory # type: Path sr_model = args.sr_model_directory # type: Path input_directory = args.input_directory output_directory = args.output_directory it = args.iteration gpu = args.gpu conv_config = create_conv_config(conv_model / 'config.json') sr_config = create_sr_config(sr_model / 'config.json') param = sr_config.dataset.param acoustic_feature_process = AcousticFeatureProcess( frame_period=param.acoustic_feature_param.frame_period, order=param.acoustic_feature_param.order, alpha=param.acoustic_feature_param.alpha, f0_estimating_method=param.acoustic_feature_param.f0_estimating_method, ) def extract_number(f): s = re.findall("\d+", str(f)) return int(s[-1]) if s else -1 def load_acoustic_converter(): if it is not None: conv_model_path = conv_model / 'predictor_{}.npz'.format(it) else: conv_model_paths = conv_model.glob('predictor_*.npz') conv_model_path = list(sorted(conv_model_paths, key=extract_number))[-1] print(conv_model_path) return AcousticConverter(conv_config, conv_model_path, gpu=gpu) def load_super_resolution(): sr_model_paths = sr_model.glob('predictor*.npz') sr_model_path = list(sorted(sr_model_paths, key=extract_number))[-1] print(sr_model_path) return SuperResolution(sr_config, sr_model_path, gpu=gpu) def process(p: Path, acoustic_converter: AcousticConverter, super_resolution: SuperResolution, output: Path): try: print(str(p)) input = acoustic_converter(p) input_wave = acoustic_converter._wave_process(str(p), test=True) input_feature = acoustic_converter._feature_process(input_wave, test=True) converted_feature = acoustic_converter.convert_to_feature(input=input_feature, out_sampling_rate=param.voice_param.sample_rate) # converted_feature.f0 = input_feature.f0.astype(numpy.float64) merged_feature = AcousticFeature( f0=input_feature.f0, spectrogram=converted_feature.spectrogram, aperiodicity=converted_feature.aperiodicity, mfcc=converted_feature.mfcc, voiced=converted_feature.voiced, ) wave = super_resolution(merged_feature.spectrogram.astype(numpy.float32), acoustic_feature=merged_feature, sampling_rate=param.voice_param.sample_rate) librosa.output.write_wav(str(output / p.stem) + '.wav', wave.wave, wave.sampling_rate, norm=True) except: import traceback print('error!', str(p)) print(traceback.format_exc()) def run(): input_paths = list(input_directory.glob('*.wav')) acoustic_converter = load_acoustic_converter() super_resolution = load_super_resolution() # voice_changer = VoiceChanger(acoustic_converter: acoustic_converter, super_resolution: super_resolution) output = output_directory.absolute() output.mkdir(exist_ok=True) process_partial = partial(process, acoustic_converter=acoustic_converter, super_resolution=super_resolution, output=output) if gpu is None: pool = multiprocessing.Pool() pool.map(process_partial, input_paths) else: list(map(process_partial, input_paths)) run()