diff options
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 75 | ||||
| -rw-r--r-- | become_yukarin/voice_changer.py | 2 | ||||
| -rw-r--r-- | scripts/extract_acoustic_feature.py | 3 | ||||
| -rw-r--r-- | scripts/voice_conversion_test.py | 66 |
4 files changed, 109 insertions, 37 deletions
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 5ad7a80..b597bba 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -66,7 +66,7 @@ class SplitProcess(BaseDataProcess): class WaveFileLoadProcess(BaseDataProcess): - def __init__(self, sample_rate: int, top_db: float, pad_second: float = 0, dtype=numpy.float32): + def __init__(self, sample_rate: int, top_db: float = None, pad_second: float = 0, dtype=numpy.float32): self._sample_rate = sample_rate self._top_db = top_db self._pad_second = pad_second @@ -394,27 +394,29 @@ def create(config: DatasetConfig): data_process_train = copy.deepcopy(data_process_base) - def add_seed(): - return LambdaProcess(lambda d, test: dict(seed=numpy.random.randint(2 ** 32), **d)) + # cropping + if config.train_crop_size is not None: + def add_seed(): + return LambdaProcess(lambda d, test: dict(seed=numpy.random.randint(2 ** 32), **d)) - def padding(s): - return ChainProcess([ - LambdaProcess(lambda d, test: dict(data=d[s], seed=d['seed'])), - RandomPaddingProcess(min_size=config.train_crop_size), - ]) + def padding(s): + return ChainProcess([ + LambdaProcess(lambda d, test: dict(data=d[s], seed=d['seed'])), + RandomPaddingProcess(min_size=config.train_crop_size), + ]) - def crop(s): - return ChainProcess([ - LambdaProcess(lambda d, test: dict(data=d[s], seed=d['seed'])), - RandomCropProcess(crop_size=config.train_crop_size), - ]) + def crop(s): + return ChainProcess([ + LambdaProcess(lambda d, test: dict(data=d[s], seed=d['seed'])), + RandomCropProcess(crop_size=config.train_crop_size), + ]) - data_process_train.append(ChainProcess([ - add_seed(), - SplitProcess(dict(input=padding('input'), target=padding('target'), mask=padding('mask'))), - add_seed(), - SplitProcess(dict(input=crop('input'), target=crop('target'), mask=crop('mask'))), - ])) + data_process_train.append(ChainProcess([ + add_seed(), + SplitProcess(dict(input=padding('input'), target=padding('target'), mask=padding('mask'))), + add_seed(), + SplitProcess(dict(input=crop('input'), target=crop('target'), mask=crop('mask'))), + ])) # add noise data_process_train.append(SplitProcess(dict( @@ -432,23 +434,24 @@ def create(config: DatasetConfig): ))) data_process_test = data_process_base - data_process_test.append(SplitProcess(dict( - input=ChainProcess([ - LambdaProcess(lambda d, test: d['input']), - LastPaddingProcess(min_size=config.train_crop_size), - FirstCropProcess(crop_size=config.train_crop_size), - ]), - target=ChainProcess([ - LambdaProcess(lambda d, test: d['target']), - LastPaddingProcess(min_size=config.train_crop_size), - FirstCropProcess(crop_size=config.train_crop_size), - ]), - mask=ChainProcess([ - LambdaProcess(lambda d, test: d['mask']), - LastPaddingProcess(min_size=config.train_crop_size), - FirstCropProcess(crop_size=config.train_crop_size), - ]), - ))) + if config.train_crop_size is not None: + data_process_test.append(SplitProcess(dict( + input=ChainProcess([ + LambdaProcess(lambda d, test: d['input']), + LastPaddingProcess(min_size=config.train_crop_size), + FirstCropProcess(crop_size=config.train_crop_size), + ]), + target=ChainProcess([ + LambdaProcess(lambda d, test: d['target']), + LastPaddingProcess(min_size=config.train_crop_size), + FirstCropProcess(crop_size=config.train_crop_size), + ]), + mask=ChainProcess([ + LambdaProcess(lambda d, test: d['mask']), + LastPaddingProcess(min_size=config.train_crop_size), + FirstCropProcess(crop_size=config.train_crop_size), + ]), + ))) num_test = config.num_test pairs = [ diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py index aeae0e6..a765dd6 100644 --- a/become_yukarin/voice_changer.py +++ b/become_yukarin/voice_changer.py @@ -31,7 +31,7 @@ class VoiceChanger(object): self._param = param = config.dataset.param self._wave_process = WaveFileLoadProcess( sample_rate=param.voice_param.sample_rate, - top_db=param.voice_param.top_db, + top_db=None, ) self._feature_process = AcousticFeatureProcess( frame_period=param.acoustic_feature_param.frame_period, diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py index 169e3f3..e1794cf 100644 --- a/scripts/extract_acoustic_feature.py +++ b/scripts/extract_acoustic_feature.py @@ -5,6 +5,7 @@ extract alignments voices. import argparse import multiprocessing from pathlib import Path +from pprint import pprint import numpy @@ -36,6 +37,8 @@ parser.add_argument('--disable_alignment', action='store_true') parser.add_argument('--enable_overwrite', action='store_true') arguments = parser.parse_args() +pprint(dir(arguments)) + def make_feature( path, diff --git a/scripts/voice_conversion_test.py b/scripts/voice_conversion_test.py new file mode 100644 index 0000000..e0ed8c4 --- /dev/null +++ b/scripts/voice_conversion_test.py @@ -0,0 +1,66 @@ +import argparse +import glob +import multiprocessing +import re +from functools import partial +from pathlib import Path + +import librosa +import numpy + +from become_yukarin import VoiceChanger +from become_yukarin.config import create_from_json as create_config + +parser = argparse.ArgumentParser() +parser.add_argument('model_names', nargs='+') +parser.add_argument('-md', '--model_directory', type=Path, default=Path('/mnt/dwango/hiroshiba/become-yukarin/')) +parser.add_argument('-iwd', '--input_wave_directory', type=Path, + default=Path('/mnt/dwango/hiroshiba/become-yukarin/dataset/hiho-wave/hiho-pause-atr503-subset/')) +args = parser.parse_args() + +model_directory = args.model_directory # type: Path +input_wave_directory = args.input_wave_directory # type: Path + +paths_test = list(Path('./test_data/').glob('*.wav')) + + +def extract_number(f): + s = re.findall("\d+", str(f)) + return int(s[-1]) if s else -1 + + +def process(p: Path, voice_changer: VoiceChanger): + try: + if p.suffix in ['.npy', '.npz']: + p = glob.glob(str(input_wave_directory / p.stem) + '.*')[0] + p = Path(p) + wave = voice_changer(p) + librosa.output.write_wav(str(output / p.stem) + '.wav', wave.wave, wave.sampling_rate, norm=True) + except: + import traceback + print('error!', str(p)) + traceback.format_exc() + + +for model_name in args.model_names: + base_model = model_directory / model_name + config = create_config(base_model / 'config.json') + + input_paths = list(sorted([Path(p) for p in glob.glob(str(config.dataset.input_glob))])) + numpy.random.RandomState(config.dataset.seed).shuffle(input_paths) + path_train = input_paths[0] + path_test = input_paths[-1] + + model_paths = base_model.glob('predictor*.npz') + model_path = list(sorted(model_paths, key=extract_number))[-1] + print(model_path) + voice_changer = VoiceChanger(config, model_path) + + output = Path('./output').absolute() / base_model.name + output.mkdir(exist_ok=True) + + paths = [path_train, path_test] + paths_test + + process_partial = partial(process, voice_changer=voice_changer) + pool = multiprocessing.Pool() + pool.map(process_partial, paths) |
