1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
"""
extract low and high quality spectrogram data.
"""
import argparse
import multiprocessing
from pathlib import Path
from pprint import pprint
import numpy
import pysptk
import pyworld
from become_yukarin.dataset.dataset import AcousticFeatureProcess
from become_yukarin.dataset.dataset import WaveFileLoadProcess
from become_yukarin.param import AcousticFeatureParam
from become_yukarin.param import VoiceParam
base_voice_param = VoiceParam()
base_acoustic_feature_param = AcousticFeatureParam()
parser = argparse.ArgumentParser()
parser.add_argument('--input_directory', '-i', type=Path)
parser.add_argument('--output_directory', '-o', type=Path)
parser.add_argument('--sample_rate', type=int, default=base_voice_param.sample_rate)
parser.add_argument('--top_db', type=float, default=base_voice_param.top_db)
parser.add_argument('--pad_second', type=float, default=base_voice_param.pad_second)
parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period)
parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order)
parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha)
parser.add_argument('--enable_overwrite', action='store_true')
arguments = parser.parse_args()
pprint(dir(arguments))
def generate_file(path):
out = Path(arguments.output_directory, path.stem + '.npy')
if out.exists() and not arguments.enable_overwrite:
return
# load wave and padding
wave_file_load_process = WaveFileLoadProcess(
sample_rate=arguments.sample_rate,
top_db=arguments.top_db,
pad_second=arguments.pad_second,
)
wave = wave_file_load_process(path, test=True)
# make acoustic feature
acoustic_feature_process = AcousticFeatureProcess(
frame_period=arguments.frame_period,
order=arguments.order,
alpha=arguments.alpha,
)
feature = acoustic_feature_process(wave, test=True).astype_only_float(numpy.float32)
high_spectrogram = feature.spectrogram
fftlen = pyworld.get_cheaptrick_fft_size(arguments.sample_rate)
low_spectrogram = pysptk.mc2sp(
feature.mfcc,
alpha=arguments.alpha,
fftlen=fftlen,
)
# save
numpy.save(out.absolute(), {
'low': low_spectrogram,
'high': high_spectrogram,
})
print('saved!', out)
def main():
paths = list(sorted(arguments.input_directory.glob('*')))
arguments.output_directory.mkdir(exist_ok=True)
pool = multiprocessing.Pool()
pool.map(generate_file, paths)
if __name__ == '__main__':
main()
|