summaryrefslogtreecommitdiff
path: root/NOTES
blob: 735c23b9fb3590ccc6f042aac105871c96929ecd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# EXTRACT ACOUSTIC FEATURES USING WORLD VOCODER

PYTHONPATH=`pwd` python scripts/extract_acoustic_feature.py \
  --input1_directory './data/mat-holly-24000/wav/holly' \
  --input2_directory './data/mat-holly-24000/wav/mat' \
  --output1_directory './data/mat-holly-24000/feat/holly' \
  --output2_directory './data/mat-holly-24000/feat/mat'

parser.add_argument('--input1_directory', '-i1', type=Path)
parser.add_argument('--input2_directory', '-i2', type=Path)
parser.add_argument('--output1_directory', '-o1', type=Path)
parser.add_argument('--output2_directory', '-o2', type=Path)
parser.add_argument('--pre_converter1_config', type=Path)
parser.add_argument('--pre_converter1_model', type=Path)
parser.add_argument('--sample_rate', type=int, default=base_voice_param.sample_rate)
parser.add_argument('--top_db', type=float, default=base_voice_param.top_db)
parser.add_argument('--pad_second', type=float, default=base_voice_param.pad_second)
parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period)
parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order)
parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha)
parser.add_argument('--f0_estimating_method', type=str, default=base_acoustic_feature_param.f0_estimating_method)
parser.add_argument('--f0_floor1', type=float, default=71)
parser.add_argument('--f0_ceil1', type=float, default=800)
parser.add_argument('--f0_floor2', type=float, default=71)
parser.add_argument('--f0_ceil2', type=float, default=800)
parser.add_argument('--ignore_feature', nargs='+', default=['spectrogram', 'aperiodicity'])
parser.add_argument('--disable_alignment', action='store_true')
parser.add_argument('--enable_overwrite', action='store_true')

# TRAIN VOICE CONVERSION NETWORKS

/home/spawn/.virtualenv/yukarin/bin/python train.py \
  ./20180719133105_2_holly_2_mat.json \
  ./data/mat-holly-24000/conversion-net/pp-el8-holly-2-mat

# OPTIONAL: TRAIN ON MULTIPLE GPUS USING RECIPE.JSON (RUNS MULTIPLE SCREENS)

PYTHONPATH=`pwd` /home/spawn/.virtualenv/yukarin/bin/python scripts/launch.py data/mat-holly-24000/conversion-net/

# TEST VOICE CONVERSION NETWORKS

PYTHONPATH=`pwd` /home/spawn/.virtualenv/yukarin/bin/python scripts/voice_conversion_test.py \
  --model_directory /home/spawn/code/become-yukarin/data/mat-holly-24000/conversion-net/ \
  --input_wave_directory /home/spawn/code/become-yukarin/data/mat-holly-24000/wav/mat/ \
  --gpu 0 \
  pp-el8-mat-2-holly

PYTHONPATH=`pwd` /home/spawn/.virtualenv/yukarin/bin/python scripts/voice_conversion_test.py \
  --model_directory /home/spawn/code/become-yukarin/data/mat-holly-24000/conversion-net/ \
  --input_wave_directory /home/spawn/code/become-yukarin/data/mat-holly-24000/wav/holly/ \
  --gpu 0 \
  pp-el8-holly-2-mat

parser.add_argument('model_names', nargs='+')
parser.add_argument('-md', '--model_directory', type=Path, default=Path('/mnt/dwango/hiroshiba/become-yukarin/'))
parser.add_argument('-iwd', '--input_wave_directory', type=Path,
                    default=Path('/mnt/dwango/hiroshiba/become-yukarin/dataset/hiho-wave/hiho-pause-atr503-subset/'))
parser.add_argument('-it', '--iteration', type=int)
parser.add_argument('-g', '--gpu', type=int)

# EXTRACT SPECTROGRAM PAIRS

./split.sh holly_normal.wav 4 holly_normal_4
cd holly_normal_4
mkdir 24k
for i in *.wav
do
  sox $i -r 24000 "24k/$i"
done

PYTHONPATH=`pwd` python scripts/extract_spectrogram_pair.py \
  --input_directory './data/holly-24000/wav' \
  --output_directory './data/holly-24000/spec'

parser.add_argument('--input_directory', '-i', type=Path)
parser.add_argument('--output_directory', '-o', type=Path)
parser.add_argument('--sample_rate', type=int, default=base_voice_param.sample_rate)
parser.add_argument('--top_db', type=float, default=base_voice_param.top_db)
parser.add_argument('--pad_second', type=float, default=base_voice_param.pad_second)
parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period)
parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order)
parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha)
parser.add_argument('--f0_estimating_method', default=base_acoustic_feature_param.f0_estimating_method)
parser.add_argument('--enable_overwrite', action='store_true')

# TRAIN SUPER RESOLUTION NETWORK

/home/spawn/.virtualenv/yukarin/bin/python train_sr.py \
  ./holly_sr.json \
  ./data/holly-24000/net

# TEST SUPER RESOLUTION NETWORK

/home/spawn/.virtualenv/yukarin/bin/python super_resolution_test.py \
  --model_directory /home/spawn/code/become-yukarin/data/holly-24000/ \
  --input_wave_directory /home/spawn/code/become-yukarin/data/mat-holly-24000/wav/holly/ \
  --gpu 0 \
  net

/home/spawn/.virtualenv/yukarin/bin/python super_resolution_test.py \
  --model_directory /home/spawn/code/become-yukarin/data/holly-24000/ \
  --input_wave_directory ./output/pp-el8-mat-2-holly/ \
  --gpu 0 \
  net

parser.add_argument('model_names', nargs='+')
parser.add_argument('-md', '--model_directory', type=Path, default=Path('/mnt/dwango/hiroshiba/become-yukarin/'))
parser.add_argument('-iwd', '--input_wave_directory', type=Path,
                    default=Path('/mnt/dwango/hiroshiba/become-yukarin/dataset/yukari-wave/yukari-news/'))
parser.add_argument('-g', '--gpu', type=int)