blob: 2bcec33c8cdf381f96f76659cbd84e558e0566be (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
#!/bin/sh
if [ "$#" -ne 3 ]; then
echo "Usage: $0 <youtube url> <chunk size in seconds> <dataset path>"
exit
fi
url=$1
chunk_size=$2
dataset_path=$3
downloaded=".temp"
rm -f $downloaded
format=$(youtube-dl -F $url | grep audio | sed -r 's|([0-9]+).*|\1|g' | tail -n 1)
youtube-dl $url -f $format -o $downloaded
converted=".temp2.wav"
rm -f $converted
ffmpeg -i $downloaded -ac 1 -ab 16k -ar 16000 $converted
rm -f $downloaded
mkdir $dataset_path
length=$(ffprobe -i $converted -show_entries format=duration -v quiet -of csv="p=0")
end=$(echo "$length / $chunk_size - 1" | bc)
echo "splitting..."
for i in $(seq 0 $end); do
ffmpeg -hide_banner -loglevel error -ss $(($i * $chunk_size)) -t $chunk_size -i $converted "$dataset_path/$i.wav"
done
echo "done"
rm -f $converted
|