summaryrefslogtreecommitdiff
path: root/datasets
diff options
context:
space:
mode:
authorPiotr Kozakowski <kozak000@gmail.com>2017-05-11 17:49:12 +0200
committerPiotr Kozakowski <kozak000@gmail.com>2017-06-29 15:37:26 +0200
commit2e308fe8e90276a892637be1bfa174e673ebf414 (patch)
tree4ff187b37d16476cc936aba84184b8feca9c8612 /datasets
parent253860fdb0949f0eab6abff09369b0a1236b541a (diff)
Implement SampleRNN
Diffstat (limited to 'datasets')
-rwxr-xr-xdatasets/download-from-youtube.sh30
1 files changed, 30 insertions, 0 deletions
diff --git a/datasets/download-from-youtube.sh b/datasets/download-from-youtube.sh
new file mode 100755
index 0000000..2bcec33
--- /dev/null
+++ b/datasets/download-from-youtube.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+if [ "$#" -ne 3 ]; then
+ echo "Usage: $0 <youtube url> <chunk size in seconds> <dataset path>"
+ exit
+fi
+
+url=$1
+chunk_size=$2
+dataset_path=$3
+
+downloaded=".temp"
+rm -f $downloaded
+format=$(youtube-dl -F $url | grep audio | sed -r 's|([0-9]+).*|\1|g' | tail -n 1)
+youtube-dl $url -f $format -o $downloaded
+
+converted=".temp2.wav"
+rm -f $converted
+ffmpeg -i $downloaded -ac 1 -ab 16k -ar 16000 $converted
+rm -f $downloaded
+
+mkdir $dataset_path
+length=$(ffprobe -i $converted -show_entries format=duration -v quiet -of csv="p=0")
+end=$(echo "$length / $chunk_size - 1" | bc)
+echo "splitting..."
+for i in $(seq 0 $end); do
+ ffmpeg -hide_banner -loglevel error -ss $(($i * $chunk_size)) -t $chunk_size -i $converted "$dataset_path/$i.wav"
+done
+echo "done"
+rm -f $converted