From 2e308fe8e90276a892637be1bfa174e673ebf414 Mon Sep 17 00:00:00 2001 From: Piotr Kozakowski Date: Thu, 11 May 2017 17:49:12 +0200 Subject: Implement SampleRNN --- datasets/download-from-youtube.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100755 datasets/download-from-youtube.sh (limited to 'datasets/download-from-youtube.sh') diff --git a/datasets/download-from-youtube.sh b/datasets/download-from-youtube.sh new file mode 100755 index 0000000..2bcec33 --- /dev/null +++ b/datasets/download-from-youtube.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +if [ "$#" -ne 3 ]; then + echo "Usage: $0 " + exit +fi + +url=$1 +chunk_size=$2 +dataset_path=$3 + +downloaded=".temp" +rm -f $downloaded +format=$(youtube-dl -F $url | grep audio | sed -r 's|([0-9]+).*|\1|g' | tail -n 1) +youtube-dl $url -f $format -o $downloaded + +converted=".temp2.wav" +rm -f $converted +ffmpeg -i $downloaded -ac 1 -ab 16k -ar 16000 $converted +rm -f $downloaded + +mkdir $dataset_path +length=$(ffprobe -i $converted -show_entries format=duration -v quiet -of csv="p=0") +end=$(echo "$length / $chunk_size - 1" | bc) +echo "splitting..." +for i in $(seq 0 $end); do + ffmpeg -hide_banner -loglevel error -ss $(($i * $chunk_size)) -t $chunk_size -i $converted "$dataset_path/$i.wav" +done +echo "done" +rm -f $converted -- cgit v1.2.3-70-g09d2