summaryrefslogtreecommitdiff
path: root/datasets
diff options
context:
space:
mode:
authorjules <jules@asdf.us>2018-03-20 23:35:18 +0100
committerjules <jules@asdf.us>2018-03-20 23:35:18 +0100
commitea6e6ee1040fa85f743ab50b699fbeb04d9a0522 (patch)
treee056f13c3ef89c5b6b8713a7f80c837b333129af /datasets
parent4167442627b1414ff8fdc86528812b46168c656b (diff)
scripts
Diffstat (limited to 'datasets')
-rwxr-xr-xdatasets/count_subdirs.sh1
-rwxr-xr-xdatasets/generate.sh34
-rwxr-xr-xdatasets/split44k.sh24
-rwxr-xr-xdatasets/spread.sh25
4 files changed, 84 insertions, 0 deletions
diff --git a/datasets/count_subdirs.sh b/datasets/count_subdirs.sh
new file mode 100755
index 0000000..3999b3c
--- /dev/null
+++ b/datasets/count_subdirs.sh
@@ -0,0 +1 @@
+find -maxdepth 1 -type d | sort | while read -r dir; do printf "%s:\t" "$dir"; find "$dir" -type f | wc -l; done
diff --git a/datasets/generate.sh b/datasets/generate.sh
new file mode 100755
index 0000000..335928c
--- /dev/null
+++ b/datasets/generate.sh
@@ -0,0 +1,34 @@
+function process () {
+ echo "____________________________________________________"
+ echo "process $1"
+ name=$1
+ in="${name}.wav"
+ out="s_${in}"
+ ./spread.sh $in $out 0.99 0.01 1.01
+ ./split44k.sh $out 8 "44k_$name"
+ rm $out
+}
+function ease_process () {
+ echo "____________________________________________________"
+ echo "ease_process $1"
+ name=$1
+ step=$2
+ in="${name}.wav"
+ sout="o_${in}"
+ out="s_${in}"
+ sox -v 0.95 $in $sout
+ ./spread.sh $sout $out 0.999 $step 1.001
+ ./split44k.sh $out 8 "44k_$name"
+ rm $sout
+ rm $out
+}
+#ease_process '' 0.0000
+ease_process 'blblbl' 0.00001515
+ease_process 'faty-scrub1' 0.0000285
+ease_process 'faty-medieval' 0.00003
+ease_process 'faty-crystals' 0.0000111
+ease_process 'faty-vocal1' 0.000013
+ease_process 'faty-vocal2' 0.000028145
+ease_process 'faty-scrub2' 0.00000466
+ease_process 'siren' 0.0000275
+
diff --git a/datasets/split44k.sh b/datasets/split44k.sh
new file mode 100755
index 0000000..4884af1
--- /dev/null
+++ b/datasets/split44k.sh
@@ -0,0 +1,24 @@
+#/bin/sh
+
+if [ "$#" -ne 3 ]; then
+ echo "Usage: $0 <filename.wav> <chunk size in seconds> <dataset path>"
+ exit
+fi
+
+fn=$1
+chunk_size=$2
+dataset_path=$3
+
+converted=".temp2.wav"
+rm -f $converted
+ffmpeg -i $fn -ac 1 -ar 44100 $converted
+
+mkdir $dataset_path
+length=$(ffprobe -i $converted -show_entries format=duration -v quiet -of csv="p=0")
+end=$(echo "$length / $chunk_size - 1" | bc)
+echo "splitting..."
+for i in $(seq 0 $end); do
+ ffmpeg -hide_banner -loglevel error -ss $(($i * $chunk_size)) -t $chunk_size -i $converted "$dataset_path/$i.wav"
+done
+echo "done"
+rm -f $converted
diff --git a/datasets/spread.sh b/datasets/spread.sh
new file mode 100755
index 0000000..bec1da3
--- /dev/null
+++ b/datasets/spread.sh
@@ -0,0 +1,25 @@
+
+if [ "$#" -ne 5 ]; then
+ echo "Usage: $0 <in.wav> <out.wav> <rate_min> <rate_step> <rate_max>"
+ exit
+fi
+
+FN_IN=$1
+FN_OUT=$2
+RATE=$3
+STEP=$4
+MAX=$5
+
+ITER=0
+while true; do
+ if (( $(echo "$RATE > $MAX" | bc -l) )); then
+ break
+ fi
+ let ITER+=1
+ RATE=`echo "$RATE+$STEP" | bc`
+ sox $FN_IN "tmp_$ITER.wav" speed $RATE
+done
+
+sox tmp_* $FN_OUT
+rm tmp_*
+