summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xdatasets/dataset.pl4
-rwxr-xr-xdatasets/split44k.sh4
-rw-r--r--generate.py2
-rwxr-xr-xget.pl22
-rw-r--r--train.py2
5 files changed, 24 insertions, 10 deletions
diff --git a/datasets/dataset.pl b/datasets/dataset.pl
index 568928c..69863b7 100755
--- a/datasets/dataset.pl
+++ b/datasets/dataset.pl
@@ -29,7 +29,8 @@ sub process($) {
}
if ($ext eq 'mp3') {
- system('ffmpeg', '-i', $filename, $name . '.wav');
+ print "Converting mp3 to wav\n";
+ system('ffmpeg', '-y', '-hide_banner', '-loglevel', 'error', '-i', $filename, $name . '.wav');
$filename = $name . '.wav';
}
my $soxi = `soxi $filename`;
@@ -47,7 +48,6 @@ sub process($) {
}
}
-
print "Seconds: $seconds\n";
my $a_tmp = "a_" . $filename;
diff --git a/datasets/split44k.sh b/datasets/split44k.sh
index b679d29..6adc2c7 100755
--- a/datasets/split44k.sh
+++ b/datasets/split44k.sh
@@ -11,14 +11,14 @@ dataset_path=$3
converted=".temp2.wav"
rm -f $converted
-ffmpeg -hide_banner -loglevel error -i $fn -ac 1 -ar 44100 $converted
+ffmpeg -y -hide_banner -loglevel error -i $fn -ac 1 -ar 44100 $converted
mkdir $dataset_path
length=$(ffprobe -i $converted -show_entries format=duration -v quiet -of csv="p=0")
end=$(echo "$length / $chunk_size - 1" | bc)
echo "splitting..."
for i in $(seq 0 $end); do
- ffmpeg -hide_banner -loglevel error -ss $(($i * $chunk_size)) -t $chunk_size -i $converted "$dataset_path/$i.wav"
+ ffmpeg -y -hide_banner -loglevel error -ss $(($i * $chunk_size)) -t $chunk_size -i $converted "$dataset_path/$i.wav"
done
echo "done"
rm -f $converted
diff --git a/generate.py b/generate.py
index f51b586..c56bb9a 100644
--- a/generate.py
+++ b/generate.py
@@ -46,7 +46,7 @@ default_params = {
'results_path': 'results',
'epoch_limit': 1000,
'resume': True,
- 'sample_rate': 16000,
+ 'sample_rate': 44100,
'n_samples': 1,
'sample_length': 80000,
'loss_smoothing': 0.99,
diff --git a/get.pl b/get.pl
index f947bfc..59b84f0 100755
--- a/get.pl
+++ b/get.pl
@@ -1,5 +1,8 @@
#!/usr/bin/perl
+use DotenvSimple;
+DotenvSimple::source_dotenv('.env');
+
$SIG{TERM} = $SIG{INT} = sub { exit 1 };
sub sanitize ($) {
@@ -21,10 +24,21 @@ my $fn, $new_fn;
foreach my $s (@ARGV){
if ($s =~ /^http/) {
if ($s =~ /(wav|aiff?|flac|mp3|opus)$/i) {
+ $s =~ s/^\s+//;
+ $s =~ s/\s+$//;
my $fn = `basename $s`;
- print "downloading $fn\n";
- system('/usr/bin/wget', $s);
- system('/usr/bin/perl', 'dataset.pl', $fn);
+ $clean_fn = $fn;
+ $clean_fn =~ s/-/_/g;
+ $clean_fn =~ s/^\s//;
+ $clean_fn =~ s/\s$//;
+ $clean_fn =~ s/\s/_/g;
+ $clean_fn =~ s/_+/_/g;
+ system('rm', $fn);
+ system('rm', $clean_fn);
+ print "downloading $clean_fn\n";
+ system($ENV{'WGET_BIN'}, '-O', $clean_fn, $s);
+ system('/usr/bin/perl', 'dataset.pl', $clean_fn);
+ $fn = $clean_fn;
} else {
print "youtube-dl $s\n";
my $yt = `youtube-dl --extract-audio --audio-format flac -o "%(title)s.%(ext)s" $s`;
@@ -38,7 +52,7 @@ foreach my $s (@ARGV){
$new_fn = sanitize($fn);
$new_fn .= '.flac';
system('mv', $fn, $new_fn);
- print "got fn, $fn => $new_fn\n";
+ print "youtube-dl got fn, $fn => $new_fn\n";
system('/usr/bin/perl', 'dataset.pl', $new_fn);
}
}
diff --git a/train.py b/train.py
index 23db492..4a68f41 100644
--- a/train.py
+++ b/train.py
@@ -46,7 +46,7 @@ default_params = {
'results_path': 'results',
'epoch_limit': 1000,
'resume': True,
- 'sample_rate': 16000,
+ 'sample_rate': 44100,
'n_samples': 1,
'sample_length': 80000,
'loss_smoothing': 0.99,