#!/usr/bin/perl #use DotenvSimple; #DotenvSimple::source_dotenv('.env'); $SIG{TERM} = $SIG{INT} = sub { exit 1 }; sub sanitize ($) { my $s = lc shift; $s =~ s/^\s+//; $s =~ s/\s+$//; $s =~ s/\.[^.]+$//g; $s =~ s/[-\[\]\(\)\+\*\\\/\'\"\`\!\@\#\$\%\^\&]//g; $s =~ s/\s+/_/g; $s =~ s/_+/_/g; if (length($s) == 0) { print "fn is empty"; exit(1); } return $s; } chdir('datasets'); my $bn, $fn, $new_fn, $ext; foreach my $s (@ARGV){ if ($s =~ /^http/) { if ($s =~ /(wav|aiff?|flac|mp3|opus)$/i) { $ext = $1; $s =~ s/^\s+//; $s =~ s/\s+$//; $clean_fn = sanitize(`basename "$s"`) . '.' . $ext; print "$s $clean_fn $ext\n"; system('rm', $fn); system('rm', $clean_fn); print "downloading $clean_fn\n"; system('wget', '-O', $clean_fn, $s); system('/usr/bin/perl', 'dataset.pl', $clean_fn); $fn = $clean_fn; } else { print "youtube-dl $s\n"; my $yt = `youtube-dl --extract-audio --audio-format flac -o "%(title)s.%(ext)s" $s`; my @partz = split("\n", $yt); foreach $part (@partz) { if ($part =~ /\[ffmpeg\] Destination\: (.*\.flac)$/) { $fn = $1; } } if ($fn) { $new_fn = sanitize($fn); $new_fn .= '.flac'; system('mv', $fn, $new_fn); print "youtube-dl got fn, $fn => $new_fn\n"; system('/usr/bin/perl', 'dataset.pl', $new_fn); } } } else { if ($s !~/\..*$/) { $s .= ".wav"; } print "downloading $s\n"; system(`which wget`, 'https://neural:spawn5@asdf.us/neural/' . $s); system(`which wget`, 'dataset.pl', $s); } if ($fn) { my $tag = sanitize($fn); open(my $fd, ">>../run_slap.sh"); print $fd "standard $tag\n"; close $fn; $fn = undef; } } END { chdir('..'); }