1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
#!/usr/bin/perl
#use DotenvSimple;
#DotenvSimple::source_dotenv('.env');
$SIG{TERM} = $SIG{INT} = sub { exit 1 };
sub sanitize ($) {
my $s = lc shift;
$s =~ s/^\s+//;
$s =~ s/\s+$//;
$s =~ s/\.[^.]+$//g;
$s =~ s/[-\[\]\(\)\+\*\\\/\'\"\`\!\@\#\$\%\^\&]//g;
$s =~ s/\s+/_/g;
$s =~ s/_+/_/g;
if (length($s) == 0) {
print "fn is empty";
exit(1);
}
return $s;
}
chdir('datasets');
my $bn, $fn, $new_fn, $ext;
foreach my $s (@ARGV){
if ($s =~ /^http/) {
if ($s =~ /(wav|aiff?|flac|mp3|opus)$/i) {
$ext = $1;
$s =~ s/^\s+//;
$s =~ s/\s+$//;
$clean_fn = sanitize(`basename "$s"`) . '.' . $ext;
print "$s $clean_fn $ext\n";
system('rm', $fn);
system('rm', $clean_fn);
print "downloading $clean_fn\n";
system('wget', '-O', $clean_fn, $s);
system('/usr/bin/perl', 'dataset.pl', $clean_fn);
$fn = $clean_fn;
} else {
print "youtube-dl $s\n";
my $yt = `youtube-dl --extract-audio --audio-format flac -o "%(title)s.%(ext)s" $s`;
my @partz = split("\n", $yt);
foreach $part (@partz) {
if ($part =~ /\[ffmpeg\] Destination\: (.*\.flac)$/) {
$fn = $1;
}
}
if ($fn) {
$new_fn = sanitize($fn);
$new_fn .= '.flac';
system('mv', $fn, $new_fn);
print "youtube-dl got fn, $fn => $new_fn\n";
system('/usr/bin/perl', 'dataset.pl', $new_fn);
}
}
} else {
if ($s !~/\..*$/) { $s .= ".wav"; }
print "downloading $s\n";
system(`which wget`, 'https://neural:spawn5@asdf.us/neural/' . $s);
system(`which wget`, 'dataset.pl', $s);
}
if ($fn) {
my $tag = sanitize($fn);
open(my $fd, ">>../run_slap.sh");
print $fd "standard $tag\n";
close $fn;
$fn = undef;
}
}
END {
chdir('..');
}
|