summaryrefslogtreecommitdiff
path: root/get.pl
blob: 2081bb8f3df2c0a8b21f250a65b8a472ed8e25eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/perl

#use DotenvSimple;
#DotenvSimple::source_dotenv('.env');

$SIG{TERM} = $SIG{INT} = sub { exit 1 };

sub sanitize ($) {
  my $s = lc shift;
  $s =~ s/^\s+//;
  $s =~ s/\s+$//;
  $s =~ s/\.[^.]+$//g;
  $s =~ s/[-\[\]\(\)\+\*\\\/\'\"\`\!\@\#\$\%\^\&]//g
  $s =~ s/\s+/_/g;
  if (length($s) == 0) {
    print "fn is empty";
    exit(1);
  }
  $s =~ s/_+/_/g;
  return $s;
}

chdir('datasets');

my $bn, $fn, $new_fn, $ext;
foreach my $s (@ARGV){
  if ($s =~ /^http/) {
    if ($s =~ /(wav|aiff?|flac|mp3|opus)$/i) {
      $ext = $1;
      $s =~ s/^\s+//;
      $s =~ s/\s+$//;
      $clean_fn = sanitize(`basename $s`) + $ext;
      system('rm', $fn);
      system('rm', $clean_fn);
      print "downloading $clean_fn\n";
      system('wget', '-O', $clean_fn, $s);
      system('/usr/bin/perl', 'dataset.pl', $clean_fn);
      $fn = $clean_fn;
    } else {
      print "youtube-dl $s\n";
      my $yt = `youtube-dl --extract-audio --audio-format flac -o "%(title)s.%(ext)s" $s`;
      my @partz = split("\n", $yt);
      foreach $part (@partz) {
        if ($part =~ /\[ffmpeg\] Destination\: (.*\.flac)$/) {
          $fn = $1;
        }
      }
      if ($fn) {
        $new_fn = sanitize($fn);
        $new_fn .= '.flac';
        system('mv', $fn, $new_fn);
        print "youtube-dl got fn, $fn => $new_fn\n";
        system('/usr/bin/perl', 'dataset.pl', $new_fn);
      }
    }
  } else {
    if ($s !~/\..*$/) { $s .= ".wav"; }
    print "downloading $s\n";
    system(`which wget`, 'https://neural:spawn5@asdf.us/neural/' . $s);
    system(`which wget`, 'dataset.pl', $s);
  }
  if ($fn) {
    my $tag = sanitize($fn);
    open(my $fd, ">>../run_slap.sh");
    print $fd "standard $tag\n";
    close $fn;
    $fn = undef;
  }
}

END {
  chdir('..');
}