From 1978464153fec8ff495aa9885826354dda5707c8 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Thu, 21 Jun 2018 15:39:23 +0200 Subject: sanitize --- get.pl | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/get.pl b/get.pl index 1ace44f..d5c2be6 100755 --- a/get.pl +++ b/get.pl @@ -7,6 +7,8 @@ $SIG{TERM} = $SIG{INT} = sub { exit 1 }; sub sanitize ($) { my $s = lc shift; + $s =~ s/^\s+//; + $s =~ s/\s+$//; $s =~ s/\.[^.]+$//g; $s =~ s/\W//g; $s =~ s/\s+/_/g; @@ -20,19 +22,14 @@ sub sanitize ($) { chdir('datasets'); -my $fn, $new_fn; +my $bn, $fn, $new_fn, $ext; foreach my $s (@ARGV){ if ($s =~ /^http/) { if ($s =~ /(wav|aiff?|flac|mp3|opus)$/i) { + $ext = $1; $s =~ s/^\s+//; $s =~ s/\s+$//; - my $fn = `basename $s`; - $clean_fn = $fn; - $clean_fn =~ s/-/_/g; - $clean_fn =~ s/^\s//; - $clean_fn =~ s/\s$//; - $clean_fn =~ s/\s/_/g; - $clean_fn =~ s/_+/_/g; + $clean_fn = sanitize(`basename $s`) + $ext; system('rm', $fn); system('rm', $clean_fn); print "downloading $clean_fn\n"; @@ -62,12 +59,13 @@ foreach my $s (@ARGV){ system(`which wget`, 'https://neural:spawn5@asdf.us/neural/' . $s); system(`which wget`, 'dataset.pl', $s); } - my $tag = sanitize($fn); - open(my $fd, ">>../run_slap.sh"); - print $fd "standard $tag\n"; - close $fn; - - $fn = undef; + if ($fn) { + my $tag = sanitize($fn); + open(my $fd, ">>../run_slap.sh"); + print $fd "standard $tag\n"; + close $fn; + $fn = undef; + } } END { -- cgit v1.2.3-70-g09d2