diff options
Diffstat (limited to 'bin')
| -rwxr-xr-x | bin/incoming/buckypoll.pl | 21 | ||||
| -rwxr-xr-x | bin/incoming/dl.pl | 238 | ||||
| -rwxr-xr-x | bin/incoming/fixnames.pl | 17 | ||||
| -rwxr-xr-x | bin/incoming/groupfiles.pl | 62 | ||||
| -rwxr-xr-x | bin/incoming/prefix.pl | 33 |
5 files changed, 371 insertions, 0 deletions
diff --git a/bin/incoming/buckypoll.pl b/bin/incoming/buckypoll.pl new file mode 100755 index 0000000..4ed9d7a --- /dev/null +++ b/bin/incoming/buckypoll.pl @@ -0,0 +1,21 @@ +#!/usr/bin/perl + +# bucky ftp monitor +# should be owned by bucky:psacln and chmod 0755 + +# nohup ./.buckypoll.pl > .output.log 2>&1 & + +use strict; +my $localpath = "/var/www/vhosts/carbonpictures.com/bucky/incoming"; + +while (1) + { + if (-e "$localpath/.importnow") + { + system("mv", "$localpath/.importnow", "$localpath/.importing"); + system("chmod", "-R", "0777", $localpath); + system("rm", "$localpath/.importing"); + } + sleep(5); + } + diff --git a/bin/incoming/dl.pl b/bin/incoming/dl.pl new file mode 100755 index 0000000..d0a2cff --- /dev/null +++ b/bin/incoming/dl.pl @@ -0,0 +1,238 @@ +#!/usr/bin/perl + +use strict; +my $DEBUG = 1; + +my $DIRNAME = '.'; +my $PREFIX = ''; +my $SOURCE_URL = ''; +my $GREP_BY = ''; +my $URL_FILE = 'index.html'; +my $BASE_HREF = ''; +my $WGET = "/usr/bin/wget"; +my $WGET_SINGLE = $WGET; +my $WGET_WEBPAGE = $WGET; +my $BIN_MV = "/bin/mv"; +my $TYPE_A = "html|txt|pdf"; +my $TYPE_I = "gif|jpe?g|png|tiff?"; +my $TYPE_B = $TYPE_I."|mp3|mov|avi|wav|aiff?|tiff?|zip|bz2|compress|m4a|m4b"; +my $VALID_TYPES = "$TYPE_A|$TYPE_B"; +my %SEEN = (); +my %FILES = (); + +foreach my $ARG (@ARGV) + { + $ARG = strip($ARG); + if ($ARG =~ /^http/) + { + $SOURCE_URL = $ARG; + } + elsif ($ARG =~ /\|/ || $ARG =~ /($VALID_TYPES)/) + { + $GREP_BY = $ARG; + $GREP_BY =~ s/^\(//; + $GREP_BY =~ s/\)$//; + } + elsif ($ARG =~ /\// || $ARG eq ".") + { + $DIRNAME = $ARG; + } + else + { + $PREFIX = $ARG; + $PREFIX =~ s/[^a-zA-Z0-9\.\/]//g; + } + } + +$BASE_HREF = get_basehref($SOURCE_URL); +$URL_FILE = get_local_filename($SOURCE_URL); +$GREP_BY ||= $VALID_TYPES; + +if ($DEBUG == 1) + { + print STDERR <<ARGZ; +DIRNAME = $DIRNAME +PREFIX = $PREFIX +SOURCE_URL = $SOURCE_URL +BASE_HREF = $BASE_HREF +GREP_BY = $GREP_BY +URL_FILE = $URL_FILE +ARGZ + } + +if (length($DIRNAME) == 0) + { + print STDERR "usage: ./dl.pl DIRNAME http://... '$VALID_TYPES'\n"; + exit; + } + +my ($WGET_SINGLE, $WGET_WEBPAGE) = make_wget_commands(); + +wget_single($SOURCE_URL); +if (! -e $URL_FILE) + { + print STDERR "\n**** $URL_FILE DOES NOT EXIST\n"; + exit; + } + +open URLZ, $URL_FILE or die $!; +foreach my $line (<URLZ>) + { + $line = strip($line); + process_line($line); + } +close URLZ; + +sub process_line + { + my ($line) = @_; + my $catch = "href"; + unless ( $line =~ /<a.*href/i && $line =~ /($GREP_BY)/i) + { + if ( $line =~ /<img.*src/i && $GREP_BY =~ /($TYPE_I)/i ) + { $catch = "src"; } + else + { return; } + } +# <p align=Center> <a href="newsamples/outtake6_20thcentury.wav">outtake6_20thcentury</a></td> + $line =~ /$catch=('|")?/i; + my $quote = $1; + my ($pre, $post) = split(/$catch=$quote/i, $line, 2); + my ($url, $rest) = split(/$quote/, $post, 2); +# if ($DEBUG == 1) +# { print STDERR $post."\n\n"; } + $url =~ s/(\"|\').*$//; + if ($url =~ /($GREP_BY)/) + { + wget_single($url); + foil_redirect($url); + } + process_line($rest); + } + +sub wget_single + { + my ($url) = @_; + return if $url =~ /\#/; + return if length($url) < 1; + return if $SEEN{$url}; + $SEEN{$url} = 1; + + my $file = get_local_filename($url); + + $url = $BASE_HREF.$url unless ($url =~ /^http:\/\//); + my $cmd = $WGET_SINGLE." '$url'"; + + system($cmd); + sleep 1; + if ($PREFIX) + { + my $prefixed = $file; + $prefixed =~ s/\//\/$PREFIX\-/; + system('/bin/mv', $file, $prefixed); + } + } + +sub foil_redirect + { + my ($_url) = @_; + my $_file = get_local_filename($_url); + if ( ! -e $_file ) + { + print STDERR "weird: no $_file\n\n"; + return; + } + return unless ($_file =~ /($TYPE_B)/); + open FILE, "$_file" or die $!; + my $scanning = 0; + foreach my $line (<FILE>) + { + if ($line =~ /<html>/) + { $scanning = 1; } + next unless $scanning; + next unless $line =~ /(src=)/; + print STDERR "$line"; + $line =~ /src=('|")?/; + my $quote = $1; + my ($pre, $post) = split(/src=$quote/, $line, 2); + my ($url, $rest) = split(/$quote/, $post, 2); + close FILE; + my ($_newfile) = get_filename($url); + if ($_file =~ $_newfile) + { system($BIN_MV, $_file, "$_file.temp"); } + wget_single($url); + if (-e $_newfile) + { system($BIN_MV, $_newfile, get_local_filename($_newfile)); } + last; + } + print STDERR "^^^ SUSPICIOUS\n" if $scanning; + close FILE; + } +sub get_basehref + { + my ($url) = @_; + $url =~ s/\/([^\/]*)$/\//; + return $url; + } +sub get_type + { + my ($_url) = @_; + $_url =~ s/\/([^\/]*).(\w+)$/\//; + my $_type = $2; + return $_type; + } +sub get_local_filename + { + my ($_url) = @_; + my $_file; + if ($_url =~ /\//) + { $_file = get_filename($_url); } + else + { $_file = $_url; } + $_file = "$DIRNAME/$_file"; + $_file =~ s/\/+/\//g; +print STDERR " -> $_url => $_file\n" if $DEBUG == 1; + return $_file; + } +sub get_filename + { + my ($_url) = @_; + my $_file = ''; + $_url =~ s/\#*$//; + if ($_url =~ /\/$/) + { + $_file = "index.html"; + } + else + { + if ($_url =~ /\?$/) + { print STDERR "possible cgi: $_url\n"; } + $_file = $_url; + my $_basehref = get_basehref($_url); + $_file =~ s/$_basehref//; + print STDERR "u: $_url\nb: $_basehref\nf: $_file\n"; + } + return $_file; + } + +sub make_wget_commands + { + my $ua = ("Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.0.3705)"); + my $dp = $DIRNAME || "."; + +# -E = --html-extension +# -H = --span-hosts +# -k = --convert-links +# -K = --backup-converted +# -p = --page-requisite + + my $SINGLE = "$WGET -erobots=off --user-agent='$ua' --directory-prefix=$dp"; + my $WEBPAGE = "$WGET -erobots=off -d -o wgetlog " . + "--user-agent='$ua' -E -H -K -k -p --no-directories " . + "--directory-prefix=$dp"; + return ($SINGLE, $WEBPAGE); + } + +sub strip + { my ($q) = @_; $q =~ s/^\s+//; $q =~ s/\s+$//; return $q; } + diff --git a/bin/incoming/fixnames.pl b/bin/incoming/fixnames.pl new file mode 100755 index 0000000..1a0249f --- /dev/null +++ b/bin/incoming/fixnames.pl @@ -0,0 +1,17 @@ +#!/usr/bin/perl +# remove non-alphanumerics from filenames in current directory +my @files = (); +opendir D, "."; +while (my $f = readdir D) + { + push @files, $f unless -d $f || $f =~ /^\./; + } +closedir D; +foreach my $f (@files) + { + my $newf = $f; + $newf =~ s/[^-_a-zA-Z0-9 .]//g; + next unless $newf; + print join " ", ("/bin/mv", $f,"\n", $newf, "\n"); + system("/bin/mv", $f, $newf); + } diff --git a/bin/incoming/groupfiles.pl b/bin/incoming/groupfiles.pl new file mode 100755 index 0000000..62b6fa5 --- /dev/null +++ b/bin/incoming/groupfiles.pl @@ -0,0 +1,62 @@ +#!/usr/bin/perl +# - display counts of mixed-up album mp3s in the current directory +# - add prefixes to the filenames +use MP3::Tag; +my @t; +#my $TAG = shift @ARGV || die "NO TAG SPECIFIED"; +my $BIN_MV = "/bin/mv"; +opendir D, "." or die $!; +while (my $f = readdir D) + { + push (@t, $f) if $f =~ /mp3$/ + } +closedir D; +my $albs = {}; +foreach my $t (sort @t) + { + my $a; + my $mp3 = MP3::Tag->new($t); + $mp3->get_tags(); + + my ($title, $track, $artist, $album, $comment, $year, $genre) = $mp3->autoinfo(); + +$track = sprintf "%02d", $track; +$artist =~ /^(..?.?)/; +my $art = lc $1; +$album =~ /^(The )?(..?.?)/; +my $alb = lc $2; + +# my $newf = "$art-$alb-$track $title.mp3"; +my $newf = "$track $title.mp3"; +$newf =~ s/[^-A-Za-z0-9 _\.]//g; +print $newf."\n"; + +system($BIN_MV, $t, $newf); + + $albs->{$art} ||= {}; + $albs->{$art}->{$alb} ||= []; + $albs->{$art}->{$alb} = 1; + + +# if (! $a) { print STDERR "TAGLESS: $t\n"; next; } +# my $id = $mp3->track0(2); +# my $newt = $TAG. '-' . $id . '-' . $t; +# print "$newt\n"; +# system($BIN_MV, $t, $newt); +# $albs->{$a} ||= []; +# push @{ $albs->{$a} }, $t; + } + +map {print $_, " => ", join ",", keys(%{$albs->{$_}}), "\n" } keys %$albs; +#my @keyz = qw[ZENC ZEND ZENE ZENF]; +#my $q = 0; +#foreach my $a (sort keys %$albs) +# { +# my $count = scalar @{ $albs->{$a} }; +## print "$keyz[$q] : $a : $count\n"; +# foreach my $f (@{ $albs->{$a} }) +# { +# # system($BIN_MV, $f, $keyz[$q] . "-" . $f); +# } +# $q++; +# } diff --git a/bin/incoming/prefix.pl b/bin/incoming/prefix.pl new file mode 100755 index 0000000..9cf59e4 --- /dev/null +++ b/bin/incoming/prefix.pl @@ -0,0 +1,33 @@ +#!/usr/bin/perl + +my $DIR, $PREFIX; +my $BIN_MV = "/bin/mv"; +usage() unless scalar(@ARGV); +foreach my $arg (@ARGV) + { + if (-d $arg && ! $DIR) + { + $DIR = $arg; + } + else + { + $PREFIX = $arg; + } + } +$DIR ||= "."; +opendir DIR, $DIR || die $!; +my @files = (); +while (my $file = readdir DIR) + { + next if $file =~ /^\./; + push @files, $file; + } +closedir DIR; +foreach my $file (@files) + { + system($BIN_MV, "$DIR/$file", "$DIR/$PREFIX-$file"); + } +sub usage + { + print "$ ./.prefix.pl DIR/ prefix\n"; + } |
