#!/bin/bash # # Developed by Fred Weinhaus 10/29/2008 .......... revised 3/19/2016 # # ------------------------------------------------------------------------------ # # Licensing: # # Copyright © Fred Weinhaus # # My scripts are available free of charge for non-commercial use, ONLY. # # For use of my scripts in commercial (for-profit) environments or # non-free applications, please contact me (Fred Weinhaus) for # licensing arrangements. My email address is fmw at alink dot net. # # If you: 1) redistribute, 2) incorporate any of these scripts into other # free applications or 3) reprogram them in another scripting language, # then you must contact me for permission, especially if the result might # be used in a commercial or for-profit environment. # # My scripts are also subject, in a subordinate manner, to the ImageMagick # license, which can be found at: http://www.imagemagick.org/script/license.php # # ------------------------------------------------------------------------------ # #### # # USAGE: kmeansthresh [-g graph] infile outfile # USAGE: kmeansthresh [-help] # # OPTIONS: # # -g graph graph specifies whether to generate a # histogram graph image displaying the # location and value of the threshold; # choices are: view or save; # default is no graph # ### # # NAME: KMEANSTHRESH # # PURPOSE: To automatically thresholds an image to binary (b/w) format # using the k-means technique. # # DESCRIPTION: KMEANSTHRESH automatically thresholds an image to binary # (b/w) format. It assume the histogram is bimodal, i.e. is the composite # of two bell-shaped distributions representing the foreground and # background classes. The k-means appoach iteratively thresholds the # image, computes the means of the foreground (above threshold data) and # background (at and below threshold value), computes a new threshold # equal to the average of these two means and repeats until there is no # change in threshold between successive iterations. This script is # equivalent to the isodatathresh script, which is implemented using image # masking rather than from the histogram of the image. The isodatathresh # script is faster than the kmeansthresh script for smaller images. The # timing transition occurs for images somewhere between 1000-2000 pixels # on a side. The kmeansthresh script is moderatly insensitive to image # size, but pays an initial timing penalty in order to compute the # histogram related data and get the means for each class from the # histogram using shell computations. # # OPTIONS: # # -g graph ... GRAPH specifies whether to generate a graph (image) of # the histogram, displaying the location and value of the threshold. # The choices are: view, save and none. If graph=view is selected, the # graph will be created and displayed automatically, but not saved. # If graph=save is selected, then the graph will be created and saved # to a file using the infile name, with "_histog_kmeans.gif" appended, # but the graph will not be displayed automatically. If -g option is # not specified, then no graph will be created. # # NOTE: It is highly recommended that the output not be specified # as a JPG image as that will cause compression and potentially a # non-binary (i.e. a graylevel) result. GIF is the recommended # output format. # # REFERENCES: see the following: # http://www.ph.tn.tudelft.nl/Courses/FIP/noframes/fip-Segmenta.html # http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/MORSE/threshold.pdf # # CAVEAT: No guarantee that this script will work on all platforms, # nor that trapping of inconsistent parameters is complete and # foolproof. Use At Your Own Risk. # ###### # # set default values graph="" #none, save or view # set directory for temporary files dir="." # suggestions are dir="." or dir="/tmp" # set up functions to report Usage and Usage with Description PROGNAME=`type $0 | awk '{print $3}'` # search for executable on path PROGDIR=`dirname $PROGNAME` # extract directory of program PROGNAME=`basename $PROGNAME` # base name of program usage1() { echo >&2 "" echo >&2 "$PROGNAME:" "$@" sed >&2 -e '1,/^####/d; /^###/g; /^#/!q; s/^#//; s/^ //; 4,$p' "$PROGDIR/$PROGNAME" } usage2() { echo >&2 "" echo >&2 "$PROGNAME:" "$@" sed >&2 -e '1,/^####/d; /^######/g; /^#/!q; s/^#*//; s/^ //; 4,$p' "$PROGDIR/$PROGNAME" } # function to report error messages errMsg() { echo "" echo $1 echo "" usage1 exit 1 } # function to test for minus at start of value of second part of option 1 or 2 checkMinus() { test=`echo "$1" | grep -c '^-.*$'` # returns 1 if match; 0 otherwise [ $test -eq 1 ] && errMsg "$errorMsg" } # test for correct number of arguments and get values if [ $# -eq 0 ] then # help information echo "" usage2 exit 0 elif [ $# -gt 4 ] then errMsg "--- TOO MANY ARGUMENTS WERE PROVIDED ---" else while [ $# -gt 0 ] do # get parameter values case "$1" in -h|-help) # help information echo "" usage2 exit 0 ;; -g) # get graph shift # to get the next parameter # test if parameter starts with minus sign errorMsg="--- INVALID GRAPH SPECIFICATION ---" checkMinus "$1" graph="$1" [ "$graph" != "view" -a "$graph" != "save" ] && errMsg "--- GRAPH=$graph MUST BE EITHER VIEW OR SAVE ---" ;; -) # STDIN and end of arguments break ;; -*) # any other - argument errMsg "--- UNKNOWN OPTION ---" ;; *) # end of arguments break ;; esac shift # next option done # # get infile and outfile infile="$1" outfile="$2" fi # test that infile provided [ "$infile" = "" ] && errMsg "NO INPUT FILE SPECIFIED" # test that outfile provided [ "$outfile" = "" ] && errMsg "NO OUTPUT FILE SPECIFIED" # get outname from infile to use for graph inname=`convert $infile -format "%t" info:` histfile=${inname}_histog_kmeans.gif tmpA1="$dir/kmeansthresh_1_$$.mpc" tmpA2="$dir/kmeansthresh_1_$$.cache" trap "rm -f $tmpA1 $tmpA2; exit 0" 0 trap "rm -f $tmpA1 $tmpA2 $histfile; exit 1" 1 2 3 15 # get im_version im_version=`convert -list configure | \ sed '/^LIB_VERSION_NUMBER /!d; s//,/; s/,/,0/g; s/,0*\([0-9][0-9]\)/\1/g' | head -n 1` # colorspace RGB and sRGB swapped between 6.7.5.5 and 6.7.6.7 # though probably not resolved until the latter # then -colorspace gray changed to linear between 6.7.6.7 and 6.7.8.2 # then -separate converted to linear gray channels between 6.7.6.7 and 6.7.8.2, # though probably not resolved until the latter # so -colorspace HSL/HSB -separate and -colorspace gray became linear # but we need to use -set colorspace RGB before using them at appropriate times # so that results stay as in original script # The following was determined from various version tests using kmeansthresh. # with IM 6.7.4.10, 6.7.6.10, 6.7.8.10 if [ "$im_version" -lt "06070607" -o "$im_version" -gt "06070707" ]; then setcspace="-set colorspace RGB" else setcspace="" fi # no need for setcspace for grayscale or channels after 6.8.5.4 if [ "$im_version" -gt "06080504" ]; then setcspace="" fi if convert -quiet "$infile" $setcspace -colorspace gray +repage "$tmpA1" then : ' do nothing ' else errMsg "--- FILE $infile DOES NOT EXIST OR IS NOT AN ORDINARY FILE, NOT READABLE OR HAG ZERO SIZE ---" fi # get totpix in image width=`convert $tmpA1 -format "%w" info:` height=`convert $tmpA1 -format "%h" info:` totpix=`echo "scale=0; $width * $height / 1" | bc` # get min and max in range 0 - 255 to correspond to 8-bit histogram and later set average to initial thresh if [ "$im_version" -ge "06030901" ] then min=`convert $tmpA1 -format "%[min]" info:` max=`convert $tmpA1 -format "%[max]" info:` min=`convert xc: -format "%[fx:255*$min/quantumrange]" info:` max=`convert xc: -format "%[fx:255*$max/quantumrange]" info:` else data=`convert $tmpA1 -verbose info:` min=`echo "$data" | sed -n 's/^.*[Mm]in:.*[(]\([0-9.]*\).*$/\1/p ' | head -1` max=`echo "$data" | sed -n 's/^.*[Mm]ax:.*[(]\([0-9.]*\).*$/\1/p ' | head -1` min=`convert xc: -format "%[fx:255*$min)]" info:` max=`convert xc: -format "%[fx:255*$max)]" info:` fi #echo "min=$min; max=$max;" # separate IM histogram into two arrays, value and count, and fill out for empty bins and normalize # get filled value array from IM histogram nvalueArr=(`convert $tmpA1 -depth 8 -format "%c" -define histogram:unique-colors=true histogram:info:- \ | tr -cs '0-9\012' ' ' | sed -n 's/[ ]*\([0-9]*\)[ ]*\([0-9]*\).*$/\1 \2/p' |\ awk ' # AWK { vbin[$2] += $2;} END { for (i=0;i<256;i++) {print vbin[i]+0; } } '`) # echo ${nvalueArr[*]} # echo ${#nvalueArr[*]} numvals=${#nvalueArr[*]} # get filled and normalized count array from IM histogram ncountArr=(`convert $tmpA1 -depth 8 -format "%c" -define histogram:unique-colors=true histogram:info:- \ | tr -cs '0-9\012' ' ' | sed -n 's/[ ]*\([0-9]*\)[ ]*\([0-9]*\).*$/\1 \2/p' |\ awk -v totpix="$totpix" ' # AWK { cbin[$2] += $1; } END { for (i=0;i<256;i++) {printf ("%f\n", (cbin[i]+0)/totpix) } } '`) # echo ${ncountArr[*]} # echo ${#ncountArr[*]} numcounts=${#ncountArr[*]} [ $numvals -ne $numcounts ] && errMsg "--- NUMBER OF COUNTS IS NOT THE SAME AS NUMBER OF VALUES ---" numbins=256 # process image using k-means approach # Generate Cumulative Arrays # p=c(i)/N (normalized count or probability, p, at bin i) # v=v(i) (graylevel at bin i) # note that as histogram has been filled that v=i # t=threshold bin # n=p0=sum(c(i)/N)=zeroth histogram moment => cumulative normalized count (from i=0 to t) = N(t) # g=p1=sum(c(i)*v(i))=first histogram momement => cumulative normalized graylevel (from i=0 to t) = G(t) # m=p1/p0=mean # compute nlowArr nlowArr=( $(for ((i=0; i