summaryrefslogtreecommitdiff
path: root/bin/kmeansthresh
diff options
context:
space:
mode:
authoryo mama <pepper@scannerjammer.com>2016-10-01 13:14:48 -0700
committeryo mama <pepper@scannerjammer.com>2016-10-01 13:14:48 -0700
commitd9b7653700040bf25cb5681b5dc7021bc505d975 (patch)
tree7422ce1db3c7b2a1ce5ac43f4b3093f81f13493e /bin/kmeansthresh
parent6529b3767669476096fb30e69dcfd29773865a0c (diff)
added threshold scripts
Diffstat (limited to 'bin/kmeansthresh')
-rw-r--r--bin/kmeansthresh428
1 files changed, 428 insertions, 0 deletions
diff --git a/bin/kmeansthresh b/bin/kmeansthresh
new file mode 100644
index 0000000..cde8cf8
--- /dev/null
+++ b/bin/kmeansthresh
@@ -0,0 +1,428 @@
+#!/bin/bash
+#
+# Developed by Fred Weinhaus 10/29/2008 .......... revised 3/19/2016
+#
+# ------------------------------------------------------------------------------
+#
+# Licensing:
+#
+# Copyright © Fred Weinhaus
+#
+# My scripts are available free of charge for non-commercial use, ONLY.
+#
+# For use of my scripts in commercial (for-profit) environments or
+# non-free applications, please contact me (Fred Weinhaus) for
+# licensing arrangements. My email address is fmw at alink dot net.
+#
+# If you: 1) redistribute, 2) incorporate any of these scripts into other
+# free applications or 3) reprogram them in another scripting language,
+# then you must contact me for permission, especially if the result might
+# be used in a commercial or for-profit environment.
+#
+# My scripts are also subject, in a subordinate manner, to the ImageMagick
+# license, which can be found at: http://www.imagemagick.org/script/license.php
+#
+# ------------------------------------------------------------------------------
+#
+####
+#
+# USAGE: kmeansthresh [-g graph] infile outfile
+# USAGE: kmeansthresh [-help]
+#
+# OPTIONS:
+#
+# -g graph graph specifies whether to generate a
+# histogram graph image displaying the
+# location and value of the threshold;
+# choices are: view or save;
+# default is no graph
+#
+###
+#
+# NAME: KMEANSTHRESH
+#
+# PURPOSE: To automatically thresholds an image to binary (b/w) format
+# using the k-means technique.
+#
+# DESCRIPTION: KMEANSTHRESH automatically thresholds an image to binary
+# (b/w) format. It assume the histogram is bimodal, i.e. is the composite
+# of two bell-shaped distributions representing the foreground and
+# background classes. The k-means appoach iteratively thresholds the
+# image, computes the means of the foreground (above threshold data) and
+# background (at and below threshold value), computes a new threshold
+# equal to the average of these two means and repeats until there is no
+# change in threshold between successive iterations. This script is
+# equivalent to the isodatathresh script, which is implemented using image
+# masking rather than from the histogram of the image. The isodatathresh
+# script is faster than the kmeansthresh script for smaller images. The
+# timing transition occurs for images somewhere between 1000-2000 pixels
+# on a side. The kmeansthresh script is moderatly insensitive to image
+# size, but pays an initial timing penalty in order to compute the
+# histogram related data and get the means for each class from the
+# histogram using shell computations.
+#
+# OPTIONS:
+#
+# -g graph ... GRAPH specifies whether to generate a graph (image) of
+# the histogram, displaying the location and value of the threshold.
+# The choices are: view, save and none. If graph=view is selected, the
+# graph will be created and displayed automatically, but not saved.
+# If graph=save is selected, then the graph will be created and saved
+# to a file using the infile name, with "_histog_kmeans.gif" appended,
+# but the graph will not be displayed automatically. If -g option is
+# not specified, then no graph will be created.
+#
+# NOTE: It is highly recommended that the output not be specified
+# as a JPG image as that will cause compression and potentially a
+# non-binary (i.e. a graylevel) result. GIF is the recommended
+# output format.
+#
+# REFERENCES: see the following:
+# http://www.ph.tn.tudelft.nl/Courses/FIP/noframes/fip-Segmenta.html
+# http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/MORSE/threshold.pdf
+#
+# CAVEAT: No guarantee that this script will work on all platforms,
+# nor that trapping of inconsistent parameters is complete and
+# foolproof. Use At Your Own Risk.
+#
+######
+#
+
+# set default values
+graph="" #none, save or view
+
+# set directory for temporary files
+dir="." # suggestions are dir="." or dir="/tmp"
+
+# set up functions to report Usage and Usage with Description
+PROGNAME=`type $0 | awk '{print $3}'` # search for executable on path
+PROGDIR=`dirname $PROGNAME` # extract directory of program
+PROGNAME=`basename $PROGNAME` # base name of program
+usage1()
+ {
+ echo >&2 ""
+ echo >&2 "$PROGNAME:" "$@"
+ sed >&2 -e '1,/^####/d; /^###/g; /^#/!q; s/^#//; s/^ //; 4,$p' "$PROGDIR/$PROGNAME"
+ }
+usage2()
+ {
+ echo >&2 ""
+ echo >&2 "$PROGNAME:" "$@"
+ sed >&2 -e '1,/^####/d; /^######/g; /^#/!q; s/^#*//; s/^ //; 4,$p' "$PROGDIR/$PROGNAME"
+ }
+
+
+# function to report error messages
+errMsg()
+ {
+ echo ""
+ echo $1
+ echo ""
+ usage1
+ exit 1
+ }
+
+
+# function to test for minus at start of value of second part of option 1 or 2
+checkMinus()
+ {
+ test=`echo "$1" | grep -c '^-.*$'` # returns 1 if match; 0 otherwise
+ [ $test -eq 1 ] && errMsg "$errorMsg"
+ }
+
+# test for correct number of arguments and get values
+if [ $# -eq 0 ]
+ then
+ # help information
+ echo ""
+ usage2
+ exit 0
+elif [ $# -gt 4 ]
+ then
+ errMsg "--- TOO MANY ARGUMENTS WERE PROVIDED ---"
+else
+ while [ $# -gt 0 ]
+ do
+ # get parameter values
+ case "$1" in
+ -h|-help) # help information
+ echo ""
+ usage2
+ exit 0
+ ;;
+ -g) # get graph
+ shift # to get the next parameter
+ # test if parameter starts with minus sign
+ errorMsg="--- INVALID GRAPH SPECIFICATION ---"
+ checkMinus "$1"
+ graph="$1"
+ [ "$graph" != "view" -a "$graph" != "save" ] && errMsg "--- GRAPH=$graph MUST BE EITHER VIEW OR SAVE ---"
+ ;;
+ -) # STDIN and end of arguments
+ break
+ ;;
+ -*) # any other - argument
+ errMsg "--- UNKNOWN OPTION ---"
+ ;;
+ *) # end of arguments
+ break
+ ;;
+ esac
+ shift # next option
+ done
+ #
+ # get infile and outfile
+ infile="$1"
+ outfile="$2"
+fi
+
+# test that infile provided
+[ "$infile" = "" ] && errMsg "NO INPUT FILE SPECIFIED"
+
+# test that outfile provided
+[ "$outfile" = "" ] && errMsg "NO OUTPUT FILE SPECIFIED"
+
+# get outname from infile to use for graph
+inname=`convert $infile -format "%t" info:`
+histfile=${inname}_histog_kmeans.gif
+
+tmpA1="$dir/kmeansthresh_1_$$.mpc"
+tmpA2="$dir/kmeansthresh_1_$$.cache"
+trap "rm -f $tmpA1 $tmpA2; exit 0" 0
+trap "rm -f $tmpA1 $tmpA2 $histfile; exit 1" 1 2 3 15
+
+# get im_version
+im_version=`convert -list configure | \
+ sed '/^LIB_VERSION_NUMBER /!d; s//,/; s/,/,0/g; s/,0*\([0-9][0-9]\)/\1/g' | head -n 1`
+
+# colorspace RGB and sRGB swapped between 6.7.5.5 and 6.7.6.7
+# though probably not resolved until the latter
+# then -colorspace gray changed to linear between 6.7.6.7 and 6.7.8.2
+# then -separate converted to linear gray channels between 6.7.6.7 and 6.7.8.2,
+# though probably not resolved until the latter
+# so -colorspace HSL/HSB -separate and -colorspace gray became linear
+# but we need to use -set colorspace RGB before using them at appropriate times
+# so that results stay as in original script
+# The following was determined from various version tests using kmeansthresh.
+# with IM 6.7.4.10, 6.7.6.10, 6.7.8.10
+if [ "$im_version" -lt "06070607" -o "$im_version" -gt "06070707" ]; then
+ setcspace="-set colorspace RGB"
+else
+ setcspace=""
+fi
+# no need for setcspace for grayscale or channels after 6.8.5.4
+if [ "$im_version" -gt "06080504" ]; then
+ setcspace=""
+fi
+
+
+if convert -quiet "$infile" $setcspace -colorspace gray +repage "$tmpA1"
+ then
+ : ' do nothing '
+else
+ errMsg "--- FILE $infile DOES NOT EXIST OR IS NOT AN ORDINARY FILE, NOT READABLE OR HAG ZERO SIZE ---"
+fi
+
+# get totpix in image
+width=`convert $tmpA1 -format "%w" info:`
+height=`convert $tmpA1 -format "%h" info:`
+totpix=`echo "scale=0; $width * $height / 1" | bc`
+
+# get min and max in range 0 - 255 to correspond to 8-bit histogram and later set average to initial thresh
+if [ "$im_version" -ge "06030901" ]
+ then
+ min=`convert $tmpA1 -format "%[min]" info:`
+ max=`convert $tmpA1 -format "%[max]" info:`
+ min=`convert xc: -format "%[fx:255*$min/quantumrange]" info:`
+ max=`convert xc: -format "%[fx:255*$max/quantumrange]" info:`
+else
+ data=`convert $tmpA1 -verbose info:`
+ min=`echo "$data" | sed -n 's/^.*[Mm]in:.*[(]\([0-9.]*\).*$/\1/p ' | head -1`
+ max=`echo "$data" | sed -n 's/^.*[Mm]ax:.*[(]\([0-9.]*\).*$/\1/p ' | head -1`
+ min=`convert xc: -format "%[fx:255*$min)]" info:`
+ max=`convert xc: -format "%[fx:255*$max)]" info:`
+fi
+#echo "min=$min; max=$max;"
+
+
+# separate IM histogram into two arrays, value and count, and fill out for empty bins and normalize
+
+
+ # get filled value array from IM histogram
+ nvalueArr=(`convert $tmpA1 -depth 8 -format "%c" -define histogram:unique-colors=true histogram:info:- \
+ | tr -cs '0-9\012' ' ' | sed -n 's/[ ]*\([0-9]*\)[ ]*\([0-9]*\).*$/\1 \2/p' |\
+ awk '
+ # AWK
+ { vbin[$2] += $2;}
+ END { for (i=0;i<256;i++) {print vbin[i]+0; } } '`)
+# echo ${nvalueArr[*]}
+# echo ${#nvalueArr[*]}
+ numvals=${#nvalueArr[*]}
+
+
+ # get filled and normalized count array from IM histogram
+ ncountArr=(`convert $tmpA1 -depth 8 -format "%c" -define histogram:unique-colors=true histogram:info:- \
+ | tr -cs '0-9\012' ' ' | sed -n 's/[ ]*\([0-9]*\)[ ]*\([0-9]*\).*$/\1 \2/p' |\
+ awk -v totpix="$totpix" '
+ # AWK
+ { cbin[$2] += $1; }
+ END { for (i=0;i<256;i++) {printf ("%f\n", (cbin[i]+0)/totpix) } } '`)
+# echo ${ncountArr[*]}
+# echo ${#ncountArr[*]}
+ numcounts=${#ncountArr[*]}
+
+ [ $numvals -ne $numcounts ] && errMsg "--- NUMBER OF COUNTS IS NOT THE SAME AS NUMBER OF VALUES ---"
+
+ numbins=256
+
+# process image using k-means approach
+
+# Generate Cumulative Arrays
+# p=c(i)/N (normalized count or probability, p, at bin i)
+# v=v(i) (graylevel at bin i)
+# note that as histogram has been filled that v=i
+# t=threshold bin
+# n=p0=sum(c(i)/N)=zeroth histogram moment => cumulative normalized count (from i=0 to t) = N(t)
+# g=p1=sum(c(i)*v(i))=first histogram momement => cumulative normalized graylevel (from i=0 to t) = G(t)
+# m=p1/p0=mean
+
+
+ # compute nlowArr
+ nlowArr=( $(for ((i=0; i<numbins; i++)); do
+ echo "$i ${ncountArr[$i]}"
+ done |\
+ awk -v numbins="$numbins" '
+ # AWK to generate a cumulative histogram 1D image...
+ { nlowbin[$1] = $2; }
+ END { for (i=0;i<numbins;i++) { nlow += nlowbin[i]; print nlow } } ') )
+# echo ${nlowArr[*]}
+# echo ${#nlowArr[*]}
+
+
+ # compute glowArr
+ glowArr=( $( for ((i=0; i<numbins; i++)); do
+ echo "$i ${nvalueArr[$i]} ${ncountArr[$i]}"
+ done |\
+ awk -v numbins="$numbins" '
+ # AWK to generate a cumulative histogram 1D image...
+ { vlowbin[$1] = $2; nclowbin[$1] = $3; }
+ END { for (i=0;i<numbins;i++) { glow += vlowbin[i]*nclowbin[i]; print glow } } ') )
+# echo ${glowArr[*]}
+# echo ${#glowArr[*]}
+
+
+ # compute mlowArr
+ [ `echo "${ncountArr[0]} == 0" | bc` -eq 1 ] && mlowold=0
+ mlowArr=( $( for ((i=0; i<numbins; i++)); do
+ echo "$i ${nlowArr[$i]} ${glowArr[$i]}"
+ done |\
+ awk -v numbins="$numbins" -v mlowold="$mlowold" '
+ # AWK to generate a cumulative histogram 1D image...
+ { nlowbin[$1] = $2; glowbin[$1] = $3; }
+ END { for (i=0;i<numbins;i++)
+ { if ( nlowbin[i] != 0 ) { mlow = glowbin[i]/nlowbin[i]; mlowold = mlow; } else { mlow = mlowold; } print mlow } }') )
+# echo ${mlowArr[*]}
+# echo ${mlowArr[*]}
+
+
+ # compute nhighArr
+ nhighArr=( $(for ((i=0; i<numbins; i++)); do
+ echo "$i ${ncountArr[$i]}"
+ done |\
+ awk -v numbins="$numbins" '
+ # AWK to generate a cumulative histogram 1D image...
+ { nhighbin[$1] = $2; }
+ END { for (i=0;i<numbins;i++) { j = (numbins - 1 - i); nhigh += nhighbin[j]; print nhigh } } ') )
+# echo ${nhighArr[*]}
+# echo ${#nhighArr[*]}
+
+
+ # compute ghighArr
+ ghighArr=( $( for ((i=0; i<numbins; i++)); do
+ echo "$i ${nvalueArr[$i]} ${ncountArr[$i]}"
+ done |\
+ awk -v numbins="$numbins" '
+ # AWK to generate a cumulative histogram 1D image...
+ { vhighbin[$1] = $2; nchighbin[$1] = $3; }
+ END { for (i=0;i<numbins;i++) { j = (numbins - 1 - i); ghigh += vhighbin[j]*nchighbin[j]; print ghigh } } ') )
+# echo ${glowArr[*]}
+# echo ${#glowArr[*]}
+
+
+ # compute mhighArr
+ [ `echo "${ncountArr[0]} == 0" | bc` -eq 1 ] && mhighold=0
+ mhighArr=( $( for ((i=0; i<numbins; i++)); do
+ echo "$i ${nhighArr[$i]} ${ghighArr[$i]}"
+ done |\
+ awk -v numbins="$numbins" -v mhighold="$mhighold" '
+ # AWK to generate a cumulative histogram 1D image...
+ { nhighbin[$1] = $2; ghighbin[$1] = $3; }
+ END { for (i=0;i<numbins;i++)
+ { if ( nhighbin[i] != 0 ) { mhigh = ghighbin[i]/nhighbin[i]; mhighold = mhigh; } else { mhigh = mhighold; } print mhigh } }') )
+# echo ${mlowArr[*]}
+# echo ${mlowArr[*]}
+
+
+#for ((i=0;i<numbins;i++)); do
+#echo "i=$i; j=$j; ncount=${ncountArr[$i]}; nlow=${nlowArr[$i]}; nhigh=${nhighArr[$i]}; mlow=${mlowArr[$i]}; mhigh=${mhighArr[$i]}"
+#done
+
+# Compute Threshold
+# iterate the following
+# start with initial threshold at average of image min and max values
+# get low and high means corresponding to below and above threshold
+# compute new threshold=(Ml+Mh)/2
+# repeat until difference between old and new thresholds is zero
+
+lastbin=`expr $numbins - 1`
+thresh=`echo "scale=0; ($min + $max)/2" | bc`
+oldthresh=0
+diff=`echo "scale=10; ($thresh - $oldthresh)" | bc`
+diff=`echo "scale=10; sqrt($diff * $diff)" | bc`
+test=`echo "$diff == 0" | bc`
+while [ $test != 1 ]; do
+ i=$thresh
+ j=`expr $lastbin - $i - 1`
+ oldthresh=$thresh
+ mlow=${mlowArr[$i]}
+ mhigh=${mhighArr[$j]}
+ thresh=`echo "scale=0; ($mlow+$mhigh)/2" | bc`
+ diff=`echo "scale=10; ($thresh - $oldthresh)" | bc`
+ diff=`echo "scale=10; sqrt($diff * $diff)" | bc`
+ test=`echo "$diff == 0" | bc`
+#echo "i=$i; j=$j; mlow=$mlow; mhigh=$mhigh; oldthresh=$oldthresh; thresh=$thresh; diff=$diff; test=$test"
+done
+
+
+# compute threshold graph x coord and threshold in percent
+xx=$thresh
+threshpct=`convert xc: -format "%[fx:100*$thresh/255]" info:`
+#echo "xx=$xx; threshpct=$threshpct"
+
+
+echo "Thresholding Image At $threshpct%"
+convert $tmpA1 -threshold $threshpct% "$outfile"
+echo ""
+
+
+if [ "$graph" != "" ]; then
+ convert $tmpA1 -define histogram:unique-colors=false histogram:- | \
+ convert - -negate \
+ -stroke red -strokewidth 1 -draw "line $xx,0 $xx,200" \
+ -background gray -splice 0x30 \
+ -fill white -stroke white -strokewidth 1 \
+ -font ArialB -pointsize 24 \
+ -draw "text 4,22 'threshold=$threshpct%'" -resize 50% \
+ -bordercolor gray50 -border 5 \
+ "$histfile"
+fi
+
+if [ "$graph" = "view" ]; then
+ convert "$histfile" x:
+ rm -f "$histfile"
+fi
+
+exit 0
+
+
+