mirror of https://github.com/tLDP/LDP
118 lines
3.5 KiB
Bash
118 lines
3.5 KiB
Bash
#!/bin/bash
|
|
# sd.sh: Standard Deviation
|
|
|
|
# The Standard Deviation indicates how consistent a set of data is.
|
|
# It shows to what extent the individual data points deviate from the
|
|
#+ arithmetic mean, i.e., how much they "bounce around" (or cluster).
|
|
# It is essentially the average deviation-distance of the
|
|
#+ data points from the mean.
|
|
|
|
# =========================================================== #
|
|
# To calculate the Standard Deviation:
|
|
#
|
|
# 1 Find the arithmetic mean (average) of all the data points.
|
|
# 2 Subtract each data point from the arithmetic mean,
|
|
# and square that difference.
|
|
# 3 Add all of the individual difference-squares in # 2.
|
|
# 4 Divide the sum in # 3 by the number of data points.
|
|
# This is known as the "variance."
|
|
# 5 The square root of # 4 gives the Standard Deviation.
|
|
# =========================================================== #
|
|
|
|
count=0 # Number of data points; global.
|
|
SC=9 # Scale to be used by bc. Nine decimal places.
|
|
E_DATAFILE=90 # Data file error.
|
|
|
|
# ----------------- Set data file ---------------------
|
|
if [ ! -z "$1" ] # Specify filename as cmd-line arg?
|
|
then
|
|
datafile="$1" # ASCII text file,
|
|
else #+ one (numerical) data point per line!
|
|
datafile=sample.dat
|
|
fi # See example data file, below.
|
|
|
|
if [ ! -e "$datafile" ]
|
|
then
|
|
echo "\""$datafile"\" does not exist!"
|
|
exit $E_DATAFILE
|
|
fi
|
|
# -----------------------------------------------------
|
|
|
|
|
|
arith_mean ()
|
|
{
|
|
local rt=0 # Running total.
|
|
local am=0 # Arithmetic mean.
|
|
local ct=0 # Number of data points.
|
|
|
|
while read value # Read one data point at a time.
|
|
do
|
|
rt=$(echo "scale=$SC; $rt + $value" | bc)
|
|
(( ct++ ))
|
|
done
|
|
|
|
am=$(echo "scale=$SC; $rt / $ct" | bc)
|
|
|
|
echo $am; return $ct # This function "returns" TWO values!
|
|
# Caution: This little trick will not work if $ct > 255!
|
|
# To handle a larger number of data points,
|
|
#+ simply comment out the "return $ct" above.
|
|
} <"$datafile" # Feed in data file.
|
|
|
|
sd ()
|
|
{
|
|
mean1=$1 # Arithmetic mean (passed to function).
|
|
n=$2 # How many data points.
|
|
sum2=0 # Sum of squared differences ("variance").
|
|
avg2=0 # Average of $sum2.
|
|
sdev=0 # Standard Deviation.
|
|
|
|
while read value # Read one line at a time.
|
|
do
|
|
diff=$(echo "scale=$SC; $mean1 - $value" | bc)
|
|
# Difference between arith. mean and data point.
|
|
dif2=$(echo "scale=$SC; $diff * $diff" | bc) # Squared.
|
|
sum2=$(echo "scale=$SC; $sum2 + $dif2" | bc) # Sum of squares.
|
|
done
|
|
|
|
avg2=$(echo "scale=$SC; $sum2 / $n" | bc) # Avg. of sum of squares.
|
|
sdev=$(echo "scale=$SC; sqrt($avg2)" | bc) # Square root =
|
|
echo $sdev # Standard Deviation.
|
|
|
|
} <"$datafile" # Rewinds data file.
|
|
|
|
|
|
# ======================================================= #
|
|
mean=$(arith_mean); count=$? # Two returns from function!
|
|
std_dev=$(sd $mean $count)
|
|
|
|
echo
|
|
echo "Number of data points in \""$datafile"\" = $count"
|
|
echo "Arithmetic mean (average) = $mean"
|
|
echo "Standard Deviation = $std_dev"
|
|
echo
|
|
# ======================================================= #
|
|
|
|
exit
|
|
|
|
# This script could stand some drastic streamlining,
|
|
#+ but not at the cost of reduced legibility, please.
|
|
|
|
|
|
# ++++++++++++++++++++++++++++++++++++++++ #
|
|
# A sample data file (sample1.dat):
|
|
|
|
# 18.35
|
|
# 19.0
|
|
# 18.88
|
|
# 18.91
|
|
# 18.64
|
|
|
|
|
|
# $ sh sd.sh sample1.dat
|
|
|
|
# Number of data points in "sample1.dat" = 5
|
|
# Arithmetic mean (average) = 18.756000000
|
|
# Standard Deviation = .235338054
|
|
# ++++++++++++++++++++++++++++++++++++++++ #
|