LDP/LDP/guide/docbook/abs-guide/wf2.sh

#!/bin/bash
# wf2.sh: Crude word frequency analysis on a text file.

# Uses 'xargs' to decompose lines of text into single words.
# Compare this example to the "wf.sh" script later on.


# Check for input file on command-line.
ARGS=1
E_BADARGS=85
E_NOFILE=86

if [ $# -ne "$ARGS" ]
# Correct number of arguments passed to script?
then
  echo "Usage: `basename $0` filename"
  exit $E_BADARGS
fi

if [ ! -f "$1" ]       # Does file exist?
then
  echo "File \"$1\" does not exist."
  exit $E_NOFILE
fi


#####################################################
cat "$1" | xargs -n1 | \
#  List the file, one word per line. 
tr A-Z a-z | \
#  Shift characters to lowercase.
sed -e 's/\.//g'  -e 's/\,//g' -e 's/ /\
/g' | \
#  Filter out periods and commas, and
#+ change space between words to linefeed,
sort | uniq -c | sort -nr
#  Finally remove duplicates, prefix occurrence count
#+ and sort numerically.
#####################################################

#  This does the same job as the "wf.sh" example,
#+ but a bit more ponderously, and it runs more slowly (why?).

exit $?
new 2004-01-05 13:21:34 +00:00			`#!/bin/bash`
			`# wf2.sh: Crude word frequency analysis on a text file.`

			`# Uses 'xargs' to decompose lines of text into single words.`
updated 2004-03-15 13:47:54 +00:00			`# Compare this example to the "wf.sh" script later on.`
new 2004-01-05 13:21:34 +00:00

updated 2008-11-23 22:43:47 +00:00			`# Check for input file on command-line.`
new 2004-01-05 13:21:34 +00:00			`ARGS=1`
updated 2008-11-23 22:43:47 +00:00			`E_BADARGS=85`
			`E_NOFILE=86`
new 2004-01-05 13:21:34 +00:00
			`if [ $# -ne "$ARGS" ]`
			`# Correct number of arguments passed to script?`
			`then`
			echo "Usage: `basename $0` filename"
			`exit $E_BADARGS`
			`fi`

updated 2012-04-04 22:51:18 +00:00			`if [ ! -f "$1" ] # Does file exist?`
new 2004-01-05 13:21:34 +00:00			`then`
			`echo "File \"$1\" does not exist."`
			`exit $E_NOFILE`
			`fi`



updated 2008-11-23 22:43:47 +00:00			`#####################################################`
new 2004-01-05 13:21:34 +00:00			`cat "$1" \| xargs -n1 \| \`
			`# List the file, one word per line.`
			`tr A-Z a-z \| \`
updated 2004-03-15 13:47:54 +00:00			`# Shift characters to lowercase.`
new 2004-01-05 13:21:34 +00:00			`sed -e 's/\.//g' -e 's/\,//g' -e 's/ /\`
			`/g' \| \`
			`# Filter out periods and commas, and`
			`#+ change space between words to linefeed,`
updated 2004-03-15 13:47:54 +00:00			`sort \| uniq -c \| sort -nr`
updated 2008-11-23 22:43:47 +00:00			`# Finally remove duplicates, prefix occurrence count`
			`#+ and sort numerically.`
			`#####################################################`
new 2004-01-05 13:21:34 +00:00
updated 2004-03-15 13:47:54 +00:00			`# This does the same job as the "wf.sh" example,`
updated 2005-05-08 20:09:31 +00:00			`#+ but a bit more ponderously, and it runs more slowly (why?).`
new 2004-01-05 13:21:34 +00:00
updated 2008-11-23 22:43:47 +00:00			`exit $?`