LDP/LDP/guide/docbook/abs-guide/wf2.sh

46 lines
1.1 KiB
Bash
Raw Normal View History

2004-01-05 13:21:34 +00:00
#!/bin/bash
# wf2.sh: Crude word frequency analysis on a text file.
# Uses 'xargs' to decompose lines of text into single words.
2004-03-15 13:47:54 +00:00
# Compare this example to the "wf.sh" script later on.
2004-01-05 13:21:34 +00:00
2008-11-23 22:43:47 +00:00
# Check for input file on command-line.
2004-01-05 13:21:34 +00:00
ARGS=1
2008-11-23 22:43:47 +00:00
E_BADARGS=85
E_NOFILE=86
2004-01-05 13:21:34 +00:00
if [ $# -ne "$ARGS" ]
# Correct number of arguments passed to script?
then
echo "Usage: `basename $0` filename"
exit $E_BADARGS
fi
2012-04-04 22:51:18 +00:00
if [ ! -f "$1" ] # Does file exist?
2004-01-05 13:21:34 +00:00
then
echo "File \"$1\" does not exist."
exit $E_NOFILE
fi
2008-11-23 22:43:47 +00:00
#####################################################
2004-01-05 13:21:34 +00:00
cat "$1" | xargs -n1 | \
# List the file, one word per line.
tr A-Z a-z | \
2004-03-15 13:47:54 +00:00
# Shift characters to lowercase.
2004-01-05 13:21:34 +00:00
sed -e 's/\.//g' -e 's/\,//g' -e 's/ /\
/g' | \
# Filter out periods and commas, and
#+ change space between words to linefeed,
2004-03-15 13:47:54 +00:00
sort | uniq -c | sort -nr
2008-11-23 22:43:47 +00:00
# Finally remove duplicates, prefix occurrence count
#+ and sort numerically.
#####################################################
2004-01-05 13:21:34 +00:00
2004-03-15 13:47:54 +00:00
# This does the same job as the "wf.sh" example,
2005-05-08 20:09:31 +00:00
#+ but a bit more ponderously, and it runs more slowly (why?).
2004-01-05 13:21:34 +00:00
2008-11-23 22:43:47 +00:00
exit $?