mirror of https://github.com/tLDP/LDP
46 lines
1.1 KiB
Bash
46 lines
1.1 KiB
Bash
#!/bin/bash
|
|
# wf2.sh: Crude word frequency analysis on a text file.
|
|
|
|
# Uses 'xargs' to decompose lines of text into single words.
|
|
# Compare this example to the "wf.sh" script later on.
|
|
|
|
|
|
# Check for input file on command-line.
|
|
ARGS=1
|
|
E_BADARGS=85
|
|
E_NOFILE=86
|
|
|
|
if [ $# -ne "$ARGS" ]
|
|
# Correct number of arguments passed to script?
|
|
then
|
|
echo "Usage: `basename $0` filename"
|
|
exit $E_BADARGS
|
|
fi
|
|
|
|
if [ ! -f "$1" ] # Does file exist?
|
|
then
|
|
echo "File \"$1\" does not exist."
|
|
exit $E_NOFILE
|
|
fi
|
|
|
|
|
|
|
|
#####################################################
|
|
cat "$1" | xargs -n1 | \
|
|
# List the file, one word per line.
|
|
tr A-Z a-z | \
|
|
# Shift characters to lowercase.
|
|
sed -e 's/\.//g' -e 's/\,//g' -e 's/ /\
|
|
/g' | \
|
|
# Filter out periods and commas, and
|
|
#+ change space between words to linefeed,
|
|
sort | uniq -c | sort -nr
|
|
# Finally remove duplicates, prefix occurrence count
|
|
#+ and sort numerically.
|
|
#####################################################
|
|
|
|
# This does the same job as the "wf.sh" example,
|
|
#+ but a bit more ponderously, and it runs more slowly (why?).
|
|
|
|
exit $?
|