LDP/LDP/guide/docbook/abs-guide/wstrings.sh

75 lines
2.5 KiB
Bash
Raw Permalink Normal View History

2002-04-01 16:05:47 +00:00
#!/bin/bash
# wstrings.sh: "word-strings" (enhanced "strings" command)
#
# This script filters the output of "strings" by checking it
#+ against a standard word list file.
2003-11-03 16:25:16 +00:00
# This effectively eliminates gibberish and noise,
2002-04-01 16:05:47 +00:00
#+ and outputs only recognized words.
2005-05-08 20:09:31 +00:00
# ===========================================================
2002-04-01 16:05:47 +00:00
# Standard Check for Script Argument(s)
ARGS=1
2009-01-22 14:43:09 +00:00
E_BADARGS=85
E_NOFILE=86
2002-04-01 16:05:47 +00:00
if [ $# -ne $ARGS ]
then
echo "Usage: `basename $0` filename"
exit $E_BADARGS
fi
2003-06-30 16:57:44 +00:00
if [ ! -f "$1" ] # Check if file exists.
2002-04-01 16:05:47 +00:00
then
echo "File \"$1\" does not exist."
exit $E_NOFILE
fi
2005-05-08 20:09:31 +00:00
# ===========================================================
2002-04-01 16:05:47 +00:00
MINSTRLEN=3 # Minimum string length.
WORDFILE=/usr/share/dict/linux.words # Dictionary file.
2008-07-20 23:16:47 +00:00
# May specify a different word list file
#+ of one-word-per-line format.
# For example, the "yawl" word-list package,
2012-04-04 22:51:18 +00:00
# http://bash.deta.in/yawl-0.3.2.tar.gz
2002-04-01 16:05:47 +00:00
wlist=`strings "$1" | tr A-Z a-z | tr '[:space:]' Z | \
2012-11-27 14:56:18 +00:00
tr -cs '[:alpha:]' Z | tr -s '\173-\377' Z | tr Z ' '`
2002-04-01 16:05:47 +00:00
# Translate output of 'strings' command with multiple passes of 'tr'.
# "tr A-Z a-z" converts to lowercase.
# "tr '[:space:]'" converts whitespace characters to Z's.
# "tr -cs '[:alpha:]' Z" converts non-alphabetic characters to Z's,
#+ and squeezes multiple consecutive Z's.
# "tr -s '\173-\377' Z" converts all characters past 'z' to Z's
#+ and squeezes multiple consecutive Z's,
#+ which gets rid of all the weird characters that the previous
#+ translation failed to deal with.
# Finally, "tr Z ' '" converts all those Z's to whitespace,
#+ which will be seen as word separators in the loop below.
2012-11-27 14:56:18 +00:00
# ***********************************************************************
# Note the technique of feeding/piping the output of 'tr' back to itself,
2011-08-29 23:59:19 +00:00
#+ but with different arguments and/or options on each successive pass.
2012-11-27 14:56:18 +00:00
# ***********************************************************************
2002-04-01 16:05:47 +00:00
2012-11-27 14:56:18 +00:00
for word in $wlist # Important:
# $wlist must not be quoted here.
2002-04-01 16:05:47 +00:00
# "$wlist" does not work.
2012-11-27 14:56:18 +00:00
# Why not?
2002-04-01 16:05:47 +00:00
do
2012-11-27 14:56:18 +00:00
strlen=${#word} # String length.
if [ "$strlen" -lt "$MINSTRLEN" ] # Skip over short strings.
2002-04-01 16:05:47 +00:00
then
continue
fi
2012-11-27 14:56:18 +00:00
grep -Fw $word "$WORDFILE" # Match whole words only.
2003-11-03 16:25:16 +00:00
# ^^^ # "Fixed strings" and
#+ "whole words" options.
2002-04-01 16:05:47 +00:00
done
2005-05-08 20:09:31 +00:00
exit $?