LDP/LDP/guide/docbook/abs-guide/strip-comments.sh

80 lines
1.9 KiB
Bash
Raw Normal View History

2001-07-10 14:25:50 +00:00
#!/bin/bash
2001-09-04 13:27:31 +00:00
# strip-comment.sh: Strips out the comments (/* COMMENT */) in a C program.
2001-07-10 14:25:50 +00:00
2004-01-26 00:03:37 +00:00
E_NOARGS=0
2001-09-04 13:27:31 +00:00
E_ARGERROR=66
E_WRONG_FILE_TYPE=67
2001-07-10 14:25:50 +00:00
2001-09-04 13:27:31 +00:00
if [ $# -eq "$E_NOARGS" ]
2001-07-10 14:25:50 +00:00
then
echo "Usage: `basename $0` C-program-file" >&2 # Error message to stderr.
2001-09-04 13:27:31 +00:00
exit $E_ARGERROR
2001-07-10 14:25:50 +00:00
fi
# Test for correct file type.
2004-01-26 00:03:37 +00:00
type=`file $1 | awk '{ print $2, $3, $4, $5 }'`
2004-01-05 13:20:57 +00:00
# "file $1" echoes file type . . .
2006-12-20 21:11:55 +00:00
# Then awk removes the first field, the filename . . .
# Then the result is fed into the variable "type."
2001-07-10 14:25:50 +00:00
correct_type="ASCII C program text"
if [ "$type" != "$correct_type" ]
then
echo
echo "This script works on C program files only."
echo
2001-09-04 13:27:31 +00:00
exit $E_WRONG_FILE_TYPE
2001-07-10 14:25:50 +00:00
fi
# Rather cryptic sed script:
#--------
sed '
/^\/\*/d
2005-08-28 18:12:12 +00:00
/.*\*\//d
2001-07-10 14:25:50 +00:00
' $1
#--------
# Easy to understand if you take several hours to learn sed fundamentals.
2002-04-01 16:04:17 +00:00
# Need to add one more line to the sed script to deal with
#+ case where line of code has a comment following it on same line.
# This is left as a non-trivial exercise.
2001-07-10 14:25:50 +00:00
2006-12-20 21:11:55 +00:00
# Also, the above code deletes non-comment lines with a "*/" . . .
2004-01-05 13:20:57 +00:00
#+ not a desirable result.
2001-07-10 14:25:50 +00:00
exit 0
# ----------------------------------------------------------------
# Code below this line will not execute because of 'exit 0' above.
# Stephane Chazelas suggests the following alternative:
usage() {
echo "Usage: `basename $0` C-program-file" >&2
2001-07-10 14:25:50 +00:00
exit 1
}
WEIRD=`echo -n -e '\377'` # or WEIRD=$'\377'
[[ $# -eq 1 ]] || usage
case `file "$1"` in
*"C program text"*) sed -e "s%/\*%${WEIRD}%g;s%\*/%${WEIRD}%g" "$1" \
| tr '\377\n' '\n\377' \
| sed -ne 'p;n' \
| tr -d '\n' | tr '\377' '\n';;
*) usage;;
esac
2004-01-05 13:20:57 +00:00
# This is still fooled by things like:
# printf("/*");
# or
# /* /* buggy embedded comment */
2001-07-10 14:25:50 +00:00
#
2004-01-05 13:20:57 +00:00
# To handle all special cases (comments in strings, comments in string
2006-12-20 21:11:55 +00:00
#+ where there is a \", \\" ...),
#+ the only way is to write a C parser (using lex or yacc perhaps?).
2001-07-10 14:25:50 +00:00
exit 0