LDP/LDP/guide/docbook/abs-guide/tohtml.sh

141 lines
3.6 KiB
Bash

#!/bin/bash
# tohtml.sh [v. 0.2, reldate: 06/26/80, still buggy]
# Convert a text file to HTML format.
# Author: Mendel Cooper
# License: GPL3
# Usage: sh tohtml.sh < textfile > htmlfile
# Script can easily be modified to accept source and target filenames.
# Assumptions:
# 1) Paragraphs in (target) text file are separated by a blank line.
# 2) Jpeg images (*.jpg) are located in "images" subdirectory.
# In the target file, the image names are enclosed in square brackets,
# for example, [image01.jpg].
# 3) Emphasized (italic) phrases begin with a space+underscore
#+ or the first character on the line is an underscore,
#+ and end with an underscore+space or underscore+end-of-line.
# Settings
FNTSIZE=2 # Small-medium font size
IMGDIR="images" # Image directory
# Headers
HDR01='&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"&gt;'
HDR02='&lt;!-- Converted to HTML by ***tohtml.sh*** script --&gt;'
HDR03='&lt;!-- script author: M. Leo Cooper &lt;thegrendel@theriver.com&gt; --&gt;'
HDR10='&lt;html&gt;'
HDR11='&lt;head&gt;'
HDR11a='&lt;/head&gt;'
HDR12a='&lt;title&gt;'
HDR12b='&lt;/title&gt;'
HDR121='&lt;META NAME="GENERATOR" CONTENT="tohtml.sh script"&gt;'
HDR13='&lt;body bgcolor="#dddddd"&gt;' # Change background color to suit.
HDR14a='&lt;font size='
HDR14b='&gt;'
# Footers
FTR10='&lt;/body&gt;'
FTR11='&lt;/html&gt;'
# Tags
BOLD="&lt;b&gt;"
CENTER="&lt;center&gt;"
END_CENTER="&lt;/center&gt;"
LF="&lt;br&gt;"
write_headers ()
{
echo "$HDR01"
echo
echo "$HDR02"
echo "$HDR03"
echo
echo
echo "$HDR10"
echo "$HDR11"
echo "$HDR121"
echo "$HDR11a"
echo "$HDR13"
echo
echo -n "$HDR14a"
echo -n "$FNTSIZE"
echo "$HDR14b"
echo
echo "$BOLD" # Everything in bold (more easily readable).
}
process_text ()
{
while read line # Read one line at a time.
do
{
if [ ! "$line" ] # Blank line?
then # Then new paragraph must follow.
echo
echo "$LF" # Insert two &lt;br&gt; tags.
echo "$LF"
echo
continue # Skip the underscore test.
else # Otherwise . . .
if [[ "$line" =~ "\[*jpg\]" ]] # Is a graphic?
then # Strip away brackets.
temp=$( echo "$line" | sed -e 's/\[//' -e 's/\]//' )
line=""$CENTER" &lt;img src="\"$IMGDIR"/$temp\"&gt; "$END_CENTER" "
# Add image tag.
# And, center it.
fi
fi
echo "$line" | grep -q _
if [ "$?" -eq 0 ] # If line contains underscore ...
then
# ===================================================
# Convert underscored phrase to italics.
temp=$( echo "$line" |
sed -e 's/ _/ &lt;i&gt;/' -e 's/_/&lt;\/i&gt; /' |
sed -e 's/^_/&lt;i&gt;/' -e 's/_/&lt;\/i&gt;/' )
# Process only underscores prefixed by space,
#+ or at beginning or end of line.
# Do not convert underscores embedded within a word!
line="$temp"
# Slows script execution. Can be optimized?
# ===================================================
fi
echo
echo "$line"
echo
} # End while
done
} # End process_text ()
write_footers () # Termination tags.
{
echo "$FTR10"
echo "$FTR11"
}
# main () {
# =========
write_headers
process_text
write_footers
# =========
# }
exit $?
# Exercises:
# ---------
# 1) Fixup: Check for closing underscore before a comma or period.
# 2) Add a test for the presence of a closing underscore
#+ in phrases to be italicized.