mirror of https://github.com/tLDP/LDP
142 lines
3.6 KiB
Bash
142 lines
3.6 KiB
Bash
#!/bin/bash
|
|
# tohtml.sh [v. 0.2.01, reldate: 04/13/12, a teeny bit less buggy]
|
|
|
|
# Convert a text file to HTML format.
|
|
# Author: Mendel Cooper
|
|
# License: GPL3
|
|
# Usage: sh tohtml.sh < textfile > htmlfile
|
|
# Script can easily be modified to accept source and target filenames.
|
|
|
|
# Assumptions:
|
|
# 1) Paragraphs in (target) text file are separated by a blank line.
|
|
# 2) Jpeg images (*.jpg) are located in "images" subdirectory.
|
|
# In the target file, the image names are enclosed in square brackets,
|
|
# for example, [image01.jpg].
|
|
# 3) Emphasized (italic) phrases begin with a space+underscore
|
|
#+ or the first character on the line is an underscore,
|
|
#+ and end with an underscore+space or underscore+end-of-line.
|
|
|
|
|
|
# Settings
|
|
FNTSIZE=2 # Small-medium font size
|
|
IMGDIR="images" # Image directory
|
|
# Headers
|
|
HDR01='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'
|
|
HDR02='<!-- Converted to HTML by ***tohtml.sh*** script -->'
|
|
HDR03='<!-- script author: M. Leo Cooper <thegrendel.abs@gmail.com> -->'
|
|
HDR10='<html>'
|
|
HDR11='<head>'
|
|
HDR11a='</head>'
|
|
HDR12a='<title>'
|
|
HDR12b='</title>'
|
|
HDR121='<META NAME="GENERATOR" CONTENT="tohtml.sh script">'
|
|
HDR13='<body bgcolor="#dddddd">' # Change background color to suit.
|
|
HDR14a='<font size='
|
|
HDR14b='>'
|
|
# Footers
|
|
FTR10='</body>'
|
|
FTR11='</html>'
|
|
# Tags
|
|
BOLD="<b>"
|
|
CENTER="<center>"
|
|
END_CENTER="</center>"
|
|
LF="<br>"
|
|
|
|
|
|
write_headers ()
|
|
{
|
|
echo "$HDR01"
|
|
echo
|
|
echo "$HDR02"
|
|
echo "$HDR03"
|
|
echo
|
|
echo
|
|
echo "$HDR10"
|
|
echo "$HDR11"
|
|
echo "$HDR121"
|
|
echo "$HDR11a"
|
|
echo "$HDR13"
|
|
echo
|
|
echo -n "$HDR14a"
|
|
echo -n "$FNTSIZE"
|
|
echo "$HDR14b"
|
|
echo
|
|
echo "$BOLD" # Everything in bold (more easily readable).
|
|
}
|
|
|
|
|
|
process_text ()
|
|
{
|
|
while read line # Read one line at a time.
|
|
do
|
|
{
|
|
if [ ! "$line" ] # Blank line?
|
|
then # Then new paragraph must follow.
|
|
echo
|
|
echo "$LF" # Insert two <br> tags.
|
|
echo "$LF"
|
|
echo
|
|
continue # Skip the underscore test.
|
|
else # Otherwise . . .
|
|
|
|
if [[ "$line" =~ \[*jpg\] ]] # Is a graphic?
|
|
then # Strip away brackets.
|
|
temp=$( echo "$line" | sed -e 's/\[//' -e 's/\]//' )
|
|
line=""$CENTER" <img src="\"$IMGDIR"/$temp\"> "$END_CENTER" "
|
|
# Add image tag.
|
|
# And, center it.
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
echo "$line" | grep -q _
|
|
if [ "$?" -eq 0 ] # If line contains underscore ...
|
|
then
|
|
# ===================================================
|
|
# Convert underscored phrase to italics.
|
|
temp=$( echo "$line" |
|
|
sed -e 's/ _/ <i>/' -e 's/_/<\/i> /' |
|
|
sed -e 's/^_/<i>/' -e 's/_/<\/i>/' )
|
|
# Process only underscores prefixed by space,
|
|
#+ or at beginning or end of line.
|
|
# Do not convert underscores embedded within a word!
|
|
line="$temp"
|
|
# Slows script execution. Can be optimized?
|
|
# ===================================================
|
|
fi
|
|
|
|
|
|
|
|
# echo
|
|
echo "$line"
|
|
# echo
|
|
# Don't want extra blank lines in generated text!
|
|
} # End while
|
|
done
|
|
} # End process_text ()
|
|
|
|
|
|
write_footers () # Termination tags.
|
|
{
|
|
echo "$FTR10"
|
|
echo "$FTR11"
|
|
}
|
|
|
|
|
|
# main () {
|
|
# =========
|
|
write_headers
|
|
process_text
|
|
write_footers
|
|
# =========
|
|
# }
|
|
|
|
exit $?
|
|
|
|
# Exercises:
|
|
# ---------
|
|
# 1) Fixup: Check for closing underscore before a comma or period.
|
|
# 2) Add a test for the presence of a closing underscore
|
|
#+ in phrases to be italicized.
|