LDP/LDP/texi2db/texi2db

#!/usr/bin/perl
#
# this utility converts a texinfo file into LDP WikiText format.
#
use File::Basename;
use FileHandle;
use HTML::Entities;

$version = "0.1";

$errors = 0;
$error = 0;

# runtime options
#
$maxerrors = 10;
$verbose = 0;

# files
#
$requestedfile = "";
$outputfile = "";
$outfh = new FileHandle;
$logfile = "";
$logfh = new FileHandle;

# the following are flags to maintain state
#
# docbook structure flags
#
$inabstract = 1;
$inappendix = 0;
$insect1 = 0;
$insect2 = 0;
$insect3 = 0;
$insect44 = 0;
$intable = 0;
$inrow = 0;
$incol = 0;
$tableformat = "";
$inorderedlist = 0;
$initemizedlist = 0;
$inlistitem = 0;
$informalpara = 0;
$inpara = 0;

# these maintain internal program state
#
$line = "";
$originalline = "";
$currentfile = "";
$currentline = "";
$saveline = "";
$badbracketlines = 0;
$badbracketstartline = 0;
$seekend = "";
$literalend = "";
$literal = "";

# here, we hold onto meta-data that we will need to build
# the article or book header structures.
#
$sgmlfile = "";
$title = "";
$authorname = "";
$abstract = "";
$buf = "";

# these hold information about the node we are currently in, if any
#
$nodename = "";
$nodenext = "";
$nodeprev = "";
$nodeup = "";
$nodeid = "";
$nodeinit = 0;

# remember this hierarchical information for every node
# we use it if the node doesn't specify a level.
#
%nodenexts  = ();
%nodeprevs  = ();
%nodeups    = ();
%nodelevels = ();

# holds user variables
#
%setvalues = ();
%ifsets = ();

# this is used to build a list of index entries to be written when requested
#
%indexes = ();

# These are used for keeping state on the most recent search of any of the following hashes
#
$pattern = "";
$action = "";

# In the following hash, the perl expressions are used to search
# each line of the input file. If matched, the Action field defines
# what to do with it.
#
# Note that @-Commands of this type must be at the beginning of the line.
#
# TODO: put these in a .conf file so it can be tweaked or customized
#
# Actions:
#
#	DROPLINE	Drop it on the floor
#	DROPBLOCK	Drop the whole block on the floor
#	META		Meta-Data
#	BYE		Stop processing the file
#	INDEX		An index entry
#	ORDEREDLIST	Numbered (enumerated) list
#	ORDEREDLISTEND	End an enumerated list
#	SEEKEND		Skip everything until you find the corresponding @end tag
#	NODE		A Texinfo node
#	SECT?		One of the sectioning commands
#	APPENDIX?	One of the appendix commands
#	DEFINFOENCLOSE	Load a customized highlighting pattern
#	SET		Set a variable
#	CLEAR		Clear a variable
#	IFSET		Test a variable
#
#	@-Command			Action
%patterns = (
	'\\input\b'			=>'DROPLINE',
#	'\@\*'				=>'PARA',
	'^\@alias'			=>'ALIAS',
	'^\@author'			=>'META',
	'^\@bye\b'			=>'BYE',
	'^\@c\b'			=>'DROPLINE',
	'^\@cindex\b'			=>'INDEX',
	'^\@clear\b'			=>'CLEAR',
	'^\@comment\b'			=>'DROPLINE',
	'^\@contents\b'			=>'DROPLINE',
	'^\@defindex\b'			=>'DROPLINE',		# TODO
	'^\@definfoenclose'		=>'DEFINFOENCLOSE',
	'^\@display\b'			=>'LITERALBLOCK',
	'^\@enumerate\b'		=>'ORDEREDLIST',
	'^\@end enumerate\b'		=>'ORDEREDLISTEND',
	'^\@example'			=>'LITERALBLOCK',
	'^\@format\b'			=>'LITERALBLOCK',
	'^\@headings\b'			=>'DROPLINE',
	'^\@html\b'			=>'SEEKEND',
	'^\@end html\b'			=>'DROLINE',
	'^\@ifhtml\b'			=>'DROPLINE',
	'^\@end ifhtml\b'		=>'DROPLINE',
	'^\@ifinfo\b'			=>'SEEKEND',
	'^\@end ifinfo\b'		=>'DROPLINE',
	'^\@ifnottex\b'			=>'DROPLINE',
	'^\@end ifnottex\b'		=>'DROPLINE',
	'^\@ifset\b'			=>'IFSET',		# handled like SEEKEND
	'^\@end ifset\b'		=>'DROPLINE',		# or ignored, so ignore this too
	'^\@ifclear\b'			=>'IFCLEAR',
	'^\@end ifclear\b'		=>'DROPLINE',
	'^\@iftex\b'			=>'SEEKEND',
	'^\@ignore\b'			=>'SEEKEND',
	'^\@itemize\b'			=>'ITEMIZEDLIST',
	'^\@end itemize\b'		=>'ITEMIZEDLISTEND',
	'^\@item\b'			=>'ITEM',
	'^\@itemx\b'			=>'ITEM',
	'^\@macro\b'			=>'MACRO',
	'^\@menu\b'			=>'SEEKEND',		# TODO
	'^\@node\b'			=>'NODE',
	'^\@page\b'			=>'DROPLINE',
	'^\@printindex\b'		=>'DROPLINE',		# TODO
	'^\@set\b'			=>'SET',
	'^\@setchapternewpage\b'	=>'DROPLINE',
	'^\@setfilename\b'		=>'META',
	'^\@settitle\b'			=>'META',
	'^\@smallbook\b'		=>'DROPLINE',
	'^\@smallexample\b'		=>'LITERALBLOCK',
	'^\@sp\b'			=>'DROPLINE',
	'^\@table\b'			=>'TABLE',
	'^\@end table\b'		=>'TABLEEND',
	'^\@tex\b'			=>'SEEKEND',
	'^\@titlepage\b'		=>'SEEKEND',

	'^\@top\b'			=>'SECT1',		# sectioning commands
	'^\@chapter\b'			=>'SECT1',
	'^\@section\b'			=>'SECT2',
	'^\@subsection\b'		=>'SECT3',
	'^\@subsubsection\b'		=>'SECT4',
	'^\@centerchap\b'		=>'SECT1',
	'^\@unnumbered\b'		=>'SECT1',
	'^\@unnumberedsec\b'		=>'SECT2',
	'^\@unnumberedsubsec\b'		=>'SECT3',
	'^\@unnumberedsubsubsec\b'	=>'SECT4',
	'^\@majorheading\b'		=>'SECT1',
	'^\@chapheading\b'		=>'SECT1',
	'^\@heading\b'			=>'SECT2',
	'^\@subheading\b'		=>'SECT3',
	'^\@subsubheading\b'		=>'SECT4',
	'^\@appendix\b'			=>'APPENDIX',
	'^\@appendixsec\b'		=>'SECT1',
	'^\@appendixsubsec\b'		=>'SECT2',
	'^\@appendixsubsubsec\b'	=>'SECT3',
);

# these are one-for-one string substitutions
#
# most of them are ISO-8879 codes for international characters,
# used by HTML as well as XML, but there are exceptions.
# There is one special case, "@@", which is the escaped form of @.
# We do not handle @@ here, because it has to come after all other
# @-commands have been processed.
#
# It's fine to replace these with nothing if you want them to be
# simply dropped on the floor.
#
# I keep these in general alphabetical order, except that @end tags
# are put right after their opening form.
#
# Note: Only the left side should be escaped!
#
# Example: @copyright{} becomes &copy;
#
%substitutions = (
	'\@!'			=>'!',
	'\@\*'			=>'',
	'\@\s'			=>' ',
	'\@"A'			=>'&Auml;',
	'\@"E'			=>'&Euml;',
	'\@"I'			=>'&Iuml;',
	'\@"O'			=>'&Ouml;',
	'\@"U'			=>'&Uuml;',
	'\@"a'			=>'&auml;',
	'\@"e'			=>'&euml;',
	'\@"i'			=>'&iuml;',
	'\@"o'			=>'&ouml;',
	'\@"u'			=>'&uuml;',
	'\@' . "'" . 'A'	=>'&Aacute;',
	'\@' . "'" . 'E'	=>'&Eacute;',
	'\@' . "'" . 'I'	=>'&Iacute;',
	'\@' . "'" . 'O'	=>'&Oacute;',
	'\@' . "'" . 'U'	=>'&Uacute;',
	'\@' . "'" . 'a'	=>'&aacute;',
	'\@' . "'" . 'e'	=>'&eacute;',
	'\@' . "'" . 'i'	=>'&iacute;',
	'\@' . "'" . 'o'	=>'&oacute;',
	'\@' . "'" . 'u'	=>'&uacute;',
	'\@,{c}'		=>'&cedil;',
	'\@-'			=>'&macr;',
	'\@\.'			=>'.',
	'\@:'			=>'',
	'\@='			=>'',
	'\@\?'			=>'?',
	'\@^A'			=>'&Acirc;',
	'\@^E'			=>'&Ecirc;',
	'\@^I'			=>'&Icirc;',
	'\@^O'			=>'&Ocirc;',
	'\@^U'			=>'&Ucirc;',
	'\@^a'			=>'&acirc;',
	'\@^e'			=>'&ecirc;',
	'\@^i'			=>'&icirc;',
	'\@^o'			=>'&ocirc;',
	'\@^u'			=>'&ucirc;',
	'\@`A'			=>'&Agrave;',
	'\@`E'			=>'&Egrave;',
	'\@`I'			=>'&Igrave;',
	'\@`O'			=>'&Ograve;',
	'\@`U'			=>'&Ugrave;',
	'\@`a'			=>'&agrave;',
	'\@`e'			=>'&egrave;',
	'\@`i'			=>'&igrave;',
	'\@`o'			=>'&ograve;',
	'\@`u'			=>'&ugrave;',
#	'\@\{'			=>'{',			# do these manually at the end so brackets match
#	'\@\}'			=>'}',
	'\@~A'			=>'&Atilde;',
	'\@~E'			=>'&Etilde;',
	'\@~I'			=>'&Itilde;',
	'\@~O'			=>'&Otilde;',
	'\@~U'			=>'&Utilde;',
	'\@~a'			=>'&atilde;',
	'\@~e'			=>'&etilde;',
	'\@~i'			=>'&itilde;',
	'\@~o'			=>'&otilde;',
	'\@~u'			=>'&utilde;',
	'\@AA\{\}'		=>'&Aring;',
	'\@aa\{\}'		=>'&aring;',
	'\@AE\{\}'		=>'&AElig;',
	'\@ae\{\}'		=>'&aelig;',
	'\@copyright\{\}'	=>'&copy;',
	'\@dots\{\}'		=>'&hellip;',
	'\@center\b'		=>'',
	'\@enddots\{\}'		=>'&hellip;',
	'\@equiv'		=>'&equiv;',
	'\@group\b'		=>'',
	'\@end group\b'		=>'',
	'\@exclamdown'		=>'&iexcl;',
	'\@noindent\b'		=>'',
	'\@refill\b'		=>'',
	'\@result\{\}'		=>'=&gt;',
	'\@TeX\{\}'		=>'TeX',
);

# these are inline tags that require some kind of programmatic control
# because they do strange things.
#
%specsubstitutions = (
	'\@uref\b'		=>'UREF',
	'\@anchor\b'		=>'ANCHOR',
);

# These are special block wrapping tags. When we hit one of these,
# we simply wrap whatever comes between the (always paired) tags
# with a set of DocBook tags.
#
# Their handling is subtly different than the %tags.
# We don't just replace them with the DocBook tags, because we
# want any <para> tags to go inside them, not outside.
#
%blocks = (
	'\@quotation\b'		=>'<blockquote>',
	'\@end quotation'	=>'</blockquote>',
);

# these are inline @-Commands which are replaced by a set of tags instead of one-for-one
# character substitution. Replacement is inline.
#
# there is also a special case, the "blank" tag, which does what you'd think.
# It removes the existing tag, but doesn't add the wrapper.
#
# Example: @code{foo} becomes <literal>foo</literal>.
#
%tags = (
	'\@acronym\b'		=>'abbrev',
	'\@b\b'			=>"emphasis role='bold'",
	'\@cite\b'		=>'citetitle',
	'\@code\b'		=>'literal',
	'\@command\b'		=>'command',
	'\@dfn\b'		=>"emphasis role='bold'",
	'\@email\b'		=>'email',
	'\@emph\b'		=>'emphasis',
	'\@env\b'		=>'envvar',
	'\@footnote\b'		=>'footnote',
	'\@file\b'		=>'filename',
	'\@i\b'			=>'emphasis',
	'\@samp\b'		=>'literal',
	'\@w\b'			=>'',
);

# this is where definfoenclose definitions go, and they are processed last
#
%definfos = ();

# and this is where macros are stored
#
$inmacro = 0;
$macro = "";
%macros = {};
$macroargs = "";
%macroargs = ();
$macrotext = "";
%macrotext = ();


##############################################################################
##############################################################################
##############################################################################


while (1) {
	if ($ARGV[0] eq '') {
		last;
	} elsif ($ARGV[0] eq '--version') {
		&version;
		exit(0);
	} elsif (($ARGV[0] eq '-f') or ($ARGV[0] eq '--file')) {
		shift(@ARGV);
		$requestedfile = $ARGV[0];
	} elsif (($ARGV[0] eq '-o') or ($ARGV[0] eq '--output-to')) {
		shift(@ARGV);
		$outputfile = $ARGV[0];
	} elsif (($ARGV[0] eq '-l') or ($ARGV[0] eq '--log-to')) {
		shift(@ARGV);
		$logfile = $ARGV[0];
	} elsif (($ARGV[0] eq '-e') or ($ARGV[0] eq '--max-errors')) {
		shift(@ARGV);
		$maxerrors = $ARGV[0];
	} elsif (($ARGV[0] eq '-v') or ($ARGV[0] eq '--verbose')) {
		$verbose++;
	} elsif (($ARGV[0] eq '-h') or ($ARGV[0] eq '--help')) {
		&usage;
	} else {
		&raiseerror("unrecognized option: $ARGV[0]\n");
		$error = 1;
		&usage;
	}
	shift(@ARGV);
}

# be sure to open the log file before trying to write any messages to it!
#
if ($logfile) {
	open ($logfh, "> $logfile") or die "cannot write to log file $logfile.\n";
}

$verbose = 3 if ($verbose > 3);
&message("verbose mode on.") if ($verbose == 1);
&message("debugging mode on.") if ($verbose ==2);
&message("insanity mode on.") if ($verbose ==3);

if ($outputfile) {
	&message("output will go to $outputfile") if (verbose);
	open($outfh, "> $outputfile");
}

while (<DATA>) {
	$template .= $_;
}

&processfile ("$requestedfile");

&writefile;
close($outfh) if ($outputfile);
close($logfh) if ($logfile);

sub processfile {
	my($filename,
	   $basename,
	   $path,
	   $ext,
	   $includefile,
	   $linenumber,
	  );

	$macrolinecount = 0;

	my $fh = new FileHandle;

	$filename = @_[0];
	$filename = 'STDIN' unless ($filename);

	($basename, $path, $ext) = fileparse($filename);
	&message("processing $filename") if ($verbose);
	$linenumber = 0;

	open $fh, "<$filename" or raiseerror("cannot open $filename\n");
LINE:	while ($line = <$fh>) {
		chomp($line);

		$linenumber++;
		$currentfile = $filename;
		$currentline = $linenumber;

		$originalline = $line;
		$trimline;

		if ($seekend) {
			$seekend = '' if ($line =~ /$seekend/);
			next LINE;
		}

		&message("LINE $currentline: $line") if ($verbose > 1);

		$line =~ s/&/&amp;/g;		# keep before the &lt; &gt; so we don't clobber them
		$line =~ s/\</&lt;/g;
		$line =~ s/\>/&gt;/g;
		$line =~ s/\@\@\}/DCM_AT\}/g;
		$line =~ s/\@\{/DCM_LB/g;
		$line =~ s/\@\}/DCM_RB/g;

		if ($saveline) {
			$line = $saveline . ' ' . $line;
			$saveline = '';
			&message("line restored, line: $line") if ($verbose > 1);
		}
		unless (&bracketsmatch($line)) {
			&message("bracket mismatch") if ($verbose > 2);
			$saveline = $line;
			next LINE;
		}

		if ($line =~ /^\@include/) {
			$includefile = $line;
			$includefile =~ s/^\@include\s+//;
			$includefile = $path . $includefile;
			$line = '';
			if (-e $includefile) {
				&message("including $includefile") if ($verbose);
				&processfile ("$includefile");
			} else {
				&raiseerror("include file $includefile not found.");
			}
		} elsif ($literalend) {
			if ($line =~ /$literalend/) {
				$literalend = '';
				&closeliteral;
			} else {
				&writeline;
			}
		} elsif ($inmacro) {
			($pattern, $action) = &matchpattern();
			if ($action eq 'SEEKEND') {
				$seekend = $pattern;
				$seekend =~ s/\@/\@end /;
				next LINE;
			} elsif ($action eq 'DROPLINE') {
				next LINE;
			}
			if ($line =~ /\@end\s+macro/) {
				#save macro
				$macroargs{$macro} = $macroargs;
				$macrotext{$macro} = $macrotext;
				&message("macro name: $macro args: $macroargs text: $macrotext") if ($verbose > 2);
				$inmacro = 0;
				$macroargs = '';
				$macrotext = '';
				$macrolinecount = 0;
			} elsif ($line =~ /\@quote-arg/) {
			} else {
				&message("macro line: $line") if ($verbose > 2);
				$macrotext .= $line;
				$macrolinecount++;
				if ($macrolinecount >= 10) {
					exit(1);
				}
			}
		} elsif ($intable) {
			&message("table: $line") if ($verbose > 2);
			($pattern, $action) = &matchpattern();
			if ($action eq 'TABLEEND') {
				&closetable;
			} elsif ($action eq 'ITEM') {
				&tablerow;
				&tablecol;
				$line =~ s/\@item\w*?\s+//;
				&message("cell: $line") if ($verbose > 2);
				$line = &trim($line);
				$line = $tableformat . "\{$line\}" unless ($tableformat eq '@asis');
				writeline;
				&closetablecol;
			} else {
				&tablecol;
				&writeline;
			}
		} else {
			($pattern, $action) = &matchblock();
			if ($action) {
				&message("matched block $pattern") if ($verbose > 2);
				&closepara;
				$buf .= $action;
				next LINE;
			}
			($pattern, $action) = &matchpattern();
			if ($action eq 'DROPLINE') {
				next LINE;
			} elsif ($action eq 'SEEKEND') {
				$seekend = $pattern;
				$seekend =~ s/\@/\@end /;
			} elsif ($action eq 'LITERALBLOCK') {
				$literalend = $pattern;
				$literalend =~ s/\@/\@end /;
				&literal;
			} elsif ($action eq 'ALIAS') {
				&alias;
			} elsif ($action eq 'MACRO') {
				&macro;
			} elsif ($action eq 'META') {
				&meta;
			} elsif ($action eq 'NODE') {
				&node;
			} elsif ($action eq 'SECT1') {
				&sect1;
			} elsif ($action eq 'SECT2') {
				&sect2;
			} elsif ($action eq 'SECT3') {
				&sect3;
			} elsif ($action eq 'SECT4') {
				&sect4;
			} elsif ($action eq 'APPENDIX') {
				&appendix;
			} elsif ($action eq 'PARA') {
				&closeformalpara;
				&para;
			} elsif ($action eq 'ORDEREDLIST') {
				&orderedlist;
			} elsif ($action eq 'ORDEREDLISTEND') {
				&closeorderedlist;
			} elsif ($action eq 'ITEMIZEDLIST') {
				&itemizedlist;
			} elsif ($action eq 'ITEMIZEDLISTEND') {
				&closeitemizedlist;
			} elsif ($action eq 'ITEM') {
				&item;
			} elsif ($action eq 'TABLE') {
				$tableformat = $line;
				$tableformat =~ s/^\@\w+\s+//;
				&message("table format: $tableformat") if ($verbose > 2);
				&table;
			} elsif ($action eq 'TABLEEND') {
				&closetable;
			} elsif ($action eq 'INDEX') {
				&message("indexing not yet supported") if ($verbose);
			} elsif ($action eq 'DEFINFOENCLOSE') {
				&definfoenclose;
			} elsif ($action eq 'SET') {
				&set;
			} elsif ($action eq 'CLEAR') {
				&clear;
			} elsif ($action eq 'IFSET') {
				$seekend = '\@end ifset' unless (&ifset());
			} elsif ($action eq 'IFCLEAR') {
				$seekend = '\@end ifclear' if (&ifset());
			} elsif ($action eq 'BYE') {
				&closesect1;
				last;
			} elsif ($action eq 'ERROR') {
				&raiseerror("Hit ERROR tag with $pattern on $line");
			} elsif ($action eq '') {
				&writeline;
			} else {
				&raiseerror("Unknown action: $action on command $pattern");
			}
		}
	}
	close($fh);
}

sub writefile {
	&closeappendix;
	&message("copying meta-data to docbook") if ($verbose);
	$template =~ s/%%TITLE/$title/;
	$template =~ s/%%ABSTRACT/$abstract/;
	$template =~ s/%%BODY/$buf/;
#	&message("deleting empty tags") if ($verbose);
#	$i = 0;
#	while (1) {
#		$template =~ s/\<(.+?)\>\W*\<\/\1\>//gs;	# delete empty tags
#		$i++;
#		last if ($i == 10);
#	}
	&message("writing $outputfile") if ($verbose);
	print $outfh $template . "\n";
	print $outfh '</article>'. "\n";
}

####################
# PATTERN MATCHING #
####################

# this is for beginning-of-line @-commands
#
sub matchpattern {
	foreach $key (keys %patterns) {
		if ($line =~ /$key/) {
			return ($key, $patterns{$key});
		}
	}
	return ('', '');
}

# this is for special wrapped blocks, like blockquotes
#
sub matchblock {
	foreach $key (keys %blocks) {
		if ($line =~ /$key/) {
			return ($key, $blocks{$key});
		}
	}
}

# the rest are all inline processing
#
sub convertinline {
	&convertsubsts;		# single characters, do immediately  and after macros
	&message("  SUBSTS: $line") if ($verbose > 1);
	&convertmacros;		# can insert new @{} commands, do first
	&message("  MACROS: $line") if ($verbose > 1);
	&convertsubsts;		# single characters, do immediately and after macros
	&message("  SUBSTS: $line") if ($verbose > 1);
	&convertvalues;		# insert variables, do immediately after macros
	&message("  VALUES: $line") if ($verbose > 1);
	&convertdefinfos;	# custom highlighting, do after macros
	&message("  DINFOS: $line") if ($verbose > 1);
	&converttags;		# @foo{bar} to <foo>bar</foo> do next to last
	&message("  DBTAGS: $line") if ($verbose > 1);
	&convertlinks;		# @ref{} => <ulink>, do last
	&message("  ULINKS: $line") if ($verbose > 1);
	&convertspecsubsts;	# @anchor{} => <anchor>, do last
	&message("  ANCHOR: $line") if ($verbose > 1);
}

# macros
#
sub convertmacros {
	while (1) {
		($macro, $macroargs, $macrotext) = &matchmacro();
		last unless ($macro);
		&message("matched macro $macro") if ($verbose > 1);
		if ($line =~ /\@$macro\{.*?\}/) {
			&replacemacro;
		} else {
			&raiseerror("unmatched macro brackets in line $line");
			last;
		}
		&message("line is now $line") if ($verbose > 2);
	}
}

sub matchmacro {
#	&message("matchmacros $line");
	foreach $macro (keys %macroargs) {
		if ($line =~ /\@$macro\{/) {
			return ($macro, $macroargs{$macro}, $macrotext{$macro});
		}
	}
}

sub replacemacro {
	my $macroarg = $line;
	my $open;
	my $close;
	$macroargs = "\\\\" . $macroargs . "\\\\";
	$macroarg =~ s/^.*?\@$macro\{//;
	$macroarg =~ s/\}.*?$//;
	$macrotext =~ s/\@$macro\{([^\{]*?)\}/$macroarg/;
	&message("replace pattern '$macroargs' with '$macroarg' in '$macrotext'") if ($verbose > 2);
	$macrotext =~ s/$macroargs/$macroarg/g;
	$line =~ s/\@$macro\{.*?\}/$macrotext/;
}

# character substitution
#
sub convertsubsts {
	while (1) {
		($pattern, $action) = &matchsubst();
		last unless ($pattern);
		&replacesubst;
	}
}

sub matchsubst {
	foreach $key (keys %substitutions) {
		if ($line =~ /$key/) {
			return ($key, $substitutions{$key});
		}
	}
	return ('', '');
}

sub replacesubst {
	&message("replacing '$pattern' with '$action' on line $line") if ($verbose > 1);
	$line =~ s/$pattern/$action/g;
}

# special character substitution
#
sub convertspecsubsts {
	while (1) {
		($pattern, $action) = &matchspecsubst();
		last unless ($pattern);
		&replacespecsubst;
	}
}

sub matchspecsubst {
	foreach $key (keys %specsubstitutions) {
		if ($line =~ /$key/) {
			return ($key, $specsubstitutions{$key});
		}
	}
	return ('', '');
}

sub replacespecsubst {
	my $link;
	my $linkname;

	&message("replacing 'special case' $pattern $action on line $line") if ($verbose > 2);
	if ($action eq 'UREF') {
		$link = $line;
		$link =~ s/^.*?\@uref\{(.*?)\}.*$/\1/;
		($link, $linkname) = split(/,/, $link);
		$link = &trim($link);
		$linkname = &trim($linkname);
		$linkname = $link unless ($linkname);
		$line =~ s/\@uref\{(.*?)\}/\<ulink url='$link'\>$linkname\<\/ulink\>/g;
		&message("ulink: $link, linkname: $linkname, line: $line"); # if ($verbose > 2);
	} elsif ($action eq 'ANCHOR') {

		$link = $line;
		$link =~ s/^.*?\@anchor\{(.*?)\}.*$/\1/;
		$link = &anchorfix($link);
		$line =~ s/\@anchor\{(.*?)\}/<anchor id='$link'\/\>/;

#		$line =~ s/\@anchor\{([^\}]*?)\}/\<anchor id='$link'\/\>/;
#		$line =~ s/\@anchor\{([^\}]*?\{.*?\}.*?)\}/\<anchor id='$link'\/\>/;
#		&message("anchor: $link on line: $line"); # if ($verbose > 2);
	} else {
		&raiseerror("Unrecognized 'special case' inline substitution code $pattern");
	}
}

$loops = 0;

# replace @-Commands with DocBook tags
#
sub converttags {
	$loops = 0;
	while (1) {
		($pattern, $action) = &matchtag();
		last unless ($pattern);
		if ($line =~ /$pattern.*?\}/) {
			$loops++;
			&replacetag;
			exit if ($loops >= 25);
		} else {
			&raiseerror("unmatched tag brackets in line $line");
			last;
		}
	}
}

sub matchtag {
	foreach $key (keys %tags) {
		if ($line =~ /$key/) {
			return ($key, $tags{$key});
		}
	}
	return ('', '');
}

sub replacetag {
	my ($tag,
	    $attributes,
	    $part,
	    $newline,
	    $fragment,
	   );

	if ($action eq '') {
		&message("removing old tag, but not adding a new one") if ($verbose > 2);
		$line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\1/g;
		$line =~ s/$pattern\{([^\}]*?)\}/\1/g;
	} else {
		&message("replacing $pattern with $action tags on line $line") if ($verbose > 1);
		($tag, $attributes) = split(/ /, $action);
		if ($attributes) {
			$line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\<$tag $attributes\>\1\<\/$tag\>/g;
			$line =~ s/$pattern\{([^\}]*?)\}/\<$tag $attributes\>\1\<\/$tag\>/g;
		} else {
			$line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\<$tag\>\1\<\/$tag\>/g;
			$line =~ s/$pattern\{([^\}]*?)\}/\<$tag\>\1\<\/$tag\>/g;
		}
	}
}

# replace @-Commands with custom highlighting
#
sub convertdefinfos {
	while (1) {
		($pattern, $action) = &matchdefinfo();
		last unless ($pattern);
		&message("matched definfoenclose '$pattern'") if ($verbose >2);
		&replacedefinfo;
	}
}

sub matchdefinfo {
	foreach $key (keys %definfos) {
		if ($line =~ /$key\{.*?\}/) {
			return ($key, $definfos{$key});
		}
	}
	return ('', '');
}

sub replacedefinfo {
	($prefix, $suffix) = split(/,/,$action);
	&message("pattern: $pattern, prefix: $prefix, suffix: $suffix") if ($verbose > 2);
	$line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/$prefix\1$suffix/g;
	$line =~ s/$pattern\{([^\}]*?)\}/$prefix\1$suffix/g;
	$line =~ s/$pattern\{\}/$prefix$suffix/g;
	&message("definfoenclose replaced") if ($verbose >2);
}

# replace @value{} with values
#
sub convertvalues {
	my ($key,
	    $value);

	foreach $key (keys %setvalues) {
		$value = $setvalues{$key};
		$line =~ s/\@value\{$key\}/$value/;
	}
}

sub convertlinks {
	my $link,
	   $anchor;

	while ($line =~ /\@ref\{/) {
		if ($line =~ /\@ref\{.*?\}/) {
			&message("making xref on line $line") if ($verbose > 1);
			$anchor = $line;
			$anchor =~ s/^.*?\@ref\{//;
			$anchor =~ s/\}.*$//;
			$anchortitle = &trim($anchor);
			$anchor = &anchorfix($anchor);
			&message("anchor: $anchor, title; $anchortitle") if ($verbose > 2);
			&raiseerror("NO ANCHOR in $line") unless ($anchor);

			# decide what kind of link to make based on whether it is
			# to a section or not. Sections have titles that can be
			# referenced using an xref. Anchors do not, and we have to
			# render a full <link> tag.
			#
			if ($nodelevels{$anchor}) {
				$link = "\<xref linkend='$anchor' endterm='$anchor-title'\/\>";
			} else {
				$link = "\<link linkend='$anchor'\>$anchortitle\<\/link\>";
			}
			$line =~ s/\@ref\{.*?\}/$link/;
			&message("made xref to tag $anchor on line: $line") if ($verbose > 1);
		} else {
			&raiseerror("unmatched ref brackets in line $line");
			last;
		}
	}
}

sub anchorfix {
	my $anchor = $_[0];
	$anchor = lc(&trim($anchor));
	$anchor = decode_entities($anchor);
	$anchor =~ s/&/-and-/g;
	$anchor =~ s/;//g;
	$anchor = encode_entities($anchor);
	$anchor =~ s/&(\w)grave/\1/g;
	$anchor =~ s/&(\w)acute/\1/g;
	$anchor =~ s/&(\w)circ/\1/g;
	$anchor =~ s/&(\w)uml/\1/g;
	$anchor =~ s/&(\w)tilde/\1/g;
	$anchor =~ s/&(\w)cedil/\1/g;
	$anchor =~ s/&/-and-/g;
	$anchor =~ s/;//g;
	$anchor =~ s/\//-/g;
	$anchor =~ s/\s+/-/g;
	$anchor =~ s/'//g;
	$anchor =~ s/!//g;
	$anchor =~ s/--/-/g;
	$anchor =~ s/\+/plus/g;
	$anchor =~ s/\*/x/g;
	$anchor =~ s/\(//g;
	$anchor =~ s/\)//g;
	$anchor =~ s/\@/at/g;
	$anchor =~ s/^-+//;
	$anchor =~ s/-+$//;
	$anchor =~ s/\$/S/;
	$anchor =~ s/\.//;
	return $anchor;
}

#############
# META-DATA #
#############

# this routine processes meta-data @-Commands
#
sub meta {
	return if (&metaarg('\@setfilename',	$sgmlfile));
	return if (&metaarg('\@settitle', 	$title));
	return if (&metaarg('\@author', 	$authorname));
	&raiseerror("Unknown meta-data command $line");
}

# Meta-data commands have arguments,
# so if the pattern matches, store the argument
# to the variable in $_[1]
#
sub metaarg {
	my $pattern = $_[0];
	if ($line =~ /$pattern/) {
		$_[1] = $line;
		$_[1] =~ s/$pattern//;
	}
}

################
# WRITE OUTPUT #
################

sub writeline {
	my $temp = &trim($line);

	&convertinline;
	unless (&bracketsmatch($line)) {
		&raiseerror("brackets do not match in line: $line");
		last;
	}

	unless ($literal) {
		if ($temp eq '') {
			&closeformalpara;
		} else {
			unless (($inpara) or ($intable)) {
				if (($nodename) and ($nodeinit == 0)) {
					&initnode;
				}
				&para;
			}
		}
	}
	$line =~ s/\@\@/\@/g;
	$line =~ s/DCM_AT/\@/g;
	$line =~ s/DCM_LB/\{/g;
	$line =~ s/DCM_RB/\}/g;
	$buf .= $line . "\n";
}

##############################################
# STRUCTURAL TEXINFO -> DOCBOOK TRANSLATIONS #
##############################################

# this is for when the source never set a position for this node in the
# chapter-style hierarchy, which we use.
#
# We have to make a best guess based on the node's settings,
# so put it right under whoever it calls the "up" node.
#
sub initnode {
	my $parentname = $nodeup;
	my $parentlevel = $nodelevels{$parentname};
	if ($parentlevel == "A") {
		&sect1;
	} elsif ($parentlevel == 1) {
		&sect2;
	} elsif ($parentlevel == 2) {
		&sect3
	} elsif ($parentlevel == 3) {
		&sect4;
	} else {
		&raiseerror("The parent node, $parentname, could not be found.");
	}
}

sub node {
	$line =~ s/\@node\b//;
	&convertinline;
	($nodename, $nodenext, $nodeprev, $nodeup) = split(/,/, $line);
	$nodename = &trim($nodename);
	$nodeid = &anchorfix($nodename);
	$nodeprev = &trim($nodeprev);
	$nodenext = &trim($nodenext);
	$nodeup   = &trim($nodeup);
	$nodenexts{$nodename} = $nodenext;
	$nodeprevs{$nodename} = $nodeprev;
	$nodeups{$nodename} = $nodeup;
	$nodeinit = 0;
	&message("Name: $nodename Next: $nodenext Previous: $nodeprev Up: $nodeup") if ($verbose > 1);
	&closeformalpara;
	if ($nodename eq 'Top') {
		$inabstract = 0;
		$abstract = $buf;
		$buf = "";
	}
}

sub appendix {
	&closeappendix;
	$buf .= "\<appendix id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>";
	$inappendix = 1;
	$nodeinit = 1;
	$nodelevels{$nodename} = "A";
	&message("processing node $nodename at level $nodelevels{$nodename}");
}

sub sect1 {
	&closesect1;
	$buf .= "\<sect1 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>";
	$insect1 = 1;
	$nodeinit = 1;
	$nodelevels{$nodename} = 1;
	&message("set node $nodename level to $nodelevels{$nodename}");
}

sub sect2 {
	&closesect2;
	if ($inappendix) {
		$buf .= "\<sect1 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>";
		$nodelevels{$nodename} = 1;
	} else {
		$buf .= "\<sect2 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>";
		$nodelevels{$nodename} = 2;
	}
	$insect2 = 1;
	$nodeinit = 1;
	$nodelevels{$nodename} = 2;
	&message("set node $nodename level to $nodelevels{$nodename}");
}

sub sect3 {
	&closesect3;
	if ($inappendix) {
		$buf .= "\<sect2 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>";
		$nodelevels{$nodename} = 2;
	} else {
		$buf .= "\<sect3 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>";
		$nodelevels{$nodename} = 3;
	}
	$insect3 = 1;
	$nodeinit = 1;
	$nodelevels{$nodename} = 3;
	&message("set node $nodename level to $nodelevels{$nodename}");
}

sub sect4 {
	&closesect4;
	if ($inappendix) {
		$buf .= "\<sect3 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>";
		$nodelevels{$nodename} = 3;
	} else {
		$buf .= "\<sect4 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>";
		$nodelevels{$nodename} = 4;
	}
	$insect4 = 1;
	$nodeinit = 1;
	$nodelevels{$nodename} = 4;
	&message("set node $nodename level to $nodelevels{$nodename}");
}

sub table {
	&closetable;
	$buf .= '<informaltable><tgroup col=' . "'2'" . '><tbody>' . "\n";
	$intable = 1;
}

sub tablerow {
	&closetablerow;
	$buf .= '<row>';
	$inrow = 1;
}

sub tablecol {
	&closetablecol;
	$buf .= '<entry>';
	$incol = 1;
}

sub orderedlist {
	&closeformalpara;
	$buf .= '<orderedlist>';
	$inorderedlist = 1;
}

sub itemizedlist {
	&closeformalpara;
	$buf .= '<itemizedlist>';
	$initemizedlist = 1;
}

sub item {
	if ($intable) {
		&tableitem;
	} else {
		&listitem;
	}
}

sub listitem {
	&closelistitem;
	$buf .= '<listitem>';
	$inlistitem = 1;
}

sub formalpara {
	my $title = $_[0];
	my $id = &anchorfix($title);

	&closeformalpara;
	#$buf .= "\<formalpara id='$id'\>\<title\>$title\<\/title\>\n\<para\>";
	$informalpara = 1;
	$inpara = 1;
	$nodeinit = 1;
}

sub para {
	&closeformalpara;
	$buf .= '<para>';
	$inpara = 1;
}

sub literal {
	if ($pattern =~ /\bformat\b/) {
		$literal = 'programlisting';
		$buf .= '<programlisting>' . "\n";
	} elsif ($pattern =~ /\bdisplay\b/) {
		$literal = 'screen';
		$buf .= '<screen>';
	} elsif ($pattern =~ /\bexample\b/) {
		$literal = 'programlisting';
		$buf .= '<programlisting>';
	} elsif ($pattern =~ /\bsmallexample\b/) {
		$literal = 'programlisting';
		$buf .= '<programlisting>';
	}
}

sub closeappendix {
	&closesect1;
	if ($inappendix) {
		$buf .= '</appendix>' . "\n";
		$inappendix = 0;
	}
}

sub closesect1 {
	&closesect2;
	if ($insect1) {
		$buf .= '</sect1>' . "\n\n";
		$insect1 = 0;
	}
}

sub closesect2 {
	&closesect3;
	if ($insect2) {
		if ($inappendix) {
			$buf .= '</sect1>' . "\n\n";
		} else {
			$buf .= '</sect2>' . "\n\n";
		}
		$insect2 = 0;
	}
}

sub closesect3 {
	&closesect4;
	if ($insect3) {
		if ($inappendix) {
			$buf .= '</sect2>' . "\n\n";
		} else {
			$buf .= '</sect3>' . "\n\n";
		}
		$insect3 = 0;
	}
}

sub closesect4 {
	&closeformalpara;
	if ($insect4) {
		if ($inappendix) {
			$buf .= '</sect3>' . "\n\n";
		} else {
			$buf .= '</sect4>' . "\n\n";
		}
		$insect4 = 0;
	}
}

sub closetable {
	&closetablerow;
	if ($intable) {
		$buf .= '</tbody></tgroup></informaltable>' . "\n";
		$intable = 0;
	}
}

sub closetablerow {
	&closetablecol;
	if ($inrow) {
		$buf .= '</row>' . "\n";
		$inrow = 0;
	}
}

sub closetablecol {
	if ($incol) {
		$buf .= '</entry>' . "\n";
		$incol = 0;
	}
}

sub closeorderedlist {
	&closelistitem;
	if ($inorderedlist) {
		$buf .= '</orderedlist>' . "\n";
		$inorderedlist = 0;
	}
}

sub closeitemizedlist {
	&closelistitem;
	if ($initemizedlist) {
		$buf .= '</itemizedlist>' . "\n";
		$initemizedlist = 0;
	}
}

sub closelistitem {
	&closeformalpara;
	if ($inlistitem) {
		$buf .= '</listitem>' . "\n";
		$inlistitem = 0;
	}
}

sub closeformalpara {
	&closepara;
	if ($informalpara) {
#		$buf .= '</formalpara>';
		$informalpara = 0;
	}
}

sub closepara {
	if ($inpara) {
		$buf .= '</para>';
		$inpara = 0;
	}
}

sub closeliteral {
	if ($literal) {
		$buf .= '</' . $literal . '>' . "\n";
		$literal = '';
	}
}


################
# CONDITIONALS #
################

sub set {
	my ($name,
	    $value);

	($foo, $name, $value) = split(/\s+/, $line);
	$value = 1 unless ($value);
	$setvalues{$name} = $value;
	&message("set $name to $value") if ($verbose > 1);
	$value = $setvalues{$name};
	&message("read back $value") if ($verbose > 2);
}

sub clear {
	my $name;

	($foo, $name) = split(/\s+/, $line);
	$setvalues{$name} = 0;
	&message("cleared $name") if ($verbose > 1);
}

sub ifset {
	my $name;

	($foo, $name) = split(/\s+/, $line);
	return $ifsets{$name};
}

#################
# META-LANGUAGE #
#################

# only the commands in %tags are currently supported
#
sub alias {
	my $alias;

	($foo, $alias) = split(/\s/, $line);
	&message("alias: $alias") if ($verbose > 2);
	($alias, $command) = split(/=/, $alias);
	&message("alias: $alias, command: $command") if ($verbose > 2);
	$alias = '\@' . $alias . '\b';
	$command = '\@' . $command . '\b';
	&message("alias: $alias, command: $command") if ($verbose > 2);
	$tag = $tags{$command};
	&message("tag: $tag") if ($verbose > 2);
	$tags{$alias} = $tag;
}

sub macro {
	$line =~ s/\@macro\s*?(.*?)\{(.*?)\}/\1,\2/;
	($macro, $macroargs) = split(/,/, $line);
	$macro = &trim($macro);
	$macros{$macro} = $macro;
	$macrotext = '';
	$inmacro = 1;
}

# load custom @-commands
#
sub definfoenclose {
	my ($name,
	    $prefix,
	    $suffix,
	    $key,
	    $replacement);

	$line =~ s/\@definfoenclose\s+//;
	($name, $prefix, $suffix) = split(/,/, $line);
	$key = '\@' . $name;
	$replacement =  $prefix. ',' . $suffix;
	$definfos{$key} = $replacement;
#	&message("custom definfoenclosure: \[$name\] \[$prefix\] \[$suffix\] \/ \[$key\] \[$replacement\]") if ($verbose > 0);
}

###########
# INDEXES #
###########

sub defindex {
	$line =~ s/^\@defindex\s+//;
	$indexes{$line} = 1;
}

####################
# SUPPORT ROUTINES #
####################

sub bracketsmatch {
	my $bline = ' ' . $_[0] . ' ';
	my $left = scalar split(/\{/, $bline) - 1;
	my $right = scalar split(/\}/, $bline) - 1;
	if ($right == $left) {
		$badbracketlines = 0;
		$badbracketstartline = $currentline + 1
	} else {
		$badbracketlines++;
		&message("unmatched \{\} (left: $left, right: $right) on line $line") if ($verbose > 1);
		if ($badbracketlines >= 20) {
			&raiseerror("Mismatched brackets starting on line $badbracketstartline: $line");
		}
	}
	return ($right == $left);
}

sub trimline {
	$line = &trim($line);
}

sub trim {
	my $temp = $_[0];

	$temp =~ s/^\s+//g;
	$temp =~ s/\s+$//g;
	return $temp;
}

sub raiseerror {
	my $errmsg = "ERROR: $currentfile $currentline $_[0]\n";
	print STDERR $errmsg;
	$errors++;
	if (($maxerrors) and ($errors >= $maxerrors)) {
		print STDERR "aborting after $errors errors.\n";
		&writefile;
		exit(1);
	}
}

sub message {
	my $message;
	$message = $_[0];

	if ($logfile) {
		print $logfh "$message\n";
	} else {
		print "$message\n";
	}
}

sub version {
	print "texi2db version $version\n";
	print "Copyright (c) 2002 David Merrill \<david\@lupercalia.net\>.\n";
	print "\n";
	print "Converts a Texinfo file into DocBook.\n";
	print "\n";
	print "This free software; see the source for copying conditions. There is no\n";
	print "warranty; not even for merchantability or fitness for a particular purpose.\n";
}

sub usage {
	&version;
	print "\n";
	print "usage: texi2db [OPTIONS]... [FILE]\n";
	print "-f, --file FILE        read from file rather than STDIN.\n";
	print "-o, --output-to FILE   write to file rather than STDOUT.\n";
	print "-l, --log-to FILE      write status messages to file rather than STDOUT.\n";
	print "-v, --verbose          show diagnostic output.\n";
	print "                       use twice for lots of detail.\n";
	print "                       use thrice for insane amounts of detail.\n";
	print "-e, --max-errors MAX   show diagnostic output.\n";
	print "                       use '--max-errors 0' to disable.\n";
	print "-h, --help             show this usage message.\n";
	print "    --version          show the program version.\n";
	exit($error);
}

__END__
<!DOCTYPE ARTICLE PUBLIC "-//OASIS//DTD DocBook V4.1.2//EN" >

<article>
  <artheader>
    <title>%%TITLE</title>
    <abstract>
      %%ABSTRACT
    </abstract>
  </artheader>
%%BODY