LDP/LDP/defunct/texi2db/texi2db

2413 lines
59 KiB
Perl
Executable File

#!/usr/bin/perl
#
# this utility converts a Texinfo file into DocBook XML format.
#
use File::Basename;
use FileHandle;
use HTML::Entities;
$VERSION = "0.6-cvs";
$errors = 0;
$error = 0;
# runtime options
#
$maxerrors = 1;
$verbose = 0;
$outputtype = "HTML";
$maxrunonlines = 20;
# files
#
$requestedfile = "";
$outputfile = "";
$outfh = new FileHandle;
$logfile = "";
$logfh = new FileHandle;
# the following are flags to maintain state
#
# docbook structure flags
#
$inabstract = 1;
$inappendix = 0;
$insect1 = 0;
$insect2 = 0;
$insect3 = 0;
$insect4 = 0;
@nest = (); # stack, tracks nested structures 'table', 'multi', 'itemized', 'ordered'
@inrow = ();
@incol = ();
@infirstcol = ();
@tableformat = ();
@initem = ();
$informalpara = 0;
$inpara = 0;
$inmenu = 0;
$lang = "en"; # Used for keeping the language of the document
# these maintain internal program state
#
$line = "";
$originalline = "";
$currentfile = "";
$currentline = "";
$saveline = "";
$badbracketlines = 0;
$badbracketstartline = 0;
$seekend = "";
$literaltag = ''; # inline literal tag we're inside of
$literalendtag = ''; # tag that will end the block
@literal = (); # stacks for above tags
@literalend = ();
$suppressconversion = 0; # causes no conversion of @-commands
$suppresspara = 0; # causes no insertion of <para> tags
# here, we hold onto meta-data that we will need to build
# the article or book header structures.
#
$sgmlfile = "";
$title = "";
$authorname = "";
$buf = "";
# these are used when parsing the line for commands
#
$command;
$tag;
$tagplain;
$contents;
# these hold information about the node we are currently in, if any
#
$nodename = '';
$nodetitle = '';
$nodenext = '';
$nodeprev = '';
$nodeup = '';
$nodeid = '';
$nodeinit = 0;
# remember this hierarchical information for every node
# we use it if the node doesn't specify a level.
#
%nodenames = ();
%nodeanchors = ();
%nodenexts = ();
%nodeprevs = ();
%nodeups = ();
%nodelevels = ();
# holds user variables
#
%setvalues = ();
%ifsets = ();
# this is used to build a list of index entries to be written when requested
#
%indexes = ();
# These are used for keeping state on the most recent search of any of the following hashes
#
$pattern = "";
$action = "";
$replacement = "";
# In the following hash, the perl expressions are used to search
# each line of the input file. If matched, the Action field defines
# what to do with it.
#
# Note that @-Commands of this type must be at the beginning of the line.
#
# TODO: put these in a .conf file so it can be tweaked or customized
#
# Actions:
#
# ALIAS Create an alias to another command
# APPENDIX Begin an appentix
# BYE Stop processing the file
# CLEAR Clear a variable
# COMMENT Insert a comment
# DEFFN Define a function
# DEFINFOENCLOSE Load a customized highlighting pattern
# DROPBLOCK Drop the whole block on the floor
# DROPLINE Drop it on the floor
# IFCLEAR Test a variable
# IFSET Test a variable
# INDEX An index entry
# ITEM An item in a list or table
# LITERALBLOCK Literal layout block
# MACRO Record a program macro
# META Meta-Data
# MULTITABLE Begin a multi column table
# NODE A Texinfo node
# ORDEREDLIST Numbered (enumerated) list
# ORDEREDLISTEND End an enumerated list
# SECT? One of the sectioning commands
# SEEKEND Skip everything until you find the corresponding @end tag
# SET Set a variable
# TABLE Begin a table
# TABLEEND End a table
#
# @-Command Action
%patterns = (
'\input' =>'DROPLINE',
'o\input' =>'DROPLINE',
'@alias' =>'ALIAS',
'@author' =>'META',
'@bye' =>'BYE',
'@cartouche' =>'DROPLINE',
'@end cartouche' =>'DROPLINE',
'@clear' =>'CLEAR',
# '@c' =>'COMMENT', # handled specially
# '@comment' =>'COMMENT',
'@contents' =>'DROPLINE',
'@defcodeindex' =>'DROPLINE', # TODO
'@deffn' =>'DEFFN',
'@end deffn' =>'DROPLINE',
'@deffnx' =>'DEFFN',
'@end deffnx' =>'DROPLINE',
'@defmac' =>'DEFFN',
'@end defmac' =>'DROPLINE',
'@defmacx' =>'DEFFN',
'@end defmacx' =>'DROPLINE',
'@defun' =>'DEFFN',
'@end defun' =>'DROPLINE',
'@defunx' =>'DEFFN',
'@end defunx' =>'DROPLINE',
'@defindex' =>'DROPLINE', # TODO
'@definfoenclose' =>'DEFINFOENCLOSE',
'@defspec' =>'DEFFN',
'@end defspec' =>'DROPLINE',
'@dircategory' =>'DROPLINE',
'@direntry' =>'SEEKEND',
'@display' =>'LITERALBLOCK',
'@documentlanguage' =>'META',
# '@end display' =>'DROPLINE',
'@enumerate' =>'ORDEREDLIST',
'@end enumerate' =>'ORDEREDLISTEND',
'@example' =>'LITERALBLOCK',
# '@end example' =>'DROPLINE',
'@exdent' =>'DROPLINE',
'@finalout' =>'DROPLINE',
'@footnotestyle' =>'DROPLINE',
'@format' =>'LITERALBLOCK',
# '@end format' =>'DROPLINE',
'@flushleft' =>'DROPLINE',
'@end flushleft' =>'DROPLINE',
'@flushright' =>'DROPLINE',
'@end flushright' =>'DROPLINE',
'@headings' =>'DROPLINE',
'@html' =>'SEEKEND',
'@end html' =>'DROPLINE',
'@ifhtml' =>'SEEKEND',
'@end ifhtml' =>'DROPLINE',
'@ifinfo' =>'SEEKEND',
'@end ifinfo' =>'DROPLINE',
'@ifnothtml' =>'SEEKEND',
'@end ifnothtml' =>'DROPLINE',
'@ifnotinfo' =>'SEEKEND',
'@end ifnotinfo' =>'DROPLINE',
'@ifnottex' =>'SEEKEND',
'@end ifnottex' =>'DROPLINE',
'@ifset' =>'IFSET', # handled like SEEKEND
'@end ifset' =>'DROPLINE', # or ignored, so ignore this too
'@ifclear' =>'IFCLEAR',
'@end ifclear' =>'DROPLINE',
'@iftex' =>'DROPLINE',
'@end iftex' =>'DROPLINE',
'@ignore' =>'SEEKEND',
'@itemize' =>'ITEMIZEDLIST',
'@end itemize' =>'ITEMIZEDLISTEND',
'@item' =>'ITEM',
'@itemx' =>'ITEM',
'@lisp' =>'LITERALBLOCK',
# '@end lisp' =>'DROPLINE',
'@macro' =>'MACRO',
'@majorheading' =>'SECT1',
'@menu' =>'SEEKEND',
'@opindex' =>'DROPLINE', # TODO
'@need' =>'DROP2',
'@node' =>'NODE',
'@page' =>'DROPLINE',
'@paragraphindent' =>'DROPLINE',
'@printindex' =>'DROPLINE', # TODO
'@set' =>'SET',
'@setchapternewpage' =>'DROPLINE',
'@setfilename' =>'META',
'@settitle' =>'META',
'@shorttitlepage' =>'DROPLINE',
'@smallbook' =>'DROPLINE',
'@smallexample' =>'LITERALBLOCK',
# '@end smallexample' =>'DROPLINE',
'@sp' =>'DROPLINE',
'@synindex' =>'DROPLINE',
'@ftable' =>'TABLE',
'@end ftable' =>'TABLEEND',
'@table' =>'TABLE',
'@end table' =>'TABLEEND',
'@multitable' =>'MULTITABLE',
'@end multitable' =>'TABLEEND',
'@summarycontents' =>'DROPLINE',
'@syncodeindex' =>'DROPLINE', # TODO
'@tex' =>'SEEKEND',
'@titlepage' =>'SEEKEND',
'@verbatim' =>'LITERALBLOCK',
# '@end verbatim' =>'DROPLINE',
'@vtable' =>'TABLE',
'@end vtable' =>'TABLEEND',
'@top' =>'SECT1', # sectioning commands
'@chapter' =>'SECT1',
'@section' =>'SECT2',
'@subsection' =>'SECT3',
'@subsubsection' =>'SECT4',
'@centerchap' =>'SECT1',
'@unnumbered' =>'SECT1',
'@unnumberedsec' =>'SECT2',
'@unnumberedsubsec' =>'SECT3',
'@unnumberedsubsubsec' =>'SECT4',
'@chapheading' =>'SECT1',
'@heading' =>'SECT2',
'@subheading' =>'SECT3',
'@subsubheading' =>'SECT4',
'@appendix' =>'APPENDIX',
'@appendixsec' =>'SECT1',
'@appendixsubsec' =>'SECT2',
'@appendixsubsubsec' =>'SECT3',
'@cindex' =>'CINDEX', # TODO (temporary placement)
'@cmindex' =>'DROPLINE',
'@cvindex' =>'DROPLINE',
'@findex' =>'CINDEX',
'@kindex' =>'DROPLINE',
'@pindex' =>'DROPLINE',
'@tindex' =>'DROPLINE',
'@trindex' =>'DROPLINE',
'@vindex' =>'DROPLINE',
);
# These are one-for-one string substitutions
#
# Many of them are replaced by ISO-8879 codes for international characters,
# used by HTML as well as XML. See that spec for more information.
# (One version is at http://www.w3.org/TR/html4/sgml/entities.html)
#
# There are special cases, e.g. "@@", which is the escaped form of @.
# We do not handle @@ here, because it has to come after all other
# @-commands have been processed.
#
# These commands can have {} following them, but if so the whole
# construction must be listed, e.g., @AA{}.
#
# It's fine to replace these with nothing if you want them to be
# simply dropped on the floor.
#
# Neither side needs to be escaped.
#
# The array is for sorting keys.
# They are sorted in descending order and then iterated, to make sure
# @ss comes before @s, so we replace them correctly later.
#
@substitutionkeys = ();
%substitutions = (
# '@@' =>'@', # handled specially to simplify parsing
# '@\{' =>'{',
# '@\}' =>'}',
# '@,{c}' =>'&cedil;',
'@ ' =>' ', # punctuation and typographic characters
'@-' =>'&macr;',
'@.' =>'.',
'@!' =>'!',
'@?' =>'?',
'@"' =>'"',
'@exclamdown{}' =>'&iexcl;',
'@questiondown{}' =>'&iquest;',
'@pounds{}' =>'&pound;',
'@bullet{}' =>'*',
'@copyright{}' =>'&copy;',
'@dots{}' =>'&hellip;',
'@minus{}' =>'-',
'@TeX{}' =>'TeX',
'@enddots{}' =>'&hellip;',
'@:' =>'', # text layout, has no meaning in DocBook
'@=' =>'', # which is a semantic language.
'@*' =>'', # Layout issues are handled in XSLT.
'@s' =>' ',
'@center' =>'',
'@group' =>'',
'@end group' =>'',
'@noindent' =>'',
'@refill' =>'',
# accented characters
#
# Note that glyphs we cannot support in DocBook are represented by
# an appended character. Since this is so very similar to the
# @definfoenclose style of custom highlighting, they are handled
# in that section.
#
'@"A' =>'&Auml;', # umlaut
'@"E' =>'&Euml;',
'@"I' =>'&Iuml;',
'@"O' =>'&Ouml;',
'@"U' =>'&Uuml;',
'@"a' =>'&auml;',
'@"e' =>'&euml;',
'@"i' =>'&iuml;',
'@"o' =>'&ouml;',
'@"u' =>'&uuml;',
'@' . "'" . 'A' =>'&Aacute;', # acute accent
'@' . "'" . 'E' =>'&Eacute;',
'@' . "'" . 'I' =>'&Iacute;',
'@' . "'" . 'O' =>'&Oacute;',
'@' . "'" . 'U' =>'&Uacute;',
'@' . "'" . 'a' =>'&aacute;',
'@' . "'" . 'e' =>'&eacute;',
'@' . "'" . 'i' =>'&iacute;',
'@' . "'" . 'o' =>'&oacute;',
'@' . "'" . 'u' =>'&uacute;',
'@`A' =>'&Agrave;', # grave accent
'@`E' =>'&Egrave;',
'@`I' =>'&Igrave;',
'@`O' =>'&Ograve;',
'@`U' =>'&Ugrave;',
'@`a' =>'&agrave;',
'@`e' =>'&egrave;',
'@`i' =>'&igrave;',
'@`o' =>'&ograve;',
'@`u' =>'&ugrave;',
'@^A' =>'&Acirc;', # circumflex
'@^E' =>'&Ecirc;',
'@^I' =>'&Icirc;',
'@^O' =>'&Ocirc;',
'@^U' =>'&Ucirc;',
'@^a' =>'&acirc;',
'@^e' =>'&ecirc;',
'@^i' =>'&icirc;',
'@^o' =>'&ocirc;',
'@^u' =>'&ucirc;',
'@~A' =>'&Atilde;', # tilde
'@~N' =>'&Ntilde;',
'@~O' =>'&Otilde;',
'@~a' =>'&atilde;',
'@~n' =>'&ntilde;',
'@~o' =>'&otilde;',
'@=A' =>'A&macr;', # macron
'@=E' =>'E&macr;',
'@=I' =>'I&macr;',
'@=O' =>'O&macr;',
'@=U' =>'U&macr;',
'@=a' =>'a&macr;',
'@=e' =>'e&macr;',
'@=i' =>'i&macr;',
'@=o' =>'o&macr;',
'@=u' =>'u&macr;',
# other international characters and ligatures
#
'@L{}' =>'L', # Polish suppressed L/l
'@l{}' =>'l', # (currently unsupported)
'@AA{}' =>'&Aring;',
'@aa{}' =>'&aring;',
'@AE{}' =>'&AElig;', # ligatures
'@ae{}' =>'&aelig;',
'@OE{}' =>'&OElig;',
'@oe{}' =>'&oelig;',
'@O{}' =>'&Oslash;',
'@o{}' =>'&oslash;',
'@ss{}' =>'&szlig;',
'@error{}' =>'error-->', # glyphs used in examples
'@equiv{}' =>'&equiv;',
'@expansion{}' =>'==>',
'@point{}' =>'-!-',
'@print{}' =>'-|',
'@result{}' =>'=&gt;',
'@tab' =>'</entry><entry>',
);
# these are additional inline tags that always have {} following them.
# The replacement strings are triggers that they require some kind of
# programmatic control because they do strange things.
#
%specsubstitutions = (
'@anchor' =>'ANCHOR',
'@ref' =>'REF',
'@url' =>'UREF',
'@uref' =>'UREF',
'@xref' =>'XREF',
'@pxref' =>'PXREF',
'@inforef' =>'INFOREF',
'@kbd' =>'KBD',
);
# These are special block wrapping tags. When we hit one of these,
# we simply wrap whatever comes between the (always paired) tags
# with a set of DocBook tags.
#
# Their handling is subtly different than the %tags.
# We don't just replace them with the DocBook tags, because we
# want any <para> tags to go inside them, not outside.
#
# Any existing para is closed first, as these are always paragraphs.
#
%blocks = (
# '@quotation' =>'<blockquote><literallayout>',
# '@end quotation' =>'</literallayout></blockquote>',
'@quotation' =>'<blockquote>',
'@end quotation' =>'</blockquote>',
# '@format' =>'<literallayout>',
# '@end format' =>'</literallayout>',
);
# these are inline @-Commands which are replaced by a set of tags instead of one-for-one
# character substitution. Replacement is inline.
#
# there is also a special case, the "blank" tag, which does what you'd think.
# It removes the existing tag, but doesn't add the wrapper.
#
# Note that you can nest tags here by separating with commas.
#
# Examples: @code{foo} becomes <literal>foo</literal>.
# @footnote{foo} becomes <footnote><para>foo</para></footnote>.
#
%tags = (
'@acronym' =>'abbrev',
'@b' =>"emphasis role='bold'",
'@cite' =>'citetitle',
'@code' =>'literal',
'@command' =>'command',
'@dfn' =>"emphasis role='bold'",
'@dmn' =>'',
'@dotless' =>'', # not yet supported, just discarded
'@dotless' =>'',
'@key' =>'keycap',
'@email' =>'email',
'@emph' =>'emphasis',
'@env' =>'envar',
'@footnote' =>'footnote, para',
'@file' =>'filename',
'@i' =>'emphasis',
'@option' =>'option',
'@samp' =>'literal',
'@sc' =>'',
'@strong' =>"emphasis role='bold'",
'@t' =>'programlisting',
'@r' =>'',
'@var' =>'literal',
'@verb' =>'literal',
'@w' =>'',
);
# this is where definfoenclose definitions go, and they are processed last
#
# Note: Some built-in Texinfo accents can be seen as a kind of custom highlighting,
# since they cannot be displayed by a special glyph but are instead rendered
# by appending a character. Those glyphs are handled here.
#
# Note also that the "@" is not used here.
#
%definfos = (
'dotaccent' =>',.', # overdot accent
'H' =>',"', # Hungarian long umlaut
'ringaccent' =>',*', # ring accent
'tieaccent' =>',|', # tie-after accent
'u' =>',(', # breve accent
'ubaraccent' =>',_', # underbar accent
'udotaccent' =>'.,', # underdot accent
'v' =>',&lt;', # tie-after accent
);
# and this is where macros are stored
#
$inmacro = 0;
$macro = "";
%macros = {};
$macroargs = "";
%macroargs = ();
$macrotext = "";
%macrotext = ();
%aliases = ();
##############################################################################
##############################################################################
##############################################################################
while (1) {
if ($ARGV[0] eq '') {
last;
} elsif (($ARGV[0] eq '-i') or ($ARGV[0] eq '--include')) {
shift(@ARGV);
if (($ARGV[0] eq 'HTML') or ($ARGV[0] eq 'TEX') or ($ARGV[0] eq 'INFO')) {
$outputtype = $ARGV[0];
} else {
&raiseerror("invalid include format: $ARGV[0]");
exit(1);
}
} elsif (($ARGV[0] eq '-f') or ($ARGV[0] eq '--file')) {
shift(@ARGV);
$requestedfile = $ARGV[0];
} elsif (($ARGV[0] eq '-o') or ($ARGV[0] eq '--output-to')) {
shift(@ARGV);
$outputfile = $ARGV[0];
} elsif (($ARGV[0] eq '-L') or ($ARGV[0] eq '--runon')) {
shift(@ARGV);
$maxrunonlines = $ARGV[0];
} elsif (($ARGV[0] eq '-l') or ($ARGV[0] eq '--log-to')) {
shift(@ARGV);
$logfile = $ARGV[0];
} elsif (($ARGV[0] eq '-e') or ($ARGV[0] eq '--max-errors')) {
shift(@ARGV);
$maxerrors = $ARGV[0];
} elsif (($ARGV[0] eq '-V') or ($ARGV[0] eq '--verbose')) {
$verbose++;
} elsif (($ARGV[0] eq '-v') or ($ARGV[0] eq '--version')) {
&version;
exit(0);
} elsif (($ARGV[0] eq '-h') or ($ARGV[0] eq '--help')) {
&usage;
} else {
&raiseerror("unrecognized option: $ARGV[0]\n");
$error = 1;
&usage;
}
shift(@ARGV);
}
# be sure to open the log file before trying to write any messages to it!
#
if ($logfile) {
open ($logfh, "> $logfile") or die "cannot write to log file $logfile.\n";
}
$verbose = 3 if ($verbose > 3);
&message("verbose mode on.") if ($verbose == 1);
&message("debugging mode on.") if ($verbose ==2);
&message("insanity mode on.") if ($verbose ==3);
if ($outputtype eq 'HTML') {
$patterns{'@ifhtml'} = 'DROPLINE';
$patterns{'@end ifhtml'} = 'DROPLINE';
$patterns{'@ifinfo'} = 'SEEKEND';
$patterns{'@end ifinfo'} = 'DROPLINE';
$patterns{'@iftex'} = 'SEEKEND';
$patterns{'@end iftex'} = 'DROPLINE';
$patterns{'@ifnothtml'} = 'SEEKEND';
$patterns{'@end ifnothtml'} = 'DROPLINE';
$patterns{'@ifnotinfo'} = 'DROPLINE';
$patterns{'@end ifnotinfo'} = 'DROPLINE';
$patterns{'@ifnottex'} = 'DROPLINE';
$patterns{'@end ifnottex'} = 'DROPLINE';
&message("including HTML text") if ($verbose);
} elsif ($outputtype eq 'INFO') {
$patterns{'@ifhtml'} = 'SEEKEND';
$patterns{'@end ifhtml'} = 'DROPLINE';
$patterns{'@ifinfo'} = 'DROPLINE';
$patterns{'@end ifinfo'} = 'DROPLINE';
$patterns{'@iftex'} = 'SEEKEND';
$patterns{'@end iftex'} = 'DROPLINE';
$patterns{'@ifnothtml'} = 'DROPLINE';
$patterns{'@end ifnothtml'} = 'DROPLINE';
$patterns{'@ifnotinfo'} = 'SEEKEND';
$patterns{'@end ifnotinfo'} = 'DROPLINE';
$patterns{'@ifnottex'} = 'DROPLINE';
$patterns{'@end ifnottex'} = 'DROPLINE';
&message("including INFO text") if ($verbose);
} elsif ($outputtype eq 'TEX') {
$patterns{'@ifhtml'} = 'SEEKEND';
$patterns{'@end ifhtml'} = 'DROPLINE';
$patterns{'@ifinfo'} = 'SEEKEND';
$patterns{'@end ifinfo'} = 'DROPLINE';
$patterns{'@iftex'} = 'DROPLINE';
$patterns{'@end iftex'} = 'DROPLINE';
$patterns{'@ifnothtml'} = 'DROPLINE';
$patterns{'@end ifnothtml'} = 'DROPLINE';
$patterns{'@ifnotinfo'} = 'DROPLINE';
$patterns{'@end ifnotinfo'} = 'DROPLINE';
$patterns{'@ifnottex'} = 'SEEKEND';
$patterns{'@end ifnottex'} = 'DROPLINE';
&message("including TEX text") if ($verbose);
}
if ($outputfile) {
&message("output will go to $outputfile") if ($verbose);
$outfh = new FileHandle;
open($outfh, "> $outputfile") or die "Cannot write to $outputfile ($!)\n";
} else {
$outfh = STDOUT;
}
while (<DATA>) {
$template .= $_;
}
@substitutionkeys = sort {$b cmp $a} keys %substitutions;
&processfile ("$requestedfile");
&writefile;
close($outfh);
close($logfh) if ($logfile);
sub processfile {
my($filename,
$basename,
$path,
$ext,
$includefile,
$linenumber,
);
$macrolinecount = 0;
my $fh = new FileHandle;
$filename = @_[0];
$filename = '' unless ($filename);
($basename, $path, $ext) = fileparse($filename);
&message("processing $filename") if ($verbose);
$linenumber = 0;
if ($filename) {
open $fh, "<$filename" or raiseerror("cannot open $filename\n");
} else {
$fh = STDIN;
}
LINE: while ($line = <$fh>) {
chomp($line);
$linenumber++;
$currentfile = $filename;
$currentline = $linenumber;
if ($seekend) {
$seekend = '' if ($line =~ /$seekend/);
next LINE;
}
&message("LINE $currentline: $line") if ($verbose > 1);
&cleanline;
&markspecial; # hide @@, @{, @} to ease parsing
$originalline = $line; # must be cleaned but not trimmed!
$trimline;
# We have to handle some @-commands on the line when they appear,
# even if waiting for some wrapper command to complete:
#
# e.g., @footer{foo bar
# @cindex baz
# blah blah}
# and @comment, @c
#
if (($line =~ /\@c\b/) or ($line =~ /\@comment\b/)) {
&comment;
}
($pattern, $action) = &matchpattern();
if ($action eq 'DROPLINE') {
next LINE;
}
# keep reading until we have only complete tags
#
if ($saveline) {
$line = $saveline . ' ' . $line;
$saveline = '';
&message("line restored, line: $line") if ($verbose > 1);
}
unless (&bracketsmatch($line)) {
&message("bracket mismatch") if ($verbose > 2);
$saveline = $line;
next LINE;
}
if (scalar @literal) {
&message("checking for $literalendtag") if ($verbose > 2);
if ($line =~ /$literalendtag/) {
&message("End literal block") if ($verbose > 2);
&closeliteralblock;
next LINE;
}
}
if ($line =~ /^\@include/) {
$includefile = $line;
$includefile =~ s/^\@include\s+//;
$includefile = $path . $includefile;
$line = '';
if (-e $includefile) {
&message("including $includefile") if ($verbose);
&processfile ("$includefile");
} else {
&raiseerror("include file $includefile not found.");
}
} elsif ($inmacro) {
($pattern, $action) = &matchpattern();
if ($action eq 'SEEKEND') {
$seekend = $pattern;
$seekend =~ s/\@/\@end /;
next LINE;
} elsif ($action eq 'DROPLINE') {
next LINE;
} elsif ($action eq 'DROP2') {
$line =~ s/\@\w+\s+\w+\s*//;
}
if ($line =~ /\@end\s+macro/) {
#save macro
$macroargs{$macro} = $macroargs;
$macrotext{$macro} = $macrotext;
&message("macro name: $macro args: $macroargs text: $macrotext") if ($verbose > 2);
$inmacro = 0;
$macroargs = '';
$macrotext = '';
$macrolinecount = 0;
} elsif ($line =~ /\@quote-arg/) {
} else {
&message("macro line: $line") if ($verbose > 2);
$macrotext .= $line;
$macrolinecount++;
if ($macrolinecount >= 10) {
&raiseerror("runon macro");
}
}
} elsif ($inmenu) {
&message("inmenu: $line");
if ($line =~ /\@end menu/) {
&closemenu;
} else {
&menuitem;
}
} else {
if ($suppressconversion) {
$line = $originalline;
$pattern = '';
$action = '';
} else {
($pattern, $action) = &matchblock();
if ($action) {
&message("replacing block with $action") if ($verbose > 2);
&closepara;
$buf .= $action;
next LINE;
}
($pattern, $action) = &matchpattern();
}
if ($action eq 'DROPLINE') {
next LINE;
} else {
&doaction;
}
}
}
close($fh);
}
sub doaction {
if ($action eq 'DROP2') {
$line =~ s/\@\w+\s+\w+\s*//;
&writeconverted;
} elsif ($action eq 'SEEKEND') {
$seekend = $pattern;
$seekend =~ s/\@/\@end /;
} elsif ($action eq 'LITERALBLOCK') {
&literalblock;
} elsif ($action eq 'ALIAS') {
&alias;
} elsif ($action eq 'MACRO') {
&macro;
} elsif ($action eq 'META') {
&meta;
} elsif ($action eq 'NODE') {
&node;
} elsif ($action eq 'MENU') {
&menu;
} elsif ($action eq 'APPENDIX') {
$line =~ s/^\@\w+\s+//;
&convertinline;
&appendix($line);
} elsif ($action eq 'SECT1') {
$line =~ s/^\@\w+\s+//;
&convertinline;
&sect1($line);
} elsif ($action eq 'SECT2') {
$line =~ s/^\@\w+\s+//;
&convertinline;
&sect2($line);
} elsif ($action eq 'SECT3') {
$line =~ s/^\@\w+\s+//;
&convertinline;
&sect3($line);
} elsif ($action eq 'SECT4') {
$line =~ s/^\@\w+\s+//;
&convertinline;
&sect4($line);
} elsif ($action eq 'CINDEX') {
$line =~ s/^\@\w+\s+//;
&convertinline;
&indexterm($line);
} elsif ($action eq 'PARA') {
&closeformalpara;
&para;
} elsif ($action eq 'DEFFN') {
&deffn;
} elsif ($action eq 'ORDEREDLIST') {
&orderedlist;
} elsif ($action eq 'ORDEREDLISTEND') {
&closeorderedlist;
} elsif ($action eq 'ITEMIZEDLIST') {
&itemizedlist;
} elsif ($action eq 'ITEMIZEDLISTEND') {
&closeitemizedlist;
} elsif ($action eq 'ITEM') {
&item;
&writeconverted;
} elsif ($action eq 'TABLE') {
&table;
$line =~ s/^\@\w+\s+//;
$tableformat[-1] = $line;
&message("table format: $tableformat[-1]") if ($verbose > 2);
$infirstcol[-1] = 0;
} elsif ($action eq 'MULTITABLE') {
&multitable;
} elsif ($action eq 'TABLEEND') {
&closetable;
} elsif ($action eq 'INDEX') {
&message("indexing not yet supported") if ($verbose > 1);
} elsif ($action eq 'DEFINFOENCLOSE') {
&definfoenclose;
} elsif ($action eq 'SET') {
&set;
} elsif ($action eq 'CLEAR') {
&clear;
} elsif ($action eq 'IFSET') {
$seekend = '\@end ifset' unless (&ifset());
} elsif ($action eq 'IFCLEAR') {
$seekend = '\@end ifclear' if (&ifset());
} elsif ($action eq 'BYE') {
&closesect1;
last;
} elsif ($action eq 'ERROR') {
&raiseerror("Hit ERROR tag with $pattern on $line");
} elsif ($action eq '') {
if ($nest[-1] eq 'table') { # multitables just write the line
&tableline;
}
if ($suppressconversion) {
&writeline;
} else {
&writeconverted;
}
} else {
&raiseerror("Unknown action: $action on command $pattern");
}
}
sub cleanline {
$line =~ s/&/&amp;/g; # do first, before writing new amps
$line =~ s/\</&lt;/g;
$line =~ s/\>/&gt;/g;
$line =~ s/\x0C//;
$line =~ s/\xA0/&nbsp;/;
$line =~ s/\xD7//;
$line =~ s/\xD8/&Ouml;/;
$line =~ s/\xDF/&szlig;/;
$line =~ s/\xF6/&ouml;/;
$line =~ s/\xE4/&auml;/;
}
sub writeabstract {
if ($inabstract) {
$inabstract = 0;
&message("copying meta-data to docbook") if ($verbose);
$template =~ s/%%TITLE/$title/;
$lang =~ s/\s+/q/g;
$template =~ s/%%LANG/$lang/;
if ($buf) {
$template =~ s/%%ABSTRACT/$buf/;
} else {
$template =~ s/\<abstract\>%%ABSTRACT\<\/abstract\>//s;
}
print $outfh $template . "\n";
$buf = "";
}
}
sub writefile {
&closeappendix;
print $outfh $buf;
print $outfh '</article>'. "\n";
}
####################
# PATTERN MATCHING #
####################
# this is for beginning-of-line @-commands
#
sub matchpattern {
$pattern = "";
foreach $key (keys %patterns) {
$pattern = quotemeta($key) . '\b';
if ($line =~ /^$pattern\b/) {
return ($pattern, $patterns{$key});
}
}
return ('', '');
}
# this is for special wrapped blocks, like blockquotes
#
sub matchblock {
foreach $key (keys %blocks) {
$pattern = quotemeta($key);
if ($line =~ /$pattern/) {
&message("matched block $pattern") if ($verbose > 2);
return ($pattern, $blocks{$key}) unless ($line =~ /$pattern\w/);
}
}
}
# inline processing
#
sub convertinline {
TAG: while ($line =~ /\@\w+\{[^\{]*?\}/) {
$command = $line;
$command =~ s/.*(\@\w+\{[^{]*?\}).*/\1/;
$tag = $command;
$tag =~ s/(.*)\{.*/\1/;
$tagplain = $tag;
$tagplain =~ s/\@//;
$contents = $command;
$contents =~ s/.*\{(.*)\}/\1/;
&message("CMDLINE: $line") if ($verbose > 2);
&message("CMD: $command, tag: $tag, tagplain: $tagplain, contents: $contents") if ($verbose > 2);
# If we're in a table's first column, and this column already contains
# the same tag, remove the internal tag.
# Otherwise, we wind up with nested literals, which DocBook disallows.
#
if (scalar @tableformat) {
if (($infirstcol[-1]) and ($tableformat[-1] eq $tag)) {
&message("not doubly wrapping tag $tag, removing the nested one") if ($verbose >2);
$contents =~ s/\<$tag\>//;
$contents =~ s/\<\/$tag\>//;
#$replacement = $contents;
#&replaceinline;
#next TAG;
}
}
# substitutions (we only want to catch @{} type tags here)
#
if (exists $substitutions{$tag . '{}'}) {
&message("matched character substitution $tag") if ($verbose >2);
$replacement = $substitutions{$tag . '{}'};
&replaceinline;
next TAG;
}
# macros
#
if (exists $macros{$tagplain}) {
&message("matched macro $tagplain") if ($verbose >2);
$macro = $macros{$tagplain};
$macroargs = "\\\\" . $macroargs{$tagplain} . "\\\\";
$macrotext = $macrotext{$tagplain};
$macroarg = $command;
$macroarg =~ s/^.*?\@$macro\{//;
$macroarg =~ s/\}.*?$//;
$macrotext =~ s/\@$macro\{([^\{]*?)\}/$macroarg/;
$macrotext =~ s/$macroargs/$macroarg/g;
$replacement = $macrotext;
&replaceinline;
next TAG;
}
# special character substitutions
#
if (exists $specsubstitutions{$tag}) {
&message("matched special substitution $tag") if ($verbose >2);
$replacement = $specsubstitutions{$tag};
&replacespecsubst;
&replaceinline;
next TAG;
}
# docbook wrapper tags
#
if (exists $tags{$tag}) {
&message("matched docbook wrapper tag $tag") if ($verbose >2);
@dbtags = split(',', $tags{$tag});
$dbopen = '';
$dbclose = '';
foreach $dbtag (@dbtags) {
$dbtag = &trim($dbtag);
# Texinfo allows nexted literals, DocBook does not.
#
if (($dbtag eq 'programlisting')
or ($dbtag eq 'literal')
or ($dbtag eq 'filename')
or ($dbtag eq 'option')) {
$contents =~ s/\<literal\>//g;
$contents =~ s/\<\/literal\>//g;
$contents =~ s/\<programlisting\>//g;
$contents =~ s/\<\/programlisting\>//g;
&message("removed inline literals: $contents") if ($verbose > 2);
}
($tag, $attributes) = split(/ /, $dbtag);
if ($attributes) {
$dbopen .= "\<$tag $attributes\>";
} else {
$dbopen .= "\<$tag\>";
}
$dbclose = "\<\/$tag\>" . $dbclose;
}
$replacement = $dbopen . $contents . $dbclose;
# Texinfo allows nexted literals, DocBook does not.
#
if (scalar @literal) {
$replacement =~ s/\<literal\>//g;
$replacement =~ s/\<\/literal\>//g;
$replacement =~ s/\<programlisting\>//g;
$replacement =~ s/\<\/programlisting\>//g;
&message("removed literal tags literals: $replacement") if ($verbose > 2);
}
&replaceinline;
next TAG;
}
if (exists $definfos{$tagplain}){
&message("matched definfo $tagplain") if ($verbose >2);
$action = $definfos{$tagplain};
($prefix, $suffix) = split(/,/,$action);
$replacement = $prefix . $contents . $suffix;
&message("definfo prefix: $prefix, suffix: $suffix, contents: $contents") if ($verbose > 1);
&replaceinline;
next TAG;
}
if (exists $setvalues{$contents}) {
&message("matched set value $contents") if ($verbose > 2);
$replacement = $setvalues{$contents};
&replaceinline;
next TAG;
}
if (exists $aliases{$tag}) {
&message("matched alias $tag") if ($verbose >2);
$replacement = $aliases{$tag} . '{' . $contents . '}';
&replaceinline;
next TAG;
}
&raiseerror("cannot resolve $command");
}
# character-level substitutions.
# must come last, so tags like @samp{} don't have the @s replaced
# with a space, etc.
#
# This uses a presorted array of keys in descending order,
# so we see @ss before @s, etc.
#
foreach $key (@substitutionkeys) {
$pattern = quotemeta($key);
if ($line =~ /$pattern/) {
&message("matched character code: $key") if ($verbose > 2);
$line =~ s/$pattern/$substitutions{$key}/g;
}
}
# unusual stuff to deal with
#
# @\n is sometimes entered when the user meant @(space).
# Texinfo handles it, so we have to.
#
$line =~ s/\@$/ /;
# Remove <> from inside <email></email> tags, since DocBook stylesheets
# generally add them back in when generating output.
#
$line =~ s/\<email\>&lt;(.*?)&gt;\<\/email\>/\<email\>$1\<\/email\>/g;
# We take care in case there's a matching block (it could ocurred
# because of the expansion of a macro or an alias)
($pattern, $action) = &matchpattern();
&doaction if ($action);
# We should have found *all* @-commands by now.
# If any are still hanging around, we have a problem.
#
if ($line =~ /\@/) {
&raiseerror("Unrecognized @-command in $line");
}
}
sub markspecial {
my $pattern1 = '';
my $pattern2 = '';
my $delimiter = '';
my $delimiterpattern = '';
my $contents = '';
my $contentspattern = '';
my $newcontents = '';
my $start = '';
my $end = '';
$line =~ s/\@\@/DCM_AT/g;
$line =~ s/\@\{/DCM_LB/g;
$line =~ s/\@\}/DCM_RB/g;
$line =~ s/\@\,{c\}/DCM_CD/g;
# special handling for the @verb{} command, which contains
# within it special characters which are NOT to be interpreted.
# rather than handle such a strange case, we make it simple by
# going the long way around and escaping everything
#
unless ($suppressconversion) {
while ($line =~ /\@verb\{/) {
$delimiter = $line;
$delimiter =~ s/.*?\@verb\{//;
$delimiter =~ s/(.).*/$1/;
&message("delimiter: $delimiter") if ($verbose > 2);
$delimiterpattern = quotemeta($delimiter);
&message("delimiterpattern: $delimiterpattern") if ($verbose > 2);
$contents = $line;
$pattern1 = '.*\@verb\{' . quotemeta($delimiter);
&message("pattern1: $pattern1") if ($verbose > 2);
$contents =~ s/$pattern1//;
$pattern2 = quotemeta($delimiter) . '\}.*';
&message("pattern2: $pattern2") if ($verbose > 2);
$contents =~ s/$pattern1//;
&message("contents: $contents") if ($verbose > 2);
$contents =~ s/$pattern2//;
&message("contents: $contents") if ($verbose > 2);
$newcontents = $contents;
$newcontents =~ s/\@/\@\@/g;
$newcontents =~ s/\{/\@\{/g;
$newcontents =~ s/\}/\@\}/g;
&message("newcontents: $newcontents") if ($verbose > 2);
$contentspattern = quotemeta($contents);
&message("contentspattern: $contentspattern") if ($verbose > 2);
$start = $line;
$start =~ s/$pattern2.*//;
$start =~ s/$delimiterpattern$contentspattern$//;
$start =~ s/\@verb/DCM_VERB/;
&message("start: $start") if ($verbose > 2);
$end = $line;
$end =~ s/.*$pattern1$contentspattern$delimiterpattern//;
&message("end: $end") if ($verbose > 2);
$line = $start . $newcontents . $end;
&message("line: $line") if ($verbose > 2);
}
$line =~ s/DCM_VERB/\@verb/g;
}
# We do this twice in case one of these was inside a @verb statement
#
$line =~ s/\@\@/DCM_AT/g;
$line =~ s/\@\{/DCM_LB/g;
$line =~ s/\@\}/DCM_RB/g;
$line =~ s/\@\,{c\}/DCM_CD/g;
}
sub unmarkspecial {
$_[0] =~ s/DCM_AT/\@\@/gi;
$_[0] =~ s/DCM_LB/\@\{/gi;
$_[0] =~ s/DCM_RB/\@\}/gi;
$_[0] =~ s/DCM_CD/\@\{c\}/gi;
}
sub normalizespecial {
$_[0] =~ s/DCM_AT/\@/gi;
$_[0] =~ s/DCM_LB/\{/gi;
$_[0] =~ s/DCM_RB/\}/gi;
$_[0] =~ s/DCM_CD/&cedil;/gi;
}
sub replaceinline {
&message("replacing $command with $replacement") if ($verbose > 1);
$command = quotemeta($command);
$line =~ s/$command/$replacement/;
}
sub replacespecsubst {
my $link = '';
my $linktitle = '';
my $keystring;
my @keycombos;
my @keycaps;
my $mykeys;
if ($replacement eq 'REF') {
$replacement = &crossref();
} elsif ($replacement eq 'PXREF') {
$replacement = "see " . &crossref();
} elsif ($replacement eq 'UREF') {
($link, $linktitle) = split(/,/, $contents);
$link = &trim($link);
$linktitle = &trim($linktitle);
$linktitle = $link unless ($linktitle);
$replacement = "\<ulink url='$link'\>$linktitle\<\/ulink\>";
&message("ulink: $link, linktitle: $linktitle") if ($verbose > 2);
} elsif ($replacement eq 'XREF') {
$replacement = "See " . &crossref();
} elsif ($replacement eq 'INFOREF') {
$replacement = "See " . &inforef();
} elsif ($replacement eq 'ANCHOR') {
$link = $contents;
$link = &anchorfix($link);
$replacement = "<anchor id='$link'\/\>";
} elsif ($replacement eq 'KBD') {
$keystring = $contents;
# nested literals not allowed in DocBook
#
$keystring =~ s/\<literal\>//g;
$keystring =~ s/\<\/literal\>//g;
# I found there is at least one case where <keycap> is already here:
# @kbd{gTop<keycap>RET</keycap>}, in the info manual.
# If we have those, throw away the keycap tags and hyphenate,
# so we can keycombo properly.
#
$keystring =~ s/\<keycap\>/-/g;
$keystring =~ s/\<\/keycap\>/-/g;
$keystring =~ s/^-+//g;
$keystring =~ s/-+$//g;
$keystring =~ s/-+\s+/-/g;
$keystring =~ s/\s+-+/-/g;
@keycombos = split(/\s+/, $keystring);
foreach $keycombo (@keycombos) {
&message("keycombo: $keycombo") if ($verbose > 2);
@keycaps = split(/-/, $keycombo);
if (scalar @keycaps > 1) {
&message("making keycombo") if ($verbose > 2);
$mykeys .= "\<keycombo action='simul'\>";
foreach $keycap (@keycaps) {
$mykeys .= "\<keycap\>$keycap\<\/keycap\>";
}
$mykeys .= '</keycombo>'
} else {
$mykeys .= "\<keycap\>$keycaps[0]\<\/keycap\>";
}
}
$replacement = $mykeys;
} else {
&raiseerror("Unrecognized special substitution code: $specsubstitutions{$tag}");
$replacement = $specsubstitutions{$tag};
}
}
######################
# TEXINFO STRUCTURES #
######################
sub crossref {
my $anchor;
my $link;
my $linkname;
my $linktitle;
my $infofile;
my $manualtitle;
my $title;
my $crossref = '';
($link, $linkname, $linktitle, $infofile, $manualtitle) = split(/,/, $contents);
$link = &trim($link);
$linktitle = &trim($linktitle);
$linkname = &trim($linkname);
$infofile = &trim($infofile);
$manualtitle = &trim($manualtitle);
# build the title that will display in the output
#
$title = $linktitle;
$title .= ': ' if ($title and ($linkname or $infofile));
$title .= "($infofile)" if ($infofile);
$title .= ' ' if (($title) and ($linkname));
$title .= $linkname if ($linkname);
# Try to pull in the node title if we've already seen it but it
# isn't specified in the cross reference.
#
# If not, we can only display the node name, which is not very good
# but the best we can do without writing a two-pass engine.
#
if (($link) and !($title)) {
$title = $nodetitlelookup{$link};
$title = $link unless ($title);
}
if ($infofile) {
&message("link to another info file, replacing with text") if ($verbose > 1);
$crossref = $title;
} else {
$anchor = &anchorfix($link);
$crossref = "<link linkend='$anchor'>$title</link>";
&message("made xref to tag $anchor") if ($verbose > 1);
}
return $crossref;
}
sub inforef {
my $anchor;
my $link;
my $linkname;
my $infofile;
my $title;
my $inforef = '';
($link, $linkname, $infofile) = split(/,/, $contents);
$link = &trim($link);
$linkname = &trim($linkname);
$infofile = &trim($infofile);
$title .= "($infofile)" if ($infofile);
$title .= $linkname if ($linkname);
$title = $link unless ($title);
if ($infofile) {
&message("link to another info file, replacing with text") if ($verbose > 1);
$crossref = $title;
} else {
$anchor = &anchorfix($link);
$crossref = "<link linkend='$anchor'>$title</link>";
&message("made xref to tag $anchor") if ($verbose > 1);
}
return $crossref;
}
sub anchorfix {
my $anchor = $_[0];
$anchor = lc(&trim($anchor));
$anchor = decode_entities($anchor);
$anchor =~ s/-/-dash-/g;
$anchor =~ s/&/-and-/g;
$anchor =~ s/;//g;
$anchor = encode_entities($anchor);
$anchor =~ s/&(\w)grave/\1/g;
$anchor =~ s/&(\w)acute/\1/g;
$anchor =~ s/&(\w)circ/\1/g;
$anchor =~ s/&(\w)uml/\1/g;
$anchor =~ s/&(\w)tilde/\1/g;
$anchor =~ s/&(\w)cedil/\1/g;
$anchor =~ s/&/-and-/g;
$anchor =~ s/;//g;
$anchor =~ s/\//-slash-/g;
$anchor =~ s/\\/-bslash-/g;
$anchor =~ s/\s+/-/g;
$anchor =~ s/'//g;
$anchor =~ s/`//g;
$anchor =~ s/,/-comma-/g;
$anchor =~ s/\./-dot-/g;
$anchor =~ s/!/-bang-/g;
$anchor =~ s/\?/-question-/g;
$anchor =~ s/\+/-plus-/g;
$anchor =~ s/\*/-x-/g;
$anchor =~ s/\(/-op-/g;
$anchor =~ s/\)/-cp-/g;
$anchor =~ s/\@/-at-/g;
$anchor =~ s/dcm_at/-at-/gi;
$anchor =~ s/\^/-hat-/g;
$anchor =~ s/=/-eq-/g;
$anchor =~ s/\$/S/;
$anchor =~ s/~/-tilde-/g;
$anchor =~ s/0/-zero-/g;
$anchor =~ s/1/-one-/g;
$anchor =~ s/2/-two-/g;
$anchor =~ s/3/-three-/g;
$anchor =~ s/4/-four-/g;
$anchor =~ s/5/-five-/g;
$anchor =~ s/6/-six-/g;
$anchor =~ s/7/-seven-/g;
$anchor =~ s/8/-eight-/g;
$anchor =~ s/9/-nine-/g;
$anchor =~ s/\|/-pipe-/g;
$anchor =~ s/\[/-lsqb-/g;
$anchor =~ s/\]/-rsqb-/g;
$anchor =~ s/^-+//;
$anchor =~ s/-+$//;
$anchor =~ s/--/-/g; # get rid of double, initial and trailing hyphens
return &trim($anchor);
}
sub deffn {
my $category;
my $name;
my $arguments;
my $argument;
my $indeffn = 0;
my $indefun = 0;
my $indefspec = 0;
my $indefmac = 0;
if ($line =~ /\@deffn/) {
$indeffn = 1;
$line =~ s/\@deffn\w*//;
&message("deffn found") if ($verbose > 1);
} elsif ($line =~ /\@defun/) {
$indefun = 1;
$line =~ s/\@defun\w*//;
&message("defun found") if ($verbose > 1);
} elsif ($line =~ /\@defspec/) {
$indefspec = 1;
$line =~ s/\@defspec\w*//;
&message("defspec found") if ($verbose > 1);
} elsif ($line =~ /\@defmac/) {
$indefmac = 1;
$line =~ s/\@defmac\w*//;
&message("defmac found") if ($verbose > 1);
} else {
&raiseerror("Unrecognized function definition");
}
&convertinline;
# DocBook does not allow nested literal tags
#
$line =~ s/\<literal\>//g;
$line =~ s/\<\/literal\>//g;
$line = &trim($line);
# function category names can have spaces. When they do, the name is enclosed in braces:
#
# @deffn {Interactive Command} isearch-forward
# @defun foo bar
# @defspec foovar (@var(var))
#
if ($line =~ /\{/) {
&message("function category name enclosed in braces") if ($verbose > 2);
($category, $line) = split(/}/, $line);
$category =~ s/\{//;
($name, $arguments) = split(/ /, $line, 2);
} else {
if ($indeffn) {
&message("function category name not enclosed in braces") if ($verbose > 2);
($category, $name, $arguments) = split(/\s/, $line, 3);
} elsif ($indefun) {
$category = 'Function';
($name, $arguments) = split(/ /, $line, 2);
} elsif ($indefspec) {
$category = 'Special Form';
($name, $arguments) = split(/ /, $line, 2);
} elsif ($indefmac) {
$category = 'Macro';
($name, $arguments) = split(/ /, $line, 2);
} else {
&raiseerror("Unrecognized function type");
}
}
&message("category: $category, name: $name, arguments: $arguments") if ($verbose > 1);
$line = $category . ': <funcsynopsis><funcprototype>';
&writeconverted;
$line = '<funcdef><function>' . $name . '</function>';
&writeline;
$line = '</funcdef>';
&writeline;
foreach $argument (split(' ', $arguments)) {
$line = '<paramdef><parameter>' . $argument . '</parameter></paramdef>';
&writeline;
}
$line = '</funcprototype></funcsynopsis>';
&writeline;
}
#############
# META-DATA #
#############
# this routine processes meta-data @-Commands
#
sub meta {
return if (&metaarg('\@setfilename', $sgmlfile));
return if (&metaarg('\@settitle', $title));
return if (&metaarg('\@documentlanguage',$lang));
return if (&metaarg('\@author', $authorname));
&raiseerror("Unknown meta-data command $line");
}
# Meta-data commands have arguments,
# so if the pattern matches, store the argument
# to the variable in $_[1]
#
sub metaarg {
my $pattern = $_[0];
if ($line =~ /$pattern/) {
$line =~ s/$pattern//;
&convertinline;
&normalizespecial($line);
$_[1] = $line;
&message("set $pattern to $line") if ($verbose > 2);
return 1;
} else {
return 0;
}
}
################
# WRITE OUTPUT #
################
sub writeconverted {
my $temp = &trim($line);
unless ($suppresspara) {
if ($temp eq '') {
&closeformalpara;
} else {
unless (($inpara) or ($nest[-1] eq 'table') or ($nest[-1] eq 'multi')) {
if (($nodename) and ($nodeinit == 0) and !($inabstract)) {
&guessnodelevel;
}
&para;
}
}
}
unless ($suppressconversion) {
&convertinline;
}
&writeline;
}
sub writeline {
&normalizespecial($line);
&message("OUT: $line") if ($verbose > 1);
$buf .= $line . "\n";
if (($linenumber % 1000 == 0) and ($inabstract == 0)) {
print $outfh $buf;
$buf = '';
}
}
#########
# NODES #
#########
# this is for when the source never set a position for this node in the
# chapter-style hierarchy, which we use.
#
# We have to make a best guess based on the node's settings,
# so put it right under whoever it calls the "up" node.
#
sub guessnodelevel {
return if ($nodeinit);
&message("guessing level for node: $nodename, title: $nodetitle") if ($verbose > 2);
my $parentname = $nodeup;
my $parentlevel = $nodelevels{$parentname};
if ($parentlevel == "A") {
&sect1($nodetitle);
} elsif ($parentlevel == 1) {
&sect2($nodetitle);
} elsif ($parentlevel == 2) {
&sect3($nodetitle);
} elsif ($parentlevel == 3) {
&sect4($nodetitle);
} else {
&raiseerror("The parent node, $parentname, could not be found.");
}
}
# Sets up node variables.
#
# The node isn't actually created until we hit some text in it.
#
sub node {
$line =~ s/\@node\s+?//;
&convertinline;
&normalizespecial($line);
($nodename, $nodenext, $nodeprev, $nodeup) = split(/,/, $line);
$nodename = &trim($nodename);
$nodetitle = $nodename;
$nodeprev = &trim($nodeprev);
$nodenext = &trim($nodenext);
$nodeup = &trim($nodeup);
$nodeid = &anchorfix($nodename);
$nodeinit = 0;
&message("Node: $nodename") if ($verbose);
&closeformalpara;
}
# Creates the node
#
sub initnode {
$nodeid = 0 if (exists $nodeanchors{uc($nodename)});
if ($nodeinit) {
$nodeid = 0;
return;
}
&message("initiating node $nodename") if ($verbose > 2);
$nodenames{$nodename} = $nodename;
$nodenexts{$nodename} = $nodenext;
$nodeprevs{$nodename} = $nodeprev;
$nodeups{$nodename} = $nodeup;
$nodeanchors{uc($nodename)} = $nodeid if ($nodeid);
$nodetitlelookup{$nodeid} = $nodetitle;
$nodeinit = 1;
}
sub menu {
$inmenu = 1;
}
sub menuitem {
$line =~ s/^\*\s*//;
&message("menu line: $line");
}
sub closemenu {
$inmenu = 0;
}
####################
# DOCBOOK SECTIONS #
####################
sub appendix {
&writeabstract;
&message("appendix: $nodetitle") if ($verbose > 1);
&closeappendix;
if ($nodeinit) {
&sect($_[0]);
} else {
$nodetitle = $_[0];
}
&initnode;
&convertinline;
&normalizespecial($nodetitle);
if ($nodeid) {
$buf .= "\<appendix id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle<\/title\>\n";
} else {
$buf .= "\<appendix\>\<title\>$nodetitle\<\/title\>\n";
}
$inappendix = 1;
}
sub sect1 {
&writeabstract;
&message("sect1: $nodetitle") if ($verbose > 1);
&closeappendix;
if ($nodeinit) {
&sect($_[0]);
} else {
$nodetitle = $_[0];
}
&initnode;
&convertinline;
&normalizespecial($nodetitle);
if ($nodeid) {
$buf .= "\<sect1 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n";
} else {
$buf .= "\<sect1\>\<title\>$nodetitle\<\/title\>\n";
}
$insect1 = 1;
}
sub sect2 {
&writeabstract;
&message("sect1: $nodetitle") if ($verbose > 1);
&closesect2;
if ($nodeinit) {
&sect($_[0]);
} else {
$nodetitle = $_[0];
}
&initnode;
&convertinline;
&normalizespecial($nodetitle);
if ($inappendix) {
if ($nodeid) {
$buf .= "\<sect1 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n";
} else {
$buf .= "\<sect1\>\<title\>$nodetitle\<\/title\>\n";
}
$nodelevels{$nodename} = 1;
} else {
if ($nodeid) {
$buf .= "\<sect2 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n";
} else {
$buf .= "\<sect2\>\<title\>$nodetitle\<\/title\>\n";
}
$nodelevels{$nodename} = 2;
}
$insect2 = 1;
}
sub sect3 {
&writeabstract;
&message("sect1: $nodetitle") if ($verbose > 1);
&closesect3;
if ($nodeinit) {
&sect($_[0]);
} else {
$nodetitle = $_[0];
}
&initnode;
&convertinline;
&normalizespecial($nodetitle);
if ($inappendix) {
if ($nodeid) {
$buf .= "\<sect2 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n";
} else {
$buf .= "\<sect2\>\<title\>$nodetitle\<\/title\>\n";
}
$nodelevels{$nodename} = 2;
} else {
if ($nodeid) {
$buf .= "\<sect3 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n";
} else {
$buf .= "\<sect3\>\<title\>$nodetitle\<\/title\>\n";
}
$nodelevels{$nodename} = 3;
}
$insect3 = 1;
}
sub sect4 {
&writeabstract;
&message("sect4: $nodetitle") if ($verbose > 1);
&closesect4;
if ($nodeinit) {
&sect($_[0]);
} else {
$nodetitle = $_[0];
}
&initnode;
&convertinline;
&normalizespecial($nodetitle);
if ($inappendix) {
if ($nodeid) {
$buf .= "\<sect3 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n";
} else {
$buf .= "\<sect3\>\<title\>$nodetitlee\<\/title\>\n";
}
$nodelevels{$nodename} = 3;
} else {
if ($nodeid) {
$buf .= "\<sect4 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n";
} else {
$buf .= "\<sect4\>\<title\>$nodetitle\<\/title\>\n";
}
$nodelevels{$nodename} = 4444;
}
$insect4 = 1;
}
sub sect {
$nodetitle = $_[0];
&normalizespecial($nodetitle);
$nodename = $nodetitle;
$nodename = &trim($nodename);
$nodeprev = '';
$nodenext = '';
$nodeup = '';
$nodeid = &anchorfix($nodename);
$nodeid = 0 if (exists $nodeanchors{uc($nodename)});
$nodeanchors{uc($nodename)} = $nodeid if ($nodeid);
$nodeinit = 0;
&message("Section Node: $nodename") if ($verbose);
&closeformalpara;
}
sub multitable {
my @colwidths = ();
my $colcount = 0;
my $colspecs = '';
&closepara;
$line =~ s/\@\w+\s+//;
# fractional column widths are supported
# but prototypes are ignored, as DocBook will do the same
# thing automatically without them.
#
if ($line =~ /\@columnfractions/) {
&message("starting multicolumn table") if ($verbose > 1);
$line =~ s/\@columnfractions\s+//;
@colwidths = split(/\s+/, $line);
$colcount = scalar @colwidths;
foreach $colwidth (@colwidths) {
$colwidth =~ s/\.//;
$colwidth .= '0' if (length($colwidth) < 2);
$colspecs .= "\<colspec colwidth='$colwidth\*'\/\>\n";
}
} else {
$colcount = scalar split(/\{/, $line) - 1;
}
if (($nest[-1] eq 'table') or ($nest[-1] eq 'multi')) {
&closetablecol;
$buf .= '<entrytbl cols=' . "'" . $colcount . "'" . "\>\n";
} else {
$buf .= '<informaltable><tgroup cols=' . "'" . $colcount . "'" . "\>\n";
}
$buf .= "$colspecs\n" if ($colspecs);
$buf .= '<tbody>' . "\n";
push @nest, 'multi';
push @incol, 0;
push @inrow, 0;
push @infirstcol, 0;
push @initem, 0;
push @tableformat, '';
}
sub table {
&closepara;
if (($nest[-1] eq 'table') or ($nest[-1] eq 'multi')) {
&closetablecol;
$buf .= '<entrytbl cols=' . "'2'" . '><tbody>' . "\n";
} else {
$buf .= '<informaltable><tgroup cols=' . "'2'" . '><tbody>' . "\n";
}
push @nest, 'table';
push @incol, 0;
push @inrow, 0;
push @infirstcol, 0;
push @initem, 0;
push @tableformat, '';
}
sub tablerow {
&closetablerow;
$buf .= '<row>';
$inrow[-1] = 1;
}
sub tablecol {
&closetablecol;
$buf .= '<entry>';
$incol[-1] = 1;
}
sub tableline {
&message("table: $line") if ($verbose > 2);
if ($nest[-1] ne 'multi') {
if ($infirstcol[-1]) {
# $buf .= '</literallayout>';
&closetablecol;
&tablecol;
$infirstcol[-1] = 0;
}
}
}
sub orderedlist {
&closeformalpara;
$buf .= '<orderedlist>';
push @nest, 'ordered';
push @initem, 0;
# $suppresspara++;
}
sub itemizedlist {
&closeformalpara;
$buf .= '<itemizedlist>';
push @nest, 'itemized';
push @initem, 0;
# $suppresspara++;
}
sub item {
$line =~ s/\@item\w*\s*//;
if ($nest[-1] eq 'table') {
&message("table item: $line") if ($verbose > 2);
if ($infirstcol[-1]) {
$buf .= ', ';
}
unless ($infirstcol[-1]) {
&tablerow;
&tablecol;
$infirstcol[-1] = 1;
# $buf .= '<literallayout>';
}
$line = &trim($line);
$line = $tableformat[-1] . "\{$line\}" unless ($tableformat[-1] eq '@asis');
} elsif ($nest[-1] eq 'multi') {
&message("multitable item: $line") if ($verbose > 2);
&tablerow;
&tablecol;
# $line =~ s/^(.)/\<literallayout\>$1/;
# $line =~ s/\@tab/\<\/literallayout\>\@tab/;
$line =~ s/\s*\@tab\s*/\<\/entry\>\<entry\>/g;
} elsif (($nest[-1] eq 'ordered') or ($nest[-1] eq 'itemized')) {
&listitem;
} else {
&raiseerror("item tag found, but we're not in a list. Nest depth: " . scalar @nest);
}
}
sub listitem {
&closelistitem;
$buf .= '<listitem>';
# &para;
$initem[-1] = 1;
}
sub formalpara {
my $title = $_[0];
my $id = &anchorfix($title);
&closeformalpara;
#$buf .= "\<formalpara id='$id'\>\<title\>$title\<\/title\>\n\<para\>";
$informalpara = 1;
$inpara = 1;
}
sub para {
&closeformalpara;
$buf .= '<para>';
$inpara = 1;
}
sub literalblock {
if ($pattern =~ /\bformat\b/) {
&message("start programlisting") if ($verbose > 2);
$suppresspara++;
$literaltag = 'programlisting';
} elsif ($pattern =~ /\bexample\b/) {
&message("start programlisting") if ($verbose > 2);
$suppresspara++;
$literaltag = 'programlisting';
} elsif ($pattern =~ /\bsmallexample\b/) {
&message("start programlisting") if ($verbose > 2);
$suppresspara++;
$literaltag = 'programlisting';
} elsif ($pattern =~ /\bdisplay\b/) {
&message("start literallayout") if ($verbose > 2);
$suppresspara++;
$literaltag = 'literallayout';
} elsif ($pattern =~ /\blisp\b/) {
&message("start programlisting") if ($verbose > 2);
$suppressconversion++;
$suppresspara++;
$literaltag = 'programlisting';
} elsif ($pattern =~ /\bquotation\b/) {
&message("start quotation") if ($verbose > 2);
$literaltag = 'blockquote';
} elsif ($pattern =~ /\bverbatim\b/) {
&message("start programlisting") if ($verbose > 2);
$suppressconversion++;
$suppresspara++;
$literaltag = 'programlisting';
} else {
&raiseerror("Unrecognized literal: $pattern");
}
if (scalar @literal) {
&message("Not including tag $literaltag, due to nested literal blocks") if ($verbose > 2);
$literaltag = '';
}
$buf .= '<' . $literaltag . '>' . "\n" if ($literaltag);
$literalendtag = $pattern;
$literalendtag =~ s/\@/\@end /;
push @literal, $literaltag;
push @literalend, $literalendtag;
}
sub comment {
my $start = '';
my $comment = '';
if ($line =~ /\@c\b/) {
($start, $comment) = split(/\@c\b/, $line, 2);
} elsif ($line =~ /\@comment\b/) {
($start, $comment) = split(/\@comment\b/, $line, 2);
} else {
&raiseerror("error locating the comment");
return;
}
while ($comment =~ /--/) {
$comment =~ s/--/-/g;
}
$line = '<!-- ' . $comment . ' -->';
&writeline;
$line = $start;
}
sub indexterm {
$buf .= "\<indexterm\>\<primary\>$_[0]\</primary\>\</indexterm\>\n";
}
sub closeappendix {
&closesect1;
if ($inappendix) {
&message("closing appendix") if ($verbose > 2);
$buf .= '</appendix>' . "\n";
$inappendix = 0;
}
}
sub closesect1 {
&closesect2;
if ($insect1) {
&message("closing sect1") if ($verbose > 2);
$buf .= '</sect1>' . "\n\n";
$insect1 = 0;
}
}
sub closesect2 {
&closesect3;
if ($insect2) {
&message("closing sect2") if ($verbose > 2);
if ($inappendix) {
$buf .= '</sect1>' . "\n\n";
} else {
$buf .= '</sect2>' . "\n\n";
}
$insect2 = 0;
}
}
sub closesect3 {
&closesect4;
if ($insect3) {
&message("closing sect3") if ($verbose > 2);
if ($inappendix) {
$buf .= '</sect2>' . "\n\n";
} else {
$buf .= '</sect3>' . "\n\n";
}
$insect3 = 0;
}
}
sub closesect4 {
&closeformalpara;
&closenest;
if ($insect4) {
&message("closing sect4") if ($verbose > 2);
if ($inappendix) {
$buf .= '</sect3>' . "\n\n";
} else {
$buf .= '</sect4>' . "\n\n";
}
$insect4 = 0;
}
}
sub closenest {
my $runaway = 0;
while (scalar @nest) {
&closetable;
&closeorderedlist;
&closeitemizedlist;
$runaway++;
&raiseerror("Runaway nesting") if ($runaway >= 100);
}
}
sub closetable {
&closetablerow;
if (($nest[-1] eq 'table') or ($nest[-1] eq 'multi')) {
&message("closing table") if ($verbose > 2);
if (($nest[-2] eq 'table') or ($nest[-2] eq 'multi')) {
$buf .= '</tbody></entrytbl>' . "\n";
} else {
$buf .= '</tbody></tgroup></informaltable>' . "\n";
}
pop @nest;
pop @incol;
pop @inrow;
pop @infirstcol;
pop @initem;
pop @tableformat;
}
}
sub closetablerow {
&closetablecol;
if ($inrow[-1]) {
&message("closing table row") if ($verbose > 2);
$buf .= '</row>' . "\n";
$inrow[-1] = 0;
}
}
sub closetablecol {
if ($incol[-1]) {
&message("closing table column") if ($verbose > 2);
$buf .= '</entry>' . "\n";
$incol[-1] = 0;
}
}
sub closeorderedlist {
&closelistitem;
if ($nest[-1] eq 'ordered') {
&message("closing ordered list") if ($verbose > 2);
$buf .= '</orderedlist>' . "\n";
pop @nest;
pop @initem;
# $suppresspara--;
}
}
sub closeitemizedlist {
&closelistitem;
if ($nest[-1] eq 'itemized') {
&message("closing itemized list") if ($verbose > 2);
$buf .= '</itemizedlist>' . "\n";
pop @nest;
pop @initem;
# $suppresspara--;
}
}
sub closelistitem {
&closeformalpara;
if ((($nest[-1] eq 'ordered') or (@nest[-1] eq 'itemized')) and (@initem[-1])) {
&message("closing list item") if ($verbose > 2);
#$buf .= '</para></listitem>' . "\n";
$buf .= '</listitem>' . "\n";
$initem[-1] = 0;
}
}
sub closeformalpara {
&closepara;
if ($informalpara) {
# $buf .= '</formalpara>';
$informalpara = 0;
}
}
sub closepara {
if ($inpara) {
$buf .= '</para>';
$inpara = 0;
}
}
sub closeliteralblock {
if (scalar @literal) {
foreach $tag (split(/,/, $literal[-1])) {
$buf .= '</' . &trim($tag) . '>' . "\n";
}
$literal = '';
$literalend = '';
$suppressconversion--;
$suppressconversion = 0 if ($suppressconversion < 0);
$suppresspara--;
&raiseerror("Literal block nesting error") if ($suppresspara < 0);
$suppresspara = 0 if ($suppresspara < 0);
pop @literal;
pop @literalend;
$literaltag = $literal[-1];
$literalendtag = $literalend[-1];
&message("literalendtag: $literalendtag") if ($verbose > 2);
&message("literal depth: " . scalar @literal . ", para: $suppresspara, conv: $suppressconversion") if ($verbose > 2);
}
}
################
# CONDITIONALS #
################
sub set {
my ($name,
$value);
($foo, $name, $value) = split(/\s+/, $line, 3);
$value = 1 unless ($value);
$setvalues{$name} = $value;
&message("set $name to $value") if ($verbose > 1);
$value = $setvalues{$name};
&message("read back $value") if ($verbose > 2);
}
sub clear {
my $name;
($foo, $name) = split(/\s+/, $line);
$setvalues{$name} = 0;
&message("cleared $name") if ($verbose > 1);
}
sub ifset {
my $name;
($foo, $name) = split(/\s+/, $line);
return $ifsets{$name};
}
#################
# META-LANGUAGE #
#################
sub alias {
my $alias;
($foo, $alias) = split(/\s/, $line);
($alias, $command) = split(/=/, $alias);
&message("alias: $alias, command: $command") if ($verbose > 2);
$aliases{'@' . $alias} = '@' . $command;
}
sub macro {
$line =~ s/\@macro\s+?(.*?)\{(.*?)\}/\1,\2/;
($macro, $macroargs) = split(/,/, $line);
$macro = &trim($macro);
$macros{$macro} = $macro;
$macrotext = '';
$inmacro = 1;
&message("adding macro $macro") if ($verbose > 2);
}
# load custom @-commands
#
sub definfoenclose {
my ($name,
$prefix,
$suffix,
$key,
$replacement);
$line =~ s/\@definfoenclose\s+//;
($name, $prefix, $suffix) = split(/,/, $line);
$replacement = $prefix. ',' . $suffix;
$definfos{$name} = $replacement;
&message("custom definfoenclosure: $name, $replacement") if ($verbose > 2);
}
####################
# SUPPORT ROUTINES #
####################
sub bracketsmatch {
my $bline = ' ' . $_[0] . ' ';
$bline =~ s/\@\{//g;
$bline =~ s/\@\}//g;
my $left = scalar split(/\{/, $bline) - 1;
my $right = scalar split(/\}/, $bline) - 1;
if ($right == $left) {
$badbracketlines = 0;
$badbracketstartline = $currentline + 1
} else {
$badbracketlines++;
&message("unmatched \{\} (left: $left, right: $right) on line $line") if ($verbose > 1);
if ($badbracketlines >= $maxrunonlines) {
&raiseerror("Mismatched brackets starting on line $badbracketstartline: $line");
}
}
return ($right == $left);
}
sub trimline {
$line = &trim($line);
}
sub trim {
my $temp = $_[0];
$temp =~ s/^\s+//g;
$temp =~ s/\s+$//g;
return $temp;
}
sub raiseerror {
my $errmsg = "ERROR: $currentfile $currentline $_[0]\n";
print STDERR $errmsg;
$errors++;
if (($maxerrors) and ($errors >= $maxerrors)) {
print STDERR "aborting after $errors errors.\n";
&writefile;
exit(1);
}
}
sub message {
my $message;
$message = $_[0];
if ($logfile) {
print $logfh "$message\n";
} else {
print "$message\n";
}
}
sub version {
print "texi2db version $VERSION\n";
print "Copyright (c) 2002, 2003 David Merrill \<david\@lupercalia.net\>.\n";
print "\n";
print "Converts a Texinfo file into DocBook XML.\n";
print "\n";
print "This is free software; see the source for copying conditions. There is no\n";
print "warranty; not even for merchantability or fitness for a particular purpose.\n\n";
}
sub usage {
&version;
print "Usage: texi2db [OPTIONS]... [FILE]\n";
print "\n";
print "Options:\n";
print "-i, --include HTML|INFO|TEX\n";
print " include the specified text.\n";
print "-f, --file FILE read from file rather than STDIN.\n";
print "-o, --output-to FILE write to file rather than STDOUT.\n";
print "-l, --log-to FILE write status messages to file rather than STDOUT.\n";
print "-L, --runon set runon line limit (default=20)\n";
print "-v, --verbose show diagnostic output.\n";
print " use twice for lots of detail.\n";
print " use thrice for insane amounts of detail.\n";
print "-e, --max-errors MAX abort after this many errors. Default is 1.\n";
print " use '--max-errors 0' to disable.\n";
print "-h, --help show this usage message and exit.\n";
print "-V, --version show the program version and exit.\n";
exit($error);
}
__END__
<?xml version='1.0'?>
<!DOCTYPE article PUBLIC '-//OASIS//DTD DocBook XML V4.1.2//EN'
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
<!ENTITY hellip "&#8230;">
<!ENTITY Oslash "&#216;">
]>
<article lang="%%LANG">
<articleinfo>
<title>%%TITLE</title>
<abstract>%%ABSTRACT</abstract>
</articleinfo>