#!/usr/bin/perl # # this utility converts a Texinfo file into DocBook XML format. # use File::Basename; use FileHandle; use HTML::Entities; $VERSION = "0.6-cvs"; $errors = 0; $error = 0; # runtime options # $maxerrors = 1; $verbose = 0; $outputtype = "HTML"; $maxrunonlines = 20; # files # $requestedfile = ""; $outputfile = ""; $outfh = new FileHandle; $logfile = ""; $logfh = new FileHandle; # the following are flags to maintain state # # docbook structure flags # $inabstract = 1; $inappendix = 0; $insect1 = 0; $insect2 = 0; $insect3 = 0; $insect4 = 0; @nest = (); # stack, tracks nested structures 'table', 'multi', 'itemized', 'ordered' @inrow = (); @incol = (); @infirstcol = (); @tableformat = (); @initem = (); $informalpara = 0; $inpara = 0; $inmenu = 0; $lang = "en"; # Used for keeping the language of the document # these maintain internal program state # $line = ""; $originalline = ""; $currentfile = ""; $currentline = ""; $saveline = ""; $badbracketlines = 0; $badbracketstartline = 0; $seekend = ""; $literaltag = ''; # inline literal tag we're inside of $literalendtag = ''; # tag that will end the block @literal = (); # stacks for above tags @literalend = (); $suppressconversion = 0; # causes no conversion of @-commands $suppresspara = 0; # causes no insertion of tags # here, we hold onto meta-data that we will need to build # the article or book header structures. # $sgmlfile = ""; $title = ""; $authorname = ""; $buf = ""; # these are used when parsing the line for commands # $command; $tag; $tagplain; $contents; # these hold information about the node we are currently in, if any # $nodename = ''; $nodetitle = ''; $nodenext = ''; $nodeprev = ''; $nodeup = ''; $nodeid = ''; $nodeinit = 0; # remember this hierarchical information for every node # we use it if the node doesn't specify a level. # %nodenames = (); %nodeanchors = (); %nodenexts = (); %nodeprevs = (); %nodeups = (); %nodelevels = (); # holds user variables # %setvalues = (); %ifsets = (); # this is used to build a list of index entries to be written when requested # %indexes = (); # These are used for keeping state on the most recent search of any of the following hashes # $pattern = ""; $action = ""; $replacement = ""; # In the following hash, the perl expressions are used to search # each line of the input file. If matched, the Action field defines # what to do with it. # # Note that @-Commands of this type must be at the beginning of the line. # # TODO: put these in a .conf file so it can be tweaked or customized # # Actions: # # ALIAS Create an alias to another command # APPENDIX Begin an appentix # BYE Stop processing the file # CLEAR Clear a variable # COMMENT Insert a comment # DEFFN Define a function # DEFINFOENCLOSE Load a customized highlighting pattern # DROPBLOCK Drop the whole block on the floor # DROPLINE Drop it on the floor # IFCLEAR Test a variable # IFSET Test a variable # INDEX An index entry # ITEM An item in a list or table # LITERALBLOCK Literal layout block # MACRO Record a program macro # META Meta-Data # MULTITABLE Begin a multi column table # NODE A Texinfo node # ORDEREDLIST Numbered (enumerated) list # ORDEREDLISTEND End an enumerated list # SECT? One of the sectioning commands # SEEKEND Skip everything until you find the corresponding @end tag # SET Set a variable # TABLE Begin a table # TABLEEND End a table # # @-Command Action %patterns = ( '\input' =>'DROPLINE', 'o\input' =>'DROPLINE', '@alias' =>'ALIAS', '@author' =>'META', '@bye' =>'BYE', '@cartouche' =>'DROPLINE', '@end cartouche' =>'DROPLINE', '@clear' =>'CLEAR', # '@c' =>'COMMENT', # handled specially # '@comment' =>'COMMENT', '@contents' =>'DROPLINE', '@defcodeindex' =>'DROPLINE', # TODO '@deffn' =>'DEFFN', '@end deffn' =>'DROPLINE', '@deffnx' =>'DEFFN', '@end deffnx' =>'DROPLINE', '@defmac' =>'DEFFN', '@end defmac' =>'DROPLINE', '@defmacx' =>'DEFFN', '@end defmacx' =>'DROPLINE', '@defun' =>'DEFFN', '@end defun' =>'DROPLINE', '@defunx' =>'DEFFN', '@end defunx' =>'DROPLINE', '@defindex' =>'DROPLINE', # TODO '@definfoenclose' =>'DEFINFOENCLOSE', '@defspec' =>'DEFFN', '@end defspec' =>'DROPLINE', '@dircategory' =>'DROPLINE', '@direntry' =>'SEEKEND', '@display' =>'LITERALBLOCK', '@documentlanguage' =>'META', # '@end display' =>'DROPLINE', '@enumerate' =>'ORDEREDLIST', '@end enumerate' =>'ORDEREDLISTEND', '@example' =>'LITERALBLOCK', # '@end example' =>'DROPLINE', '@exdent' =>'DROPLINE', '@finalout' =>'DROPLINE', '@footnotestyle' =>'DROPLINE', '@format' =>'LITERALBLOCK', # '@end format' =>'DROPLINE', '@flushleft' =>'DROPLINE', '@end flushleft' =>'DROPLINE', '@flushright' =>'DROPLINE', '@end flushright' =>'DROPLINE', '@headings' =>'DROPLINE', '@html' =>'SEEKEND', '@end html' =>'DROPLINE', '@ifhtml' =>'SEEKEND', '@end ifhtml' =>'DROPLINE', '@ifinfo' =>'SEEKEND', '@end ifinfo' =>'DROPLINE', '@ifnothtml' =>'SEEKEND', '@end ifnothtml' =>'DROPLINE', '@ifnotinfo' =>'SEEKEND', '@end ifnotinfo' =>'DROPLINE', '@ifnottex' =>'SEEKEND', '@end ifnottex' =>'DROPLINE', '@ifset' =>'IFSET', # handled like SEEKEND '@end ifset' =>'DROPLINE', # or ignored, so ignore this too '@ifclear' =>'IFCLEAR', '@end ifclear' =>'DROPLINE', '@iftex' =>'DROPLINE', '@end iftex' =>'DROPLINE', '@ignore' =>'SEEKEND', '@itemize' =>'ITEMIZEDLIST', '@end itemize' =>'ITEMIZEDLISTEND', '@item' =>'ITEM', '@itemx' =>'ITEM', '@lisp' =>'LITERALBLOCK', # '@end lisp' =>'DROPLINE', '@macro' =>'MACRO', '@majorheading' =>'SECT1', '@menu' =>'SEEKEND', '@opindex' =>'DROPLINE', # TODO '@need' =>'DROP2', '@node' =>'NODE', '@page' =>'DROPLINE', '@paragraphindent' =>'DROPLINE', '@printindex' =>'DROPLINE', # TODO '@set' =>'SET', '@setchapternewpage' =>'DROPLINE', '@setfilename' =>'META', '@settitle' =>'META', '@shorttitlepage' =>'DROPLINE', '@smallbook' =>'DROPLINE', '@smallexample' =>'LITERALBLOCK', # '@end smallexample' =>'DROPLINE', '@sp' =>'DROPLINE', '@synindex' =>'DROPLINE', '@ftable' =>'TABLE', '@end ftable' =>'TABLEEND', '@table' =>'TABLE', '@end table' =>'TABLEEND', '@multitable' =>'MULTITABLE', '@end multitable' =>'TABLEEND', '@summarycontents' =>'DROPLINE', '@syncodeindex' =>'DROPLINE', # TODO '@tex' =>'SEEKEND', '@titlepage' =>'SEEKEND', '@verbatim' =>'LITERALBLOCK', # '@end verbatim' =>'DROPLINE', '@vtable' =>'TABLE', '@end vtable' =>'TABLEEND', '@top' =>'SECT1', # sectioning commands '@chapter' =>'SECT1', '@section' =>'SECT2', '@subsection' =>'SECT3', '@subsubsection' =>'SECT4', '@centerchap' =>'SECT1', '@unnumbered' =>'SECT1', '@unnumberedsec' =>'SECT2', '@unnumberedsubsec' =>'SECT3', '@unnumberedsubsubsec' =>'SECT4', '@chapheading' =>'SECT1', '@heading' =>'SECT2', '@subheading' =>'SECT3', '@subsubheading' =>'SECT4', '@appendix' =>'APPENDIX', '@appendixsec' =>'SECT1', '@appendixsubsec' =>'SECT2', '@appendixsubsubsec' =>'SECT3', '@cindex' =>'CINDEX', # TODO (temporary placement) '@cmindex' =>'DROPLINE', '@cvindex' =>'DROPLINE', '@findex' =>'CINDEX', '@kindex' =>'DROPLINE', '@pindex' =>'DROPLINE', '@tindex' =>'DROPLINE', '@trindex' =>'DROPLINE', '@vindex' =>'DROPLINE', ); # These are one-for-one string substitutions # # Many of them are replaced by ISO-8879 codes for international characters, # used by HTML as well as XML. See that spec for more information. # (One version is at http://www.w3.org/TR/html4/sgml/entities.html) # # There are special cases, e.g. "@@", which is the escaped form of @. # We do not handle @@ here, because it has to come after all other # @-commands have been processed. # # These commands can have {} following them, but if so the whole # construction must be listed, e.g., @AA{}. # # It's fine to replace these with nothing if you want them to be # simply dropped on the floor. # # Neither side needs to be escaped. # # The array is for sorting keys. # They are sorted in descending order and then iterated, to make sure # @ss comes before @s, so we replace them correctly later. # @substitutionkeys = (); %substitutions = ( # '@@' =>'@', # handled specially to simplify parsing # '@\{' =>'{', # '@\}' =>'}', # '@,{c}' =>'¸', '@ ' =>' ', # punctuation and typographic characters '@-' =>'¯', '@.' =>'.', '@!' =>'!', '@?' =>'?', '@"' =>'"', '@exclamdown{}' =>'¡', '@questiondown{}' =>'¿', '@pounds{}' =>'£', '@bullet{}' =>'*', '@copyright{}' =>'©', '@dots{}' =>'…', '@minus{}' =>'-', '@TeX{}' =>'TeX', '@enddots{}' =>'…', '@:' =>'', # text layout, has no meaning in DocBook '@=' =>'', # which is a semantic language. '@*' =>'', # Layout issues are handled in XSLT. '@s' =>' ', '@center' =>'', '@group' =>'', '@end group' =>'', '@noindent' =>'', '@refill' =>'', # accented characters # # Note that glyphs we cannot support in DocBook are represented by # an appended character. Since this is so very similar to the # @definfoenclose style of custom highlighting, they are handled # in that section. # '@"A' =>'Ä', # umlaut '@"E' =>'Ë', '@"I' =>'Ï', '@"O' =>'Ö', '@"U' =>'Ü', '@"a' =>'ä', '@"e' =>'ë', '@"i' =>'ï', '@"o' =>'ö', '@"u' =>'ü', '@' . "'" . 'A' =>'Á', # acute accent '@' . "'" . 'E' =>'É', '@' . "'" . 'I' =>'Í', '@' . "'" . 'O' =>'Ó', '@' . "'" . 'U' =>'Ú', '@' . "'" . 'a' =>'á', '@' . "'" . 'e' =>'é', '@' . "'" . 'i' =>'í', '@' . "'" . 'o' =>'ó', '@' . "'" . 'u' =>'ú', '@`A' =>'À', # grave accent '@`E' =>'È', '@`I' =>'Ì', '@`O' =>'Ò', '@`U' =>'Ù', '@`a' =>'à', '@`e' =>'è', '@`i' =>'ì', '@`o' =>'ò', '@`u' =>'ù', '@^A' =>'Â', # circumflex '@^E' =>'Ê', '@^I' =>'Î', '@^O' =>'Ô', '@^U' =>'Û', '@^a' =>'â', '@^e' =>'ê', '@^i' =>'î', '@^o' =>'ô', '@^u' =>'û', '@~A' =>'Ã', # tilde '@~N' =>'Ñ', '@~O' =>'Õ', '@~a' =>'ã', '@~n' =>'ñ', '@~o' =>'õ', '@=A' =>'A¯', # macron '@=E' =>'E¯', '@=I' =>'I¯', '@=O' =>'O¯', '@=U' =>'U¯', '@=a' =>'a¯', '@=e' =>'e¯', '@=i' =>'i¯', '@=o' =>'o¯', '@=u' =>'u¯', # other international characters and ligatures # '@L{}' =>'L', # Polish suppressed L/l '@l{}' =>'l', # (currently unsupported) '@AA{}' =>'Å', '@aa{}' =>'å', '@AE{}' =>'Æ', # ligatures '@ae{}' =>'æ', '@OE{}' =>'Œ', '@oe{}' =>'œ', '@O{}' =>'Ø', '@o{}' =>'ø', '@ss{}' =>'ß', '@error{}' =>'error-->', # glyphs used in examples '@equiv{}' =>'≡', '@expansion{}' =>'==>', '@point{}' =>'-!-', '@print{}' =>'-|', '@result{}' =>'=>', '@tab' =>'', ); # these are additional inline tags that always have {} following them. # The replacement strings are triggers that they require some kind of # programmatic control because they do strange things. # %specsubstitutions = ( '@anchor' =>'ANCHOR', '@ref' =>'REF', '@url' =>'UREF', '@uref' =>'UREF', '@xref' =>'XREF', '@pxref' =>'PXREF', '@inforef' =>'INFOREF', '@kbd' =>'KBD', ); # These are special block wrapping tags. When we hit one of these, # we simply wrap whatever comes between the (always paired) tags # with a set of DocBook tags. # # Their handling is subtly different than the %tags. # We don't just replace them with the DocBook tags, because we # want any tags to go inside them, not outside. # # Any existing para is closed first, as these are always paragraphs. # %blocks = ( # '@quotation' =>'
', # '@end quotation' =>'
', '@quotation' =>'
', '@end quotation' =>'
', # '@format' =>'', # '@end format' =>'', ); # these are inline @-Commands which are replaced by a set of tags instead of one-for-one # character substitution. Replacement is inline. # # there is also a special case, the "blank" tag, which does what you'd think. # It removes the existing tag, but doesn't add the wrapper. # # Note that you can nest tags here by separating with commas. # # Examples: @code{foo} becomes foo. # @footnote{foo} becomes foo. # %tags = ( '@acronym' =>'abbrev', '@b' =>"emphasis role='bold'", '@cite' =>'citetitle', '@code' =>'literal', '@command' =>'command', '@dfn' =>"emphasis role='bold'", '@dmn' =>'', '@dotless' =>'', # not yet supported, just discarded '@dotless' =>'', '@key' =>'keycap', '@email' =>'email', '@emph' =>'emphasis', '@env' =>'envar', '@footnote' =>'footnote, para', '@file' =>'filename', '@i' =>'emphasis', '@option' =>'option', '@samp' =>'literal', '@sc' =>'', '@strong' =>"emphasis role='bold'", '@t' =>'programlisting', '@r' =>'', '@var' =>'literal', '@verb' =>'literal', '@w' =>'', ); # this is where definfoenclose definitions go, and they are processed last # # Note: Some built-in Texinfo accents can be seen as a kind of custom highlighting, # since they cannot be displayed by a special glyph but are instead rendered # by appending a character. Those glyphs are handled here. # # Note also that the "@" is not used here. # %definfos = ( 'dotaccent' =>',.', # overdot accent 'H' =>',"', # Hungarian long umlaut 'ringaccent' =>',*', # ring accent 'tieaccent' =>',|', # tie-after accent 'u' =>',(', # breve accent 'ubaraccent' =>',_', # underbar accent 'udotaccent' =>'.,', # underdot accent 'v' =>',<', # tie-after accent ); # and this is where macros are stored # $inmacro = 0; $macro = ""; %macros = {}; $macroargs = ""; %macroargs = (); $macrotext = ""; %macrotext = (); %aliases = (); ############################################################################## ############################################################################## ############################################################################## while (1) { if ($ARGV[0] eq '') { last; } elsif (($ARGV[0] eq '-i') or ($ARGV[0] eq '--include')) { shift(@ARGV); if (($ARGV[0] eq 'HTML') or ($ARGV[0] eq 'TEX') or ($ARGV[0] eq 'INFO')) { $outputtype = $ARGV[0]; } else { &raiseerror("invalid include format: $ARGV[0]"); exit(1); } } elsif (($ARGV[0] eq '-f') or ($ARGV[0] eq '--file')) { shift(@ARGV); $requestedfile = $ARGV[0]; } elsif (($ARGV[0] eq '-o') or ($ARGV[0] eq '--output-to')) { shift(@ARGV); $outputfile = $ARGV[0]; } elsif (($ARGV[0] eq '-L') or ($ARGV[0] eq '--runon')) { shift(@ARGV); $maxrunonlines = $ARGV[0]; } elsif (($ARGV[0] eq '-l') or ($ARGV[0] eq '--log-to')) { shift(@ARGV); $logfile = $ARGV[0]; } elsif (($ARGV[0] eq '-e') or ($ARGV[0] eq '--max-errors')) { shift(@ARGV); $maxerrors = $ARGV[0]; } elsif (($ARGV[0] eq '-V') or ($ARGV[0] eq '--verbose')) { $verbose++; } elsif (($ARGV[0] eq '-v') or ($ARGV[0] eq '--version')) { &version; exit(0); } elsif (($ARGV[0] eq '-h') or ($ARGV[0] eq '--help')) { &usage; } else { &raiseerror("unrecognized option: $ARGV[0]\n"); $error = 1; &usage; } shift(@ARGV); } # be sure to open the log file before trying to write any messages to it! # if ($logfile) { open ($logfh, "> $logfile") or die "cannot write to log file $logfile.\n"; } $verbose = 3 if ($verbose > 3); &message("verbose mode on.") if ($verbose == 1); &message("debugging mode on.") if ($verbose ==2); &message("insanity mode on.") if ($verbose ==3); if ($outputtype eq 'HTML') { $patterns{'@ifhtml'} = 'DROPLINE'; $patterns{'@end ifhtml'} = 'DROPLINE'; $patterns{'@ifinfo'} = 'SEEKEND'; $patterns{'@end ifinfo'} = 'DROPLINE'; $patterns{'@iftex'} = 'SEEKEND'; $patterns{'@end iftex'} = 'DROPLINE'; $patterns{'@ifnothtml'} = 'SEEKEND'; $patterns{'@end ifnothtml'} = 'DROPLINE'; $patterns{'@ifnotinfo'} = 'DROPLINE'; $patterns{'@end ifnotinfo'} = 'DROPLINE'; $patterns{'@ifnottex'} = 'DROPLINE'; $patterns{'@end ifnottex'} = 'DROPLINE'; &message("including HTML text") if ($verbose); } elsif ($outputtype eq 'INFO') { $patterns{'@ifhtml'} = 'SEEKEND'; $patterns{'@end ifhtml'} = 'DROPLINE'; $patterns{'@ifinfo'} = 'DROPLINE'; $patterns{'@end ifinfo'} = 'DROPLINE'; $patterns{'@iftex'} = 'SEEKEND'; $patterns{'@end iftex'} = 'DROPLINE'; $patterns{'@ifnothtml'} = 'DROPLINE'; $patterns{'@end ifnothtml'} = 'DROPLINE'; $patterns{'@ifnotinfo'} = 'SEEKEND'; $patterns{'@end ifnotinfo'} = 'DROPLINE'; $patterns{'@ifnottex'} = 'DROPLINE'; $patterns{'@end ifnottex'} = 'DROPLINE'; &message("including INFO text") if ($verbose); } elsif ($outputtype eq 'TEX') { $patterns{'@ifhtml'} = 'SEEKEND'; $patterns{'@end ifhtml'} = 'DROPLINE'; $patterns{'@ifinfo'} = 'SEEKEND'; $patterns{'@end ifinfo'} = 'DROPLINE'; $patterns{'@iftex'} = 'DROPLINE'; $patterns{'@end iftex'} = 'DROPLINE'; $patterns{'@ifnothtml'} = 'DROPLINE'; $patterns{'@end ifnothtml'} = 'DROPLINE'; $patterns{'@ifnotinfo'} = 'DROPLINE'; $patterns{'@end ifnotinfo'} = 'DROPLINE'; $patterns{'@ifnottex'} = 'SEEKEND'; $patterns{'@end ifnottex'} = 'DROPLINE'; &message("including TEX text") if ($verbose); } if ($outputfile) { &message("output will go to $outputfile") if ($verbose); $outfh = new FileHandle; open($outfh, "> $outputfile") or die "Cannot write to $outputfile ($!)\n"; } else { $outfh = STDOUT; } while () { $template .= $_; } @substitutionkeys = sort {$b cmp $a} keys %substitutions; &processfile ("$requestedfile"); &writefile; close($outfh); close($logfh) if ($logfile); sub processfile { my($filename, $basename, $path, $ext, $includefile, $linenumber, ); $macrolinecount = 0; my $fh = new FileHandle; $filename = @_[0]; $filename = '' unless ($filename); ($basename, $path, $ext) = fileparse($filename); &message("processing $filename") if ($verbose); $linenumber = 0; if ($filename) { open $fh, "<$filename" or raiseerror("cannot open $filename\n"); } else { $fh = STDIN; } LINE: while ($line = <$fh>) { chomp($line); $linenumber++; $currentfile = $filename; $currentline = $linenumber; if ($seekend) { $seekend = '' if ($line =~ /$seekend/); next LINE; } &message("LINE $currentline: $line") if ($verbose > 1); &cleanline; &markspecial; # hide @@, @{, @} to ease parsing $originalline = $line; # must be cleaned but not trimmed! $trimline; # We have to handle some @-commands on the line when they appear, # even if waiting for some wrapper command to complete: # # e.g., @footer{foo bar # @cindex baz # blah blah} # and @comment, @c # if (($line =~ /\@c\b/) or ($line =~ /\@comment\b/)) { &comment; } ($pattern, $action) = &matchpattern(); if ($action eq 'DROPLINE') { next LINE; } # keep reading until we have only complete tags # if ($saveline) { $line = $saveline . ' ' . $line; $saveline = ''; &message("line restored, line: $line") if ($verbose > 1); } unless (&bracketsmatch($line)) { &message("bracket mismatch") if ($verbose > 2); $saveline = $line; next LINE; } if (scalar @literal) { &message("checking for $literalendtag") if ($verbose > 2); if ($line =~ /$literalendtag/) { &message("End literal block") if ($verbose > 2); &closeliteralblock; next LINE; } } if ($line =~ /^\@include/) { $includefile = $line; $includefile =~ s/^\@include\s+//; $includefile = $path . $includefile; $line = ''; if (-e $includefile) { &message("including $includefile") if ($verbose); &processfile ("$includefile"); } else { &raiseerror("include file $includefile not found."); } } elsif ($inmacro) { ($pattern, $action) = &matchpattern(); if ($action eq 'SEEKEND') { $seekend = $pattern; $seekend =~ s/\@/\@end /; next LINE; } elsif ($action eq 'DROPLINE') { next LINE; } elsif ($action eq 'DROP2') { $line =~ s/\@\w+\s+\w+\s*//; } if ($line =~ /\@end\s+macro/) { #save macro $macroargs{$macro} = $macroargs; $macrotext{$macro} = $macrotext; &message("macro name: $macro args: $macroargs text: $macrotext") if ($verbose > 2); $inmacro = 0; $macroargs = ''; $macrotext = ''; $macrolinecount = 0; } elsif ($line =~ /\@quote-arg/) { } else { &message("macro line: $line") if ($verbose > 2); $macrotext .= $line; $macrolinecount++; if ($macrolinecount >= 10) { &raiseerror("runon macro"); } } } elsif ($inmenu) { &message("inmenu: $line"); if ($line =~ /\@end menu/) { &closemenu; } else { &menuitem; } } else { if ($suppressconversion) { $line = $originalline; $pattern = ''; $action = ''; } else { ($pattern, $action) = &matchblock(); if ($action) { &message("replacing block with $action") if ($verbose > 2); &closepara; $buf .= $action; next LINE; } ($pattern, $action) = &matchpattern(); } if ($action eq 'DROPLINE') { next LINE; } else { &doaction; } } } close($fh); } sub doaction { if ($action eq 'DROP2') { $line =~ s/\@\w+\s+\w+\s*//; &writeconverted; } elsif ($action eq 'SEEKEND') { $seekend = $pattern; $seekend =~ s/\@/\@end /; } elsif ($action eq 'LITERALBLOCK') { &literalblock; } elsif ($action eq 'ALIAS') { &alias; } elsif ($action eq 'MACRO') { ¯o; } elsif ($action eq 'META') { &meta; } elsif ($action eq 'NODE') { &node; } elsif ($action eq 'MENU') { &menu; } elsif ($action eq 'APPENDIX') { $line =~ s/^\@\w+\s+//; &convertinline; &appendix($line); } elsif ($action eq 'SECT1') { $line =~ s/^\@\w+\s+//; &convertinline; §1($line); } elsif ($action eq 'SECT2') { $line =~ s/^\@\w+\s+//; &convertinline; §2($line); } elsif ($action eq 'SECT3') { $line =~ s/^\@\w+\s+//; &convertinline; §3($line); } elsif ($action eq 'SECT4') { $line =~ s/^\@\w+\s+//; &convertinline; §4($line); } elsif ($action eq 'CINDEX') { $line =~ s/^\@\w+\s+//; &convertinline; &indexterm($line); } elsif ($action eq 'PARA') { &closeformalpara; ¶ } elsif ($action eq 'DEFFN') { &deffn; } elsif ($action eq 'ORDEREDLIST') { &orderedlist; } elsif ($action eq 'ORDEREDLISTEND') { &closeorderedlist; } elsif ($action eq 'ITEMIZEDLIST') { &itemizedlist; } elsif ($action eq 'ITEMIZEDLISTEND') { &closeitemizedlist; } elsif ($action eq 'ITEM') { &item; &writeconverted; } elsif ($action eq 'TABLE') { &table; $line =~ s/^\@\w+\s+//; $tableformat[-1] = $line; &message("table format: $tableformat[-1]") if ($verbose > 2); $infirstcol[-1] = 0; } elsif ($action eq 'MULTITABLE') { &multitable; } elsif ($action eq 'TABLEEND') { &closetable; } elsif ($action eq 'INDEX') { &message("indexing not yet supported") if ($verbose > 1); } elsif ($action eq 'DEFINFOENCLOSE') { &definfoenclose; } elsif ($action eq 'SET') { &set; } elsif ($action eq 'CLEAR') { &clear; } elsif ($action eq 'IFSET') { $seekend = '\@end ifset' unless (&ifset()); } elsif ($action eq 'IFCLEAR') { $seekend = '\@end ifclear' if (&ifset()); } elsif ($action eq 'BYE') { &closesect1; last; } elsif ($action eq 'ERROR') { &raiseerror("Hit ERROR tag with $pattern on $line"); } elsif ($action eq '') { if ($nest[-1] eq 'table') { # multitables just write the line &tableline; } if ($suppressconversion) { &writeline; } else { &writeconverted; } } else { &raiseerror("Unknown action: $action on command $pattern"); } } sub cleanline { $line =~ s/&/&/g; # do first, before writing new amps $line =~ s/\/>/g; $line =~ s/\x0C//; $line =~ s/\xA0/ /; $line =~ s/\xD7//; $line =~ s/\xD8/Ö/; $line =~ s/\xDF/ß/; $line =~ s/\xF6/ö/; $line =~ s/\xE4/ä/; } sub writeabstract { if ($inabstract) { $inabstract = 0; &message("copying meta-data to docbook") if ($verbose); $template =~ s/%%TITLE/$title/; $lang =~ s/\s+/q/g; $template =~ s/%%LANG/$lang/; if ($buf) { $template =~ s/%%ABSTRACT/$buf/; } else { $template =~ s/\%%ABSTRACT\<\/abstract\>//s; } print $outfh $template . "\n"; $buf = ""; } } sub writefile { &closeappendix; print $outfh $buf; print $outfh ''. "\n"; } #################### # PATTERN MATCHING # #################### # this is for beginning-of-line @-commands # sub matchpattern { $pattern = ""; foreach $key (keys %patterns) { $pattern = quotemeta($key) . '\b'; if ($line =~ /^$pattern\b/) { return ($pattern, $patterns{$key}); } } return ('', ''); } # this is for special wrapped blocks, like blockquotes # sub matchblock { foreach $key (keys %blocks) { $pattern = quotemeta($key); if ($line =~ /$pattern/) { &message("matched block $pattern") if ($verbose > 2); return ($pattern, $blocks{$key}) unless ($line =~ /$pattern\w/); } } } # inline processing # sub convertinline { TAG: while ($line =~ /\@\w+\{[^\{]*?\}/) { $command = $line; $command =~ s/.*(\@\w+\{[^{]*?\}).*/\1/; $tag = $command; $tag =~ s/(.*)\{.*/\1/; $tagplain = $tag; $tagplain =~ s/\@//; $contents = $command; $contents =~ s/.*\{(.*)\}/\1/; &message("CMDLINE: $line") if ($verbose > 2); &message("CMD: $command, tag: $tag, tagplain: $tagplain, contents: $contents") if ($verbose > 2); # If we're in a table's first column, and this column already contains # the same tag, remove the internal tag. # Otherwise, we wind up with nested literals, which DocBook disallows. # if (scalar @tableformat) { if (($infirstcol[-1]) and ($tableformat[-1] eq $tag)) { &message("not doubly wrapping tag $tag, removing the nested one") if ($verbose >2); $contents =~ s/\<$tag\>//; $contents =~ s/\<\/$tag\>//; #$replacement = $contents; #&replaceinline; #next TAG; } } # substitutions (we only want to catch @{} type tags here) # if (exists $substitutions{$tag . '{}'}) { &message("matched character substitution $tag") if ($verbose >2); $replacement = $substitutions{$tag . '{}'}; &replaceinline; next TAG; } # macros # if (exists $macros{$tagplain}) { &message("matched macro $tagplain") if ($verbose >2); $macro = $macros{$tagplain}; $macroargs = "\\\\" . $macroargs{$tagplain} . "\\\\"; $macrotext = $macrotext{$tagplain}; $macroarg = $command; $macroarg =~ s/^.*?\@$macro\{//; $macroarg =~ s/\}.*?$//; $macrotext =~ s/\@$macro\{([^\{]*?)\}/$macroarg/; $macrotext =~ s/$macroargs/$macroarg/g; $replacement = $macrotext; &replaceinline; next TAG; } # special character substitutions # if (exists $specsubstitutions{$tag}) { &message("matched special substitution $tag") if ($verbose >2); $replacement = $specsubstitutions{$tag}; &replacespecsubst; &replaceinline; next TAG; } # docbook wrapper tags # if (exists $tags{$tag}) { &message("matched docbook wrapper tag $tag") if ($verbose >2); @dbtags = split(',', $tags{$tag}); $dbopen = ''; $dbclose = ''; foreach $dbtag (@dbtags) { $dbtag = &trim($dbtag); # Texinfo allows nexted literals, DocBook does not. # if (($dbtag eq 'programlisting') or ($dbtag eq 'literal') or ($dbtag eq 'filename') or ($dbtag eq 'option')) { $contents =~ s/\//g; $contents =~ s/\<\/literal\>//g; $contents =~ s/\//g; $contents =~ s/\<\/programlisting\>//g; &message("removed inline literals: $contents") if ($verbose > 2); } ($tag, $attributes) = split(/ /, $dbtag); if ($attributes) { $dbopen .= "\<$tag $attributes\>"; } else { $dbopen .= "\<$tag\>"; } $dbclose = "\<\/$tag\>" . $dbclose; } $replacement = $dbopen . $contents . $dbclose; # Texinfo allows nexted literals, DocBook does not. # if (scalar @literal) { $replacement =~ s/\//g; $replacement =~ s/\<\/literal\>//g; $replacement =~ s/\//g; $replacement =~ s/\<\/programlisting\>//g; &message("removed literal tags literals: $replacement") if ($verbose > 2); } &replaceinline; next TAG; } if (exists $definfos{$tagplain}){ &message("matched definfo $tagplain") if ($verbose >2); $action = $definfos{$tagplain}; ($prefix, $suffix) = split(/,/,$action); $replacement = $prefix . $contents . $suffix; &message("definfo prefix: $prefix, suffix: $suffix, contents: $contents") if ($verbose > 1); &replaceinline; next TAG; } if (exists $setvalues{$contents}) { &message("matched set value $contents") if ($verbose > 2); $replacement = $setvalues{$contents}; &replaceinline; next TAG; } if (exists $aliases{$tag}) { &message("matched alias $tag") if ($verbose >2); $replacement = $aliases{$tag} . '{' . $contents . '}'; &replaceinline; next TAG; } &raiseerror("cannot resolve $command"); } # character-level substitutions. # must come last, so tags like @samp{} don't have the @s replaced # with a space, etc. # # This uses a presorted array of keys in descending order, # so we see @ss before @s, etc. # foreach $key (@substitutionkeys) { $pattern = quotemeta($key); if ($line =~ /$pattern/) { &message("matched character code: $key") if ($verbose > 2); $line =~ s/$pattern/$substitutions{$key}/g; } } # unusual stuff to deal with # # @\n is sometimes entered when the user meant @(space). # Texinfo handles it, so we have to. # $line =~ s/\@$/ /; # Remove <> from inside tags, since DocBook stylesheets # generally add them back in when generating output. # $line =~ s/\<(.*?)>\<\/email\>/\$1\<\/email\>/g; # We take care in case there's a matching block (it could ocurred # because of the expansion of a macro or an alias) ($pattern, $action) = &matchpattern(); &doaction if ($action); # We should have found *all* @-commands by now. # If any are still hanging around, we have a problem. # if ($line =~ /\@/) { &raiseerror("Unrecognized @-command in $line"); } } sub markspecial { my $pattern1 = ''; my $pattern2 = ''; my $delimiter = ''; my $delimiterpattern = ''; my $contents = ''; my $contentspattern = ''; my $newcontents = ''; my $start = ''; my $end = ''; $line =~ s/\@\@/DCM_AT/g; $line =~ s/\@\{/DCM_LB/g; $line =~ s/\@\}/DCM_RB/g; $line =~ s/\@\,{c\}/DCM_CD/g; # special handling for the @verb{} command, which contains # within it special characters which are NOT to be interpreted. # rather than handle such a strange case, we make it simple by # going the long way around and escaping everything # unless ($suppressconversion) { while ($line =~ /\@verb\{/) { $delimiter = $line; $delimiter =~ s/.*?\@verb\{//; $delimiter =~ s/(.).*/$1/; &message("delimiter: $delimiter") if ($verbose > 2); $delimiterpattern = quotemeta($delimiter); &message("delimiterpattern: $delimiterpattern") if ($verbose > 2); $contents = $line; $pattern1 = '.*\@verb\{' . quotemeta($delimiter); &message("pattern1: $pattern1") if ($verbose > 2); $contents =~ s/$pattern1//; $pattern2 = quotemeta($delimiter) . '\}.*'; &message("pattern2: $pattern2") if ($verbose > 2); $contents =~ s/$pattern1//; &message("contents: $contents") if ($verbose > 2); $contents =~ s/$pattern2//; &message("contents: $contents") if ($verbose > 2); $newcontents = $contents; $newcontents =~ s/\@/\@\@/g; $newcontents =~ s/\{/\@\{/g; $newcontents =~ s/\}/\@\}/g; &message("newcontents: $newcontents") if ($verbose > 2); $contentspattern = quotemeta($contents); &message("contentspattern: $contentspattern") if ($verbose > 2); $start = $line; $start =~ s/$pattern2.*//; $start =~ s/$delimiterpattern$contentspattern$//; $start =~ s/\@verb/DCM_VERB/; &message("start: $start") if ($verbose > 2); $end = $line; $end =~ s/.*$pattern1$contentspattern$delimiterpattern//; &message("end: $end") if ($verbose > 2); $line = $start . $newcontents . $end; &message("line: $line") if ($verbose > 2); } $line =~ s/DCM_VERB/\@verb/g; } # We do this twice in case one of these was inside a @verb statement # $line =~ s/\@\@/DCM_AT/g; $line =~ s/\@\{/DCM_LB/g; $line =~ s/\@\}/DCM_RB/g; $line =~ s/\@\,{c\}/DCM_CD/g; } sub unmarkspecial { $_[0] =~ s/DCM_AT/\@\@/gi; $_[0] =~ s/DCM_LB/\@\{/gi; $_[0] =~ s/DCM_RB/\@\}/gi; $_[0] =~ s/DCM_CD/\@\{c\}/gi; } sub normalizespecial { $_[0] =~ s/DCM_AT/\@/gi; $_[0] =~ s/DCM_LB/\{/gi; $_[0] =~ s/DCM_RB/\}/gi; $_[0] =~ s/DCM_CD/¸/gi; } sub replaceinline { &message("replacing $command with $replacement") if ($verbose > 1); $command = quotemeta($command); $line =~ s/$command/$replacement/; } sub replacespecsubst { my $link = ''; my $linktitle = ''; my $keystring; my @keycombos; my @keycaps; my $mykeys; if ($replacement eq 'REF') { $replacement = &crossref(); } elsif ($replacement eq 'PXREF') { $replacement = "see " . &crossref(); } elsif ($replacement eq 'UREF') { ($link, $linktitle) = split(/,/, $contents); $link = &trim($link); $linktitle = &trim($linktitle); $linktitle = $link unless ($linktitle); $replacement = "\$linktitle\<\/ulink\>"; &message("ulink: $link, linktitle: $linktitle") if ($verbose > 2); } elsif ($replacement eq 'XREF') { $replacement = "See " . &crossref(); } elsif ($replacement eq 'INFOREF') { $replacement = "See " . &inforef(); } elsif ($replacement eq 'ANCHOR') { $link = $contents; $link = &anchorfix($link); $replacement = ""; } elsif ($replacement eq 'KBD') { $keystring = $contents; # nested literals not allowed in DocBook # $keystring =~ s/\//g; $keystring =~ s/\<\/literal\>//g; # I found there is at least one case where is already here: # @kbd{gTopRET}, in the info manual. # If we have those, throw away the keycap tags and hyphenate, # so we can keycombo properly. # $keystring =~ s/\/-/g; $keystring =~ s/\<\/keycap\>/-/g; $keystring =~ s/^-+//g; $keystring =~ s/-+$//g; $keystring =~ s/-+\s+/-/g; $keystring =~ s/\s+-+/-/g; @keycombos = split(/\s+/, $keystring); foreach $keycombo (@keycombos) { &message("keycombo: $keycombo") if ($verbose > 2); @keycaps = split(/-/, $keycombo); if (scalar @keycaps > 1) { &message("making keycombo") if ($verbose > 2); $mykeys .= "\"; foreach $keycap (@keycaps) { $mykeys .= "\$keycap\<\/keycap\>"; } $mykeys .= '' } else { $mykeys .= "\$keycaps[0]\<\/keycap\>"; } } $replacement = $mykeys; } else { &raiseerror("Unrecognized special substitution code: $specsubstitutions{$tag}"); $replacement = $specsubstitutions{$tag}; } } ###################### # TEXINFO STRUCTURES # ###################### sub crossref { my $anchor; my $link; my $linkname; my $linktitle; my $infofile; my $manualtitle; my $title; my $crossref = ''; ($link, $linkname, $linktitle, $infofile, $manualtitle) = split(/,/, $contents); $link = &trim($link); $linktitle = &trim($linktitle); $linkname = &trim($linkname); $infofile = &trim($infofile); $manualtitle = &trim($manualtitle); # build the title that will display in the output # $title = $linktitle; $title .= ': ' if ($title and ($linkname or $infofile)); $title .= "($infofile)" if ($infofile); $title .= ' ' if (($title) and ($linkname)); $title .= $linkname if ($linkname); # Try to pull in the node title if we've already seen it but it # isn't specified in the cross reference. # # If not, we can only display the node name, which is not very good # but the best we can do without writing a two-pass engine. # if (($link) and !($title)) { $title = $nodetitlelookup{$link}; $title = $link unless ($title); } if ($infofile) { &message("link to another info file, replacing with text") if ($verbose > 1); $crossref = $title; } else { $anchor = &anchorfix($link); $crossref = "$title"; &message("made xref to tag $anchor") if ($verbose > 1); } return $crossref; } sub inforef { my $anchor; my $link; my $linkname; my $infofile; my $title; my $inforef = ''; ($link, $linkname, $infofile) = split(/,/, $contents); $link = &trim($link); $linkname = &trim($linkname); $infofile = &trim($infofile); $title .= "($infofile)" if ($infofile); $title .= $linkname if ($linkname); $title = $link unless ($title); if ($infofile) { &message("link to another info file, replacing with text") if ($verbose > 1); $crossref = $title; } else { $anchor = &anchorfix($link); $crossref = "$title"; &message("made xref to tag $anchor") if ($verbose > 1); } return $crossref; } sub anchorfix { my $anchor = $_[0]; $anchor = lc(&trim($anchor)); $anchor = decode_entities($anchor); $anchor =~ s/-/-dash-/g; $anchor =~ s/&/-and-/g; $anchor =~ s/;//g; $anchor = encode_entities($anchor); $anchor =~ s/&(\w)grave/\1/g; $anchor =~ s/&(\w)acute/\1/g; $anchor =~ s/&(\w)circ/\1/g; $anchor =~ s/&(\w)uml/\1/g; $anchor =~ s/&(\w)tilde/\1/g; $anchor =~ s/&(\w)cedil/\1/g; $anchor =~ s/&/-and-/g; $anchor =~ s/;//g; $anchor =~ s/\//-slash-/g; $anchor =~ s/\\/-bslash-/g; $anchor =~ s/\s+/-/g; $anchor =~ s/'//g; $anchor =~ s/`//g; $anchor =~ s/,/-comma-/g; $anchor =~ s/\./-dot-/g; $anchor =~ s/!/-bang-/g; $anchor =~ s/\?/-question-/g; $anchor =~ s/\+/-plus-/g; $anchor =~ s/\*/-x-/g; $anchor =~ s/\(/-op-/g; $anchor =~ s/\)/-cp-/g; $anchor =~ s/\@/-at-/g; $anchor =~ s/dcm_at/-at-/gi; $anchor =~ s/\^/-hat-/g; $anchor =~ s/=/-eq-/g; $anchor =~ s/\$/S/; $anchor =~ s/~/-tilde-/g; $anchor =~ s/0/-zero-/g; $anchor =~ s/1/-one-/g; $anchor =~ s/2/-two-/g; $anchor =~ s/3/-three-/g; $anchor =~ s/4/-four-/g; $anchor =~ s/5/-five-/g; $anchor =~ s/6/-six-/g; $anchor =~ s/7/-seven-/g; $anchor =~ s/8/-eight-/g; $anchor =~ s/9/-nine-/g; $anchor =~ s/\|/-pipe-/g; $anchor =~ s/\[/-lsqb-/g; $anchor =~ s/\]/-rsqb-/g; $anchor =~ s/^-+//; $anchor =~ s/-+$//; $anchor =~ s/--/-/g; # get rid of double, initial and trailing hyphens return &trim($anchor); } sub deffn { my $category; my $name; my $arguments; my $argument; my $indeffn = 0; my $indefun = 0; my $indefspec = 0; my $indefmac = 0; if ($line =~ /\@deffn/) { $indeffn = 1; $line =~ s/\@deffn\w*//; &message("deffn found") if ($verbose > 1); } elsif ($line =~ /\@defun/) { $indefun = 1; $line =~ s/\@defun\w*//; &message("defun found") if ($verbose > 1); } elsif ($line =~ /\@defspec/) { $indefspec = 1; $line =~ s/\@defspec\w*//; &message("defspec found") if ($verbose > 1); } elsif ($line =~ /\@defmac/) { $indefmac = 1; $line =~ s/\@defmac\w*//; &message("defmac found") if ($verbose > 1); } else { &raiseerror("Unrecognized function definition"); } &convertinline; # DocBook does not allow nested literal tags # $line =~ s/\//g; $line =~ s/\<\/literal\>//g; $line = &trim($line); # function category names can have spaces. When they do, the name is enclosed in braces: # # @deffn {Interactive Command} isearch-forward # @defun foo bar # @defspec foovar (@var(var)) # if ($line =~ /\{/) { &message("function category name enclosed in braces") if ($verbose > 2); ($category, $line) = split(/}/, $line); $category =~ s/\{//; ($name, $arguments) = split(/ /, $line, 2); } else { if ($indeffn) { &message("function category name not enclosed in braces") if ($verbose > 2); ($category, $name, $arguments) = split(/\s/, $line, 3); } elsif ($indefun) { $category = 'Function'; ($name, $arguments) = split(/ /, $line, 2); } elsif ($indefspec) { $category = 'Special Form'; ($name, $arguments) = split(/ /, $line, 2); } elsif ($indefmac) { $category = 'Macro'; ($name, $arguments) = split(/ /, $line, 2); } else { &raiseerror("Unrecognized function type"); } } &message("category: $category, name: $name, arguments: $arguments") if ($verbose > 1); $line = $category . ': '; &writeconverted; $line = '' . $name . ''; &writeline; $line = ''; &writeline; foreach $argument (split(' ', $arguments)) { $line = '' . $argument . ''; &writeline; } $line = ''; &writeline; } ############# # META-DATA # ############# # this routine processes meta-data @-Commands # sub meta { return if (&metaarg('\@setfilename', $sgmlfile)); return if (&metaarg('\@settitle', $title)); return if (&metaarg('\@documentlanguage',$lang)); return if (&metaarg('\@author', $authorname)); &raiseerror("Unknown meta-data command $line"); } # Meta-data commands have arguments, # so if the pattern matches, store the argument # to the variable in $_[1] # sub metaarg { my $pattern = $_[0]; if ($line =~ /$pattern/) { $line =~ s/$pattern//; &convertinline; &normalizespecial($line); $_[1] = $line; &message("set $pattern to $line") if ($verbose > 2); return 1; } else { return 0; } } ################ # WRITE OUTPUT # ################ sub writeconverted { my $temp = &trim($line); unless ($suppresspara) { if ($temp eq '') { &closeformalpara; } else { unless (($inpara) or ($nest[-1] eq 'table') or ($nest[-1] eq 'multi')) { if (($nodename) and ($nodeinit == 0) and !($inabstract)) { &guessnodelevel; } ¶ } } } unless ($suppressconversion) { &convertinline; } &writeline; } sub writeline { &normalizespecial($line); &message("OUT: $line") if ($verbose > 1); $buf .= $line . "\n"; if (($linenumber % 1000 == 0) and ($inabstract == 0)) { print $outfh $buf; $buf = ''; } } ######### # NODES # ######### # this is for when the source never set a position for this node in the # chapter-style hierarchy, which we use. # # We have to make a best guess based on the node's settings, # so put it right under whoever it calls the "up" node. # sub guessnodelevel { return if ($nodeinit); &message("guessing level for node: $nodename, title: $nodetitle") if ($verbose > 2); my $parentname = $nodeup; my $parentlevel = $nodelevels{$parentname}; if ($parentlevel == "A") { §1($nodetitle); } elsif ($parentlevel == 1) { §2($nodetitle); } elsif ($parentlevel == 2) { §3($nodetitle); } elsif ($parentlevel == 3) { §4($nodetitle); } else { &raiseerror("The parent node, $parentname, could not be found."); } } # Sets up node variables. # # The node isn't actually created until we hit some text in it. # sub node { $line =~ s/\@node\s+?//; &convertinline; &normalizespecial($line); ($nodename, $nodenext, $nodeprev, $nodeup) = split(/,/, $line); $nodename = &trim($nodename); $nodetitle = $nodename; $nodeprev = &trim($nodeprev); $nodenext = &trim($nodenext); $nodeup = &trim($nodeup); $nodeid = &anchorfix($nodename); $nodeinit = 0; &message("Node: $nodename") if ($verbose); &closeformalpara; } # Creates the node # sub initnode { $nodeid = 0 if (exists $nodeanchors{uc($nodename)}); if ($nodeinit) { $nodeid = 0; return; } &message("initiating node $nodename") if ($verbose > 2); $nodenames{$nodename} = $nodename; $nodenexts{$nodename} = $nodenext; $nodeprevs{$nodename} = $nodeprev; $nodeups{$nodename} = $nodeup; $nodeanchors{uc($nodename)} = $nodeid if ($nodeid); $nodetitlelookup{$nodeid} = $nodetitle; $nodeinit = 1; } sub menu { $inmenu = 1; } sub menuitem { $line =~ s/^\*\s*//; &message("menu line: $line"); } sub closemenu { $inmenu = 0; } #################### # DOCBOOK SECTIONS # #################### sub appendix { &writeabstract; &message("appendix: $nodetitle") if ($verbose > 1); &closeappendix; if ($nodeinit) { §($_[0]); } else { $nodetitle = $_[0]; } &initnode; &convertinline; &normalizespecial($nodetitle); if ($nodeid) { $buf .= "\\$nodetitle<\/title\>\n"; } else { $buf .= "\<appendix\>\<title\>$nodetitle\<\/title\>\n"; } $inappendix = 1; } sub sect1 { &writeabstract; &message("sect1: $nodetitle") if ($verbose > 1); &closeappendix; if ($nodeinit) { §($_[0]); } else { $nodetitle = $_[0]; } &initnode; &convertinline; &normalizespecial($nodetitle); if ($nodeid) { $buf .= "\<sect1 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n"; } else { $buf .= "\<sect1\>\<title\>$nodetitle\<\/title\>\n"; } $insect1 = 1; } sub sect2 { &writeabstract; &message("sect1: $nodetitle") if ($verbose > 1); &closesect2; if ($nodeinit) { §($_[0]); } else { $nodetitle = $_[0]; } &initnode; &convertinline; &normalizespecial($nodetitle); if ($inappendix) { if ($nodeid) { $buf .= "\<sect1 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n"; } else { $buf .= "\<sect1\>\<title\>$nodetitle\<\/title\>\n"; } $nodelevels{$nodename} = 1; } else { if ($nodeid) { $buf .= "\<sect2 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n"; } else { $buf .= "\<sect2\>\<title\>$nodetitle\<\/title\>\n"; } $nodelevels{$nodename} = 2; } $insect2 = 1; } sub sect3 { &writeabstract; &message("sect1: $nodetitle") if ($verbose > 1); &closesect3; if ($nodeinit) { §($_[0]); } else { $nodetitle = $_[0]; } &initnode; &convertinline; &normalizespecial($nodetitle); if ($inappendix) { if ($nodeid) { $buf .= "\<sect2 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n"; } else { $buf .= "\<sect2\>\<title\>$nodetitle\<\/title\>\n"; } $nodelevels{$nodename} = 2; } else { if ($nodeid) { $buf .= "\<sect3 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n"; } else { $buf .= "\<sect3\>\<title\>$nodetitle\<\/title\>\n"; } $nodelevels{$nodename} = 3; } $insect3 = 1; } sub sect4 { &writeabstract; &message("sect4: $nodetitle") if ($verbose > 1); &closesect4; if ($nodeinit) { §($_[0]); } else { $nodetitle = $_[0]; } &initnode; &convertinline; &normalizespecial($nodetitle); if ($inappendix) { if ($nodeid) { $buf .= "\<sect3 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n"; } else { $buf .= "\<sect3\>\<title\>$nodetitlee\<\/title\>\n"; } $nodelevels{$nodename} = 3; } else { if ($nodeid) { $buf .= "\<sect4 id='$nodeid'\>\<title id='$nodeid-title'\>$nodetitle\<\/title\>\n"; } else { $buf .= "\<sect4\>\<title\>$nodetitle\<\/title\>\n"; } $nodelevels{$nodename} = 4444; } $insect4 = 1; } sub sect { $nodetitle = $_[0]; &normalizespecial($nodetitle); $nodename = $nodetitle; $nodename = &trim($nodename); $nodeprev = ''; $nodenext = ''; $nodeup = ''; $nodeid = &anchorfix($nodename); $nodeid = 0 if (exists $nodeanchors{uc($nodename)}); $nodeanchors{uc($nodename)} = $nodeid if ($nodeid); $nodeinit = 0; &message("Section Node: $nodename") if ($verbose); &closeformalpara; } sub multitable { my @colwidths = (); my $colcount = 0; my $colspecs = ''; &closepara; $line =~ s/\@\w+\s+//; # fractional column widths are supported # but prototypes are ignored, as DocBook will do the same # thing automatically without them. # if ($line =~ /\@columnfractions/) { &message("starting multicolumn table") if ($verbose > 1); $line =~ s/\@columnfractions\s+//; @colwidths = split(/\s+/, $line); $colcount = scalar @colwidths; foreach $colwidth (@colwidths) { $colwidth =~ s/\.//; $colwidth .= '0' if (length($colwidth) < 2); $colspecs .= "\<colspec colwidth='$colwidth\*'\/\>\n"; } } else { $colcount = scalar split(/\{/, $line) - 1; } if (($nest[-1] eq 'table') or ($nest[-1] eq 'multi')) { &closetablecol; $buf .= '<entrytbl cols=' . "'" . $colcount . "'" . "\>\n"; } else { $buf .= '<informaltable><tgroup cols=' . "'" . $colcount . "'" . "\>\n"; } $buf .= "$colspecs\n" if ($colspecs); $buf .= '<tbody>' . "\n"; push @nest, 'multi'; push @incol, 0; push @inrow, 0; push @infirstcol, 0; push @initem, 0; push @tableformat, ''; } sub table { &closepara; if (($nest[-1] eq 'table') or ($nest[-1] eq 'multi')) { &closetablecol; $buf .= '<entrytbl cols=' . "'2'" . '><tbody>' . "\n"; } else { $buf .= '<informaltable><tgroup cols=' . "'2'" . '><tbody>' . "\n"; } push @nest, 'table'; push @incol, 0; push @inrow, 0; push @infirstcol, 0; push @initem, 0; push @tableformat, ''; } sub tablerow { &closetablerow; $buf .= '<row>'; $inrow[-1] = 1; } sub tablecol { &closetablecol; $buf .= '<entry>'; $incol[-1] = 1; } sub tableline { &message("table: $line") if ($verbose > 2); if ($nest[-1] ne 'multi') { if ($infirstcol[-1]) { # $buf .= '</literallayout>'; &closetablecol; &tablecol; $infirstcol[-1] = 0; } } } sub orderedlist { &closeformalpara; $buf .= '<orderedlist>'; push @nest, 'ordered'; push @initem, 0; # $suppresspara++; } sub itemizedlist { &closeformalpara; $buf .= '<itemizedlist>'; push @nest, 'itemized'; push @initem, 0; # $suppresspara++; } sub item { $line =~ s/\@item\w*\s*//; if ($nest[-1] eq 'table') { &message("table item: $line") if ($verbose > 2); if ($infirstcol[-1]) { $buf .= ', '; } unless ($infirstcol[-1]) { &tablerow; &tablecol; $infirstcol[-1] = 1; # $buf .= '<literallayout>'; } $line = &trim($line); $line = $tableformat[-1] . "\{$line\}" unless ($tableformat[-1] eq '@asis'); } elsif ($nest[-1] eq 'multi') { &message("multitable item: $line") if ($verbose > 2); &tablerow; &tablecol; # $line =~ s/^(.)/\<literallayout\>$1/; # $line =~ s/\@tab/\<\/literallayout\>\@tab/; $line =~ s/\s*\@tab\s*/\<\/entry\>\<entry\>/g; } elsif (($nest[-1] eq 'ordered') or ($nest[-1] eq 'itemized')) { &listitem; } else { &raiseerror("item tag found, but we're not in a list. Nest depth: " . scalar @nest); } } sub listitem { &closelistitem; $buf .= '<listitem>'; # ¶ $initem[-1] = 1; } sub formalpara { my $title = $_[0]; my $id = &anchorfix($title); &closeformalpara; #$buf .= "\<formalpara id='$id'\>\<title\>$title\<\/title\>\n\<para\>"; $informalpara = 1; $inpara = 1; } sub para { &closeformalpara; $buf .= '<para>'; $inpara = 1; } sub literalblock { if ($pattern =~ /\bformat\b/) { &message("start programlisting") if ($verbose > 2); $suppresspara++; $literaltag = 'programlisting'; } elsif ($pattern =~ /\bexample\b/) { &message("start programlisting") if ($verbose > 2); $suppresspara++; $literaltag = 'programlisting'; } elsif ($pattern =~ /\bsmallexample\b/) { &message("start programlisting") if ($verbose > 2); $suppresspara++; $literaltag = 'programlisting'; } elsif ($pattern =~ /\bdisplay\b/) { &message("start literallayout") if ($verbose > 2); $suppresspara++; $literaltag = 'literallayout'; } elsif ($pattern =~ /\blisp\b/) { &message("start programlisting") if ($verbose > 2); $suppressconversion++; $suppresspara++; $literaltag = 'programlisting'; } elsif ($pattern =~ /\bquotation\b/) { &message("start quotation") if ($verbose > 2); $literaltag = 'blockquote'; } elsif ($pattern =~ /\bverbatim\b/) { &message("start programlisting") if ($verbose > 2); $suppressconversion++; $suppresspara++; $literaltag = 'programlisting'; } else { &raiseerror("Unrecognized literal: $pattern"); } if (scalar @literal) { &message("Not including tag $literaltag, due to nested literal blocks") if ($verbose > 2); $literaltag = ''; } $buf .= '<' . $literaltag . '>' . "\n" if ($literaltag); $literalendtag = $pattern; $literalendtag =~ s/\@/\@end /; push @literal, $literaltag; push @literalend, $literalendtag; } sub comment { my $start = ''; my $comment = ''; if ($line =~ /\@c\b/) { ($start, $comment) = split(/\@c\b/, $line, 2); } elsif ($line =~ /\@comment\b/) { ($start, $comment) = split(/\@comment\b/, $line, 2); } else { &raiseerror("error locating the comment"); return; } while ($comment =~ /--/) { $comment =~ s/--/-/g; } $line = '<!-- ' . $comment . ' -->'; &writeline; $line = $start; } sub indexterm { $buf .= "\<indexterm\>\<primary\>$_[0]\</primary\>\</indexterm\>\n"; } sub closeappendix { &closesect1; if ($inappendix) { &message("closing appendix") if ($verbose > 2); $buf .= '</appendix>' . "\n"; $inappendix = 0; } } sub closesect1 { &closesect2; if ($insect1) { &message("closing sect1") if ($verbose > 2); $buf .= '</sect1>' . "\n\n"; $insect1 = 0; } } sub closesect2 { &closesect3; if ($insect2) { &message("closing sect2") if ($verbose > 2); if ($inappendix) { $buf .= '</sect1>' . "\n\n"; } else { $buf .= '</sect2>' . "\n\n"; } $insect2 = 0; } } sub closesect3 { &closesect4; if ($insect3) { &message("closing sect3") if ($verbose > 2); if ($inappendix) { $buf .= '</sect2>' . "\n\n"; } else { $buf .= '</sect3>' . "\n\n"; } $insect3 = 0; } } sub closesect4 { &closeformalpara; &closenest; if ($insect4) { &message("closing sect4") if ($verbose > 2); if ($inappendix) { $buf .= '</sect3>' . "\n\n"; } else { $buf .= '</sect4>' . "\n\n"; } $insect4 = 0; } } sub closenest { my $runaway = 0; while (scalar @nest) { &closetable; &closeorderedlist; &closeitemizedlist; $runaway++; &raiseerror("Runaway nesting") if ($runaway >= 100); } } sub closetable { &closetablerow; if (($nest[-1] eq 'table') or ($nest[-1] eq 'multi')) { &message("closing table") if ($verbose > 2); if (($nest[-2] eq 'table') or ($nest[-2] eq 'multi')) { $buf .= '</tbody></entrytbl>' . "\n"; } else { $buf .= '</tbody></tgroup></informaltable>' . "\n"; } pop @nest; pop @incol; pop @inrow; pop @infirstcol; pop @initem; pop @tableformat; } } sub closetablerow { &closetablecol; if ($inrow[-1]) { &message("closing table row") if ($verbose > 2); $buf .= '</row>' . "\n"; $inrow[-1] = 0; } } sub closetablecol { if ($incol[-1]) { &message("closing table column") if ($verbose > 2); $buf .= '</entry>' . "\n"; $incol[-1] = 0; } } sub closeorderedlist { &closelistitem; if ($nest[-1] eq 'ordered') { &message("closing ordered list") if ($verbose > 2); $buf .= '</orderedlist>' . "\n"; pop @nest; pop @initem; # $suppresspara--; } } sub closeitemizedlist { &closelistitem; if ($nest[-1] eq 'itemized') { &message("closing itemized list") if ($verbose > 2); $buf .= '</itemizedlist>' . "\n"; pop @nest; pop @initem; # $suppresspara--; } } sub closelistitem { &closeformalpara; if ((($nest[-1] eq 'ordered') or (@nest[-1] eq 'itemized')) and (@initem[-1])) { &message("closing list item") if ($verbose > 2); #$buf .= '</para></listitem>' . "\n"; $buf .= '</listitem>' . "\n"; $initem[-1] = 0; } } sub closeformalpara { &closepara; if ($informalpara) { # $buf .= '</formalpara>'; $informalpara = 0; } } sub closepara { if ($inpara) { $buf .= '</para>'; $inpara = 0; } } sub closeliteralblock { if (scalar @literal) { foreach $tag (split(/,/, $literal[-1])) { $buf .= '</' . &trim($tag) . '>' . "\n"; } $literal = ''; $literalend = ''; $suppressconversion--; $suppressconversion = 0 if ($suppressconversion < 0); $suppresspara--; &raiseerror("Literal block nesting error") if ($suppresspara < 0); $suppresspara = 0 if ($suppresspara < 0); pop @literal; pop @literalend; $literaltag = $literal[-1]; $literalendtag = $literalend[-1]; &message("literalendtag: $literalendtag") if ($verbose > 2); &message("literal depth: " . scalar @literal . ", para: $suppresspara, conv: $suppressconversion") if ($verbose > 2); } } ################ # CONDITIONALS # ################ sub set { my ($name, $value); ($foo, $name, $value) = split(/\s+/, $line, 3); $value = 1 unless ($value); $setvalues{$name} = $value; &message("set $name to $value") if ($verbose > 1); $value = $setvalues{$name}; &message("read back $value") if ($verbose > 2); } sub clear { my $name; ($foo, $name) = split(/\s+/, $line); $setvalues{$name} = 0; &message("cleared $name") if ($verbose > 1); } sub ifset { my $name; ($foo, $name) = split(/\s+/, $line); return $ifsets{$name}; } ################# # META-LANGUAGE # ################# sub alias { my $alias; ($foo, $alias) = split(/\s/, $line); ($alias, $command) = split(/=/, $alias); &message("alias: $alias, command: $command") if ($verbose > 2); $aliases{'@' . $alias} = '@' . $command; } sub macro { $line =~ s/\@macro\s+?(.*?)\{(.*?)\}/\1,\2/; ($macro, $macroargs) = split(/,/, $line); $macro = &trim($macro); $macros{$macro} = $macro; $macrotext = ''; $inmacro = 1; &message("adding macro $macro") if ($verbose > 2); } # load custom @-commands # sub definfoenclose { my ($name, $prefix, $suffix, $key, $replacement); $line =~ s/\@definfoenclose\s+//; ($name, $prefix, $suffix) = split(/,/, $line); $replacement = $prefix. ',' . $suffix; $definfos{$name} = $replacement; &message("custom definfoenclosure: $name, $replacement") if ($verbose > 2); } #################### # SUPPORT ROUTINES # #################### sub bracketsmatch { my $bline = ' ' . $_[0] . ' '; $bline =~ s/\@\{//g; $bline =~ s/\@\}//g; my $left = scalar split(/\{/, $bline) - 1; my $right = scalar split(/\}/, $bline) - 1; if ($right == $left) { $badbracketlines = 0; $badbracketstartline = $currentline + 1 } else { $badbracketlines++; &message("unmatched \{\} (left: $left, right: $right) on line $line") if ($verbose > 1); if ($badbracketlines >= $maxrunonlines) { &raiseerror("Mismatched brackets starting on line $badbracketstartline: $line"); } } return ($right == $left); } sub trimline { $line = &trim($line); } sub trim { my $temp = $_[0]; $temp =~ s/^\s+//g; $temp =~ s/\s+$//g; return $temp; } sub raiseerror { my $errmsg = "ERROR: $currentfile $currentline $_[0]\n"; print STDERR $errmsg; $errors++; if (($maxerrors) and ($errors >= $maxerrors)) { print STDERR "aborting after $errors errors.\n"; &writefile; exit(1); } } sub message { my $message; $message = $_[0]; if ($logfile) { print $logfh "$message\n"; } else { print "$message\n"; } } sub version { print "texi2db version $VERSION\n"; print "Copyright (c) 2002, 2003 David Merrill \<david\@lupercalia.net\>.\n"; print "\n"; print "Converts a Texinfo file into DocBook XML.\n"; print "\n"; print "This is free software; see the source for copying conditions. There is no\n"; print "warranty; not even for merchantability or fitness for a particular purpose.\n\n"; } sub usage { &version; print "Usage: texi2db [OPTIONS]... [FILE]\n"; print "\n"; print "Options:\n"; print "-i, --include HTML|INFO|TEX\n"; print " include the specified text.\n"; print "-f, --file FILE read from file rather than STDIN.\n"; print "-o, --output-to FILE write to file rather than STDOUT.\n"; print "-l, --log-to FILE write status messages to file rather than STDOUT.\n"; print "-L, --runon set runon line limit (default=20)\n"; print "-v, --verbose show diagnostic output.\n"; print " use twice for lots of detail.\n"; print " use thrice for insane amounts of detail.\n"; print "-e, --max-errors MAX abort after this many errors. Default is 1.\n"; print " use '--max-errors 0' to disable.\n"; print "-h, --help show this usage message and exit.\n"; print "-V, --version show the program version and exit.\n"; exit($error); } __END__ <?xml version='1.0'?> <!DOCTYPE article PUBLIC '-//OASIS//DTD DocBook XML V4.1.2//EN' "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [ <!ENTITY hellip "…"> <!ENTITY Oslash "Ø"> ]> <article lang="%%LANG"> <articleinfo> <title>%%TITLE %%ABSTRACT