#!/usr/bin/perl # # this utility converts a texinfo file into LDP WikiText format. # use File::Basename; use FileHandle; use HTML::Entities; $version = "0.1"; $errors = 0; $error = 0; # runtime options # $maxerrors = 10; $verbose = 0; # files # $requestedfile = ""; $outputfile = ""; $outfh = new FileHandle; $logfile = ""; $logfh = new FileHandle; # the following are flags to maintain state # # docbook structure flags # $inabstract = 1; $inappendix = 0; $insect1 = 0; $insect2 = 0; $insect3 = 0; $insect44 = 0; $intable = 0; $inrow = 0; $incol = 0; $tableformat = ""; $inorderedlist = 0; $initemizedlist = 0; $inlistitem = 0; $informalpara = 0; $inpara = 0; # these maintain internal program state # $line = ""; $originalline = ""; $currentfile = ""; $currentline = ""; $saveline = ""; $badbracketlines = 0; $badbracketstartline = 0; $seekend = ""; $literalend = ""; $literal = ""; # here, we hold onto meta-data that we will need to build # the article or book header structures. # $sgmlfile = ""; $title = ""; $authorname = ""; $abstract = ""; $buf = ""; # these hold information about the node we are currently in, if any # $nodename = ""; $nodenext = ""; $nodeprev = ""; $nodeup = ""; $nodeid = ""; $nodeinit = 0; # remember this hierarchical information for every node # we use it if the node doesn't specify a level. # %nodenexts = (); %nodeprevs = (); %nodeups = (); %nodelevels = (); # holds user variables # %setvalues = (); %ifsets = (); # this is used to build a list of index entries to be written when requested # %indexes = (); # These are used for keeping state on the most recent search of any of the following hashes # $pattern = ""; $action = ""; # In the following hash, the perl expressions are used to search # each line of the input file. If matched, the Action field defines # what to do with it. # # Note that @-Commands of this type must be at the beginning of the line. # # TODO: put these in a .conf file so it can be tweaked or customized # # Actions: # # DROPLINE Drop it on the floor # DROPBLOCK Drop the whole block on the floor # META Meta-Data # BYE Stop processing the file # INDEX An index entry # ORDEREDLIST Numbered (enumerated) list # ORDEREDLISTEND End an enumerated list # SEEKEND Skip everything until you find the corresponding @end tag # NODE A Texinfo node # SECT? One of the sectioning commands # APPENDIX? One of the appendix commands # DEFINFOENCLOSE Load a customized highlighting pattern # SET Set a variable # CLEAR Clear a variable # IFSET Test a variable # # @-Command Action %patterns = ( '\\input\b' =>'DROPLINE', # '\@\*' =>'PARA', '^\@alias' =>'ALIAS', '^\@author' =>'META', '^\@bye\b' =>'BYE', '^\@c\b' =>'DROPLINE', '^\@cindex\b' =>'INDEX', '^\@clear\b' =>'CLEAR', '^\@comment\b' =>'DROPLINE', '^\@contents\b' =>'DROPLINE', '^\@defindex\b' =>'DROPLINE', # TODO '^\@definfoenclose' =>'DEFINFOENCLOSE', '^\@display\b' =>'LITERALBLOCK', '^\@enumerate\b' =>'ORDEREDLIST', '^\@end enumerate\b' =>'ORDEREDLISTEND', '^\@example' =>'LITERALBLOCK', '^\@format\b' =>'LITERALBLOCK', '^\@headings\b' =>'DROPLINE', '^\@html\b' =>'SEEKEND', '^\@end html\b' =>'DROLINE', '^\@ifhtml\b' =>'DROPLINE', '^\@end ifhtml\b' =>'DROPLINE', '^\@ifinfo\b' =>'SEEKEND', '^\@end ifinfo\b' =>'DROPLINE', '^\@ifnottex\b' =>'DROPLINE', '^\@end ifnottex\b' =>'DROPLINE', '^\@ifset\b' =>'IFSET', # handled like SEEKEND '^\@end ifset\b' =>'DROPLINE', # or ignored, so ignore this too '^\@ifclear\b' =>'IFCLEAR', '^\@end ifclear\b' =>'DROPLINE', '^\@iftex\b' =>'SEEKEND', '^\@ignore\b' =>'SEEKEND', '^\@itemize\b' =>'ITEMIZEDLIST', '^\@end itemize\b' =>'ITEMIZEDLISTEND', '^\@item\b' =>'ITEM', '^\@itemx\b' =>'ITEM', '^\@macro\b' =>'MACRO', '^\@menu\b' =>'SEEKEND', # TODO '^\@node\b' =>'NODE', '^\@page\b' =>'DROPLINE', '^\@printindex\b' =>'DROPLINE', # TODO '^\@set\b' =>'SET', '^\@setchapternewpage\b' =>'DROPLINE', '^\@setfilename\b' =>'META', '^\@settitle\b' =>'META', '^\@smallbook\b' =>'DROPLINE', '^\@smallexample\b' =>'LITERALBLOCK', '^\@sp\b' =>'DROPLINE', '^\@table\b' =>'TABLE', '^\@end table\b' =>'TABLEEND', '^\@tex\b' =>'SEEKEND', '^\@titlepage\b' =>'SEEKEND', '^\@top\b' =>'SECT1', # sectioning commands '^\@chapter\b' =>'SECT1', '^\@section\b' =>'SECT2', '^\@subsection\b' =>'SECT3', '^\@subsubsection\b' =>'SECT4', '^\@centerchap\b' =>'SECT1', '^\@unnumbered\b' =>'SECT1', '^\@unnumberedsec\b' =>'SECT2', '^\@unnumberedsubsec\b' =>'SECT3', '^\@unnumberedsubsubsec\b' =>'SECT4', '^\@majorheading\b' =>'SECT1', '^\@chapheading\b' =>'SECT1', '^\@heading\b' =>'SECT2', '^\@subheading\b' =>'SECT3', '^\@subsubheading\b' =>'SECT4', '^\@appendix\b' =>'APPENDIX', '^\@appendixsec\b' =>'SECT1', '^\@appendixsubsec\b' =>'SECT2', '^\@appendixsubsubsec\b' =>'SECT3', ); # these are one-for-one string substitutions # # most of them are ISO-8879 codes for international characters, # used by HTML as well as XML, but there are exceptions. # There is one special case, "@@", which is the escaped form of @. # We do not handle @@ here, because it has to come after all other # @-commands have been processed. # # It's fine to replace these with nothing if you want them to be # simply dropped on the floor. # # I keep these in general alphabetical order, except that @end tags # are put right after their opening form. # # Note: Only the left side should be escaped! # # Example: @copyright{} becomes © # %substitutions = ( '\@!' =>'!', '\@\*' =>'', '\@\s' =>' ', '\@"A' =>'Ä', '\@"E' =>'Ë', '\@"I' =>'Ï', '\@"O' =>'Ö', '\@"U' =>'Ü', '\@"a' =>'ä', '\@"e' =>'ë', '\@"i' =>'ï', '\@"o' =>'ö', '\@"u' =>'ü', '\@' . "'" . 'A' =>'Á', '\@' . "'" . 'E' =>'É', '\@' . "'" . 'I' =>'Í', '\@' . "'" . 'O' =>'Ó', '\@' . "'" . 'U' =>'Ú', '\@' . "'" . 'a' =>'á', '\@' . "'" . 'e' =>'é', '\@' . "'" . 'i' =>'í', '\@' . "'" . 'o' =>'ó', '\@' . "'" . 'u' =>'ú', '\@,{c}' =>'¸', '\@-' =>'¯', '\@\.' =>'.', '\@:' =>'', '\@=' =>'', '\@\?' =>'?', '\@^A' =>'Â', '\@^E' =>'Ê', '\@^I' =>'Î', '\@^O' =>'Ô', '\@^U' =>'Û', '\@^a' =>'â', '\@^e' =>'ê', '\@^i' =>'î', '\@^o' =>'ô', '\@^u' =>'û', '\@`A' =>'À', '\@`E' =>'È', '\@`I' =>'Ì', '\@`O' =>'Ò', '\@`U' =>'Ù', '\@`a' =>'à', '\@`e' =>'è', '\@`i' =>'ì', '\@`o' =>'ò', '\@`u' =>'ù', # '\@\{' =>'{', # do these manually at the end so brackets match # '\@\}' =>'}', '\@~A' =>'Ã', '\@~E' =>'&Etilde;', '\@~I' =>'Ĩ', '\@~O' =>'Õ', '\@~U' =>'Ũ', '\@~a' =>'ã', '\@~e' =>'&etilde;', '\@~i' =>'ĩ', '\@~o' =>'õ', '\@~u' =>'ũ', '\@AA\{\}' =>'Å', '\@aa\{\}' =>'å', '\@AE\{\}' =>'Æ', '\@ae\{\}' =>'æ', '\@copyright\{\}' =>'©', '\@dots\{\}' =>'…', '\@center\b' =>'', '\@enddots\{\}' =>'…', '\@equiv' =>'≡', '\@group\b' =>'', '\@end group\b' =>'', '\@exclamdown' =>'¡', '\@noindent\b' =>'', '\@refill\b' =>'', '\@result\{\}' =>'=>', '\@TeX\{\}' =>'TeX', ); # these are inline tags that require some kind of programmatic control # because they do strange things. # %specsubstitutions = ( '\@uref\b' =>'UREF', '\@anchor\b' =>'ANCHOR', ); # These are special block wrapping tags. When we hit one of these, # we simply wrap whatever comes between the (always paired) tags # with a set of DocBook tags. # # Their handling is subtly different than the %tags. # We don't just replace them with the DocBook tags, because we # want any tags to go inside them, not outside. # %blocks = ( '\@quotation\b' =>'
', '\@end quotation' =>'
', ); # these are inline @-Commands which are replaced by a set of tags instead of one-for-one # character substitution. Replacement is inline. # # there is also a special case, the "blank" tag, which does what you'd think. # It removes the existing tag, but doesn't add the wrapper. # # Example: @code{foo} becomes foo. # %tags = ( '\@acronym\b' =>'abbrev', '\@b\b' =>"emphasis role='bold'", '\@cite\b' =>'citetitle', '\@code\b' =>'literal', '\@command\b' =>'command', '\@dfn\b' =>"emphasis role='bold'", '\@email\b' =>'email', '\@emph\b' =>'emphasis', '\@env\b' =>'envvar', '\@footnote\b' =>'footnote', '\@file\b' =>'filename', '\@i\b' =>'emphasis', '\@samp\b' =>'literal', '\@w\b' =>'', ); # this is where definfoenclose definitions go, and they are processed last # %definfos = (); # and this is where macros are stored # $inmacro = 0; $macro = ""; %macros = {}; $macroargs = ""; %macroargs = (); $macrotext = ""; %macrotext = (); ############################################################################## ############################################################################## ############################################################################## while (1) { if ($ARGV[0] eq '') { last; } elsif ($ARGV[0] eq '--version') { &version; exit(0); } elsif (($ARGV[0] eq '-f') or ($ARGV[0] eq '--file')) { shift(@ARGV); $requestedfile = $ARGV[0]; } elsif (($ARGV[0] eq '-o') or ($ARGV[0] eq '--output-to')) { shift(@ARGV); $outputfile = $ARGV[0]; } elsif (($ARGV[0] eq '-l') or ($ARGV[0] eq '--log-to')) { shift(@ARGV); $logfile = $ARGV[0]; } elsif (($ARGV[0] eq '-e') or ($ARGV[0] eq '--max-errors')) { shift(@ARGV); $maxerrors = $ARGV[0]; } elsif (($ARGV[0] eq '-v') or ($ARGV[0] eq '--verbose')) { $verbose++; } elsif (($ARGV[0] eq '-h') or ($ARGV[0] eq '--help')) { &usage; } else { &raiseerror("unrecognized option: $ARGV[0]\n"); $error = 1; &usage; } shift(@ARGV); } # be sure to open the log file before trying to write any messages to it! # if ($logfile) { open ($logfh, "> $logfile") or die "cannot write to log file $logfile.\n"; } $verbose = 3 if ($verbose > 3); &message("verbose mode on.") if ($verbose == 1); &message("debugging mode on.") if ($verbose ==2); &message("insanity mode on.") if ($verbose ==3); if ($outputfile) { &message("output will go to $outputfile") if (verbose); open($outfh, "> $outputfile"); } while () { $template .= $_; } &processfile ("$requestedfile"); &writefile; close($outfh) if ($outputfile); close($logfh) if ($logfile); sub processfile { my($filename, $basename, $path, $ext, $includefile, $linenumber, ); $macrolinecount = 0; my $fh = new FileHandle; $filename = @_[0]; $filename = 'STDIN' unless ($filename); ($basename, $path, $ext) = fileparse($filename); &message("processing $filename") if ($verbose); $linenumber = 0; open $fh, "<$filename" or raiseerror("cannot open $filename\n"); LINE: while ($line = <$fh>) { chomp($line); $linenumber++; $currentfile = $filename; $currentline = $linenumber; $originalline = $line; $trimline; if ($seekend) { $seekend = '' if ($line =~ /$seekend/); next LINE; } &message("LINE $currentline: $line") if ($verbose > 1); $line =~ s/&/&/g; # keep before the < > so we don't clobber them $line =~ s/\/>/g; $line =~ s/\@\@\}/DCM_AT\}/g; $line =~ s/\@\{/DCM_LB/g; $line =~ s/\@\}/DCM_RB/g; if ($saveline) { $line = $saveline . ' ' . $line; $saveline = ''; &message("line restored, line: $line") if ($verbose > 1); } unless (&bracketsmatch($line)) { &message("bracket mismatch") if ($verbose > 2); $saveline = $line; next LINE; } if ($line =~ /^\@include/) { $includefile = $line; $includefile =~ s/^\@include\s+//; $includefile = $path . $includefile; $line = ''; if (-e $includefile) { &message("including $includefile") if ($verbose); &processfile ("$includefile"); } else { &raiseerror("include file $includefile not found."); } } elsif ($literalend) { if ($line =~ /$literalend/) { $literalend = ''; &closeliteral; } else { &writeline; } } elsif ($inmacro) { ($pattern, $action) = &matchpattern(); if ($action eq 'SEEKEND') { $seekend = $pattern; $seekend =~ s/\@/\@end /; next LINE; } elsif ($action eq 'DROPLINE') { next LINE; } if ($line =~ /\@end\s+macro/) { #save macro $macroargs{$macro} = $macroargs; $macrotext{$macro} = $macrotext; &message("macro name: $macro args: $macroargs text: $macrotext") if ($verbose > 2); $inmacro = 0; $macroargs = ''; $macrotext = ''; $macrolinecount = 0; } elsif ($line =~ /\@quote-arg/) { } else { &message("macro line: $line") if ($verbose > 2); $macrotext .= $line; $macrolinecount++; if ($macrolinecount >= 10) { exit(1); } } } elsif ($intable) { &message("table: $line") if ($verbose > 2); ($pattern, $action) = &matchpattern(); if ($action eq 'TABLEEND') { &closetable; } elsif ($action eq 'ITEM') { &tablerow; &tablecol; $line =~ s/\@item\w*?\s+//; &message("cell: $line") if ($verbose > 2); $line = &trim($line); $line = $tableformat . "\{$line\}" unless ($tableformat eq '@asis'); writeline; &closetablecol; } else { &tablecol; &writeline; } } else { ($pattern, $action) = &matchblock(); if ($action) { &message("matched block $pattern") if ($verbose > 2); &closepara; $buf .= $action; next LINE; } ($pattern, $action) = &matchpattern(); if ($action eq 'DROPLINE') { next LINE; } elsif ($action eq 'SEEKEND') { $seekend = $pattern; $seekend =~ s/\@/\@end /; } elsif ($action eq 'LITERALBLOCK') { $literalend = $pattern; $literalend =~ s/\@/\@end /; &literal; } elsif ($action eq 'ALIAS') { &alias; } elsif ($action eq 'MACRO') { ¯o; } elsif ($action eq 'META') { &meta; } elsif ($action eq 'NODE') { &node; } elsif ($action eq 'SECT1') { §1; } elsif ($action eq 'SECT2') { §2; } elsif ($action eq 'SECT3') { §3; } elsif ($action eq 'SECT4') { §4; } elsif ($action eq 'APPENDIX') { &appendix; } elsif ($action eq 'PARA') { &closeformalpara; ¶ } elsif ($action eq 'ORDEREDLIST') { &orderedlist; } elsif ($action eq 'ORDEREDLISTEND') { &closeorderedlist; } elsif ($action eq 'ITEMIZEDLIST') { &itemizedlist; } elsif ($action eq 'ITEMIZEDLISTEND') { &closeitemizedlist; } elsif ($action eq 'ITEM') { &item; } elsif ($action eq 'TABLE') { $tableformat = $line; $tableformat =~ s/^\@\w+\s+//; &message("table format: $tableformat") if ($verbose > 2); &table; } elsif ($action eq 'TABLEEND') { &closetable; } elsif ($action eq 'INDEX') { &message("indexing not yet supported") if ($verbose); } elsif ($action eq 'DEFINFOENCLOSE') { &definfoenclose; } elsif ($action eq 'SET') { &set; } elsif ($action eq 'CLEAR') { &clear; } elsif ($action eq 'IFSET') { $seekend = '\@end ifset' unless (&ifset()); } elsif ($action eq 'IFCLEAR') { $seekend = '\@end ifclear' if (&ifset()); } elsif ($action eq 'BYE') { &closesect1; last; } elsif ($action eq 'ERROR') { &raiseerror("Hit ERROR tag with $pattern on $line"); } elsif ($action eq '') { &writeline; } else { &raiseerror("Unknown action: $action on command $pattern"); } } } close($fh); } sub writefile { &closeappendix; &message("copying meta-data to docbook") if ($verbose); $template =~ s/%%TITLE/$title/; $template =~ s/%%ABSTRACT/$abstract/; $template =~ s/%%BODY/$buf/; # &message("deleting empty tags") if ($verbose); # $i = 0; # while (1) { # $template =~ s/\<(.+?)\>\W*\<\/\1\>//gs; # delete empty tags # $i++; # last if ($i == 10); # } &message("writing $outputfile") if ($verbose); print $outfh $template . "\n"; print $outfh ''. "\n"; } #################### # PATTERN MATCHING # #################### # this is for beginning-of-line @-commands # sub matchpattern { foreach $key (keys %patterns) { if ($line =~ /$key/) { return ($key, $patterns{$key}); } } return ('', ''); } # this is for special wrapped blocks, like blockquotes # sub matchblock { foreach $key (keys %blocks) { if ($line =~ /$key/) { return ($key, $blocks{$key}); } } } # the rest are all inline processing # sub convertinline { &convertsubsts; # single characters, do immediately and after macros &message(" SUBSTS: $line") if ($verbose > 1); &convertmacros; # can insert new @{} commands, do first &message(" MACROS: $line") if ($verbose > 1); &convertsubsts; # single characters, do immediately and after macros &message(" SUBSTS: $line") if ($verbose > 1); &convertvalues; # insert variables, do immediately after macros &message(" VALUES: $line") if ($verbose > 1); &convertdefinfos; # custom highlighting, do after macros &message(" DINFOS: $line") if ($verbose > 1); &converttags; # @foo{bar} to bar do next to last &message(" DBTAGS: $line") if ($verbose > 1); &convertlinks; # @ref{} => , do last &message(" ULINKS: $line") if ($verbose > 1); &convertspecsubsts; # @anchor{} => , do last &message(" ANCHOR: $line") if ($verbose > 1); } # macros # sub convertmacros { while (1) { ($macro, $macroargs, $macrotext) = &matchmacro(); last unless ($macro); &message("matched macro $macro") if ($verbose > 1); if ($line =~ /\@$macro\{.*?\}/) { &replacemacro; } else { &raiseerror("unmatched macro brackets in line $line"); last; } &message("line is now $line") if ($verbose > 2); } } sub matchmacro { # &message("matchmacros $line"); foreach $macro (keys %macroargs) { if ($line =~ /\@$macro\{/) { return ($macro, $macroargs{$macro}, $macrotext{$macro}); } } } sub replacemacro { my $macroarg = $line; my $open; my $close; $macroargs = "\\\\" . $macroargs . "\\\\"; $macroarg =~ s/^.*?\@$macro\{//; $macroarg =~ s/\}.*?$//; $macrotext =~ s/\@$macro\{([^\{]*?)\}/$macroarg/; &message("replace pattern '$macroargs' with '$macroarg' in '$macrotext'") if ($verbose > 2); $macrotext =~ s/$macroargs/$macroarg/g; $line =~ s/\@$macro\{.*?\}/$macrotext/; } # character substitution # sub convertsubsts { while (1) { ($pattern, $action) = &matchsubst(); last unless ($pattern); &replacesubst; } } sub matchsubst { foreach $key (keys %substitutions) { if ($line =~ /$key/) { return ($key, $substitutions{$key}); } } return ('', ''); } sub replacesubst { &message("replacing '$pattern' with '$action' on line $line") if ($verbose > 1); $line =~ s/$pattern/$action/g; } # special character substitution # sub convertspecsubsts { while (1) { ($pattern, $action) = &matchspecsubst(); last unless ($pattern); &replacespecsubst; } } sub matchspecsubst { foreach $key (keys %specsubstitutions) { if ($line =~ /$key/) { return ($key, $specsubstitutions{$key}); } } return ('', ''); } sub replacespecsubst { my $link; my $linkname; &message("replacing 'special case' $pattern $action on line $line") if ($verbose > 2); if ($action eq 'UREF') { $link = $line; $link =~ s/^.*?\@uref\{(.*?)\}.*$/\1/; ($link, $linkname) = split(/,/, $link); $link = &trim($link); $linkname = &trim($linkname); $linkname = $link unless ($linkname); $line =~ s/\@uref\{(.*?)\}/\$linkname\<\/ulink\>/g; &message("ulink: $link, linkname: $linkname, line: $line"); # if ($verbose > 2); } elsif ($action eq 'ANCHOR') { $link = $line; $link =~ s/^.*?\@anchor\{(.*?)\}.*$/\1/; $link = &anchorfix($link); $line =~ s/\@anchor\{(.*?)\}//; # $line =~ s/\@anchor\{([^\}]*?)\}/\/; # $line =~ s/\@anchor\{([^\}]*?\{.*?\}.*?)\}/\/; # &message("anchor: $link on line: $line"); # if ($verbose > 2); } else { &raiseerror("Unrecognized 'special case' inline substitution code $pattern"); } } $loops = 0; # replace @-Commands with DocBook tags # sub converttags { $loops = 0; while (1) { ($pattern, $action) = &matchtag(); last unless ($pattern); if ($line =~ /$pattern.*?\}/) { $loops++; &replacetag; exit if ($loops >= 25); } else { &raiseerror("unmatched tag brackets in line $line"); last; } } } sub matchtag { foreach $key (keys %tags) { if ($line =~ /$key/) { return ($key, $tags{$key}); } } return ('', ''); } sub replacetag { my ($tag, $attributes, $part, $newline, $fragment, ); if ($action eq '') { &message("removing old tag, but not adding a new one") if ($verbose > 2); $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\1/g; $line =~ s/$pattern\{([^\}]*?)\}/\1/g; } else { &message("replacing $pattern with $action tags on line $line") if ($verbose > 1); ($tag, $attributes) = split(/ /, $action); if ($attributes) { $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\<$tag $attributes\>\1\<\/$tag\>/g; $line =~ s/$pattern\{([^\}]*?)\}/\<$tag $attributes\>\1\<\/$tag\>/g; } else { $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\<$tag\>\1\<\/$tag\>/g; $line =~ s/$pattern\{([^\}]*?)\}/\<$tag\>\1\<\/$tag\>/g; } } } # replace @-Commands with custom highlighting # sub convertdefinfos { while (1) { ($pattern, $action) = &matchdefinfo(); last unless ($pattern); &message("matched definfoenclose '$pattern'") if ($verbose >2); &replacedefinfo; } } sub matchdefinfo { foreach $key (keys %definfos) { if ($line =~ /$key\{.*?\}/) { return ($key, $definfos{$key}); } } return ('', ''); } sub replacedefinfo { ($prefix, $suffix) = split(/,/,$action); &message("pattern: $pattern, prefix: $prefix, suffix: $suffix") if ($verbose > 2); $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/$prefix\1$suffix/g; $line =~ s/$pattern\{([^\}]*?)\}/$prefix\1$suffix/g; $line =~ s/$pattern\{\}/$prefix$suffix/g; &message("definfoenclose replaced") if ($verbose >2); } # replace @value{} with values # sub convertvalues { my ($key, $value); foreach $key (keys %setvalues) { $value = $setvalues{$key}; $line =~ s/\@value\{$key\}/$value/; } } sub convertlinks { my $link, $anchor; while ($line =~ /\@ref\{/) { if ($line =~ /\@ref\{.*?\}/) { &message("making xref on line $line") if ($verbose > 1); $anchor = $line; $anchor =~ s/^.*?\@ref\{//; $anchor =~ s/\}.*$//; $anchortitle = &trim($anchor); $anchor = &anchorfix($anchor); &message("anchor: $anchor, title; $anchortitle") if ($verbose > 2); &raiseerror("NO ANCHOR in $line") unless ($anchor); # decide what kind of link to make based on whether it is # to a section or not. Sections have titles that can be # referenced using an xref. Anchors do not, and we have to # render a full tag. # if ($nodelevels{$anchor}) { $link = "\"; } else { $link = "\$anchortitle\<\/link\>"; } $line =~ s/\@ref\{.*?\}/$link/; &message("made xref to tag $anchor on line: $line") if ($verbose > 1); } else { &raiseerror("unmatched ref brackets in line $line"); last; } } } sub anchorfix { my $anchor = $_[0]; $anchor = lc(&trim($anchor)); $anchor = decode_entities($anchor); $anchor =~ s/&/-and-/g; $anchor =~ s/;//g; $anchor = encode_entities($anchor); $anchor =~ s/&(\w)grave/\1/g; $anchor =~ s/&(\w)acute/\1/g; $anchor =~ s/&(\w)circ/\1/g; $anchor =~ s/&(\w)uml/\1/g; $anchor =~ s/&(\w)tilde/\1/g; $anchor =~ s/&(\w)cedil/\1/g; $anchor =~ s/&/-and-/g; $anchor =~ s/;//g; $anchor =~ s/\//-/g; $anchor =~ s/\s+/-/g; $anchor =~ s/'//g; $anchor =~ s/!//g; $anchor =~ s/--/-/g; $anchor =~ s/\+/plus/g; $anchor =~ s/\*/x/g; $anchor =~ s/\(//g; $anchor =~ s/\)//g; $anchor =~ s/\@/at/g; $anchor =~ s/^-+//; $anchor =~ s/-+$//; $anchor =~ s/\$/S/; $anchor =~ s/\.//; return $anchor; } ############# # META-DATA # ############# # this routine processes meta-data @-Commands # sub meta { return if (&metaarg('\@setfilename', $sgmlfile)); return if (&metaarg('\@settitle', $title)); return if (&metaarg('\@author', $authorname)); &raiseerror("Unknown meta-data command $line"); } # Meta-data commands have arguments, # so if the pattern matches, store the argument # to the variable in $_[1] # sub metaarg { my $pattern = $_[0]; if ($line =~ /$pattern/) { $_[1] = $line; $_[1] =~ s/$pattern//; } } ################ # WRITE OUTPUT # ################ sub writeline { my $temp = &trim($line); &convertinline; unless (&bracketsmatch($line)) { &raiseerror("brackets do not match in line: $line"); last; } unless ($literal) { if ($temp eq '') { &closeformalpara; } else { unless (($inpara) or ($intable)) { if (($nodename) and ($nodeinit == 0)) { &initnode; } ¶ } } } $line =~ s/\@\@/\@/g; $line =~ s/DCM_AT/\@/g; $line =~ s/DCM_LB/\{/g; $line =~ s/DCM_RB/\}/g; $buf .= $line . "\n"; } ############################################## # STRUCTURAL TEXINFO -> DOCBOOK TRANSLATIONS # ############################################## # this is for when the source never set a position for this node in the # chapter-style hierarchy, which we use. # # We have to make a best guess based on the node's settings, # so put it right under whoever it calls the "up" node. # sub initnode { my $parentname = $nodeup; my $parentlevel = $nodelevels{$parentname}; if ($parentlevel == "A") { §1; } elsif ($parentlevel == 1) { §2; } elsif ($parentlevel == 2) { §3 } elsif ($parentlevel == 3) { §4; } else { &raiseerror("The parent node, $parentname, could not be found."); } } sub node { $line =~ s/\@node\b//; &convertinline; ($nodename, $nodenext, $nodeprev, $nodeup) = split(/,/, $line); $nodename = &trim($nodename); $nodeid = &anchorfix($nodename); $nodeprev = &trim($nodeprev); $nodenext = &trim($nodenext); $nodeup = &trim($nodeup); $nodenexts{$nodename} = $nodenext; $nodeprevs{$nodename} = $nodeprev; $nodeups{$nodename} = $nodeup; $nodeinit = 0; &message("Name: $nodename Next: $nodenext Previous: $nodeprev Up: $nodeup") if ($verbose > 1); &closeformalpara; if ($nodename eq 'Top') { $inabstract = 0; $abstract = $buf; $buf = ""; } } sub appendix { &closeappendix; $buf .= "\\$nodename\<\/title\>"; $inappendix = 1; $nodeinit = 1; $nodelevels{$nodename} = "A"; &message("processing node $nodename at level $nodelevels{$nodename}"); } sub sect1 { &closesect1; $buf .= "\<sect1 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; $insect1 = 1; $nodeinit = 1; $nodelevels{$nodename} = 1; &message("set node $nodename level to $nodelevels{$nodename}"); } sub sect2 { &closesect2; if ($inappendix) { $buf .= "\<sect1 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; $nodelevels{$nodename} = 1; } else { $buf .= "\<sect2 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; $nodelevels{$nodename} = 2; } $insect2 = 1; $nodeinit = 1; $nodelevels{$nodename} = 2; &message("set node $nodename level to $nodelevels{$nodename}"); } sub sect3 { &closesect3; if ($inappendix) { $buf .= "\<sect2 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; $nodelevels{$nodename} = 2; } else { $buf .= "\<sect3 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; $nodelevels{$nodename} = 3; } $insect3 = 1; $nodeinit = 1; $nodelevels{$nodename} = 3; &message("set node $nodename level to $nodelevels{$nodename}"); } sub sect4 { &closesect4; if ($inappendix) { $buf .= "\<sect3 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; $nodelevels{$nodename} = 3; } else { $buf .= "\<sect4 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; $nodelevels{$nodename} = 4; } $insect4 = 1; $nodeinit = 1; $nodelevels{$nodename} = 4; &message("set node $nodename level to $nodelevels{$nodename}"); } sub table { &closetable; $buf .= '<informaltable><tgroup col=' . "'2'" . '><tbody>' . "\n"; $intable = 1; } sub tablerow { &closetablerow; $buf .= '<row>'; $inrow = 1; } sub tablecol { &closetablecol; $buf .= '<entry>'; $incol = 1; } sub orderedlist { &closeformalpara; $buf .= '<orderedlist>'; $inorderedlist = 1; } sub itemizedlist { &closeformalpara; $buf .= '<itemizedlist>'; $initemizedlist = 1; } sub item { if ($intable) { &tableitem; } else { &listitem; } } sub listitem { &closelistitem; $buf .= '<listitem>'; $inlistitem = 1; } sub formalpara { my $title = $_[0]; my $id = &anchorfix($title); &closeformalpara; #$buf .= "\<formalpara id='$id'\>\<title\>$title\<\/title\>\n\<para\>"; $informalpara = 1; $inpara = 1; $nodeinit = 1; } sub para { &closeformalpara; $buf .= '<para>'; $inpara = 1; } sub literal { if ($pattern =~ /\bformat\b/) { $literal = 'programlisting'; $buf .= '<programlisting>' . "\n"; } elsif ($pattern =~ /\bdisplay\b/) { $literal = 'screen'; $buf .= '<screen>'; } elsif ($pattern =~ /\bexample\b/) { $literal = 'programlisting'; $buf .= '<programlisting>'; } elsif ($pattern =~ /\bsmallexample\b/) { $literal = 'programlisting'; $buf .= '<programlisting>'; } } sub closeappendix { &closesect1; if ($inappendix) { $buf .= '</appendix>' . "\n"; $inappendix = 0; } } sub closesect1 { &closesect2; if ($insect1) { $buf .= '</sect1>' . "\n\n"; $insect1 = 0; } } sub closesect2 { &closesect3; if ($insect2) { if ($inappendix) { $buf .= '</sect1>' . "\n\n"; } else { $buf .= '</sect2>' . "\n\n"; } $insect2 = 0; } } sub closesect3 { &closesect4; if ($insect3) { if ($inappendix) { $buf .= '</sect2>' . "\n\n"; } else { $buf .= '</sect3>' . "\n\n"; } $insect3 = 0; } } sub closesect4 { &closeformalpara; if ($insect4) { if ($inappendix) { $buf .= '</sect3>' . "\n\n"; } else { $buf .= '</sect4>' . "\n\n"; } $insect4 = 0; } } sub closetable { &closetablerow; if ($intable) { $buf .= '</tbody></tgroup></informaltable>' . "\n"; $intable = 0; } } sub closetablerow { &closetablecol; if ($inrow) { $buf .= '</row>' . "\n"; $inrow = 0; } } sub closetablecol { if ($incol) { $buf .= '</entry>' . "\n"; $incol = 0; } } sub closeorderedlist { &closelistitem; if ($inorderedlist) { $buf .= '</orderedlist>' . "\n"; $inorderedlist = 0; } } sub closeitemizedlist { &closelistitem; if ($initemizedlist) { $buf .= '</itemizedlist>' . "\n"; $initemizedlist = 0; } } sub closelistitem { &closeformalpara; if ($inlistitem) { $buf .= '</listitem>' . "\n"; $inlistitem = 0; } } sub closeformalpara { &closepara; if ($informalpara) { # $buf .= '</formalpara>'; $informalpara = 0; } } sub closepara { if ($inpara) { $buf .= '</para>'; $inpara = 0; } } sub closeliteral { if ($literal) { $buf .= '</' . $literal . '>' . "\n"; $literal = ''; } } ################ # CONDITIONALS # ################ sub set { my ($name, $value); ($foo, $name, $value) = split(/\s+/, $line); $value = 1 unless ($value); $setvalues{$name} = $value; &message("set $name to $value") if ($verbose > 1); $value = $setvalues{$name}; &message("read back $value") if ($verbose > 2); } sub clear { my $name; ($foo, $name) = split(/\s+/, $line); $setvalues{$name} = 0; &message("cleared $name") if ($verbose > 1); } sub ifset { my $name; ($foo, $name) = split(/\s+/, $line); return $ifsets{$name}; } ################# # META-LANGUAGE # ################# # only the commands in %tags are currently supported # sub alias { my $alias; ($foo, $alias) = split(/\s/, $line); &message("alias: $alias") if ($verbose > 2); ($alias, $command) = split(/=/, $alias); &message("alias: $alias, command: $command") if ($verbose > 2); $alias = '\@' . $alias . '\b'; $command = '\@' . $command . '\b'; &message("alias: $alias, command: $command") if ($verbose > 2); $tag = $tags{$command}; &message("tag: $tag") if ($verbose > 2); $tags{$alias} = $tag; } sub macro { $line =~ s/\@macro\s*?(.*?)\{(.*?)\}/\1,\2/; ($macro, $macroargs) = split(/,/, $line); $macro = &trim($macro); $macros{$macro} = $macro; $macrotext = ''; $inmacro = 1; } # load custom @-commands # sub definfoenclose { my ($name, $prefix, $suffix, $key, $replacement); $line =~ s/\@definfoenclose\s+//; ($name, $prefix, $suffix) = split(/,/, $line); $key = '\@' . $name; $replacement = $prefix. ',' . $suffix; $definfos{$key} = $replacement; # &message("custom definfoenclosure: \[$name\] \[$prefix\] \[$suffix\] \/ \[$key\] \[$replacement\]") if ($verbose > 0); } ########### # INDEXES # ########### sub defindex { $line =~ s/^\@defindex\s+//; $indexes{$line} = 1; } #################### # SUPPORT ROUTINES # #################### sub bracketsmatch { my $bline = ' ' . $_[0] . ' '; my $left = scalar split(/\{/, $bline) - 1; my $right = scalar split(/\}/, $bline) - 1; if ($right == $left) { $badbracketlines = 0; $badbracketstartline = $currentline + 1 } else { $badbracketlines++; &message("unmatched \{\} (left: $left, right: $right) on line $line") if ($verbose > 1); if ($badbracketlines >= 20) { &raiseerror("Mismatched brackets starting on line $badbracketstartline: $line"); } } return ($right == $left); } sub trimline { $line = &trim($line); } sub trim { my $temp = $_[0]; $temp =~ s/^\s+//g; $temp =~ s/\s+$//g; return $temp; } sub raiseerror { my $errmsg = "ERROR: $currentfile $currentline $_[0]\n"; print STDERR $errmsg; $errors++; if (($maxerrors) and ($errors >= $maxerrors)) { print STDERR "aborting after $errors errors.\n"; &writefile; exit(1); } } sub message { my $message; $message = $_[0]; if ($logfile) { print $logfh "$message\n"; } else { print "$message\n"; } } sub version { print "texi2db version $version\n"; print "Copyright (c) 2002 David Merrill \<david\@lupercalia.net\>.\n"; print "\n"; print "Converts a Texinfo file into DocBook.\n"; print "\n"; print "This free software; see the source for copying conditions. There is no\n"; print "warranty; not even for merchantability or fitness for a particular purpose.\n"; } sub usage { &version; print "\n"; print "usage: texi2db [OPTIONS]... [FILE]\n"; print "-f, --file FILE read from file rather than STDIN.\n"; print "-o, --output-to FILE write to file rather than STDOUT.\n"; print "-l, --log-to FILE write status messages to file rather than STDOUT.\n"; print "-v, --verbose show diagnostic output.\n"; print " use twice for lots of detail.\n"; print " use thrice for insane amounts of detail.\n"; print "-e, --max-errors MAX show diagnostic output.\n"; print " use '--max-errors 0' to disable.\n"; print "-h, --help show this usage message.\n"; print " --version show the program version.\n"; exit($error); } __END__ <!DOCTYPE ARTICLE PUBLIC "-//OASIS//DTD DocBook V4.1.2//EN" > <article> <artheader> <title>%%TITLE %%ABSTRACT %%BODY