From 1872402c7f31db50946e6c7e5a512da066d5e275 Mon Sep 17 00:00:00 2001 From: david <> Date: Sat, 2 Feb 2002 04:55:52 +0000 Subject: [PATCH] added texi2db, texinfo to docbook converter nearing initial release --- LDP/README | 3 +- LDP/texi2db/texi2db | 1507 +++++++++++++++++++++++++++++++++++++++++ LDP/txt2db/README | 69 -- LDP/txt2db/sample.txt | 72 -- LDP/txt2db/txt2db.pl | 523 -------------- 5 files changed, 1509 insertions(+), 665 deletions(-) create mode 100755 LDP/texi2db/texi2db delete mode 100644 LDP/txt2db/README delete mode 100644 LDP/txt2db/sample.txt delete mode 100755 LDP/txt2db/txt2db.pl diff --git a/LDP/README b/LDP/README index 1ac12570..f4b9bce0 100644 --- a/LDP/README +++ b/LDP/README @@ -28,8 +28,9 @@ scrollserver/ python web application server front end to scrollkeeper stylesheets/ xsl stylesheets for xml -> html conversion www/ www.scrollserver.org website test/ to test your cvs without disturbing things, use this -txt2db/ utility to convert text files into docbook +texi2db/ utility to convert Texinfo files into docbook users/ individual users' areas +wt2db/ utility to convert WikiText files into docbook www/ websites db./ ldp database website cgi-bin/ perl scripts for the ldp database diff --git a/LDP/texi2db/texi2db b/LDP/texi2db/texi2db new file mode 100755 index 00000000..f0c2aa42 --- /dev/null +++ b/LDP/texi2db/texi2db @@ -0,0 +1,1507 @@ +#!/usr/bin/perl +# +# this utility converts a texinfo file into LDP WikiText format. +# +use File::Basename; +use FileHandle; +use HTML::Entities; + +$version = "0.1"; + +$errors = 0; +$error = 0; + +# runtime options +# +$maxerrors = 10; +$verbose = 0; + +# files +# +$requestedfile = ""; +$outputfile = ""; +$outfh = new FileHandle; +$logfile = ""; +$logfh = new FileHandle; + +# the following are flags to maintain state +# +# docbook structure flags +# +$inabstract = 1; +$inappendix = 0; +$insect1 = 0; +$insect2 = 0; +$insect3 = 0; +$insect44 = 0; +$intable = 0; +$inrow = 0; +$incol = 0; +$tableformat = ""; +$inorderedlist = 0; +$initemizedlist = 0; +$inlistitem = 0; +$informalpara = 0; +$inpara = 0; + +# these maintain internal program state +# +$line = ""; +$originalline = ""; +$currentfile = ""; +$currentline = ""; +$saveline = ""; +$badbracketlines = 0; +$badbracketstartline = 0; +$seekend = ""; +$literalend = ""; +$literal = ""; + +# here, we hold onto meta-data that we will need to build +# the article or book header structures. +# +$sgmlfile = ""; +$title = ""; +$authorname = ""; +$abstract = ""; +$buf = ""; + +# these hold information about the node we are currently in, if any +# +$nodename = ""; +$nodenext = ""; +$nodeprev = ""; +$nodeup = ""; +$nodeid = ""; +$nodeinit = 0; + +# remember this hierarchical information for every node +# we use it if the node doesn't specify a level. +# +%nodenexts = (); +%nodeprevs = (); +%nodeups = (); +%nodelevels = (); + +# holds user variables +# +%setvalues = (); +%ifsets = (); + +# this is used to build a list of index entries to be written when requested +# +%indexes = (); + +# These are used for keeping state on the most recent search of any of the following hashes +# +$pattern = ""; +$action = ""; + +# In the following hash, the perl expressions are used to search +# each line of the input file. If matched, the Action field defines +# what to do with it. +# +# Note that @-Commands of this type must be at the beginning of the line. +# +# TODO: put these in a .conf file so it can be tweaked or customized +# +# Actions: +# +# DROPLINE Drop it on the floor +# DROPBLOCK Drop the whole block on the floor +# META Meta-Data +# BYE Stop processing the file +# INDEX An index entry +# ORDEREDLIST Numbered (enumerated) list +# ORDEREDLISTEND End an enumerated list +# SEEKEND Skip everything until you find the corresponding @end tag +# NODE A Texinfo node +# SECT? One of the sectioning commands +# APPENDIX? One of the appendix commands +# DEFINFOENCLOSE Load a customized highlighting pattern +# SET Set a variable +# CLEAR Clear a variable +# IFSET Test a variable +# +# @-Command Action +%patterns = ( + '\\input\b' =>'DROPLINE', +# '\@\*' =>'PARA', + '^\@alias' =>'ALIAS', + '^\@author' =>'META', + '^\@bye\b' =>'BYE', + '^\@c\b' =>'DROPLINE', + '^\@cindex\b' =>'INDEX', + '^\@clear\b' =>'CLEAR', + '^\@comment\b' =>'DROPLINE', + '^\@contents\b' =>'DROPLINE', + '^\@defindex\b' =>'DROPLINE', # TODO + '^\@definfoenclose' =>'DEFINFOENCLOSE', + '^\@display\b' =>'LITERALBLOCK', + '^\@enumerate\b' =>'ORDEREDLIST', + '^\@end enumerate\b' =>'ORDEREDLISTEND', + '^\@example' =>'LITERALBLOCK', + '^\@format\b' =>'LITERALBLOCK', + '^\@headings\b' =>'DROPLINE', + '^\@html\b' =>'SEEKEND', + '^\@end html\b' =>'DROLINE', + '^\@ifhtml\b' =>'DROPLINE', + '^\@end ifhtml\b' =>'DROPLINE', + '^\@ifinfo\b' =>'SEEKEND', + '^\@end ifinfo\b' =>'DROPLINE', + '^\@ifnottex\b' =>'DROPLINE', + '^\@end ifnottex\b' =>'DROPLINE', + '^\@ifset\b' =>'IFSET', # handled like SEEKEND + '^\@end ifset\b' =>'DROPLINE', # or ignored, so ignore this too + '^\@ifclear\b' =>'IFCLEAR', + '^\@end ifclear\b' =>'DROPLINE', + '^\@iftex\b' =>'SEEKEND', + '^\@ignore\b' =>'SEEKEND', + '^\@itemize\b' =>'ITEMIZEDLIST', + '^\@end itemize\b' =>'ITEMIZEDLISTEND', + '^\@item\b' =>'ITEM', + '^\@itemx\b' =>'ITEM', + '^\@macro\b' =>'MACRO', + '^\@menu\b' =>'SEEKEND', # TODO + '^\@node\b' =>'NODE', + '^\@page\b' =>'DROPLINE', + '^\@printindex\b' =>'DROPLINE', # TODO + '^\@set\b' =>'SET', + '^\@setchapternewpage\b' =>'DROPLINE', + '^\@setfilename\b' =>'META', + '^\@settitle\b' =>'META', + '^\@smallbook\b' =>'DROPLINE', + '^\@smallexample\b' =>'LITERALBLOCK', + '^\@sp\b' =>'DROPLINE', + '^\@table\b' =>'TABLE', + '^\@end table\b' =>'TABLEEND', + '^\@tex\b' =>'SEEKEND', + '^\@titlepage\b' =>'SEEKEND', + + '^\@top\b' =>'SECT1', # sectioning commands + '^\@chapter\b' =>'SECT1', + '^\@section\b' =>'SECT2', + '^\@subsection\b' =>'SECT3', + '^\@subsubsection\b' =>'SECT4', + '^\@centerchap\b' =>'SECT1', + '^\@unnumbered\b' =>'SECT1', + '^\@unnumberedsec\b' =>'SECT2', + '^\@unnumberedsubsec\b' =>'SECT3', + '^\@unnumberedsubsubsec\b' =>'SECT4', + '^\@majorheading\b' =>'SECT1', + '^\@chapheading\b' =>'SECT1', + '^\@heading\b' =>'SECT2', + '^\@subheading\b' =>'SECT3', + '^\@subsubheading\b' =>'SECT4', + '^\@appendix\b' =>'APPENDIX', + '^\@appendixsec\b' =>'SECT1', + '^\@appendixsubsec\b' =>'SECT2', + '^\@appendixsubsubsec\b' =>'SECT3', +); + +# these are one-for-one string substitutions +# +# most of them are ISO-8879 codes for international characters, +# used by HTML as well as XML, but there are exceptions. +# There is one special case, "@@", which is the escaped form of @. +# We do not handle @@ here, because it has to come after all other +# @-commands have been processed. +# +# It's fine to replace these with nothing if you want them to be +# simply dropped on the floor. +# +# I keep these in general alphabetical order, except that @end tags +# are put right after their opening form. +# +# Note: Only the left side should be escaped! +# +# Example: @copyright{} becomes © +# +%substitutions = ( + '\@!' =>'!', + '\@\*' =>'', + '\@\s' =>' ', + '\@"A' =>'Ä', + '\@"E' =>'Ë', + '\@"I' =>'Ï', + '\@"O' =>'Ö', + '\@"U' =>'Ü', + '\@"a' =>'ä', + '\@"e' =>'ë', + '\@"i' =>'ï', + '\@"o' =>'ö', + '\@"u' =>'ü', + '\@' . "'" . 'A' =>'Á', + '\@' . "'" . 'E' =>'É', + '\@' . "'" . 'I' =>'Í', + '\@' . "'" . 'O' =>'Ó', + '\@' . "'" . 'U' =>'Ú', + '\@' . "'" . 'a' =>'á', + '\@' . "'" . 'e' =>'é', + '\@' . "'" . 'i' =>'í', + '\@' . "'" . 'o' =>'ó', + '\@' . "'" . 'u' =>'ú', + '\@,{c}' =>'¸', + '\@-' =>'¯', + '\@\.' =>'.', + '\@:' =>'', + '\@=' =>'', + '\@\?' =>'?', + '\@^A' =>'Â', + '\@^E' =>'Ê', + '\@^I' =>'Î', + '\@^O' =>'Ô', + '\@^U' =>'Û', + '\@^a' =>'â', + '\@^e' =>'ê', + '\@^i' =>'î', + '\@^o' =>'ô', + '\@^u' =>'û', + '\@`A' =>'À', + '\@`E' =>'È', + '\@`I' =>'Ì', + '\@`O' =>'Ò', + '\@`U' =>'Ù', + '\@`a' =>'à', + '\@`e' =>'è', + '\@`i' =>'ì', + '\@`o' =>'ò', + '\@`u' =>'ù', +# '\@\{' =>'{', # do these manually at the end so brackets match +# '\@\}' =>'}', + '\@~A' =>'Ã', + '\@~E' =>'&Etilde;', + '\@~I' =>'Ĩ', + '\@~O' =>'Õ', + '\@~U' =>'Ũ', + '\@~a' =>'ã', + '\@~e' =>'&etilde;', + '\@~i' =>'ĩ', + '\@~o' =>'õ', + '\@~u' =>'ũ', + '\@AA\{\}' =>'Å', + '\@aa\{\}' =>'å', + '\@AE\{\}' =>'Æ', + '\@ae\{\}' =>'æ', + '\@copyright\{\}' =>'©', + '\@dots\{\}' =>'…', + '\@center\b' =>'', + '\@enddots\{\}' =>'…', + '\@equiv' =>'≡', + '\@group\b' =>'', + '\@end group\b' =>'', + '\@exclamdown' =>'¡', + '\@noindent\b' =>'', + '\@refill\b' =>'', + '\@result\{\}' =>'=>', + '\@TeX\{\}' =>'TeX', +); + +# these are inline tags that require some kind of programmatic control +# because they do strange things. +# +%specsubstitutions = ( + '\@uref\b' =>'UREF', + '\@anchor\b' =>'ANCHOR', +); + +# These are special block wrapping tags. When we hit one of these, +# we simply wrap whatever comes between the (always paired) tags +# with a set of DocBook tags. +# +# Their handling is subtly different than the %tags. +# We don't just replace them with the DocBook tags, because we +# want any tags to go inside them, not outside. +# +%blocks = ( + '\@quotation\b' =>'
', + '\@end quotation' =>'
', +); + +# these are inline @-Commands which are replaced by a set of tags instead of one-for-one +# character substitution. Replacement is inline. +# +# there is also a special case, the "blank" tag, which does what you'd think. +# It removes the existing tag, but doesn't add the wrapper. +# +# Example: @code{foo} becomes foo. +# +%tags = ( + '\@acronym\b' =>'abbrev', + '\@b\b' =>"emphasis role='bold'", + '\@cite\b' =>'citetitle', + '\@code\b' =>'literal', + '\@command\b' =>'command', + '\@dfn\b' =>"emphasis role='bold'", + '\@email\b' =>'email', + '\@emph\b' =>'emphasis', + '\@env\b' =>'envvar', + '\@footnote\b' =>'footnote', + '\@file\b' =>'filename', + '\@i\b' =>'emphasis', + '\@samp\b' =>'literal', + '\@w\b' =>'', +); + +# this is where definfoenclose definitions go, and they are processed last +# +%definfos = (); + +# and this is where macros are stored +# +$inmacro = 0; +$macro = ""; +%macros = {}; +$macroargs = ""; +%macroargs = (); +$macrotext = ""; +%macrotext = (); + + +############################################################################## +############################################################################## +############################################################################## + + +while (1) { + if ($ARGV[0] eq '') { + last; + } elsif ($ARGV[0] eq '--version') { + &version; + exit(0); + } elsif (($ARGV[0] eq '-f') or ($ARGV[0] eq '--file')) { + shift(@ARGV); + $requestedfile = $ARGV[0]; + } elsif (($ARGV[0] eq '-o') or ($ARGV[0] eq '--output-to')) { + shift(@ARGV); + $outputfile = $ARGV[0]; + } elsif (($ARGV[0] eq '-l') or ($ARGV[0] eq '--log-to')) { + shift(@ARGV); + $logfile = $ARGV[0]; + } elsif (($ARGV[0] eq '-e') or ($ARGV[0] eq '--max-errors')) { + shift(@ARGV); + $maxerrors = $ARGV[0]; + } elsif (($ARGV[0] eq '-v') or ($ARGV[0] eq '--verbose')) { + $verbose++; + } elsif (($ARGV[0] eq '-h') or ($ARGV[0] eq '--help')) { + &usage; + } else { + &raiseerror("unrecognized option: $ARGV[0]\n"); + $error = 1; + &usage; + } + shift(@ARGV); +} + +# be sure to open the log file before trying to write any messages to it! +# +if ($logfile) { + open ($logfh, "> $logfile") or die "cannot write to log file $logfile.\n"; +} + +$verbose = 3 if ($verbose > 3); +&message("verbose mode on.") if ($verbose == 1); +&message("debugging mode on.") if ($verbose ==2); +&message("insanity mode on.") if ($verbose ==3); + +if ($outputfile) { + &message("output will go to $outputfile") if (verbose); + open($outfh, "> $outputfile"); +} + +while () { + $template .= $_; +} + +&processfile ("$requestedfile"); + +&writefile; +close($outfh) if ($outputfile); +close($logfh) if ($logfile); + +sub processfile { + my($filename, + $basename, + $path, + $ext, + $includefile, + $linenumber, + ); + + $macrolinecount = 0; + + my $fh = new FileHandle; + + $filename = @_[0]; + $filename = 'STDIN' unless ($filename); + + ($basename, $path, $ext) = fileparse($filename); + &message("processing $filename") if ($verbose); + $linenumber = 0; + + open $fh, "<$filename" or raiseerror("cannot open $filename\n"); +LINE: while ($line = <$fh>) { + chomp($line); + + $linenumber++; + $currentfile = $filename; + $currentline = $linenumber; + + $originalline = $line; + $trimline; + + if ($seekend) { + $seekend = '' if ($line =~ /$seekend/); + next LINE; + } + + &message("LINE $currentline: $line") if ($verbose > 1); + + $line =~ s/&/&/g; # keep before the < > so we don't clobber them + $line =~ s/\/>/g; + $line =~ s/\@\@\}/DCM_AT\}/g; + $line =~ s/\@\{/DCM_LB/g; + $line =~ s/\@\}/DCM_RB/g; + + if ($saveline) { + $line = $saveline . ' ' . $line; + $saveline = ''; + &message("line restored, line: $line") if ($verbose > 1); + } + unless (&bracketsmatch($line)) { + &message("bracket mismatch") if ($verbose > 2); + $saveline = $line; + next LINE; + } + + if ($line =~ /^\@include/) { + $includefile = $line; + $includefile =~ s/^\@include\s+//; + $includefile = $path . $includefile; + $line = ''; + if (-e $includefile) { + &message("including $includefile") if ($verbose); + &processfile ("$includefile"); + } else { + &raiseerror("include file $includefile not found."); + } + } elsif ($literalend) { + if ($line =~ /$literalend/) { + $literalend = ''; + &closeliteral; + } else { + &writeline; + } + } elsif ($inmacro) { + ($pattern, $action) = &matchpattern(); + if ($action eq 'SEEKEND') { + $seekend = $pattern; + $seekend =~ s/\@/\@end /; + next LINE; + } elsif ($action eq 'DROPLINE') { + next LINE; + } + if ($line =~ /\@end\s+macro/) { + #save macro + $macroargs{$macro} = $macroargs; + $macrotext{$macro} = $macrotext; + &message("macro name: $macro args: $macroargs text: $macrotext") if ($verbose > 2); + $inmacro = 0; + $macroargs = ''; + $macrotext = ''; + $macrolinecount = 0; + } elsif ($line =~ /\@quote-arg/) { + } else { + &message("macro line: $line") if ($verbose > 2); + $macrotext .= $line; + $macrolinecount++; + if ($macrolinecount >= 10) { + exit(1); + } + } + } elsif ($intable) { + &message("table: $line") if ($verbose > 2); + ($pattern, $action) = &matchpattern(); + if ($action eq 'TABLEEND') { + &closetable; + } elsif ($action eq 'ITEM') { + &tablerow; + &tablecol; + $line =~ s/\@item\w*?\s+//; + &message("cell: $line") if ($verbose > 2); + $line = &trim($line); + $line = $tableformat . "\{$line\}" unless ($tableformat eq '@asis'); + writeline; + &closetablecol; + } else { + &tablecol; + &writeline; + } + } else { + ($pattern, $action) = &matchblock(); + if ($action) { + &message("matched block $pattern") if ($verbose > 2); + &closepara; + $buf .= $action; + next LINE; + } + ($pattern, $action) = &matchpattern(); + if ($action eq 'DROPLINE') { + next LINE; + } elsif ($action eq 'SEEKEND') { + $seekend = $pattern; + $seekend =~ s/\@/\@end /; + } elsif ($action eq 'LITERALBLOCK') { + $literalend = $pattern; + $literalend =~ s/\@/\@end /; + &literal; + } elsif ($action eq 'ALIAS') { + &alias; + } elsif ($action eq 'MACRO') { + ¯o; + } elsif ($action eq 'META') { + &meta; + } elsif ($action eq 'NODE') { + &node; + } elsif ($action eq 'SECT1') { + §1; + } elsif ($action eq 'SECT2') { + §2; + } elsif ($action eq 'SECT3') { + §3; + } elsif ($action eq 'SECT4') { + §4; + } elsif ($action eq 'APPENDIX') { + &appendix; + } elsif ($action eq 'PARA') { + &closeformalpara; + ¶ + } elsif ($action eq 'ORDEREDLIST') { + &orderedlist; + } elsif ($action eq 'ORDEREDLISTEND') { + &closeorderedlist; + } elsif ($action eq 'ITEMIZEDLIST') { + &itemizedlist; + } elsif ($action eq 'ITEMIZEDLISTEND') { + &closeitemizedlist; + } elsif ($action eq 'ITEM') { + &item; + } elsif ($action eq 'TABLE') { + $tableformat = $line; + $tableformat =~ s/^\@\w+\s+//; + &message("table format: $tableformat") if ($verbose > 2); + &table; + } elsif ($action eq 'TABLEEND') { + &closetable; + } elsif ($action eq 'INDEX') { + &message("indexing not yet supported") if ($verbose); + } elsif ($action eq 'DEFINFOENCLOSE') { + &definfoenclose; + } elsif ($action eq 'SET') { + &set; + } elsif ($action eq 'CLEAR') { + &clear; + } elsif ($action eq 'IFSET') { + $seekend = '\@end ifset' unless (&ifset()); + } elsif ($action eq 'IFCLEAR') { + $seekend = '\@end ifclear' if (&ifset()); + } elsif ($action eq 'BYE') { + &closesect1; + last; + } elsif ($action eq 'ERROR') { + &raiseerror("Hit ERROR tag with $pattern on $line"); + } elsif ($action eq '') { + &writeline; + } else { + &raiseerror("Unknown action: $action on command $pattern"); + } + } + } + close($fh); +} + +sub writefile { + &closeappendix; + &message("copying meta-data to docbook") if ($verbose); + $template =~ s/%%TITLE/$title/; + $template =~ s/%%ABSTRACT/$abstract/; + $template =~ s/%%BODY/$buf/; +# &message("deleting empty tags") if ($verbose); +# $i = 0; +# while (1) { +# $template =~ s/\<(.+?)\>\W*\<\/\1\>//gs; # delete empty tags +# $i++; +# last if ($i == 10); +# } + &message("writing $outputfile") if ($verbose); + print $outfh $template . "\n"; + print $outfh ''. "\n"; +} + +#################### +# PATTERN MATCHING # +#################### + +# this is for beginning-of-line @-commands +# +sub matchpattern { + foreach $key (keys %patterns) { + if ($line =~ /$key/) { + return ($key, $patterns{$key}); + } + } + return ('', ''); +} + +# this is for special wrapped blocks, like blockquotes +# +sub matchblock { + foreach $key (keys %blocks) { + if ($line =~ /$key/) { + return ($key, $blocks{$key}); + } + } +} + +# the rest are all inline processing +# +sub convertinline { + &convertsubsts; # single characters, do immediately and after macros + &message(" SUBSTS: $line") if ($verbose > 1); + &convertmacros; # can insert new @{} commands, do first + &message(" MACROS: $line") if ($verbose > 1); + &convertsubsts; # single characters, do immediately and after macros + &message(" SUBSTS: $line") if ($verbose > 1); + &convertvalues; # insert variables, do immediately after macros + &message(" VALUES: $line") if ($verbose > 1); + &convertdefinfos; # custom highlighting, do after macros + &message(" DINFOS: $line") if ($verbose > 1); + &converttags; # @foo{bar} to bar do next to last + &message(" DBTAGS: $line") if ($verbose > 1); + &convertlinks; # @ref{} => , do last + &message(" ULINKS: $line") if ($verbose > 1); + &convertspecsubsts; # @anchor{} => , do last + &message(" ANCHOR: $line") if ($verbose > 1); +} + +# macros +# +sub convertmacros { + while (1) { + ($macro, $macroargs, $macrotext) = &matchmacro(); + last unless ($macro); + &message("matched macro $macro") if ($verbose > 1); + if ($line =~ /\@$macro\{.*?\}/) { + &replacemacro; + } else { + &raiseerror("unmatched macro brackets in line $line"); + last; + } + &message("line is now $line") if ($verbose > 2); + } +} + +sub matchmacro { +# &message("matchmacros $line"); + foreach $macro (keys %macroargs) { + if ($line =~ /\@$macro\{/) { + return ($macro, $macroargs{$macro}, $macrotext{$macro}); + } + } +} + +sub replacemacro { + my $macroarg = $line; + my $open; + my $close; + $macroargs = "\\\\" . $macroargs . "\\\\"; + $macroarg =~ s/^.*?\@$macro\{//; + $macroarg =~ s/\}.*?$//; + $macrotext =~ s/\@$macro\{([^\{]*?)\}/$macroarg/; + &message("replace pattern '$macroargs' with '$macroarg' in '$macrotext'") if ($verbose > 2); + $macrotext =~ s/$macroargs/$macroarg/g; + $line =~ s/\@$macro\{.*?\}/$macrotext/; +} + +# character substitution +# +sub convertsubsts { + while (1) { + ($pattern, $action) = &matchsubst(); + last unless ($pattern); + &replacesubst; + } +} + +sub matchsubst { + foreach $key (keys %substitutions) { + if ($line =~ /$key/) { + return ($key, $substitutions{$key}); + } + } + return ('', ''); +} + +sub replacesubst { + &message("replacing '$pattern' with '$action' on line $line") if ($verbose > 1); + $line =~ s/$pattern/$action/g; +} + +# special character substitution +# +sub convertspecsubsts { + while (1) { + ($pattern, $action) = &matchspecsubst(); + last unless ($pattern); + &replacespecsubst; + } +} + +sub matchspecsubst { + foreach $key (keys %specsubstitutions) { + if ($line =~ /$key/) { + return ($key, $specsubstitutions{$key}); + } + } + return ('', ''); +} + +sub replacespecsubst { + my $link; + my $linkname; + + &message("replacing 'special case' $pattern $action on line $line") if ($verbose > 2); + if ($action eq 'UREF') { + $link = $line; + $link =~ s/^.*?\@uref\{(.*?)\}.*$/\1/; + ($link, $linkname) = split(/,/, $link); + $link = &trim($link); + $linkname = &trim($linkname); + $linkname = $link unless ($linkname); + $line =~ s/\@uref\{(.*?)\}/\$linkname\<\/ulink\>/g; + &message("ulink: $link, linkname: $linkname, line: $line"); # if ($verbose > 2); + } elsif ($action eq 'ANCHOR') { + + $link = $line; + $link =~ s/^.*?\@anchor\{(.*?)\}.*$/\1/; + $link = &anchorfix($link); + $line =~ s/\@anchor\{(.*?)\}//; + +# $line =~ s/\@anchor\{([^\}]*?)\}/\/; +# $line =~ s/\@anchor\{([^\}]*?\{.*?\}.*?)\}/\/; +# &message("anchor: $link on line: $line"); # if ($verbose > 2); + } else { + &raiseerror("Unrecognized 'special case' inline substitution code $pattern"); + } +} + +$loops = 0; + +# replace @-Commands with DocBook tags +# +sub converttags { + $loops = 0; + while (1) { + ($pattern, $action) = &matchtag(); + last unless ($pattern); + if ($line =~ /$pattern.*?\}/) { + $loops++; + &replacetag; + exit if ($loops >= 25); + } else { + &raiseerror("unmatched tag brackets in line $line"); + last; + } + } +} + +sub matchtag { + foreach $key (keys %tags) { + if ($line =~ /$key/) { + return ($key, $tags{$key}); + } + } + return ('', ''); +} + +sub replacetag { + my ($tag, + $attributes, + $part, + $newline, + $fragment, + ); + + if ($action eq '') { + &message("removing old tag, but not adding a new one") if ($verbose > 2); + $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\1/g; + $line =~ s/$pattern\{([^\}]*?)\}/\1/g; + } else { + &message("replacing $pattern with $action tags on line $line") if ($verbose > 1); + ($tag, $attributes) = split(/ /, $action); + if ($attributes) { + $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\<$tag $attributes\>\1\<\/$tag\>/g; + $line =~ s/$pattern\{([^\}]*?)\}/\<$tag $attributes\>\1\<\/$tag\>/g; + } else { + $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\<$tag\>\1\<\/$tag\>/g; + $line =~ s/$pattern\{([^\}]*?)\}/\<$tag\>\1\<\/$tag\>/g; + } + } +} + +# replace @-Commands with custom highlighting +# +sub convertdefinfos { + while (1) { + ($pattern, $action) = &matchdefinfo(); + last unless ($pattern); + &message("matched definfoenclose '$pattern'") if ($verbose >2); + &replacedefinfo; + } +} + +sub matchdefinfo { + foreach $key (keys %definfos) { + if ($line =~ /$key\{.*?\}/) { + return ($key, $definfos{$key}); + } + } + return ('', ''); +} + +sub replacedefinfo { + ($prefix, $suffix) = split(/,/,$action); + &message("pattern: $pattern, prefix: $prefix, suffix: $suffix") if ($verbose > 2); + $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/$prefix\1$suffix/g; + $line =~ s/$pattern\{([^\}]*?)\}/$prefix\1$suffix/g; + $line =~ s/$pattern\{\}/$prefix$suffix/g; + &message("definfoenclose replaced") if ($verbose >2); +} + +# replace @value{} with values +# +sub convertvalues { + my ($key, + $value); + + foreach $key (keys %setvalues) { + $value = $setvalues{$key}; + $line =~ s/\@value\{$key\}/$value/; + } +} + +sub convertlinks { + my $link, + $anchor; + + while ($line =~ /\@ref\{/) { + if ($line =~ /\@ref\{.*?\}/) { + &message("making xref on line $line") if ($verbose > 1); + $anchor = $line; + $anchor =~ s/^.*?\@ref\{//; + $anchor =~ s/\}.*$//; + $anchortitle = &trim($anchor); + $anchor = &anchorfix($anchor); + &message("anchor: $anchor, title; $anchortitle") if ($verbose > 2); + &raiseerror("NO ANCHOR in $line") unless ($anchor); + + # decide what kind of link to make based on whether it is + # to a section or not. Sections have titles that can be + # referenced using an xref. Anchors do not, and we have to + # render a full tag. + # + if ($nodelevels{$anchor}) { + $link = "\"; + } else { + $link = "\$anchortitle\<\/link\>"; + } + $line =~ s/\@ref\{.*?\}/$link/; + &message("made xref to tag $anchor on line: $line") if ($verbose > 1); + } else { + &raiseerror("unmatched ref brackets in line $line"); + last; + } + } +} + +sub anchorfix { + my $anchor = $_[0]; + $anchor = lc(&trim($anchor)); + $anchor = decode_entities($anchor); + $anchor =~ s/&/-and-/g; + $anchor =~ s/;//g; + $anchor = encode_entities($anchor); + $anchor =~ s/&(\w)grave/\1/g; + $anchor =~ s/&(\w)acute/\1/g; + $anchor =~ s/&(\w)circ/\1/g; + $anchor =~ s/&(\w)uml/\1/g; + $anchor =~ s/&(\w)tilde/\1/g; + $anchor =~ s/&(\w)cedil/\1/g; + $anchor =~ s/&/-and-/g; + $anchor =~ s/;//g; + $anchor =~ s/\//-/g; + $anchor =~ s/\s+/-/g; + $anchor =~ s/'//g; + $anchor =~ s/!//g; + $anchor =~ s/--/-/g; + $anchor =~ s/\+/plus/g; + $anchor =~ s/\*/x/g; + $anchor =~ s/\(//g; + $anchor =~ s/\)//g; + $anchor =~ s/\@/at/g; + $anchor =~ s/^-+//; + $anchor =~ s/-+$//; + $anchor =~ s/\$/S/; + $anchor =~ s/\.//; + return $anchor; +} + +############# +# META-DATA # +############# + +# this routine processes meta-data @-Commands +# +sub meta { + return if (&metaarg('\@setfilename', $sgmlfile)); + return if (&metaarg('\@settitle', $title)); + return if (&metaarg('\@author', $authorname)); + &raiseerror("Unknown meta-data command $line"); +} + +# Meta-data commands have arguments, +# so if the pattern matches, store the argument +# to the variable in $_[1] +# +sub metaarg { + my $pattern = $_[0]; + if ($line =~ /$pattern/) { + $_[1] = $line; + $_[1] =~ s/$pattern//; + } +} + +################ +# WRITE OUTPUT # +################ + +sub writeline { + my $temp = &trim($line); + + &convertinline; + unless (&bracketsmatch($line)) { + &raiseerror("brackets do not match in line: $line"); + last; + } + + unless ($literal) { + if ($temp eq '') { + &closeformalpara; + } else { + unless (($inpara) or ($intable)) { + if (($nodename) and ($nodeinit == 0)) { + &initnode; + } + ¶ + } + } + } + $line =~ s/\@\@/\@/g; + $line =~ s/DCM_AT/\@/g; + $line =~ s/DCM_LB/\{/g; + $line =~ s/DCM_RB/\}/g; + $buf .= $line . "\n"; +} + +############################################## +# STRUCTURAL TEXINFO -> DOCBOOK TRANSLATIONS # +############################################## + +# this is for when the source never set a position for this node in the +# chapter-style hierarchy, which we use. +# +# We have to make a best guess based on the node's settings, +# so put it right under whoever it calls the "up" node. +# +sub initnode { + my $parentname = $nodeup; + my $parentlevel = $nodelevels{$parentname}; + if ($parentlevel == "A") { + §1; + } elsif ($parentlevel == 1) { + §2; + } elsif ($parentlevel == 2) { + §3 + } elsif ($parentlevel == 3) { + §4; + } else { + &raiseerror("The parent node, $parentname, could not be found."); + } +} + +sub node { + $line =~ s/\@node\b//; + &convertinline; + ($nodename, $nodenext, $nodeprev, $nodeup) = split(/,/, $line); + $nodename = &trim($nodename); + $nodeid = &anchorfix($nodename); + $nodeprev = &trim($nodeprev); + $nodenext = &trim($nodenext); + $nodeup = &trim($nodeup); + $nodenexts{$nodename} = $nodenext; + $nodeprevs{$nodename} = $nodeprev; + $nodeups{$nodename} = $nodeup; + $nodeinit = 0; + &message("Name: $nodename Next: $nodenext Previous: $nodeprev Up: $nodeup") if ($verbose > 1); + &closeformalpara; + if ($nodename eq 'Top') { + $inabstract = 0; + $abstract = $buf; + $buf = ""; + } +} + +sub appendix { + &closeappendix; + $buf .= "\\$nodename\<\/title\>"; + $inappendix = 1; + $nodeinit = 1; + $nodelevels{$nodename} = "A"; + &message("processing node $nodename at level $nodelevels{$nodename}"); +} + +sub sect1 { + &closesect1; + $buf .= "\<sect1 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; + $insect1 = 1; + $nodeinit = 1; + $nodelevels{$nodename} = 1; + &message("set node $nodename level to $nodelevels{$nodename}"); +} + +sub sect2 { + &closesect2; + if ($inappendix) { + $buf .= "\<sect1 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; + $nodelevels{$nodename} = 1; + } else { + $buf .= "\<sect2 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; + $nodelevels{$nodename} = 2; + } + $insect2 = 1; + $nodeinit = 1; + $nodelevels{$nodename} = 2; + &message("set node $nodename level to $nodelevels{$nodename}"); +} + +sub sect3 { + &closesect3; + if ($inappendix) { + $buf .= "\<sect2 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; + $nodelevels{$nodename} = 2; + } else { + $buf .= "\<sect3 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; + $nodelevels{$nodename} = 3; + } + $insect3 = 1; + $nodeinit = 1; + $nodelevels{$nodename} = 3; + &message("set node $nodename level to $nodelevels{$nodename}"); +} + +sub sect4 { + &closesect4; + if ($inappendix) { + $buf .= "\<sect3 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; + $nodelevels{$nodename} = 3; + } else { + $buf .= "\<sect4 id='$nodeid'\>\<title id='$nodeid-title'\>$nodename\<\/title\>"; + $nodelevels{$nodename} = 4; + } + $insect4 = 1; + $nodeinit = 1; + $nodelevels{$nodename} = 4; + &message("set node $nodename level to $nodelevels{$nodename}"); +} + +sub table { + &closetable; + $buf .= '<informaltable><tgroup col=' . "'2'" . '><tbody>' . "\n"; + $intable = 1; +} + +sub tablerow { + &closetablerow; + $buf .= '<row>'; + $inrow = 1; +} + +sub tablecol { + &closetablecol; + $buf .= '<entry>'; + $incol = 1; +} + +sub orderedlist { + &closeformalpara; + $buf .= '<orderedlist>'; + $inorderedlist = 1; +} + +sub itemizedlist { + &closeformalpara; + $buf .= '<itemizedlist>'; + $initemizedlist = 1; +} + +sub item { + if ($intable) { + &tableitem; + } else { + &listitem; + } +} + +sub listitem { + &closelistitem; + $buf .= '<listitem>'; + $inlistitem = 1; +} + +sub formalpara { + my $title = $_[0]; + my $id = &anchorfix($title); + + &closeformalpara; + #$buf .= "\<formalpara id='$id'\>\<title\>$title\<\/title\>\n\<para\>"; + $informalpara = 1; + $inpara = 1; + $nodeinit = 1; +} + +sub para { + &closeformalpara; + $buf .= '<para>'; + $inpara = 1; +} + +sub literal { + if ($pattern =~ /\bformat\b/) { + $literal = 'programlisting'; + $buf .= '<programlisting>' . "\n"; + } elsif ($pattern =~ /\bdisplay\b/) { + $literal = 'screen'; + $buf .= '<screen>'; + } elsif ($pattern =~ /\bexample\b/) { + $literal = 'programlisting'; + $buf .= '<programlisting>'; + } elsif ($pattern =~ /\bsmallexample\b/) { + $literal = 'programlisting'; + $buf .= '<programlisting>'; + } +} + +sub closeappendix { + &closesect1; + if ($inappendix) { + $buf .= '</appendix>' . "\n"; + $inappendix = 0; + } +} + +sub closesect1 { + &closesect2; + if ($insect1) { + $buf .= '</sect1>' . "\n\n"; + $insect1 = 0; + } +} + +sub closesect2 { + &closesect3; + if ($insect2) { + if ($inappendix) { + $buf .= '</sect1>' . "\n\n"; + } else { + $buf .= '</sect2>' . "\n\n"; + } + $insect2 = 0; + } +} + +sub closesect3 { + &closesect4; + if ($insect3) { + if ($inappendix) { + $buf .= '</sect2>' . "\n\n"; + } else { + $buf .= '</sect3>' . "\n\n"; + } + $insect3 = 0; + } +} + +sub closesect4 { + &closeformalpara; + if ($insect4) { + if ($inappendix) { + $buf .= '</sect3>' . "\n\n"; + } else { + $buf .= '</sect4>' . "\n\n"; + } + $insect4 = 0; + } +} + +sub closetable { + &closetablerow; + if ($intable) { + $buf .= '</tbody></tgroup></informaltable>' . "\n"; + $intable = 0; + } +} + +sub closetablerow { + &closetablecol; + if ($inrow) { + $buf .= '</row>' . "\n"; + $inrow = 0; + } +} + +sub closetablecol { + if ($incol) { + $buf .= '</entry>' . "\n"; + $incol = 0; + } +} + +sub closeorderedlist { + &closelistitem; + if ($inorderedlist) { + $buf .= '</orderedlist>' . "\n"; + $inorderedlist = 0; + } +} + +sub closeitemizedlist { + &closelistitem; + if ($initemizedlist) { + $buf .= '</itemizedlist>' . "\n"; + $initemizedlist = 0; + } +} + +sub closelistitem { + &closeformalpara; + if ($inlistitem) { + $buf .= '</listitem>' . "\n"; + $inlistitem = 0; + } +} + +sub closeformalpara { + &closepara; + if ($informalpara) { +# $buf .= '</formalpara>'; + $informalpara = 0; + } +} + +sub closepara { + if ($inpara) { + $buf .= '</para>'; + $inpara = 0; + } +} + +sub closeliteral { + if ($literal) { + $buf .= '</' . $literal . '>' . "\n"; + $literal = ''; + } +} + + +################ +# CONDITIONALS # +################ + +sub set { + my ($name, + $value); + + ($foo, $name, $value) = split(/\s+/, $line); + $value = 1 unless ($value); + $setvalues{$name} = $value; + &message("set $name to $value") if ($verbose > 1); + $value = $setvalues{$name}; + &message("read back $value") if ($verbose > 2); +} + +sub clear { + my $name; + + ($foo, $name) = split(/\s+/, $line); + $setvalues{$name} = 0; + &message("cleared $name") if ($verbose > 1); +} + +sub ifset { + my $name; + + ($foo, $name) = split(/\s+/, $line); + return $ifsets{$name}; +} + +################# +# META-LANGUAGE # +################# + +# only the commands in %tags are currently supported +# +sub alias { + my $alias; + + ($foo, $alias) = split(/\s/, $line); + &message("alias: $alias") if ($verbose > 2); + ($alias, $command) = split(/=/, $alias); + &message("alias: $alias, command: $command") if ($verbose > 2); + $alias = '\@' . $alias . '\b'; + $command = '\@' . $command . '\b'; + &message("alias: $alias, command: $command") if ($verbose > 2); + $tag = $tags{$command}; + &message("tag: $tag") if ($verbose > 2); + $tags{$alias} = $tag; +} + +sub macro { + $line =~ s/\@macro\s*?(.*?)\{(.*?)\}/\1,\2/; + ($macro, $macroargs) = split(/,/, $line); + $macro = &trim($macro); + $macros{$macro} = $macro; + $macrotext = ''; + $inmacro = 1; +} + +# load custom @-commands +# +sub definfoenclose { + my ($name, + $prefix, + $suffix, + $key, + $replacement); + + $line =~ s/\@definfoenclose\s+//; + ($name, $prefix, $suffix) = split(/,/, $line); + $key = '\@' . $name; + $replacement = $prefix. ',' . $suffix; + $definfos{$key} = $replacement; +# &message("custom definfoenclosure: \[$name\] \[$prefix\] \[$suffix\] \/ \[$key\] \[$replacement\]") if ($verbose > 0); +} + +########### +# INDEXES # +########### + +sub defindex { + $line =~ s/^\@defindex\s+//; + $indexes{$line} = 1; +} + +#################### +# SUPPORT ROUTINES # +#################### + +sub bracketsmatch { + my $bline = ' ' . $_[0] . ' '; + my $left = scalar split(/\{/, $bline) - 1; + my $right = scalar split(/\}/, $bline) - 1; + if ($right == $left) { + $badbracketlines = 0; + $badbracketstartline = $currentline + 1 + } else { + $badbracketlines++; + &message("unmatched \{\} (left: $left, right: $right) on line $line") if ($verbose > 1); + if ($badbracketlines >= 20) { + &raiseerror("Mismatched brackets starting on line $badbracketstartline: $line"); + } + } + return ($right == $left); +} + +sub trimline { + $line = &trim($line); +} + +sub trim { + my $temp = $_[0]; + + $temp =~ s/^\s+//g; + $temp =~ s/\s+$//g; + return $temp; +} + +sub raiseerror { + my $errmsg = "ERROR: $currentfile $currentline $_[0]\n"; + print STDERR $errmsg; + $errors++; + if (($maxerrors) and ($errors >= $maxerrors)) { + print STDERR "aborting after $errors errors.\n"; + &writefile; + exit(1); + } +} + +sub message { + my $message; + $message = $_[0]; + + if ($logfile) { + print $logfh "$message\n"; + } else { + print "$message\n"; + } +} + +sub version { + print "texi2db version $version\n"; + print "Copyright (c) 2002 David Merrill \<david\@lupercalia.net\>.\n"; + print "\n"; + print "Converts a Texinfo file into DocBook.\n"; + print "\n"; + print "This free software; see the source for copying conditions. There is no\n"; + print "warranty; not even for merchantability or fitness for a particular purpose.\n"; +} + +sub usage { + &version; + print "\n"; + print "usage: texi2db [OPTIONS]... [FILE]\n"; + print "-f, --file FILE read from file rather than STDIN.\n"; + print "-o, --output-to FILE write to file rather than STDOUT.\n"; + print "-l, --log-to FILE write status messages to file rather than STDOUT.\n"; + print "-v, --verbose show diagnostic output.\n"; + print " use twice for lots of detail.\n"; + print " use thrice for insane amounts of detail.\n"; + print "-e, --max-errors MAX show diagnostic output.\n"; + print " use '--max-errors 0' to disable.\n"; + print "-h, --help show this usage message.\n"; + print " --version show the program version.\n"; + exit($error); +} + +__END__ +<!DOCTYPE ARTICLE PUBLIC "-//OASIS//DTD DocBook V4.1.2//EN" > + +<article> + <artheader> + <title>%%TITLE + + %%ABSTRACT + + +%%BODY + diff --git a/LDP/txt2db/README b/LDP/txt2db/README deleted file mode 100644 index 120edf37..00000000 --- a/LDP/txt2db/README +++ /dev/null @@ -1,69 +0,0 @@ -This is a utility to convert text files in a specific format into valid -DocBook. Just pass it the input filename on the commmand line and you'll -get a .sgml file out. It won't be a complete valid document, as it will -have no header information or dtd specification. It's just a DocBook -fragment, not a complete document. - -The following constructs are currently supported. If you need support for -an addition construct, write discuss@linuxdoc.org if you're subscribed, -or feedback@linuxdoc.org if you're not. - -Or just add it in the cvs. :-) - -Foo Foo - -=Title= Title - - -=Title|id= Title - - - works for other sect levels as well, and many other - tags. It is either the "id" value, or the "title" - value, depending on the semantics of the particular - tag. Usage should be obvious in context. - -==Title== Title - - -===Title=== Title - - - -#Foo -#Bar Foo -#Baz Bar -/# Baz - - -*Foo -*Bar Foo -*Baz Bar -/* Baz - - -[[http://foo.org]] - http://foo.org - - -[[http://foo.org Foo]] - Foo - - -[http://foo.org|Foo]] You can also delimit with the pipe character "|". - This works on any of these [[]] tags. - -[[file:Foo]] Foo - -'''Foo''' Foo - -A few DocBook structures do not have tags wrapped around them. They -are itself (duh!), and . If you insert anything -using these tags, no tags will be wrapped around it or inserted into it. -So if you want fine control over your tags, insert them yourself. - -These tags include: - - - - diff --git a/LDP/txt2db/sample.txt b/LDP/txt2db/sample.txt deleted file mode 100644 index 1a018924..00000000 --- a/LDP/txt2db/sample.txt +++ /dev/null @@ -1,72 +0,0 @@ -=Introduction|intro-to-the-program= - -The following list should be rendered as a qandaset: - -Q: Why?|why-id -A: Why -not? - -Multiple questions and multiple answers: - -Q: Why? -A: Why not? -A: Why not2? - -Q: Why? -A: Why not? - -Simple List - -*item -*item -*item -/* - -This tests arbitrary DocBook. It should be passed right on to the output file -with no changes. It can be nested arbitrarily deep. - -test -test some more - -This is the second level! - - - -This document is from the [[http://www.linuxdoc.org Linux Documentation Project]]. - -Numbered List - -This is an '''important''' [file]. - -#item -#item - -#item -/# - -Another to make sure the numbers restart at one. - -#item -#item -#item -/# - -=Bar= - -Just another section. - -==Level 2|level2== - -===Level 3|level3=== - -=Conclusion|conclusion= - -All previous sections should be properly closed. - - -=test again= - -[[ldp:INFO-SHEET]] -[[ldp:Distributions-HOWTO]] - - diff --git a/LDP/txt2db/txt2db.pl b/LDP/txt2db/txt2db.pl deleted file mode 100755 index 064ee394..00000000 --- a/LDP/txt2db/txt2db.pl +++ /dev/null @@ -1,523 +0,0 @@ -#!/usr/bin/perl -# -#Converts txt files into docbook. -# -# Requirements: -# -# If you use the "ldp:" namespace, you must have wget installed. -# Wget is used to request an xml record from the LDP # database, -# http://db.linuxdoc.org. -# - -use File::Basename; -use HTML::Entities; - -my($txtfile, $dbfile) = ''; - -#These keep track of which constructs we're in the middle of -my($level1, - $level2, - $level3, - $orderedlist, - $listitem, - $itemizedlist, - $para, - $qandaset, - $qandaentry, - $answer); - -my($line); -my($id, $title); - -my($verbose); - -my($error); -$error = 0; - -# read in cmd-line arguments -# -while (1) { - if($ARGV[0] eq "-o" or $ARGV[0] eq "--output-to") { - shift(@ARGV); - $dbfile = $ARGV[0]; - shift(@ARGV); - } elsif($ARGV[0] eq "-h" or $ARGV[0] eq "--help") { - &usage; - } elsif($ARGV[0] eq "-v" or $ARGV[0] eq "--verbose") { - $verbose++; - shift(@ARGV); - } else { - $txtfile = $ARGV[0]; - shift(@ARGV); - } - - if ($ARGV[0] eq '') { - last; - } -} - -# abort if no input file given -# -if($txtfile eq '') { - print "txt2db: ERROR text file not specified.\n\n"; - $error = 1; - &usage(); -} elsif( !(-r $txtfile) ) { - print "txt2db: ERROR cannot read $f ($!)\n\n"; - $error = 1; - &usage(); -} - -unless ($dbfile) { - ($basename, $path, $ext) = fileparse($txtfile); - $dbfile = $basename; - $dbfile =~ s/\..*?$/\.sgml/; -} - -$buf = ''; - -&proc_txt($txtfile); - -open(DB, "> $dbfile") || die "txt2db: cannot write to $dbfile ($!)\n"; -print DB $buf, "\n"; -close(DB); - -exit(0); - -# ----------------------------------------------------------- - -sub proc_txt { - my($f) = @_; - - my($linenumber); - $linenumber = 0; - - my ($noparatag, - $noparadepth); - $noparadepth = 0; - $noparaline = 0; - - # read in the text file - # - open(TXT, "$f") || die "txt2db: cannot open $f ($!)\n"; - while ($originalline = ) { - $line = $originalline; - $linenumber++; - - &trimline; - - # blank lines - if ($line eq '') { - if ($noparadepth == 0) { - &closenonsect; - next; - } - } - - # capitalize hints that can be entered in lowercase - # - $line =~ s/^q:/Q:/; - $line =~ s/^a:/A:/; - - # encode entities - # -# while ($line =~ //) { -# } -# decode_entities($line); - encode_entities($line); - - # inline docbook - # - # ulink - # - while ($line =~ /\[\[/) { - unless ($line =~ /\]\]/) { - $buf .= "ERROR unterminated '[[' tag on line $linenumber.\n"; - } - - # separate link url from link name - # - $link = $line; - $link=~ s/\n//g; - $link =~ s/.*?\[\[//; - $link =~ s/\]\].*?$//; - if ($link =~ /\|/) { - $linkname = $link; - $link =~ s/\|.+$//; - $linkname =~ s/^\S+\|//; - } else { - $linkname = $link; - } - - # kill quotes, they mess us up - # - $link =~ s/'/%27/g; - - # namespaces are handled differently - # - print "$link\n" if ($verbose); - if ($link =~ /^http:/) { - $line =~ s/\[\[.*?\]\]/$linkname<\/citetitle><\/ulink>/; - } elsif ($link =~ /^mailto:/) { - $linkname =~ s/^mailto://; - $line =~ s/\[\[.*?\]\]/$linkname<\/citetitle><\/ulink>/; - } elsif ($link =~ /^wiki:/) { - $linkname =~ s/^wiki://; - $link =~ s/^wiki:/http:\/\/www\.wikipedia\.com\/wiki\.phtml\?title=/; - $link =~ s/\ /+/; - $line =~ s/\[\[.*?\]\]/$linkname<\/citetitle><\/ulink>/; - } elsif ($link =~ /^ldp:/) { - $linkname =~ s/^ldp://; - $link =~ s/^ldp://; - $tempfile = "/tmp/txt2db-" . $rand; - $cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile"; - system("$cmd"); - open(URL, "$tempfile") || die "txt2db: cannot open temporary file ($!)\n"; - $link = ""; - while ($url_line = ) { - $url_line =~ s/\n//; - if ($url_line =~ /identifier/) { - $link .= $url_line; - } - } - close(URL); - unlink $tempfile; - $link =~ s/^.*?//; - $link =~ s/<\/identifier>.*?$//; - if ($link eq '') { - $linkname = "ERROR: LDP namespace resolution failure on $linkname"; - } - $line =~ s/\[\[.*?\]\]/$linkname<\/citetitle><\/ulink>/; - } elsif ($link =~ /^file:/) { - $linkname =~ s/^file://; - $line =~ s/\[\[.*?\]\]/$linkname<\/filename>/; - } else { - $line =~ s/\[\[.*?\]\]/$linkname<\/filename>/; - } - } - - # emphasis - # - while ($line =~ /'''.*'''/) { - $line =~ s/'''//; - $line =~ s/'''/<\/emphasis>/; - } - - # this block defines DocBook structures that won't be broken up with - # paragraphs when we hit empty lines: - # - # - # - # - # - # - # - - # forget about nopara - if ($noparadepth == 0) { - $noparatag = ""; - } - - # start a new nopara section - # - if ((($line =~ /^/) or - ($line =~ /^/) or - ($line =~ /^/) or - ($line =~ /^/)) and - ($noparadepth == 0)) { - &closepara; - $noparatag = $line; - $noparatag =~ s/^.*?.*?$//; - $noparaline = $linenumber; - if ($line =~ /^/) { - unless ($para) { - $line = "" . $line; - $para = 1; - } - } - } - - # count noparadepth - # - if ($noparatag ne '') { - $temp = $line; - while ($temp =~ /<$noparatag>/) { - $temp =~ s///; - $noparadepth ++; - } - while ($temp =~ /<\/$noparatag>/) { - $temp =~ s///; - $noparadepth --; - if ($noparadepth == 0) { - $noparaline == 0; - } - } - - # runon protection - # - if ($linenumber >= ($noparaline + 100)) { - $buf .= "ERROR: runon block starting on line $noparaline\n"; - last; - } - - # recover original line -- no whitespace modifiers - # - $line = $originalline; - - # sect3 - # - } elsif ($line =~ /^===/) { - &close3; - &splittitle; - if ($id eq '') { - $line = "$title\n"; - } else { - $line = "$title\n"; - } - $level3 = 1; - - # sect2 - # - } elsif ($line =~ /^==/) { - &close2; - &splittitle; - if ($id eq '') { - $line = "$title\n"; - } else { - $line = "$title\n"; - } - $level2 = 1; - - # sect1 - # - } elsif ($line =~ /^=/) { - &close1; - &splittitle; - if ($id eq '') { - $line = "$title\n"; - } else { - $line = "$title\n"; - } - $level1 = 1; - - # orderedlist - # - } elsif ($line =~ /^#/) { - &closeitemizedlist; - if ($orderedlist == 0) { - $buf .= "\n\n"; - $orderedlist = 1; - } - &closelistitem; - $line =~ s/^#//; - &trimline; - $line =~ s/^//; - $listitem = 1; - $para = 1; - } elsif ($line =~ /^\/#/) { - $line =~ s/^\/#//; - &trimline; - &closeorderedlist; - - # itemizedlist - # - } elsif ($line =~ /^\*/) { - &closeorderedlist; - if ($itemizedlist == 0) { - $buf .= "\n\n"; - $itemizedlist = 1; - } - &closelistitem; - $line =~ s/^\*//; - &trimline; - $line =~ s/^//; - $listitem = 1; - $para = 1; - } elsif ($line =~ /\/\*/) { - $line =~ s/^\/\*//; - &trimline; - &closeitemizedlist; - - # question - # - } elsif ($line =~ /^Q:/) { - &closelists; - &closeqandaentry; - $line =~ s/^Q://; - &trimline; - &splittitle; - if ($id eq '') { - $line = "" . $title . "\n"; - } else { - $line = "" . $title . "\n"; - } - unless ($qandaentry == 1) { - $line = "\n" . $line; - $qandaentry = 1; - } - if ($qandaset == 0) { - $line = "\n". $line; - $qandaset = 1; - } - - # answer - # - } elsif ($line =~ /^A:/) { - $line =~ s/^A://; - &trimline; - &closeanswer; - $line = "" . $line; - $answer = 1; - $para = 1; - - } elsif ($line =~ /^\s*----\s*$/) { - $line = ''; - - # para - # - } else { - if (($para == 0) and ($noparatag eq '')) { - $line = "" . $line; - $para = 1; - } else { - $line .= " "; - } - } - - $buf .= "$line "; - } - # close nesting - # - &close1; - - if ($noparadepth > 0) { - $buf .= "ERROR tag $noparatag on line $noparaline unterminated.\n"; - } -} - -sub close1 { - &close2; - if ($level1 == 1) { - $buf .= "\n"; - $level1 = 0; - } -} - -sub close2 { - &close3; - if ($level2 == 1) { - $buf .= "\n"; - $level2 = 0; - } -} - -sub close3 { - &closeorderedlist; - &closeitemizedlist; - &closepara; - &closeqandaset; - if ($level3 == 1) { - $buf .= "\n"; - $level3 = 0; - } -} - -sub closenonsect { - &closepara; -# &closeorderedlist; -# &closeitemizedlist; -} - -sub closelistitem { - &closepara; - if ($listitem == 1 ) { - $buf .= "\n"; - $listitem = 0; - } -} - -sub closeorderedlist { - &closepara; - &closelistitem; - if ($orderedlist == 1 ) { - $buf .= "\n"; - $orderedlist = 0; - } -} - -sub closeitemizedlist { - &closepara; - &closelistitem; - if ($itemizedlist == 1 ) { - $buf .= "\n"; - $itemizedlist = 0; - } -} - -sub closelists { - &closeitemizedlist; - &closeorderedlist; -} - -sub closeanswer { - &closepara; - if ($answer == 1) { - $buf .= "\n"; - $answer = 0; - } -} - -sub closeqandaentry { - &closeanswer; - if ($qandaentry == 1) { - $buf .= "\n"; - $qandaentry = 0; - } -} - -sub closeqandaset { - &closeqandaentry; - if ($qandaset == 1) { - $buf .= "\n"; - $qandaset = 0; - } -} - -sub closepara { - if ($para == 1) { - $buf .= "\n"; - $para = 0; - } -} - -sub trimline { - $line =~ s/\s+$//; - $line =~ s/^\s+//; -} - -sub splittitle { - $line =~ s/^=+//; - $line =~ s/=+$//; - $title = $line; - $id = ""; - if ($line =~ /\|/) { - $title =~ s/\|.+//; - $id = $line; - $id =~ s/^.+\|//; - } - $title =~ s/\s+$//; - $title =~ s/^\s+//; - $id =~ s/\s+$//; - $id =~ s/^\s+//; -} - -sub usage { - print "Usage: txt2db [-v] [-h|-o ] \n"; - print "-o, --output-to write to the specified file.\n"; - print "-v, --verbose show diagnostic output.\n"; - print "-h, --help show this usage message.\n"; - exit($error); -} -