From 1872402c7f31db50946e6c7e5a512da066d5e275 Mon Sep 17 00:00:00 2001
From: david <>
Date: Sat, 2 Feb 2002 04:55:52 +0000
Subject: [PATCH] added texi2db, texinfo to docbook converter nearing initial
release
---
LDP/README | 3 +-
LDP/texi2db/texi2db | 1507 +++++++++++++++++++++++++++++++++++++++++
LDP/txt2db/README | 69 --
LDP/txt2db/sample.txt | 72 --
LDP/txt2db/txt2db.pl | 523 --------------
5 files changed, 1509 insertions(+), 665 deletions(-)
create mode 100755 LDP/texi2db/texi2db
delete mode 100644 LDP/txt2db/README
delete mode 100644 LDP/txt2db/sample.txt
delete mode 100755 LDP/txt2db/txt2db.pl
diff --git a/LDP/README b/LDP/README
index 1ac12570..f4b9bce0 100644
--- a/LDP/README
+++ b/LDP/README
@@ -28,8 +28,9 @@ scrollserver/ python web application server front end to scrollkeeper
stylesheets/ xsl stylesheets for xml -> html conversion
www/ www.scrollserver.org website
test/ to test your cvs without disturbing things, use this
-txt2db/ utility to convert text files into docbook
+texi2db/ utility to convert Texinfo files into docbook
users/ individual users' areas
+wt2db/ utility to convert WikiText files into docbook
www/ websites
db./ ldp database website
cgi-bin/ perl scripts for the ldp database
diff --git a/LDP/texi2db/texi2db b/LDP/texi2db/texi2db
new file mode 100755
index 00000000..f0c2aa42
--- /dev/null
+++ b/LDP/texi2db/texi2db
@@ -0,0 +1,1507 @@
+#!/usr/bin/perl
+#
+# this utility converts a texinfo file into LDP WikiText format.
+#
+use File::Basename;
+use FileHandle;
+use HTML::Entities;
+
+$version = "0.1";
+
+$errors = 0;
+$error = 0;
+
+# runtime options
+#
+$maxerrors = 10;
+$verbose = 0;
+
+# files
+#
+$requestedfile = "";
+$outputfile = "";
+$outfh = new FileHandle;
+$logfile = "";
+$logfh = new FileHandle;
+
+# the following are flags to maintain state
+#
+# docbook structure flags
+#
+$inabstract = 1;
+$inappendix = 0;
+$insect1 = 0;
+$insect2 = 0;
+$insect3 = 0;
+$insect44 = 0;
+$intable = 0;
+$inrow = 0;
+$incol = 0;
+$tableformat = "";
+$inorderedlist = 0;
+$initemizedlist = 0;
+$inlistitem = 0;
+$informalpara = 0;
+$inpara = 0;
+
+# these maintain internal program state
+#
+$line = "";
+$originalline = "";
+$currentfile = "";
+$currentline = "";
+$saveline = "";
+$badbracketlines = 0;
+$badbracketstartline = 0;
+$seekend = "";
+$literalend = "";
+$literal = "";
+
+# here, we hold onto meta-data that we will need to build
+# the article or book header structures.
+#
+$sgmlfile = "";
+$title = "";
+$authorname = "";
+$abstract = "";
+$buf = "";
+
+# these hold information about the node we are currently in, if any
+#
+$nodename = "";
+$nodenext = "";
+$nodeprev = "";
+$nodeup = "";
+$nodeid = "";
+$nodeinit = 0;
+
+# remember this hierarchical information for every node
+# we use it if the node doesn't specify a level.
+#
+%nodenexts = ();
+%nodeprevs = ();
+%nodeups = ();
+%nodelevels = ();
+
+# holds user variables
+#
+%setvalues = ();
+%ifsets = ();
+
+# this is used to build a list of index entries to be written when requested
+#
+%indexes = ();
+
+# These are used for keeping state on the most recent search of any of the following hashes
+#
+$pattern = "";
+$action = "";
+
+# In the following hash, the perl expressions are used to search
+# each line of the input file. If matched, the Action field defines
+# what to do with it.
+#
+# Note that @-Commands of this type must be at the beginning of the line.
+#
+# TODO: put these in a .conf file so it can be tweaked or customized
+#
+# Actions:
+#
+# DROPLINE Drop it on the floor
+# DROPBLOCK Drop the whole block on the floor
+# META Meta-Data
+# BYE Stop processing the file
+# INDEX An index entry
+# ORDEREDLIST Numbered (enumerated) list
+# ORDEREDLISTEND End an enumerated list
+# SEEKEND Skip everything until you find the corresponding @end tag
+# NODE A Texinfo node
+# SECT? One of the sectioning commands
+# APPENDIX? One of the appendix commands
+# DEFINFOENCLOSE Load a customized highlighting pattern
+# SET Set a variable
+# CLEAR Clear a variable
+# IFSET Test a variable
+#
+# @-Command Action
+%patterns = (
+ '\\input\b' =>'DROPLINE',
+# '\@\*' =>'PARA',
+ '^\@alias' =>'ALIAS',
+ '^\@author' =>'META',
+ '^\@bye\b' =>'BYE',
+ '^\@c\b' =>'DROPLINE',
+ '^\@cindex\b' =>'INDEX',
+ '^\@clear\b' =>'CLEAR',
+ '^\@comment\b' =>'DROPLINE',
+ '^\@contents\b' =>'DROPLINE',
+ '^\@defindex\b' =>'DROPLINE', # TODO
+ '^\@definfoenclose' =>'DEFINFOENCLOSE',
+ '^\@display\b' =>'LITERALBLOCK',
+ '^\@enumerate\b' =>'ORDEREDLIST',
+ '^\@end enumerate\b' =>'ORDEREDLISTEND',
+ '^\@example' =>'LITERALBLOCK',
+ '^\@format\b' =>'LITERALBLOCK',
+ '^\@headings\b' =>'DROPLINE',
+ '^\@html\b' =>'SEEKEND',
+ '^\@end html\b' =>'DROLINE',
+ '^\@ifhtml\b' =>'DROPLINE',
+ '^\@end ifhtml\b' =>'DROPLINE',
+ '^\@ifinfo\b' =>'SEEKEND',
+ '^\@end ifinfo\b' =>'DROPLINE',
+ '^\@ifnottex\b' =>'DROPLINE',
+ '^\@end ifnottex\b' =>'DROPLINE',
+ '^\@ifset\b' =>'IFSET', # handled like SEEKEND
+ '^\@end ifset\b' =>'DROPLINE', # or ignored, so ignore this too
+ '^\@ifclear\b' =>'IFCLEAR',
+ '^\@end ifclear\b' =>'DROPLINE',
+ '^\@iftex\b' =>'SEEKEND',
+ '^\@ignore\b' =>'SEEKEND',
+ '^\@itemize\b' =>'ITEMIZEDLIST',
+ '^\@end itemize\b' =>'ITEMIZEDLISTEND',
+ '^\@item\b' =>'ITEM',
+ '^\@itemx\b' =>'ITEM',
+ '^\@macro\b' =>'MACRO',
+ '^\@menu\b' =>'SEEKEND', # TODO
+ '^\@node\b' =>'NODE',
+ '^\@page\b' =>'DROPLINE',
+ '^\@printindex\b' =>'DROPLINE', # TODO
+ '^\@set\b' =>'SET',
+ '^\@setchapternewpage\b' =>'DROPLINE',
+ '^\@setfilename\b' =>'META',
+ '^\@settitle\b' =>'META',
+ '^\@smallbook\b' =>'DROPLINE',
+ '^\@smallexample\b' =>'LITERALBLOCK',
+ '^\@sp\b' =>'DROPLINE',
+ '^\@table\b' =>'TABLE',
+ '^\@end table\b' =>'TABLEEND',
+ '^\@tex\b' =>'SEEKEND',
+ '^\@titlepage\b' =>'SEEKEND',
+
+ '^\@top\b' =>'SECT1', # sectioning commands
+ '^\@chapter\b' =>'SECT1',
+ '^\@section\b' =>'SECT2',
+ '^\@subsection\b' =>'SECT3',
+ '^\@subsubsection\b' =>'SECT4',
+ '^\@centerchap\b' =>'SECT1',
+ '^\@unnumbered\b' =>'SECT1',
+ '^\@unnumberedsec\b' =>'SECT2',
+ '^\@unnumberedsubsec\b' =>'SECT3',
+ '^\@unnumberedsubsubsec\b' =>'SECT4',
+ '^\@majorheading\b' =>'SECT1',
+ '^\@chapheading\b' =>'SECT1',
+ '^\@heading\b' =>'SECT2',
+ '^\@subheading\b' =>'SECT3',
+ '^\@subsubheading\b' =>'SECT4',
+ '^\@appendix\b' =>'APPENDIX',
+ '^\@appendixsec\b' =>'SECT1',
+ '^\@appendixsubsec\b' =>'SECT2',
+ '^\@appendixsubsubsec\b' =>'SECT3',
+);
+
+# these are one-for-one string substitutions
+#
+# most of them are ISO-8879 codes for international characters,
+# used by HTML as well as XML, but there are exceptions.
+# There is one special case, "@@", which is the escaped form of @.
+# We do not handle @@ here, because it has to come after all other
+# @-commands have been processed.
+#
+# It's fine to replace these with nothing if you want them to be
+# simply dropped on the floor.
+#
+# I keep these in general alphabetical order, except that @end tags
+# are put right after their opening form.
+#
+# Note: Only the left side should be escaped!
+#
+# Example: @copyright{} becomes ©
+#
+%substitutions = (
+ '\@!' =>'!',
+ '\@\*' =>'',
+ '\@\s' =>' ',
+ '\@"A' =>'Ä',
+ '\@"E' =>'Ë',
+ '\@"I' =>'Ï',
+ '\@"O' =>'Ö',
+ '\@"U' =>'Ü',
+ '\@"a' =>'ä',
+ '\@"e' =>'ë',
+ '\@"i' =>'ï',
+ '\@"o' =>'ö',
+ '\@"u' =>'ü',
+ '\@' . "'" . 'A' =>'Á',
+ '\@' . "'" . 'E' =>'É',
+ '\@' . "'" . 'I' =>'Í',
+ '\@' . "'" . 'O' =>'Ó',
+ '\@' . "'" . 'U' =>'Ú',
+ '\@' . "'" . 'a' =>'á',
+ '\@' . "'" . 'e' =>'é',
+ '\@' . "'" . 'i' =>'í',
+ '\@' . "'" . 'o' =>'ó',
+ '\@' . "'" . 'u' =>'ú',
+ '\@,{c}' =>'¸',
+ '\@-' =>'¯',
+ '\@\.' =>'.',
+ '\@:' =>'',
+ '\@=' =>'',
+ '\@\?' =>'?',
+ '\@^A' =>'Â',
+ '\@^E' =>'Ê',
+ '\@^I' =>'Î',
+ '\@^O' =>'Ô',
+ '\@^U' =>'Û',
+ '\@^a' =>'â',
+ '\@^e' =>'ê',
+ '\@^i' =>'î',
+ '\@^o' =>'ô',
+ '\@^u' =>'û',
+ '\@`A' =>'À',
+ '\@`E' =>'È',
+ '\@`I' =>'Ì',
+ '\@`O' =>'Ò',
+ '\@`U' =>'Ù',
+ '\@`a' =>'à',
+ '\@`e' =>'è',
+ '\@`i' =>'ì',
+ '\@`o' =>'ò',
+ '\@`u' =>'ù',
+# '\@\{' =>'{', # do these manually at the end so brackets match
+# '\@\}' =>'}',
+ '\@~A' =>'Ã',
+ '\@~E' =>'&Etilde;',
+ '\@~I' =>'Ĩ',
+ '\@~O' =>'Õ',
+ '\@~U' =>'Ũ',
+ '\@~a' =>'ã',
+ '\@~e' =>'&etilde;',
+ '\@~i' =>'ĩ',
+ '\@~o' =>'õ',
+ '\@~u' =>'ũ',
+ '\@AA\{\}' =>'Å',
+ '\@aa\{\}' =>'å',
+ '\@AE\{\}' =>'Æ',
+ '\@ae\{\}' =>'æ',
+ '\@copyright\{\}' =>'©',
+ '\@dots\{\}' =>'…',
+ '\@center\b' =>'',
+ '\@enddots\{\}' =>'…',
+ '\@equiv' =>'≡',
+ '\@group\b' =>'',
+ '\@end group\b' =>'',
+ '\@exclamdown' =>'¡',
+ '\@noindent\b' =>'',
+ '\@refill\b' =>'',
+ '\@result\{\}' =>'=>',
+ '\@TeX\{\}' =>'TeX',
+);
+
+# these are inline tags that require some kind of programmatic control
+# because they do strange things.
+#
+%specsubstitutions = (
+ '\@uref\b' =>'UREF',
+ '\@anchor\b' =>'ANCHOR',
+);
+
+# These are special block wrapping tags. When we hit one of these,
+# we simply wrap whatever comes between the (always paired) tags
+# with a set of DocBook tags.
+#
+# Their handling is subtly different than the %tags.
+# We don't just replace them with the DocBook tags, because we
+# want any tags to go inside them, not outside.
+#
+%blocks = (
+ '\@quotation\b' =>'',
+ '\@end quotation' =>'
',
+);
+
+# these are inline @-Commands which are replaced by a set of tags instead of one-for-one
+# character substitution. Replacement is inline.
+#
+# there is also a special case, the "blank" tag, which does what you'd think.
+# It removes the existing tag, but doesn't add the wrapper.
+#
+# Example: @code{foo} becomes foo.
+#
+%tags = (
+ '\@acronym\b' =>'abbrev',
+ '\@b\b' =>"emphasis role='bold'",
+ '\@cite\b' =>'citetitle',
+ '\@code\b' =>'literal',
+ '\@command\b' =>'command',
+ '\@dfn\b' =>"emphasis role='bold'",
+ '\@email\b' =>'email',
+ '\@emph\b' =>'emphasis',
+ '\@env\b' =>'envvar',
+ '\@footnote\b' =>'footnote',
+ '\@file\b' =>'filename',
+ '\@i\b' =>'emphasis',
+ '\@samp\b' =>'literal',
+ '\@w\b' =>'',
+);
+
+# this is where definfoenclose definitions go, and they are processed last
+#
+%definfos = ();
+
+# and this is where macros are stored
+#
+$inmacro = 0;
+$macro = "";
+%macros = {};
+$macroargs = "";
+%macroargs = ();
+$macrotext = "";
+%macrotext = ();
+
+
+##############################################################################
+##############################################################################
+##############################################################################
+
+
+while (1) {
+ if ($ARGV[0] eq '') {
+ last;
+ } elsif ($ARGV[0] eq '--version') {
+ &version;
+ exit(0);
+ } elsif (($ARGV[0] eq '-f') or ($ARGV[0] eq '--file')) {
+ shift(@ARGV);
+ $requestedfile = $ARGV[0];
+ } elsif (($ARGV[0] eq '-o') or ($ARGV[0] eq '--output-to')) {
+ shift(@ARGV);
+ $outputfile = $ARGV[0];
+ } elsif (($ARGV[0] eq '-l') or ($ARGV[0] eq '--log-to')) {
+ shift(@ARGV);
+ $logfile = $ARGV[0];
+ } elsif (($ARGV[0] eq '-e') or ($ARGV[0] eq '--max-errors')) {
+ shift(@ARGV);
+ $maxerrors = $ARGV[0];
+ } elsif (($ARGV[0] eq '-v') or ($ARGV[0] eq '--verbose')) {
+ $verbose++;
+ } elsif (($ARGV[0] eq '-h') or ($ARGV[0] eq '--help')) {
+ &usage;
+ } else {
+ &raiseerror("unrecognized option: $ARGV[0]\n");
+ $error = 1;
+ &usage;
+ }
+ shift(@ARGV);
+}
+
+# be sure to open the log file before trying to write any messages to it!
+#
+if ($logfile) {
+ open ($logfh, "> $logfile") or die "cannot write to log file $logfile.\n";
+}
+
+$verbose = 3 if ($verbose > 3);
+&message("verbose mode on.") if ($verbose == 1);
+&message("debugging mode on.") if ($verbose ==2);
+&message("insanity mode on.") if ($verbose ==3);
+
+if ($outputfile) {
+ &message("output will go to $outputfile") if (verbose);
+ open($outfh, "> $outputfile");
+}
+
+while () {
+ $template .= $_;
+}
+
+&processfile ("$requestedfile");
+
+&writefile;
+close($outfh) if ($outputfile);
+close($logfh) if ($logfile);
+
+sub processfile {
+ my($filename,
+ $basename,
+ $path,
+ $ext,
+ $includefile,
+ $linenumber,
+ );
+
+ $macrolinecount = 0;
+
+ my $fh = new FileHandle;
+
+ $filename = @_[0];
+ $filename = 'STDIN' unless ($filename);
+
+ ($basename, $path, $ext) = fileparse($filename);
+ &message("processing $filename") if ($verbose);
+ $linenumber = 0;
+
+ open $fh, "<$filename" or raiseerror("cannot open $filename\n");
+LINE: while ($line = <$fh>) {
+ chomp($line);
+
+ $linenumber++;
+ $currentfile = $filename;
+ $currentline = $linenumber;
+
+ $originalline = $line;
+ $trimline;
+
+ if ($seekend) {
+ $seekend = '' if ($line =~ /$seekend/);
+ next LINE;
+ }
+
+ &message("LINE $currentline: $line") if ($verbose > 1);
+
+ $line =~ s/&/&/g; # keep before the < > so we don't clobber them
+ $line =~ s/\</g;
+ $line =~ s/\>/>/g;
+ $line =~ s/\@\@\}/DCM_AT\}/g;
+ $line =~ s/\@\{/DCM_LB/g;
+ $line =~ s/\@\}/DCM_RB/g;
+
+ if ($saveline) {
+ $line = $saveline . ' ' . $line;
+ $saveline = '';
+ &message("line restored, line: $line") if ($verbose > 1);
+ }
+ unless (&bracketsmatch($line)) {
+ &message("bracket mismatch") if ($verbose > 2);
+ $saveline = $line;
+ next LINE;
+ }
+
+ if ($line =~ /^\@include/) {
+ $includefile = $line;
+ $includefile =~ s/^\@include\s+//;
+ $includefile = $path . $includefile;
+ $line = '';
+ if (-e $includefile) {
+ &message("including $includefile") if ($verbose);
+ &processfile ("$includefile");
+ } else {
+ &raiseerror("include file $includefile not found.");
+ }
+ } elsif ($literalend) {
+ if ($line =~ /$literalend/) {
+ $literalend = '';
+ &closeliteral;
+ } else {
+ &writeline;
+ }
+ } elsif ($inmacro) {
+ ($pattern, $action) = &matchpattern();
+ if ($action eq 'SEEKEND') {
+ $seekend = $pattern;
+ $seekend =~ s/\@/\@end /;
+ next LINE;
+ } elsif ($action eq 'DROPLINE') {
+ next LINE;
+ }
+ if ($line =~ /\@end\s+macro/) {
+ #save macro
+ $macroargs{$macro} = $macroargs;
+ $macrotext{$macro} = $macrotext;
+ &message("macro name: $macro args: $macroargs text: $macrotext") if ($verbose > 2);
+ $inmacro = 0;
+ $macroargs = '';
+ $macrotext = '';
+ $macrolinecount = 0;
+ } elsif ($line =~ /\@quote-arg/) {
+ } else {
+ &message("macro line: $line") if ($verbose > 2);
+ $macrotext .= $line;
+ $macrolinecount++;
+ if ($macrolinecount >= 10) {
+ exit(1);
+ }
+ }
+ } elsif ($intable) {
+ &message("table: $line") if ($verbose > 2);
+ ($pattern, $action) = &matchpattern();
+ if ($action eq 'TABLEEND') {
+ &closetable;
+ } elsif ($action eq 'ITEM') {
+ &tablerow;
+ &tablecol;
+ $line =~ s/\@item\w*?\s+//;
+ &message("cell: $line") if ($verbose > 2);
+ $line = &trim($line);
+ $line = $tableformat . "\{$line\}" unless ($tableformat eq '@asis');
+ writeline;
+ &closetablecol;
+ } else {
+ &tablecol;
+ &writeline;
+ }
+ } else {
+ ($pattern, $action) = &matchblock();
+ if ($action) {
+ &message("matched block $pattern") if ($verbose > 2);
+ &closepara;
+ $buf .= $action;
+ next LINE;
+ }
+ ($pattern, $action) = &matchpattern();
+ if ($action eq 'DROPLINE') {
+ next LINE;
+ } elsif ($action eq 'SEEKEND') {
+ $seekend = $pattern;
+ $seekend =~ s/\@/\@end /;
+ } elsif ($action eq 'LITERALBLOCK') {
+ $literalend = $pattern;
+ $literalend =~ s/\@/\@end /;
+ &literal;
+ } elsif ($action eq 'ALIAS') {
+ &alias;
+ } elsif ($action eq 'MACRO') {
+ ¯o;
+ } elsif ($action eq 'META') {
+ &meta;
+ } elsif ($action eq 'NODE') {
+ &node;
+ } elsif ($action eq 'SECT1') {
+ §1;
+ } elsif ($action eq 'SECT2') {
+ §2;
+ } elsif ($action eq 'SECT3') {
+ §3;
+ } elsif ($action eq 'SECT4') {
+ §4;
+ } elsif ($action eq 'APPENDIX') {
+ &appendix;
+ } elsif ($action eq 'PARA') {
+ &closeformalpara;
+ ¶
+ } elsif ($action eq 'ORDEREDLIST') {
+ &orderedlist;
+ } elsif ($action eq 'ORDEREDLISTEND') {
+ &closeorderedlist;
+ } elsif ($action eq 'ITEMIZEDLIST') {
+ &itemizedlist;
+ } elsif ($action eq 'ITEMIZEDLISTEND') {
+ &closeitemizedlist;
+ } elsif ($action eq 'ITEM') {
+ &item;
+ } elsif ($action eq 'TABLE') {
+ $tableformat = $line;
+ $tableformat =~ s/^\@\w+\s+//;
+ &message("table format: $tableformat") if ($verbose > 2);
+ &table;
+ } elsif ($action eq 'TABLEEND') {
+ &closetable;
+ } elsif ($action eq 'INDEX') {
+ &message("indexing not yet supported") if ($verbose);
+ } elsif ($action eq 'DEFINFOENCLOSE') {
+ &definfoenclose;
+ } elsif ($action eq 'SET') {
+ &set;
+ } elsif ($action eq 'CLEAR') {
+ &clear;
+ } elsif ($action eq 'IFSET') {
+ $seekend = '\@end ifset' unless (&ifset());
+ } elsif ($action eq 'IFCLEAR') {
+ $seekend = '\@end ifclear' if (&ifset());
+ } elsif ($action eq 'BYE') {
+ &closesect1;
+ last;
+ } elsif ($action eq 'ERROR') {
+ &raiseerror("Hit ERROR tag with $pattern on $line");
+ } elsif ($action eq '') {
+ &writeline;
+ } else {
+ &raiseerror("Unknown action: $action on command $pattern");
+ }
+ }
+ }
+ close($fh);
+}
+
+sub writefile {
+ &closeappendix;
+ &message("copying meta-data to docbook") if ($verbose);
+ $template =~ s/%%TITLE/$title/;
+ $template =~ s/%%ABSTRACT/$abstract/;
+ $template =~ s/%%BODY/$buf/;
+# &message("deleting empty tags") if ($verbose);
+# $i = 0;
+# while (1) {
+# $template =~ s/\<(.+?)\>\W*\<\/\1\>//gs; # delete empty tags
+# $i++;
+# last if ($i == 10);
+# }
+ &message("writing $outputfile") if ($verbose);
+ print $outfh $template . "\n";
+ print $outfh ''. "\n";
+}
+
+####################
+# PATTERN MATCHING #
+####################
+
+# this is for beginning-of-line @-commands
+#
+sub matchpattern {
+ foreach $key (keys %patterns) {
+ if ($line =~ /$key/) {
+ return ($key, $patterns{$key});
+ }
+ }
+ return ('', '');
+}
+
+# this is for special wrapped blocks, like blockquotes
+#
+sub matchblock {
+ foreach $key (keys %blocks) {
+ if ($line =~ /$key/) {
+ return ($key, $blocks{$key});
+ }
+ }
+}
+
+# the rest are all inline processing
+#
+sub convertinline {
+ &convertsubsts; # single characters, do immediately and after macros
+ &message(" SUBSTS: $line") if ($verbose > 1);
+ &convertmacros; # can insert new @{} commands, do first
+ &message(" MACROS: $line") if ($verbose > 1);
+ &convertsubsts; # single characters, do immediately and after macros
+ &message(" SUBSTS: $line") if ($verbose > 1);
+ &convertvalues; # insert variables, do immediately after macros
+ &message(" VALUES: $line") if ($verbose > 1);
+ &convertdefinfos; # custom highlighting, do after macros
+ &message(" DINFOS: $line") if ($verbose > 1);
+ &converttags; # @foo{bar} to bar do next to last
+ &message(" DBTAGS: $line") if ($verbose > 1);
+ &convertlinks; # @ref{} => , do last
+ &message(" ULINKS: $line") if ($verbose > 1);
+ &convertspecsubsts; # @anchor{} => , do last
+ &message(" ANCHOR: $line") if ($verbose > 1);
+}
+
+# macros
+#
+sub convertmacros {
+ while (1) {
+ ($macro, $macroargs, $macrotext) = &matchmacro();
+ last unless ($macro);
+ &message("matched macro $macro") if ($verbose > 1);
+ if ($line =~ /\@$macro\{.*?\}/) {
+ &replacemacro;
+ } else {
+ &raiseerror("unmatched macro brackets in line $line");
+ last;
+ }
+ &message("line is now $line") if ($verbose > 2);
+ }
+}
+
+sub matchmacro {
+# &message("matchmacros $line");
+ foreach $macro (keys %macroargs) {
+ if ($line =~ /\@$macro\{/) {
+ return ($macro, $macroargs{$macro}, $macrotext{$macro});
+ }
+ }
+}
+
+sub replacemacro {
+ my $macroarg = $line;
+ my $open;
+ my $close;
+ $macroargs = "\\\\" . $macroargs . "\\\\";
+ $macroarg =~ s/^.*?\@$macro\{//;
+ $macroarg =~ s/\}.*?$//;
+ $macrotext =~ s/\@$macro\{([^\{]*?)\}/$macroarg/;
+ &message("replace pattern '$macroargs' with '$macroarg' in '$macrotext'") if ($verbose > 2);
+ $macrotext =~ s/$macroargs/$macroarg/g;
+ $line =~ s/\@$macro\{.*?\}/$macrotext/;
+}
+
+# character substitution
+#
+sub convertsubsts {
+ while (1) {
+ ($pattern, $action) = &matchsubst();
+ last unless ($pattern);
+ &replacesubst;
+ }
+}
+
+sub matchsubst {
+ foreach $key (keys %substitutions) {
+ if ($line =~ /$key/) {
+ return ($key, $substitutions{$key});
+ }
+ }
+ return ('', '');
+}
+
+sub replacesubst {
+ &message("replacing '$pattern' with '$action' on line $line") if ($verbose > 1);
+ $line =~ s/$pattern/$action/g;
+}
+
+# special character substitution
+#
+sub convertspecsubsts {
+ while (1) {
+ ($pattern, $action) = &matchspecsubst();
+ last unless ($pattern);
+ &replacespecsubst;
+ }
+}
+
+sub matchspecsubst {
+ foreach $key (keys %specsubstitutions) {
+ if ($line =~ /$key/) {
+ return ($key, $specsubstitutions{$key});
+ }
+ }
+ return ('', '');
+}
+
+sub replacespecsubst {
+ my $link;
+ my $linkname;
+
+ &message("replacing 'special case' $pattern $action on line $line") if ($verbose > 2);
+ if ($action eq 'UREF') {
+ $link = $line;
+ $link =~ s/^.*?\@uref\{(.*?)\}.*$/\1/;
+ ($link, $linkname) = split(/,/, $link);
+ $link = &trim($link);
+ $linkname = &trim($linkname);
+ $linkname = $link unless ($linkname);
+ $line =~ s/\@uref\{(.*?)\}/\$linkname\<\/ulink\>/g;
+ &message("ulink: $link, linkname: $linkname, line: $line"); # if ($verbose > 2);
+ } elsif ($action eq 'ANCHOR') {
+
+ $link = $line;
+ $link =~ s/^.*?\@anchor\{(.*?)\}.*$/\1/;
+ $link = &anchorfix($link);
+ $line =~ s/\@anchor\{(.*?)\}//;
+
+# $line =~ s/\@anchor\{([^\}]*?)\}/\/;
+# $line =~ s/\@anchor\{([^\}]*?\{.*?\}.*?)\}/\/;
+# &message("anchor: $link on line: $line"); # if ($verbose > 2);
+ } else {
+ &raiseerror("Unrecognized 'special case' inline substitution code $pattern");
+ }
+}
+
+$loops = 0;
+
+# replace @-Commands with DocBook tags
+#
+sub converttags {
+ $loops = 0;
+ while (1) {
+ ($pattern, $action) = &matchtag();
+ last unless ($pattern);
+ if ($line =~ /$pattern.*?\}/) {
+ $loops++;
+ &replacetag;
+ exit if ($loops >= 25);
+ } else {
+ &raiseerror("unmatched tag brackets in line $line");
+ last;
+ }
+ }
+}
+
+sub matchtag {
+ foreach $key (keys %tags) {
+ if ($line =~ /$key/) {
+ return ($key, $tags{$key});
+ }
+ }
+ return ('', '');
+}
+
+sub replacetag {
+ my ($tag,
+ $attributes,
+ $part,
+ $newline,
+ $fragment,
+ );
+
+ if ($action eq '') {
+ &message("removing old tag, but not adding a new one") if ($verbose > 2);
+ $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\1/g;
+ $line =~ s/$pattern\{([^\}]*?)\}/\1/g;
+ } else {
+ &message("replacing $pattern with $action tags on line $line") if ($verbose > 1);
+ ($tag, $attributes) = split(/ /, $action);
+ if ($attributes) {
+ $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\<$tag $attributes\>\1\<\/$tag\>/g;
+ $line =~ s/$pattern\{([^\}]*?)\}/\<$tag $attributes\>\1\<\/$tag\>/g;
+ } else {
+ $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/\<$tag\>\1\<\/$tag\>/g;
+ $line =~ s/$pattern\{([^\}]*?)\}/\<$tag\>\1\<\/$tag\>/g;
+ }
+ }
+}
+
+# replace @-Commands with custom highlighting
+#
+sub convertdefinfos {
+ while (1) {
+ ($pattern, $action) = &matchdefinfo();
+ last unless ($pattern);
+ &message("matched definfoenclose '$pattern'") if ($verbose >2);
+ &replacedefinfo;
+ }
+}
+
+sub matchdefinfo {
+ foreach $key (keys %definfos) {
+ if ($line =~ /$key\{.*?\}/) {
+ return ($key, $definfos{$key});
+ }
+ }
+ return ('', '');
+}
+
+sub replacedefinfo {
+ ($prefix, $suffix) = split(/,/,$action);
+ &message("pattern: $pattern, prefix: $prefix, suffix: $suffix") if ($verbose > 2);
+ $line =~ s/$pattern\{([^\}]*?\{.*?\}.*?)\}/$prefix\1$suffix/g;
+ $line =~ s/$pattern\{([^\}]*?)\}/$prefix\1$suffix/g;
+ $line =~ s/$pattern\{\}/$prefix$suffix/g;
+ &message("definfoenclose replaced") if ($verbose >2);
+}
+
+# replace @value{} with values
+#
+sub convertvalues {
+ my ($key,
+ $value);
+
+ foreach $key (keys %setvalues) {
+ $value = $setvalues{$key};
+ $line =~ s/\@value\{$key\}/$value/;
+ }
+}
+
+sub convertlinks {
+ my $link,
+ $anchor;
+
+ while ($line =~ /\@ref\{/) {
+ if ($line =~ /\@ref\{.*?\}/) {
+ &message("making xref on line $line") if ($verbose > 1);
+ $anchor = $line;
+ $anchor =~ s/^.*?\@ref\{//;
+ $anchor =~ s/\}.*$//;
+ $anchortitle = &trim($anchor);
+ $anchor = &anchorfix($anchor);
+ &message("anchor: $anchor, title; $anchortitle") if ($verbose > 2);
+ &raiseerror("NO ANCHOR in $line") unless ($anchor);
+
+ # decide what kind of link to make based on whether it is
+ # to a section or not. Sections have titles that can be
+ # referenced using an xref. Anchors do not, and we have to
+ # render a full tag.
+ #
+ if ($nodelevels{$anchor}) {
+ $link = "\";
+ } else {
+ $link = "\$anchortitle\<\/link\>";
+ }
+ $line =~ s/\@ref\{.*?\}/$link/;
+ &message("made xref to tag $anchor on line: $line") if ($verbose > 1);
+ } else {
+ &raiseerror("unmatched ref brackets in line $line");
+ last;
+ }
+ }
+}
+
+sub anchorfix {
+ my $anchor = $_[0];
+ $anchor = lc(&trim($anchor));
+ $anchor = decode_entities($anchor);
+ $anchor =~ s/&/-and-/g;
+ $anchor =~ s/;//g;
+ $anchor = encode_entities($anchor);
+ $anchor =~ s/&(\w)grave/\1/g;
+ $anchor =~ s/&(\w)acute/\1/g;
+ $anchor =~ s/&(\w)circ/\1/g;
+ $anchor =~ s/&(\w)uml/\1/g;
+ $anchor =~ s/&(\w)tilde/\1/g;
+ $anchor =~ s/&(\w)cedil/\1/g;
+ $anchor =~ s/&/-and-/g;
+ $anchor =~ s/;//g;
+ $anchor =~ s/\//-/g;
+ $anchor =~ s/\s+/-/g;
+ $anchor =~ s/'//g;
+ $anchor =~ s/!//g;
+ $anchor =~ s/--/-/g;
+ $anchor =~ s/\+/plus/g;
+ $anchor =~ s/\*/x/g;
+ $anchor =~ s/\(//g;
+ $anchor =~ s/\)//g;
+ $anchor =~ s/\@/at/g;
+ $anchor =~ s/^-+//;
+ $anchor =~ s/-+$//;
+ $anchor =~ s/\$/S/;
+ $anchor =~ s/\.//;
+ return $anchor;
+}
+
+#############
+# META-DATA #
+#############
+
+# this routine processes meta-data @-Commands
+#
+sub meta {
+ return if (&metaarg('\@setfilename', $sgmlfile));
+ return if (&metaarg('\@settitle', $title));
+ return if (&metaarg('\@author', $authorname));
+ &raiseerror("Unknown meta-data command $line");
+}
+
+# Meta-data commands have arguments,
+# so if the pattern matches, store the argument
+# to the variable in $_[1]
+#
+sub metaarg {
+ my $pattern = $_[0];
+ if ($line =~ /$pattern/) {
+ $_[1] = $line;
+ $_[1] =~ s/$pattern//;
+ }
+}
+
+################
+# WRITE OUTPUT #
+################
+
+sub writeline {
+ my $temp = &trim($line);
+
+ &convertinline;
+ unless (&bracketsmatch($line)) {
+ &raiseerror("brackets do not match in line: $line");
+ last;
+ }
+
+ unless ($literal) {
+ if ($temp eq '') {
+ &closeformalpara;
+ } else {
+ unless (($inpara) or ($intable)) {
+ if (($nodename) and ($nodeinit == 0)) {
+ &initnode;
+ }
+ ¶
+ }
+ }
+ }
+ $line =~ s/\@\@/\@/g;
+ $line =~ s/DCM_AT/\@/g;
+ $line =~ s/DCM_LB/\{/g;
+ $line =~ s/DCM_RB/\}/g;
+ $buf .= $line . "\n";
+}
+
+##############################################
+# STRUCTURAL TEXINFO -> DOCBOOK TRANSLATIONS #
+##############################################
+
+# this is for when the source never set a position for this node in the
+# chapter-style hierarchy, which we use.
+#
+# We have to make a best guess based on the node's settings,
+# so put it right under whoever it calls the "up" node.
+#
+sub initnode {
+ my $parentname = $nodeup;
+ my $parentlevel = $nodelevels{$parentname};
+ if ($parentlevel == "A") {
+ §1;
+ } elsif ($parentlevel == 1) {
+ §2;
+ } elsif ($parentlevel == 2) {
+ §3
+ } elsif ($parentlevel == 3) {
+ §4;
+ } else {
+ &raiseerror("The parent node, $parentname, could not be found.");
+ }
+}
+
+sub node {
+ $line =~ s/\@node\b//;
+ &convertinline;
+ ($nodename, $nodenext, $nodeprev, $nodeup) = split(/,/, $line);
+ $nodename = &trim($nodename);
+ $nodeid = &anchorfix($nodename);
+ $nodeprev = &trim($nodeprev);
+ $nodenext = &trim($nodenext);
+ $nodeup = &trim($nodeup);
+ $nodenexts{$nodename} = $nodenext;
+ $nodeprevs{$nodename} = $nodeprev;
+ $nodeups{$nodename} = $nodeup;
+ $nodeinit = 0;
+ &message("Name: $nodename Next: $nodenext Previous: $nodeprev Up: $nodeup") if ($verbose > 1);
+ &closeformalpara;
+ if ($nodename eq 'Top') {
+ $inabstract = 0;
+ $abstract = $buf;
+ $buf = "";
+ }
+}
+
+sub appendix {
+ &closeappendix;
+ $buf .= "\\$nodename\<\/title\>";
+ $inappendix = 1;
+ $nodeinit = 1;
+ $nodelevels{$nodename} = "A";
+ &message("processing node $nodename at level $nodelevels{$nodename}");
+}
+
+sub sect1 {
+ &closesect1;
+ $buf .= "\\$nodename\<\/title\>";
+ $insect1 = 1;
+ $nodeinit = 1;
+ $nodelevels{$nodename} = 1;
+ &message("set node $nodename level to $nodelevels{$nodename}");
+}
+
+sub sect2 {
+ &closesect2;
+ if ($inappendix) {
+ $buf .= "\\$nodename\<\/title\>";
+ $nodelevels{$nodename} = 1;
+ } else {
+ $buf .= "\\$nodename\<\/title\>";
+ $nodelevels{$nodename} = 2;
+ }
+ $insect2 = 1;
+ $nodeinit = 1;
+ $nodelevels{$nodename} = 2;
+ &message("set node $nodename level to $nodelevels{$nodename}");
+}
+
+sub sect3 {
+ &closesect3;
+ if ($inappendix) {
+ $buf .= "\\$nodename\<\/title\>";
+ $nodelevels{$nodename} = 2;
+ } else {
+ $buf .= "\\$nodename\<\/title\>";
+ $nodelevels{$nodename} = 3;
+ }
+ $insect3 = 1;
+ $nodeinit = 1;
+ $nodelevels{$nodename} = 3;
+ &message("set node $nodename level to $nodelevels{$nodename}");
+}
+
+sub sect4 {
+ &closesect4;
+ if ($inappendix) {
+ $buf .= "\\$nodename\<\/title\>";
+ $nodelevels{$nodename} = 3;
+ } else {
+ $buf .= "\\$nodename\<\/title\>";
+ $nodelevels{$nodename} = 4;
+ }
+ $insect4 = 1;
+ $nodeinit = 1;
+ $nodelevels{$nodename} = 4;
+ &message("set node $nodename level to $nodelevels{$nodename}");
+}
+
+sub table {
+ &closetable;
+ $buf .= '' . "\n";
+ $intable = 1;
+}
+
+sub tablerow {
+ &closetablerow;
+ $buf .= '';
+ $inrow = 1;
+}
+
+sub tablecol {
+ &closetablecol;
+ $buf .= '';
+ $incol = 1;
+}
+
+sub orderedlist {
+ &closeformalpara;
+ $buf .= '';
+ $inorderedlist = 1;
+}
+
+sub itemizedlist {
+ &closeformalpara;
+ $buf .= '';
+ $initemizedlist = 1;
+}
+
+sub item {
+ if ($intable) {
+ &tableitem;
+ } else {
+ &listitem;
+ }
+}
+
+sub listitem {
+ &closelistitem;
+ $buf .= '';
+ $inlistitem = 1;
+}
+
+sub formalpara {
+ my $title = $_[0];
+ my $id = &anchorfix($title);
+
+ &closeformalpara;
+ #$buf .= "\\$title\<\/title\>\n\";
+ $informalpara = 1;
+ $inpara = 1;
+ $nodeinit = 1;
+}
+
+sub para {
+ &closeformalpara;
+ $buf .= '';
+ $inpara = 1;
+}
+
+sub literal {
+ if ($pattern =~ /\bformat\b/) {
+ $literal = 'programlisting';
+ $buf .= '' . "\n";
+ } elsif ($pattern =~ /\bdisplay\b/) {
+ $literal = 'screen';
+ $buf .= '';
+ } elsif ($pattern =~ /\bexample\b/) {
+ $literal = 'programlisting';
+ $buf .= '';
+ } elsif ($pattern =~ /\bsmallexample\b/) {
+ $literal = 'programlisting';
+ $buf .= '';
+ }
+}
+
+sub closeappendix {
+ &closesect1;
+ if ($inappendix) {
+ $buf .= '' . "\n";
+ $inappendix = 0;
+ }
+}
+
+sub closesect1 {
+ &closesect2;
+ if ($insect1) {
+ $buf .= '' . "\n\n";
+ $insect1 = 0;
+ }
+}
+
+sub closesect2 {
+ &closesect3;
+ if ($insect2) {
+ if ($inappendix) {
+ $buf .= '' . "\n\n";
+ } else {
+ $buf .= '' . "\n\n";
+ }
+ $insect2 = 0;
+ }
+}
+
+sub closesect3 {
+ &closesect4;
+ if ($insect3) {
+ if ($inappendix) {
+ $buf .= '' . "\n\n";
+ } else {
+ $buf .= '' . "\n\n";
+ }
+ $insect3 = 0;
+ }
+}
+
+sub closesect4 {
+ &closeformalpara;
+ if ($insect4) {
+ if ($inappendix) {
+ $buf .= '' . "\n\n";
+ } else {
+ $buf .= '' . "\n\n";
+ }
+ $insect4 = 0;
+ }
+}
+
+sub closetable {
+ &closetablerow;
+ if ($intable) {
+ $buf .= '
' . "\n";
+ $intable = 0;
+ }
+}
+
+sub closetablerow {
+ &closetablecol;
+ if ($inrow) {
+ $buf .= '' . "\n";
+ $inrow = 0;
+ }
+}
+
+sub closetablecol {
+ if ($incol) {
+ $buf .= '' . "\n";
+ $incol = 0;
+ }
+}
+
+sub closeorderedlist {
+ &closelistitem;
+ if ($inorderedlist) {
+ $buf .= '' . "\n";
+ $inorderedlist = 0;
+ }
+}
+
+sub closeitemizedlist {
+ &closelistitem;
+ if ($initemizedlist) {
+ $buf .= '' . "\n";
+ $initemizedlist = 0;
+ }
+}
+
+sub closelistitem {
+ &closeformalpara;
+ if ($inlistitem) {
+ $buf .= '' . "\n";
+ $inlistitem = 0;
+ }
+}
+
+sub closeformalpara {
+ &closepara;
+ if ($informalpara) {
+# $buf .= '';
+ $informalpara = 0;
+ }
+}
+
+sub closepara {
+ if ($inpara) {
+ $buf .= '';
+ $inpara = 0;
+ }
+}
+
+sub closeliteral {
+ if ($literal) {
+ $buf .= '' . $literal . '>' . "\n";
+ $literal = '';
+ }
+}
+
+
+################
+# CONDITIONALS #
+################
+
+sub set {
+ my ($name,
+ $value);
+
+ ($foo, $name, $value) = split(/\s+/, $line);
+ $value = 1 unless ($value);
+ $setvalues{$name} = $value;
+ &message("set $name to $value") if ($verbose > 1);
+ $value = $setvalues{$name};
+ &message("read back $value") if ($verbose > 2);
+}
+
+sub clear {
+ my $name;
+
+ ($foo, $name) = split(/\s+/, $line);
+ $setvalues{$name} = 0;
+ &message("cleared $name") if ($verbose > 1);
+}
+
+sub ifset {
+ my $name;
+
+ ($foo, $name) = split(/\s+/, $line);
+ return $ifsets{$name};
+}
+
+#################
+# META-LANGUAGE #
+#################
+
+# only the commands in %tags are currently supported
+#
+sub alias {
+ my $alias;
+
+ ($foo, $alias) = split(/\s/, $line);
+ &message("alias: $alias") if ($verbose > 2);
+ ($alias, $command) = split(/=/, $alias);
+ &message("alias: $alias, command: $command") if ($verbose > 2);
+ $alias = '\@' . $alias . '\b';
+ $command = '\@' . $command . '\b';
+ &message("alias: $alias, command: $command") if ($verbose > 2);
+ $tag = $tags{$command};
+ &message("tag: $tag") if ($verbose > 2);
+ $tags{$alias} = $tag;
+}
+
+sub macro {
+ $line =~ s/\@macro\s*?(.*?)\{(.*?)\}/\1,\2/;
+ ($macro, $macroargs) = split(/,/, $line);
+ $macro = &trim($macro);
+ $macros{$macro} = $macro;
+ $macrotext = '';
+ $inmacro = 1;
+}
+
+# load custom @-commands
+#
+sub definfoenclose {
+ my ($name,
+ $prefix,
+ $suffix,
+ $key,
+ $replacement);
+
+ $line =~ s/\@definfoenclose\s+//;
+ ($name, $prefix, $suffix) = split(/,/, $line);
+ $key = '\@' . $name;
+ $replacement = $prefix. ',' . $suffix;
+ $definfos{$key} = $replacement;
+# &message("custom definfoenclosure: \[$name\] \[$prefix\] \[$suffix\] \/ \[$key\] \[$replacement\]") if ($verbose > 0);
+}
+
+###########
+# INDEXES #
+###########
+
+sub defindex {
+ $line =~ s/^\@defindex\s+//;
+ $indexes{$line} = 1;
+}
+
+####################
+# SUPPORT ROUTINES #
+####################
+
+sub bracketsmatch {
+ my $bline = ' ' . $_[0] . ' ';
+ my $left = scalar split(/\{/, $bline) - 1;
+ my $right = scalar split(/\}/, $bline) - 1;
+ if ($right == $left) {
+ $badbracketlines = 0;
+ $badbracketstartline = $currentline + 1
+ } else {
+ $badbracketlines++;
+ &message("unmatched \{\} (left: $left, right: $right) on line $line") if ($verbose > 1);
+ if ($badbracketlines >= 20) {
+ &raiseerror("Mismatched brackets starting on line $badbracketstartline: $line");
+ }
+ }
+ return ($right == $left);
+}
+
+sub trimline {
+ $line = &trim($line);
+}
+
+sub trim {
+ my $temp = $_[0];
+
+ $temp =~ s/^\s+//g;
+ $temp =~ s/\s+$//g;
+ return $temp;
+}
+
+sub raiseerror {
+ my $errmsg = "ERROR: $currentfile $currentline $_[0]\n";
+ print STDERR $errmsg;
+ $errors++;
+ if (($maxerrors) and ($errors >= $maxerrors)) {
+ print STDERR "aborting after $errors errors.\n";
+ &writefile;
+ exit(1);
+ }
+}
+
+sub message {
+ my $message;
+ $message = $_[0];
+
+ if ($logfile) {
+ print $logfh "$message\n";
+ } else {
+ print "$message\n";
+ }
+}
+
+sub version {
+ print "texi2db version $version\n";
+ print "Copyright (c) 2002 David Merrill \.\n";
+ print "\n";
+ print "Converts a Texinfo file into DocBook.\n";
+ print "\n";
+ print "This free software; see the source for copying conditions. There is no\n";
+ print "warranty; not even for merchantability or fitness for a particular purpose.\n";
+}
+
+sub usage {
+ &version;
+ print "\n";
+ print "usage: texi2db [OPTIONS]... [FILE]\n";
+ print "-f, --file FILE read from file rather than STDIN.\n";
+ print "-o, --output-to FILE write to file rather than STDOUT.\n";
+ print "-l, --log-to FILE write status messages to file rather than STDOUT.\n";
+ print "-v, --verbose show diagnostic output.\n";
+ print " use twice for lots of detail.\n";
+ print " use thrice for insane amounts of detail.\n";
+ print "-e, --max-errors MAX show diagnostic output.\n";
+ print " use '--max-errors 0' to disable.\n";
+ print "-h, --help show this usage message.\n";
+ print " --version show the program version.\n";
+ exit($error);
+}
+
+__END__
+
+
+
+
+ %%TITLE
+
+ %%ABSTRACT
+
+
+%%BODY
+
diff --git a/LDP/txt2db/README b/LDP/txt2db/README
deleted file mode 100644
index 120edf37..00000000
--- a/LDP/txt2db/README
+++ /dev/null
@@ -1,69 +0,0 @@
-This is a utility to convert text files in a specific format into valid
-DocBook. Just pass it the input filename on the commmand line and you'll
-get a .sgml file out. It won't be a complete valid document, as it will
-have no header information or dtd specification. It's just a DocBook
-fragment, not a complete document.
-
-The following constructs are currently supported. If you need support for
-an addition construct, write discuss@linuxdoc.org if you're subscribed,
-or feedback@linuxdoc.org if you're not.
-
-Or just add it in the cvs. :-)
-
-Foo Foo
-
-=Title= Title
-
-
-=Title|id= Title
-
-
- works for other sect levels as well, and many other
- tags. It is either the "id" value, or the "title"
- value, depending on the semantics of the particular
- tag. Usage should be obvious in context.
-
-==Title== Title
-
-
-===Title=== Title
-
-
-
-#Foo
-#Bar Foo
-#Baz Bar
-/# Baz
-
-
-*Foo
-*Bar Foo
-*Baz Bar
-/* Baz
-
-
-[[http://foo.org]]
- http://foo.org
-
-
-[[http://foo.org Foo]]
- Foo
-
-
-[http://foo.org|Foo]] You can also delimit with the pipe character "|".
- This works on any of these [[]] tags.
-
-[[file:Foo]] Foo
-
-'''Foo''' Foo
-
-A few DocBook structures do not have tags wrapped around them. They
-are itself (duh!), and . If you insert anything
-using these tags, no tags will be wrapped around it or inserted into it.
-So if you want fine control over your tags, insert them yourself.
-
-These tags include:
-
-
-
-
diff --git a/LDP/txt2db/sample.txt b/LDP/txt2db/sample.txt
deleted file mode 100644
index 1a018924..00000000
--- a/LDP/txt2db/sample.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-=Introduction|intro-to-the-program=
-
-The following list should be rendered as a qandaset:
-
-Q: Why?|why-id
-A: Why
-not?
-
-Multiple questions and multiple answers:
-
-Q: Why?
-A: Why not?
-A: Why not2?
-
-Q: Why?
-A: Why not?
-
-Simple List
-
-*item
-*item
-*item
-/*
-
-This tests arbitrary DocBook. It should be passed right on to the output file
-with no changes. It can be nested arbitrarily deep.
-
-test
-test some more
-
-This is the second level!
-
-
-
-This document is from the [[http://www.linuxdoc.org Linux Documentation Project]].
-
-Numbered List
-
-This is an '''important''' [file].
-
-#item
-#item
-
-#item
-/#
-
-Another to make sure the numbers restart at one.
-
-#item
-#item
-#item
-/#
-
-=Bar=
-
-Just another section.
-
-==Level 2|level2==
-
-===Level 3|level3===
-
-=Conclusion|conclusion=
-
-All previous sections should be properly closed.
-
-
-=test again=
-
-[[ldp:INFO-SHEET]]
-[[ldp:Distributions-HOWTO]]
-
-
diff --git a/LDP/txt2db/txt2db.pl b/LDP/txt2db/txt2db.pl
deleted file mode 100755
index 064ee394..00000000
--- a/LDP/txt2db/txt2db.pl
+++ /dev/null
@@ -1,523 +0,0 @@
-#!/usr/bin/perl
-#
-#Converts txt files into docbook.
-#
-# Requirements:
-#
-# If you use the "ldp:" namespace, you must have wget installed.
-# Wget is used to request an xml record from the LDP # database,
-# http://db.linuxdoc.org.
-#
-
-use File::Basename;
-use HTML::Entities;
-
-my($txtfile, $dbfile) = '';
-
-#These keep track of which constructs we're in the middle of
-my($level1,
- $level2,
- $level3,
- $orderedlist,
- $listitem,
- $itemizedlist,
- $para,
- $qandaset,
- $qandaentry,
- $answer);
-
-my($line);
-my($id, $title);
-
-my($verbose);
-
-my($error);
-$error = 0;
-
-# read in cmd-line arguments
-#
-while (1) {
- if($ARGV[0] eq "-o" or $ARGV[0] eq "--output-to") {
- shift(@ARGV);
- $dbfile = $ARGV[0];
- shift(@ARGV);
- } elsif($ARGV[0] eq "-h" or $ARGV[0] eq "--help") {
- &usage;
- } elsif($ARGV[0] eq "-v" or $ARGV[0] eq "--verbose") {
- $verbose++;
- shift(@ARGV);
- } else {
- $txtfile = $ARGV[0];
- shift(@ARGV);
- }
-
- if ($ARGV[0] eq '') {
- last;
- }
-}
-
-# abort if no input file given
-#
-if($txtfile eq '') {
- print "txt2db: ERROR text file not specified.\n\n";
- $error = 1;
- &usage();
-} elsif( !(-r $txtfile) ) {
- print "txt2db: ERROR cannot read $f ($!)\n\n";
- $error = 1;
- &usage();
-}
-
-unless ($dbfile) {
- ($basename, $path, $ext) = fileparse($txtfile);
- $dbfile = $basename;
- $dbfile =~ s/\..*?$/\.sgml/;
-}
-
-$buf = '';
-
-&proc_txt($txtfile);
-
-open(DB, "> $dbfile") || die "txt2db: cannot write to $dbfile ($!)\n";
-print DB $buf, "\n";
-close(DB);
-
-exit(0);
-
-# -----------------------------------------------------------
-
-sub proc_txt {
- my($f) = @_;
-
- my($linenumber);
- $linenumber = 0;
-
- my ($noparatag,
- $noparadepth);
- $noparadepth = 0;
- $noparaline = 0;
-
- # read in the text file
- #
- open(TXT, "$f") || die "txt2db: cannot open $f ($!)\n";
- while ($originalline = ) {
- $line = $originalline;
- $linenumber++;
-
- &trimline;
-
- # blank lines
- if ($line eq '') {
- if ($noparadepth == 0) {
- &closenonsect;
- next;
- }
- }
-
- # capitalize hints that can be entered in lowercase
- #
- $line =~ s/^q:/Q:/;
- $line =~ s/^a:/A:/;
-
- # encode entities
- #
-# while ($line =~ //) {
-# }
-# decode_entities($line);
- encode_entities($line);
-
- # inline docbook
- #
- # ulink
- #
- while ($line =~ /\[\[/) {
- unless ($line =~ /\]\]/) {
- $buf .= "ERROR unterminated '[[' tag on line $linenumber.\n";
- }
-
- # separate link url from link name
- #
- $link = $line;
- $link=~ s/\n//g;
- $link =~ s/.*?\[\[//;
- $link =~ s/\]\].*?$//;
- if ($link =~ /\|/) {
- $linkname = $link;
- $link =~ s/\|.+$//;
- $linkname =~ s/^\S+\|//;
- } else {
- $linkname = $link;
- }
-
- # kill quotes, they mess us up
- #
- $link =~ s/'/%27/g;
-
- # namespaces are handled differently
- #
- print "$link\n" if ($verbose);
- if ($link =~ /^http:/) {
- $line =~ s/\[\[.*?\]\]/$linkname<\/citetitle><\/ulink>/;
- } elsif ($link =~ /^mailto:/) {
- $linkname =~ s/^mailto://;
- $line =~ s/\[\[.*?\]\]/$linkname<\/citetitle><\/ulink>/;
- } elsif ($link =~ /^wiki:/) {
- $linkname =~ s/^wiki://;
- $link =~ s/^wiki:/http:\/\/www\.wikipedia\.com\/wiki\.phtml\?title=/;
- $link =~ s/\ /+/;
- $line =~ s/\[\[.*?\]\]/$linkname<\/citetitle><\/ulink>/;
- } elsif ($link =~ /^ldp:/) {
- $linkname =~ s/^ldp://;
- $link =~ s/^ldp://;
- $tempfile = "/tmp/txt2db-" . $rand;
- $cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
- system("$cmd");
- open(URL, "$tempfile") || die "txt2db: cannot open temporary file ($!)\n";
- $link = "";
- while ($url_line = ) {
- $url_line =~ s/\n//;
- if ($url_line =~ /identifier/) {
- $link .= $url_line;
- }
- }
- close(URL);
- unlink $tempfile;
- $link =~ s/^.*?//;
- $link =~ s/<\/identifier>.*?$//;
- if ($link eq '') {
- $linkname = "ERROR: LDP namespace resolution failure on $linkname";
- }
- $line =~ s/\[\[.*?\]\]/$linkname<\/citetitle><\/ulink>/;
- } elsif ($link =~ /^file:/) {
- $linkname =~ s/^file://;
- $line =~ s/\[\[.*?\]\]/$linkname<\/filename>/;
- } else {
- $line =~ s/\[\[.*?\]\]/$linkname<\/filename>/;
- }
- }
-
- # emphasis
- #
- while ($line =~ /'''.*'''/) {
- $line =~ s/'''//;
- $line =~ s/'''/<\/emphasis>/;
- }
-
- # this block defines DocBook structures that won't be broken up with
- # paragraphs when we hit empty lines:
- #
- #
- #
- #
- #
- #
- #
-
- # forget about nopara
- if ($noparadepth == 0) {
- $noparatag = "";
- }
-
- # start a new nopara section
- #
- if ((($line =~ /^/) or
- ($line =~ /^/) or
- ($line =~ /^/) or
- ($line =~ /^/)) and
- ($noparadepth == 0)) {
- &closepara;
- $noparatag = $line;
- $noparatag =~ s/^.*?/;
- $noparatag =~ s/>.*?$//;
- $noparaline = $linenumber;
- if ($line =~ /^/) {
- unless ($para) {
- $line = "" . $line;
- $para = 1;
- }
- }
- }
-
- # count noparadepth
- #
- if ($noparatag ne '') {
- $temp = $line;
- while ($temp =~ /<$noparatag>/) {
- $temp =~ s/$noparatag>//;
- $noparadepth ++;
- }
- while ($temp =~ /<\/$noparatag>/) {
- $temp =~ s/\/$noparatag>//;
- $noparadepth --;
- if ($noparadepth == 0) {
- $noparaline == 0;
- }
- }
-
- # runon protection
- #
- if ($linenumber >= ($noparaline + 100)) {
- $buf .= "ERROR: runon block starting on line $noparaline\n";
- last;
- }
-
- # recover original line -- no whitespace modifiers
- #
- $line = $originalline;
-
- # sect3
- #
- } elsif ($line =~ /^===/) {
- &close3;
- &splittitle;
- if ($id eq '') {
- $line = "$title\n";
- } else {
- $line = "$title\n";
- }
- $level3 = 1;
-
- # sect2
- #
- } elsif ($line =~ /^==/) {
- &close2;
- &splittitle;
- if ($id eq '') {
- $line = "$title\n";
- } else {
- $line = "$title\n";
- }
- $level2 = 1;
-
- # sect1
- #
- } elsif ($line =~ /^=/) {
- &close1;
- &splittitle;
- if ($id eq '') {
- $line = "$title\n";
- } else {
- $line = "$title\n";
- }
- $level1 = 1;
-
- # orderedlist
- #
- } elsif ($line =~ /^#/) {
- &closeitemizedlist;
- if ($orderedlist == 0) {
- $buf .= "\n\n";
- $orderedlist = 1;
- }
- &closelistitem;
- $line =~ s/^#//;
- &trimline;
- $line =~ s/^//;
- $listitem = 1;
- $para = 1;
- } elsif ($line =~ /^\/#/) {
- $line =~ s/^\/#//;
- &trimline;
- &closeorderedlist;
-
- # itemizedlist
- #
- } elsif ($line =~ /^\*/) {
- &closeorderedlist;
- if ($itemizedlist == 0) {
- $buf .= "\n\n";
- $itemizedlist = 1;
- }
- &closelistitem;
- $line =~ s/^\*//;
- &trimline;
- $line =~ s/^//;
- $listitem = 1;
- $para = 1;
- } elsif ($line =~ /\/\*/) {
- $line =~ s/^\/\*//;
- &trimline;
- &closeitemizedlist;
-
- # question
- #
- } elsif ($line =~ /^Q:/) {
- &closelists;
- &closeqandaentry;
- $line =~ s/^Q://;
- &trimline;
- &splittitle;
- if ($id eq '') {
- $line = "" . $title . "\n";
- } else {
- $line = "" . $title . "\n";
- }
- unless ($qandaentry == 1) {
- $line = "\n" . $line;
- $qandaentry = 1;
- }
- if ($qandaset == 0) {
- $line = "\n". $line;
- $qandaset = 1;
- }
-
- # answer
- #
- } elsif ($line =~ /^A:/) {
- $line =~ s/^A://;
- &trimline;
- &closeanswer;
- $line = "" . $line;
- $answer = 1;
- $para = 1;
-
- } elsif ($line =~ /^\s*----\s*$/) {
- $line = '';
-
- # para
- #
- } else {
- if (($para == 0) and ($noparatag eq '')) {
- $line = "" . $line;
- $para = 1;
- } else {
- $line .= " ";
- }
- }
-
- $buf .= "$line ";
- }
- # close nesting
- #
- &close1;
-
- if ($noparadepth > 0) {
- $buf .= "ERROR tag $noparatag on line $noparaline unterminated.\n";
- }
-}
-
-sub close1 {
- &close2;
- if ($level1 == 1) {
- $buf .= "\n";
- $level1 = 0;
- }
-}
-
-sub close2 {
- &close3;
- if ($level2 == 1) {
- $buf .= "\n";
- $level2 = 0;
- }
-}
-
-sub close3 {
- &closeorderedlist;
- &closeitemizedlist;
- &closepara;
- &closeqandaset;
- if ($level3 == 1) {
- $buf .= "\n";
- $level3 = 0;
- }
-}
-
-sub closenonsect {
- &closepara;
-# &closeorderedlist;
-# &closeitemizedlist;
-}
-
-sub closelistitem {
- &closepara;
- if ($listitem == 1 ) {
- $buf .= "\n";
- $listitem = 0;
- }
-}
-
-sub closeorderedlist {
- &closepara;
- &closelistitem;
- if ($orderedlist == 1 ) {
- $buf .= "\n";
- $orderedlist = 0;
- }
-}
-
-sub closeitemizedlist {
- &closepara;
- &closelistitem;
- if ($itemizedlist == 1 ) {
- $buf .= "\n";
- $itemizedlist = 0;
- }
-}
-
-sub closelists {
- &closeitemizedlist;
- &closeorderedlist;
-}
-
-sub closeanswer {
- &closepara;
- if ($answer == 1) {
- $buf .= "\n";
- $answer = 0;
- }
-}
-
-sub closeqandaentry {
- &closeanswer;
- if ($qandaentry == 1) {
- $buf .= "\n";
- $qandaentry = 0;
- }
-}
-
-sub closeqandaset {
- &closeqandaentry;
- if ($qandaset == 1) {
- $buf .= "\n";
- $qandaset = 0;
- }
-}
-
-sub closepara {
- if ($para == 1) {
- $buf .= "\n";
- $para = 0;
- }
-}
-
-sub trimline {
- $line =~ s/\s+$//;
- $line =~ s/^\s+//;
-}
-
-sub splittitle {
- $line =~ s/^=+//;
- $line =~ s/=+$//;
- $title = $line;
- $id = "";
- if ($line =~ /\|/) {
- $title =~ s/\|.+//;
- $id = $line;
- $id =~ s/^.+\|//;
- }
- $title =~ s/\s+$//;
- $title =~ s/^\s+//;
- $id =~ s/\s+$//;
- $id =~ s/^\s+//;
-}
-
-sub usage {
- print "Usage: txt2db [-v] [-h|-o ] \n";
- print "-o, --output-to write to the specified file.\n";
- print "-v, --verbose show diagnostic output.\n";
- print "-h, --help show this usage message.\n";
- exit($error);
-}
-