added texi2db, texinfo to docbook converter nearing initial release

2002-02-02 04:55:52 +00:00 · 2002-02-02 04:55:52 +00:00 · 1872402c7f
parent abd7408625
commit 1872402c7f
5 changed files with 1509 additions and 665 deletions
--- a/LDP/README
+++ b/LDP/README
@ -28,8 +28,9 @@ scrollserver/	python web application server front end to scrollkeeper
  stylesheets/	xsl stylesheets for xml -> html conversion
  www/		www.scrollserver.org website
 test/		to test your cvs without disturbing things, use this
-txt2db/		utility to convert text files into docbook
+texi2db/	utility to convert Texinfo files into docbook
 users/		individual users' areas
+wt2db/		utility to convert WikiText files into docbook
 www/		websites
  db./		ldp database website
    cgi-bin/	perl scripts for the ldp database
--- a/LDP/texi2db/texi2db
+++ b/LDP/texi2db/texi2db
--- a/LDP/txt2db/README
+++ b/LDP/txt2db/README
@ -1,69 +0,0 @@
-This is a utility to convert text files in a specific format into valid
-DocBook. Just pass it the input filename on the commmand line and you'll
-get a .sgml file out. It won't be a complete valid document, as it will
-have no header information or dtd specification. It's just a DocBook
-fragment, not a complete document.
-
-The following constructs are currently supported. If you need support for
-an addition construct, write discuss@linuxdoc.org if you're subscribed,
-or feedback@linuxdoc.org if you're not.
-
-Or just add it in the cvs. :-)
-
-Foo			<para>Foo</para>
-
-=Title=			<sect1><title>Title</title>
-			</sect1>
-
-=Title|id=		<sect1 id='id'><title>Title</title>
-			</sect1>
-
-			works for other sect levels as well, and many other
-			tags. It is either the "id" value, or the "title"
-			value, depending on the semantics of the particular
-			tag. Usage should be obvious in context.
-
-==Title==		<sect2><title>Title</title>
-			</sect2>
-
-===Title===		<sect3><title>Title</title>
-			</sect3>
-
-
-#Foo			<orderedlist>
-#Bar			<listitem><para>Foo</para></listitem>
-#Baz			<listitem><para>Bar</para></listitem>
-/#			<listitem><para>Baz</para></listitem>
-			</orderedlist>
-
-*Foo			<simplelist>
-*Bar			<listitem><para>Foo</para></listitem>
-*Baz			<listitem><para>Bar</para></listitem>
-/*			<listitem><para>Baz</para></listitem>
-			</simplelist>
-
-[[http://foo.org]]	<ulink url='http://foo.org'>
-			  <citetitle>http://foo.org</citetitle>
-			</ulink>
-
-[[http://foo.org Foo]]	<ulink url='http://foo.org'>
-			  <citetitle>Foo</citetitle>
-			</ulink>
-
-[http://foo.org|Foo]]	You can also delimit with the pipe character "|".
-			This works on any of these [[]] tags.
-
-[[file:Foo]]		<filename>Foo</filename>
-
-'''Foo'''		<emphasis>Foo</emphasis>
-
-A few DocBook structures do not have <para> tags wrapped around them. They
-are <para> itself (duh!), <sect?> and <programlisting>. If you insert anything
-using these tags, no <para> tags will be wrapped around it or inserted into it.
-So if you want fine control over your <para> tags, insert them yourself.
-
-These tags include:
-
-<programlisting/>
-<screen/>
-
--- a/LDP/txt2db/sample.txt
+++ b/LDP/txt2db/sample.txt
@ -1,72 +0,0 @@
-=Introduction|intro-to-the-program=
-
-The following list should be rendered as a qandaset:
-
-Q: Why?|why-id
-A: Why
-not?
-
-Multiple questions and multiple answers:
-
-Q: Why?
-A: Why not?
-A: Why not2?
-
-Q: Why?
-A: Why not?
-
-Simple List
-
-*item
-*item
-*item
-/*
-
-This tests arbitrary DocBook. It should be passed right on to the output file
-with no changes. It can be nested arbitrarily deep.
-
-<informaltable>test
-<foo>test some more
-</foo>
-<informaltable>This is the second level!
-</informaltable>
-</informaltable>
-
-This document is from the [[http://www.linuxdoc.org Linux Documentation Project]].
-
-Numbered List
-
-This is an '''important''' [file].
-
-#item
-#item
-
-#item
-/#
-
-Another to make sure the numbers restart at one.
-
-#item
-#item
-#item
-/#
-
-=Bar=
-
-Just another section.
-
-==Level 2|level2==
-
-===Level 3|level3===
-
-=Conclusion|conclusion=
-
-All previous sections should be properly closed.
-
-
-=test again=
-
-[[ldp:INFO-SHEET]]
-[[ldp:Distributions-HOWTO]]
-
-
--- a/LDP/txt2db/txt2db.pl
+++ b/LDP/txt2db/txt2db.pl
@ -1,523 +0,0 @@
-#!/usr/bin/perl
-#
-#Converts txt files into docbook.
-#
-# Requirements:
-# 
-# If you use the "ldp:" namespace, you must have wget installed.
-# Wget is used to request an xml record from the LDP # database,
-# http://db.linuxdoc.org.
-# 
-
-use File::Basename;
-use HTML::Entities;
-
-my($txtfile, $dbfile) = '';
-
-#These keep track of which constructs we're in the middle of
-my($level1,
-   $level2,
-   $level3,
-   $orderedlist,
-   $listitem,
-   $itemizedlist,
-   $para,
-   $qandaset,
-   $qandaentry,
-   $answer);
-
-my($line);
-my($id, $title);
-
-my($verbose);
-
-my($error);
-$error = 0;
-
-# read in cmd-line arguments
-#
-while (1) {
-	if($ARGV[0] eq "-o" or $ARGV[0] eq "--output-to") {
-		shift(@ARGV);
-		$dbfile = $ARGV[0];
-		shift(@ARGV);
-	} elsif($ARGV[0] eq "-h" or $ARGV[0] eq "--help") {
-		&usage;
-	} elsif($ARGV[0] eq "-v" or $ARGV[0] eq "--verbose") {
-		$verbose++;
-		shift(@ARGV);
-	} else {
-		$txtfile = $ARGV[0];
-		shift(@ARGV);
-	}
-
-	if ($ARGV[0] eq '') {
-		last;
-	}
-}
-
-# abort if no input file given
-# 
-if($txtfile eq '') {
-	print "txt2db: ERROR text file not specified.\n\n";
-	$error = 1;
-	&usage();
-} elsif( !(-r $txtfile) ) {
-	print "txt2db: ERROR cannot read $f ($!)\n\n";
-	$error = 1;
-	&usage();
-}
-
-unless ($dbfile) {
-	($basename, $path, $ext) = fileparse($txtfile);
-	$dbfile = $basename;
-	$dbfile =~ s/\..*?$/\.sgml/;
-}
-
-$buf = '';
-
-&proc_txt($txtfile);
-
-open(DB, "> $dbfile") || die "txt2db: cannot write to $dbfile ($!)\n";
-print DB $buf, "\n";
-close(DB);
-
-exit(0);
-
-# -----------------------------------------------------------
-
-sub proc_txt {
-	my($f) = @_;
-	
-	my($linenumber);
-	$linenumber = 0;
-	
-	my ($noparatag,
-	    $noparadepth);
-	$noparadepth = 0;
-	$noparaline = 0;
-
-	# read in the text file
-	#
-	open(TXT, "$f") || die "txt2db: cannot open $f ($!)\n";
-	while ($originalline = <TXT>) {
-		$line = $originalline;
-		$linenumber++;
-
-		&trimline;
-
-		# blank lines
-		if ($line eq '') {
-			if ($noparadepth == 0) {
-				&closenonsect;
-				next;
-			}
-		}
-
-		# capitalize hints that can be entered in lowercase
-		#
-		$line =~ s/^q:/Q:/;
-		$line =~ s/^a:/A:/;
-
-		# encode entities
-		#
-#		while ($line =~ //) {
-#		}
-#		decode_entities($line);
-		encode_entities($line);
-		
-		# inline docbook
-		#
-		# ulink
-		# 
-		while ($line =~ /\[\[/) {
-			unless ($line =~ /\]\]/) {
-				$buf .= "ERROR unterminated '[[' tag on line $linenumber.\n";
-			}
-
-			# separate link url from link name
-			#
-			$link = $line;
-			$link=~ s/\n//g;
-			$link =~ s/.*?\[\[//;
-			$link =~ s/\]\].*?$//;
-			if ($link =~ /\|/) {
-				$linkname = $link;
-				$link =~ s/\|.+$//;
-				$linkname =~ s/^\S+\|//;
-			} else {
-				$linkname = $link;
-			}
-
-			# kill quotes, they mess us up
-			# 
-			$link =~ s/'/%27/g;
-
-			# namespaces are handled differently
-			#
-			print "$link\n" if ($verbose);
-			if ($link =~ /^http:/) {
-				$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
-			} elsif ($link =~ /^mailto:/) {
-				$linkname =~ s/^mailto://;
-				$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
-			} elsif ($link =~ /^wiki:/) {
-				$linkname =~ s/^wiki://;
-				$link =~ s/^wiki:/http:\/\/www\.wikipedia\.com\/wiki\.phtml\?title=/;
-				$link =~ s/\ /+/;
-				$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
-			} elsif ($link =~ /^ldp:/) {
-				$linkname =~ s/^ldp://;
-				$link =~ s/^ldp://;
-				$tempfile = "/tmp/txt2db-" . $rand;
-				$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
-				system("$cmd");
-				open(URL, "$tempfile") || die "txt2db: cannot open temporary file ($!)\n";
-				$link = "";
-				while ($url_line = <URL>) {
-					$url_line =~ s/\n//;
-					if ($url_line =~ /identifier/) {
-						$link .= $url_line;
-					}
-				}
-				close(URL);
-				unlink $tempfile;
-				$link =~ s/^.*?<identifier>//;
-				$link =~ s/<\/identifier>.*?$//;
-				if ($link eq '') {
-					$linkname = "ERROR: LDP namespace resolution failure on $linkname";
-				}
-				$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
-			} elsif ($link =~ /^file:/) {
-				$linkname =~ s/^file://;
-				$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
-			} else {
-				$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
-			}
-		}
-
-		# emphasis
-		#
-		while ($line =~ /'''.*'''/) {
-			$line =~ s/'''/<emphasis role='bold'>/;
-			$line =~ s/'''/<\/emphasis>/;
-		}
-
-		# this block defines DocBook structures that won't be broken up with 
-		# paragraphs when we hit empty lines:
-		#
-		#	<para>
-		#	<sect1>
-		#	<sect2>
-		#	<sect3>
-		#	<programlisting>
-		#	<literallayout>
-	
-		# forget about nopara
-		if ($noparadepth == 0) {
-			$noparatag = "";
-		}
-		
-		# start a new nopara section
-		#
-		if ((($line =~ /^<para>/) or
-		     ($line =~ /^<sect/) or
-		     ($line =~ /^<screen>/) or
-		     ($line =~ /^<literallayout>/) or
-		     ($line =~ /^<programlisting>/)) and
-		    ($noparadepth == 0)) { 
-		    	&closepara;
-			$noparatag = $line;
-			$noparatag =~ s/^.*?<//;
-			$noparatag =~ s/>.*?$//;
-			$noparaline = $linenumber;
-			if ($line =~ /^<screen>/) {
-				unless ($para) {
-					$line = "<para>" . $line;
-					$para = 1;
-				}
-			}
-		}
-
-		# count noparadepth
-		#
-		if ($noparatag ne '') {
-			$temp = $line;
-			while ($temp =~ /<$noparatag>/) {
-				$temp =~ s/<?$noparatag>//;
-				$noparadepth ++;
-			}
-			while ($temp =~ /<\/$noparatag>/) {
-				$temp =~ s/<?\/$noparatag>//;
-				$noparadepth --;
-				if ($noparadepth == 0) {
-					$noparaline == 0;
-				}
-			}
-
-			# runon protection
-			#
-			if ($linenumber >= ($noparaline + 100)) {
-				$buf .= "ERROR: runon block starting on line $noparaline\n";
-				last;
-			}
-
-			# recover original line -- no whitespace modifiers
-			#
-			$line = $originalline;
-
-		# sect3
-		#
-		} elsif ($line =~ /^===/) {
-			&close3;
-			&splittitle;
-			if ($id eq '') {
-				$line = "<sect3><title>$title</title>\n";
-			} else {
-				$line = "<sect3 id='$id'><title id='$id-title'>$title</title>\n";
-			}
-			$level3 = 1;
-
-		# sect2
-		#
-		} elsif ($line =~ /^==/) {
-			&close2;
-			&splittitle;
-			if ($id eq '') {
-				$line = "<sect2><title>$title</title>\n";
-			} else {
-				$line = "<sect2 id='$id'><title id='$id-title'>$title</title>\n";
-			}
-			$level2 = 1;
-
-		# sect1
-		#
-		} elsif ($line =~ /^=/) {
-			&close1;
-			&splittitle;
-			if ($id eq '') {
-				$line = "<sect1><title>$title</title>\n";
-			} else {
-				$line = "<sect1 id='$id'><title id='$id-title'>$title</title>\n";
-			}
-			$level1 = 1;
-
-		# orderedlist
-		#
-		} elsif ($line =~ /^#/) {
-			&closeitemizedlist;
-			if ($orderedlist == 0) {
-				$buf .= "\n<orderedlist>\n";
-				$orderedlist = 1;
-			}
-			&closelistitem;
-			$line =~ s/^#//;
-			&trimline;
-			$line =~ s/^/<listitem><para>/;
-			$listitem = 1;
-			$para = 1;
-		} elsif ($line =~ /^\/#/) {
-			$line =~ s/^\/#//;
-			&trimline;
-			&closeorderedlist;
-
-		# itemizedlist
-		#
-		} elsif ($line =~ /^\*/) {
-			&closeorderedlist;
-			if ($itemizedlist == 0) {
-				$buf .= "\n<itemizedlist>\n";
-				$itemizedlist = 1;
-			}
-			&closelistitem;
-			$line =~ s/^\*//;
-			&trimline;
-			$line =~ s/^/<listitem><para>/;
-			$listitem = 1;
-			$para = 1;
-		} elsif ($line =~ /\/\*/) {
-			$line =~ s/^\/\*//;
-			&trimline;
-			&closeitemizedlist;
-
-		# question
-		#
-		} elsif ($line =~ /^Q:/) {
-			&closelists;
-			&closeqandaentry;
-			$line =~ s/^Q://;
-			&trimline;
-			&splittitle;
-			if ($id eq '') {
-				$line = "<question><para>" . $title . "</para></question>\n";
-			} else {
-				$line = "<question id='$id'><para>" . $title . "</para></question>\n";
-			}
-			unless ($qandaentry == 1) {
-				$line = "<qandaentry>\n" . $line;
-				$qandaentry = 1;
-			}
-			if ($qandaset == 0) {
-				$line = "<qandaset defaultlabel='qanda'>\n". $line;
-				$qandaset = 1;
-			}
-
-		# answer
-		#
-		} elsif ($line =~ /^A:/) {
-			$line =~ s/^A://;
-			&trimline;
-			&closeanswer;
-			$line = "<answer><para>" . $line;
-			$answer = 1;
-			$para = 1;
-
-		} elsif ($line =~ /^\s*----\s*$/) {
-			$line = '';
-
-		# para
-		#
-		} else {
-			if (($para == 0) and ($noparatag eq '')) {
-				$line = "<para>" . $line;
-				$para = 1;
-			} else {
-				$line .= " ";
-			}
-		}
-
-		$buf .= "$line ";
-	}
-	# close nesting
-	#
-	&close1;
-
-	if ($noparadepth > 0) {
-		$buf .= "ERROR tag $noparatag on line $noparaline unterminated.\n";
-	}
-}
-
-sub close1 {
-	&close2;
-	if ($level1 == 1) {
-		$buf .= "</sect1>\n";
-		$level1 = 0;
-	}
-}
-
-sub close2 {
-	&close3;
-	if ($level2 == 1) {
-		$buf .= "</sect2>\n";
-		$level2 = 0;
-	}
-}
-
-sub close3 {
-	&closeorderedlist;
-	&closeitemizedlist;
-	&closepara;
-	&closeqandaset;
-	if ($level3 == 1) {
-		$buf .= "</sect3>\n";
-		$level3 = 0;
-	}
-}
-
-sub closenonsect {
-	&closepara;
-#	&closeorderedlist;
-#	&closeitemizedlist;
-}
-
-sub closelistitem {
-	&closepara;
-	if ($listitem == 1 ) {
-		$buf .= "</listitem>\n";
-		$listitem = 0;
-	}
-}
-
-sub closeorderedlist {
-	&closepara;
-	&closelistitem;
-	if ($orderedlist == 1 ) {
-		$buf .= "</orderedlist>\n";
-		$orderedlist = 0;
-	}
-}
-
-sub closeitemizedlist {
-	&closepara;
-	&closelistitem;
-	if ($itemizedlist == 1 ) {
-		$buf .= "</itemizedlist>\n";
-		$itemizedlist = 0;
-	}
-}
-
-sub closelists {
-	&closeitemizedlist;
-	&closeorderedlist;
-}
-
-sub closeanswer {
-	&closepara;
-	if ($answer == 1) {
-		$buf .= "</answer>\n";
-		$answer = 0;
-	}
-}
-
-sub closeqandaentry {
-	&closeanswer;
-	if ($qandaentry == 1) {
-		$buf .= "</qandaentry>\n";
-		$qandaentry = 0;
-	}
-}
-
-sub closeqandaset {
-	&closeqandaentry;
-	if ($qandaset == 1) {
-		$buf .= "</qandaset>\n";
-		$qandaset = 0;
-	}
-}
-
-sub closepara {
-	if ($para == 1) {
-		$buf .= "</para>\n";
-		$para = 0;
-	}
-}
-
-sub trimline {
-	$line =~ s/\s+$//;
-	$line =~ s/^\s+//;
-}
-
-sub splittitle {
-	$line =~ s/^=+//;
-	$line =~ s/=+$//;
-	$title = $line;
-	$id = "";
-	if ($line =~ /\|/) {
-		$title =~ s/\|.+//;
-		$id = $line;
-		$id =~ s/^.+\|//;
-	}
-	$title =~ s/\s+$//;
-	$title =~ s/^\s+//;
-	$id =~ s/\s+$//;
-	$id =~ s/^\s+//;
-}
-
-sub usage {
-	print "Usage: txt2db [-v] [-h|-o <sgml file>] <text file>\n";
-	print "-o, --output-to    write to the specified file.\n";
-	print "-v, --verbose      show diagnostic output.\n";
-	print "-h, --help         show this usage message.\n";
-	exit($error);
-}
-