moved from a straight script to a Perl module with a wrapper script.

This is so the module can be used by Lampadas directly without needing to shell out to the OS.
2002-04-06 13:13:21 +00:00 · 2002-04-06 13:13:21 +00:00 · 698c2baf70
parent 1d11ea1682
commit 698c2baf70
5 changed files with 771 additions and 495 deletions
--- a/LDP/wt2db/Makefile.PL
+++ b/LDP/wt2db/Makefile.PL
@ -0,0 +1,26 @@
+#!/usr/bin/perl -sw
+##
+## Makefile for the wt2db Perl module.
+##
+## Copyright (c) 2001, 2002, David Merrill. All rights reserved.
+## This code is free software; you can redistribute it and/or modify
+## it under the same terms as Perl itself.
+##
+
+use ExtUtils::MakeMaker;
+
+WriteMakefile(
+
+    NAME          => 'Wt2Db',
+    AUTHOR        => 'David Merrill <david@lupercalia.net>',
+    ABSTRACT      => 'Converts WikiText documents into DocBook XML.',
+    NOECHO        => '',
+    VERSION_FROM  => 'lib/Wt2Db.pm',
+    EXE_FILES	  => 'wt2db',
+    PREREQ_PM     => { 
+                       'File::Basename'	=> 0,
+		       'HTML::Entities'	=> 0,
+		       'FileHandle'	=> 0,
+                     },
+
+);
--- a/LDP/wt2db/doc/wt2db.sgml
+++ b/LDP/wt2db/doc/wt2db.sgml
@ -0,0 +1,145 @@
+<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4//EN" [
+
+<!-- Process this file with docbook-to-man to generate an nroff manual
+     page: `docbook-to-man manpage.sgml > manpage.1'.  You may view
+     the manual page with: `docbook-to-man manpage.sgml | nroff -man |
+     less'.  A typical entry in a Makefile or Makefile.am is:
+
+manpage.1: manpage.sgml
+	docbook-to-man $< > $@
+
+    
+	The docbook-to-man binary is found in the docbook-to-man package.
+	Please remember that if you create the nroff version in one of the
+	debian/rules file targets (such as build), you will need to include
+	docbook-to-man in your Build-Depends control field.
+
+  -->
+
+  <!-- Fill in your name for FIRSTNAME and SURNAME. -->
+  <!ENTITY dhfirstname "<firstname>David</firstname>">
+  <!ENTITY dhsurname   "<surname>Merrill</surname>">
+  <!-- Please adjust the date whenever revising the manpage. -->
+  <!ENTITY dhdate      "<date>March 28, 2002</date>">
+  <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
+       allowed: see man(7), man(1). -->
+  <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
+  <!ENTITY dhemail     "<email>david@lupercalia.net</email>">
+  <!ENTITY dhusername  "David C. Merrill">
+  <!ENTITY dhucpackage "<refentrytitle>WT2DB</refentrytitle>">
+  <!ENTITY dhpackage   "wt2db">
+
+  <!ENTITY debian      "<productname>Debian</productname>">
+  <!ENTITY gnu         "<acronym>GNU</acronym>">
+]>
+
+<refentry>
+  <refentryinfo>
+    <address>
+      &dhemail;
+    </address>
+    <author>
+      &dhfirstname;
+      &dhsurname;
+    </author>
+    <copyright>
+      <year>2001</year>
+      <holder>&dhusername;</holder>
+    </copyright>
+    &dhdate;
+  </refentryinfo>
+  <refmeta>
+    &dhucpackage;
+
+    &dhsection;
+  </refmeta>
+  <refnamediv>
+    <refname>&dhpackage;</refname>
+
+    <refpurpose>utility to convert WikiText documents into DocBook XML</refpurpose>
+  </refnamediv>
+  <refsynopsisdiv>
+    <cmdsynopsis>
+      <command>&dhpackage;</command>
+      <arg><option>-o </option><replaceable>filename</replaceable></arg>
+      <arg><option>-v</option></arg>
+      <arg><option>-h</option></arg>
+    </cmdsynopsis>
+  </refsynopsisdiv>
+  <refsect1>
+    <title>DESCRIPTION</title>
+
+    <para>This manual page documents briefly the
+      <command>&dhpackage;</command> command.</para>
+
+    <para><command>&dhpackage;</command> is a program that converts a text
+    file in a format commonly used by WikiWikiWebs into DocBook SGML.</para>
+
+  </refsect1>
+  <refsect1>
+    <title>OPTIONS</title>
+
+    <para>These programs follow the usual GNU command line syntax,
+      with long options starting with two dashes (`-').  A summary of
+      options is included below.  For a complete description, see the
+      <application>Info</application> files.</para>
+
+    <variablelist>
+      <varlistentry>
+      <varlistentry>
+        <term><option>-o</option>
+          <option>--output-to</option>
+        </term>
+        <listitem>
+          <para>Write to the specified file. The default is to write to standard output.</para>
+        </listitem>
+      </varlistentry>
+        <term><option>-v</option>
+          <option>--verbose</option>
+        </term>
+        <listitem>
+          <para>Show diagnostic output.</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><option>-h</option>
+          <option>--help</option>
+        </term>
+        <listitem>
+          <para>Show summary of options.</para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+  <refsect1>
+    <title>AUTHOR</title>
+
+    <para>This manual page was written by &dhusername; &dhemail;.
+      Permission is
+      granted to copy, distribute and/or modify this document under
+      the terms of the <acronym>GNU</acronym> Free Documentation
+      License, Version 1.1 or any later version published by the Free
+      Software Foundation; with no Invariant Sections, no Front-Cover
+      Texts and no Back-Cover Texts.</para>
+
+  </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:t
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:2
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:nil
+sgml-exposed-tags:nil
+sgml-local-catalogs:nil
+sgml-local-ecat-files:nil
+End:
+-->
+
+
--- a/LDP/wt2db/lib/Wt2Db.pm
+++ b/LDP/wt2db/lib/Wt2Db.pm
@ -0,0 +1,573 @@
+#!/usr/bin/perl
+#
+# Converts WikiText files into docbook.
+#
+package Wt2Db;
+
+$VERSION = '0.2';
+
+use File::Basename;
+use HTML::Entities;
+use FileHandle;
+use Exporter;
+
+@ISA = qw(Exporter);
+@EXPORT = qw(
+	new,
+	ProcessFile,
+	ProcessLine,
+	ProcessEnd,
+	Buffer,
+	Reset,
+	Usage
+	);
+
+# These keep track of which constructs we're in the middle of
+#
+$level1 = 0;
+$level2 = 0;
+$level3 = 0;
+$orderedlist = 0;
+$listitem = 0;
+$itemizedlist = 0;
+$para = 0;
+$qandaset = 0;
+$qandaentry = 0;
+$answer = 0;
+
+# These are passed in by the caller
+#
+$txtfile = '';
+$dbfile = '';
+$verbose = 0;
+
+# These maintain state
+#
+$line = '';
+$linenumber = 0;
+$id = '';
+$title = '';
+$buf = '';
+
+$noparatag = 0;
+$noparadepth = 0;
+$noparaline = 0;
+
+
+# -----------------------------------------------------------
+
+sub new {
+	my $that = shift;
+	my $class = ref($that) || $that;
+	my $self = {};
+	bless $self, $class;
+	return $self;
+}
+
+sub ProcessFile {
+	($self, $txtfile, $dbfile, $verbose) = @_;
+
+	# Read from STDIN if no input file given
+	# 
+	if ($txtfile) {
+		if( !(-r $txtfile) ) {
+			print "txt2db: ERROR cannot read $f ($!)\n\n";
+			&usage(1);
+		} else {
+			$fh = new FileHandle;
+			open $fh, "<$txtfile" or die "Cannot open $txtfile ($!)\n";
+		}
+	} else {
+		$fh = STDIN;
+	}
+
+	if ($dbfile) {
+		$outfh = new FileHandle;
+		open $outfh, ">$dbfile" or die "Cannot write to $dbfile\n\n";
+	} else {
+		$outfh = STDOUT;
+	}
+
+	# read in the text file
+	#
+	while ($originalline = <$fh>) {
+		ProcessLine($foo, $originalline);
+		print $outfh "$buf";
+		$buf = '';
+	}
+
+	ProcessEnd();
+	print $outfh "$buf";
+	$buf = '';
+	close $fh;
+	close $outfh;
+}
+
+sub ProcessLine {
+	($foo, $originalline) = @_;
+	
+	$line = $originalline;
+	$linenumber++;
+
+	&trimline;
+
+	# blank lines
+	unless ($line) {
+		unless ($noparadepth) {
+			&closenonsect;
+			return;
+		}
+	}
+
+	# capitalize hints that can be entered in lowercase
+	#
+	$line =~ s/^q:/Q:/;
+	$line =~ s/^a:/A:/;
+
+	# encode entities
+	#
+#	while ($line =~ //) {
+#	}
+#	decode_entities($line);
+#	encode_entities($line);
+		
+	# inline docbook
+	#
+	# ulink
+	# 
+	while ($line =~ /\[\[/) {
+		unless ($line =~ /\]\]/) {
+			$buf .= "ERROR unterminated '[[' tag on line $linenumber.\n";
+		}
+
+		# separate link url from link name
+		#
+		$link = $line;
+		$link=~ s/\n//g;
+		$link =~ s/.*?\[\[//;
+		$link =~ s/\]\].*?$//;
+		if ($link =~ /\|/) {
+			$linkname = $link;
+			$link =~ s/\|.+$//;
+			$linkname =~ s/^\S+\|//;
+		} else {
+			$linkname = $link;
+		}
+
+		# kill quotes, they mess us up
+		# 
+		$link =~ s/'/%27/g;
+
+		# namespaces are handled differently
+		#
+		print "$link\n" if ($verbose);
+		if ($link =~ /^http:/) {
+			$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
+		} elsif ($link =~ /^mailto:/) {
+			$linkname =~ s/^mailto://;
+			$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
+		} elsif ($link =~ /^wiki:/) {
+			$linkname =~ s/^wiki://;
+			$link =~ s/^wiki:/http:\/\/www\.wikipedia\.com\/wiki\.phtml\?title=/;
+			$link =~ s/\ /+/;
+			$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
+		} elsif ($link =~ /^ldp:/) {
+			$linkname =~ s/^ldp://;
+			$link =~ s/^ldp://;
+			$tempfile = "/tmp/txt2db-" . $rand;
+			$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
+			system("$cmd");
+			open(URL, "$tempfile") || die "txt2db: cannot open temporary file ($!)\n\n";
+			$link = "";
+			while ($url_line = <URL>) {
+				$url_line =~ s/\n//;
+				if ($url_line =~ /identifier/) {
+					$link .= $url_line;
+				}
+			}
+			close(URL);
+			unlink $tempfile;
+			$link =~ s/^.*?<identifier>//;
+			$link =~ s/<\/identifier>.*?$//;
+			if ($link eq '') {
+				$linkname = "ERROR: LDP namespace resolution failure on $linkname";
+			}
+			$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
+		} elsif ($link =~ /^file:/) {
+			$linkname =~ s/^file://;
+			$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
+		} else {
+			$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
+		}
+	}
+
+	# emphasis
+	#
+	while ($line =~ /'''.*'''/) {
+		$line =~ s/'''/<emphasis role='bold'>/;
+		$line =~ s/'''/<\/emphasis>/;
+	}
+
+	# this block defines DocBook structures that won't be broken up with 
+	# paragraphs when we hit empty lines:
+	#
+	#	<para>
+	#	<sect1>
+	#	<sect2>
+	#	<sect3>
+	#	<programlisting>
+	#	<literallayout>
+	
+	# forget about nopara
+	if ($noparadepth == 0) {
+		$noparatag = "";
+	}
+		
+	# start a new nopara section
+	#
+	if ((($line =~ /^<para>/) or
+	     ($line =~ /^<sect/) or
+	     ($line =~ /^<screen>/) or
+	     ($line =~ /^<literallayout>/) or
+	     ($line =~ /^<programlisting>/)) and
+	    ($noparadepth == 0)) { 
+	    	&closepara;
+		$noparatag = $line;
+		$noparatag =~ s/^.*?<//;
+		$noparatag =~ s/>.*?$//;
+		$noparaline = $linenumber;
+		if ($line =~ /^<screen>/) {
+			unless ($para) {
+				$line = "<para>" . $line;
+				$para = 1;
+			}
+		}
+	}
+
+	# count noparadepth
+	#
+	if ($noparatag ne '') {
+		$temp = $line;
+		while ($temp =~ /<$noparatag>/) {
+			$temp =~ s/<?$noparatag>//;
+			$noparadepth ++;
+		}
+		while ($temp =~ /<\/$noparatag>/) {
+			$temp =~ s/<?\/$noparatag>//;
+			$noparadepth --;
+			if ($noparadepth == 0) {
+				$noparaline = 0;
+			}
+		}
+
+		# runon protection
+		#
+		if (($noparaline) and ($linenumber >= ($noparaline + 100))) {
+			$buf .= "ERROR: runon block starting on line $noparaline\n";
+			return;
+		}
+
+		# recover original line -- no whitespace modifiers
+		#
+		$line = $originalline;
+
+	# sect3
+	#
+	} elsif ($line =~ /^===/) {
+		&close3;
+		&splittitle;
+		if ($id eq '') {
+			$line = "<sect3><title>$title</title>";
+		} else {
+			$line = "<sect3 id='$id'><title id='$id-title'>$title</title>";
+		}
+		$level3 = 1;
+
+	# sect2
+	#
+	} elsif ($line =~ /^==/) {
+		&close2;
+		&splittitle;
+		if ($id eq '') {
+			$line = "<sect2><title>$title</title>";
+		} else {
+			$line = "<sect2 id='$id'><title id='$id-title'>$title</title>";
+		}
+		$level2 = 1;
+
+	# sect1
+	#
+	} elsif ($line =~ /^=/) {
+		&close1;
+		&splittitle;
+		if ($id eq '') {
+			$line = "<sect1><title>$title</title>";
+		} else {
+			$line = "<sect1 id='$id'><title id='$id-title'>$title</title>";
+		}
+		$level1 = 1;
+
+	# orderedlist
+	#
+	} elsif ($line =~ /^#/) {
+		&closeitemizedlist;
+		if ($orderedlist == 0) {
+			$buf .= "<orderedlist>\n";
+			$orderedlist = 1;
+		}
+		&closelistitem;
+		$line =~ s/^#//;
+		&trimline;
+		$line =~ s/^/<listitem><para>/;
+		$listitem = 1;
+		$para = 1;
+	} elsif ($line =~ /^\/#/) {
+		$line =~ s/^\/#//;
+		&trimline;
+		&closeorderedlist;
+
+	# itemizedlist
+	#
+	} elsif ($line =~ /^\*/) {
+		&closeorderedlist;
+		if ($itemizedlist == 0) {
+			$buf .= "<itemizedlist>\n";
+			$itemizedlist = 1;
+		}
+		&closelistitem;
+		$line =~ s/^\*//;
+		&trimline;
+		$line =~ s/^/<listitem><para>/;
+		$listitem = 1;
+		$para = 1;
+	} elsif ($line =~ /\/\*/) {
+		$line =~ s/^\/\*//;
+		&trimline;
+		&closeitemizedlist;
+
+	# question
+	#
+	} elsif ($line =~ /^Q:/) {
+		&closelists;
+		&closeqandaentry;
+		$line =~ s/^Q://;
+		&trimline;
+		&splittitle;
+		if ($id eq '') {
+			$line = "<question><para>" . $title . "</para></question>";
+		} else {
+			$line = "<question id='$id'><para>" . $title . "</para></question>";
+		}
+		unless ($qandaentry) {
+			$line = "<qandaentry>\n" . $line;
+			$qandaentry = 1;
+		}
+		if ($qandaset == 0) {
+			$line = "<qandaset defaultlabel='qanda'>\n". $line;
+			$qandaset = 1;
+		}
+
+	# answer
+	#
+	} elsif ($line =~ /^A:/) {
+		$line =~ s/^A://;
+		&trimline;
+		&closeanswer;
+		$line = "<answer><para>" . $line;
+		$answer = 1;
+		$para = 1;
+
+	} elsif ($line =~ /^\s*----\s*$/) {
+		$line = '';
+
+	# para
+	#
+	} else {
+		if (($para == 0) and ($noparatag eq '')) {
+			$line = "<para>" . $line;
+			$para = 1;
+		} else {
+			$line .= " ";
+		}
+	}
+
+	$buf .= "$line\n";
+}
+
+sub ProcessEnd {
+	# close nesting
+	#
+	&close1;
+
+	if ($noparadepth > 0) {
+		$buf .= "ERROR tag $noparatag on line $noparaline unterminated.\n";
+	}
+}
+
+sub Buffer {
+	return $buf;
+}
+
+# Basically a cut-and-paste of the original declarations,
+# to make sure all variables are completely cleared.
+#
+# Call this before rerunning ProcessLine to clear state.
+# 
+sub Reset {
+	$level1 = 0;
+	$level2 = 0;
+	$level3 = 0;
+	$orderedlist = 0;
+	$listitem = 0;
+	$itemizedlist = 0;
+	$para = 0;
+	$qandaset = 0;
+	$qandaentry = 0;
+	$answer = 0;
+
+	# These are passed in by the caller
+	#
+	$txtfile = '';
+	$dbfile = '';
+	$verbose = 0;
+
+	# These maintain state
+	#
+	$line = '';
+	$linenumber = 0;
+	$id = '';
+	$title = '';
+	$buf = '';
+
+	$noparatag = 0;
+	$noparadepth = 0;
+	$noparaline = 0;
+}
+
+sub close1 {
+	&close2;
+	if ($level1) {
+		$buf .= "</sect1>\n";
+		$level1 = 0;
+	}
+}
+
+sub close2 {
+	&close3;
+	if ($level2) {
+		$buf .= "</sect2>\n";
+		$level2 = 0;
+	}
+}
+
+sub close3 {
+	&closeorderedlist;
+	&closeitemizedlist;
+	&closepara;
+	&closeqandaset;
+	if ($level3) {
+		$buf .= "</sect3>\n";
+		$level3 = 0;
+	}
+}
+
+sub closenonsect {
+	&closepara;
+#	&closeorderedlist;
+#	&closeitemizedlist;
+}
+
+sub closelistitem {
+	&closepara;
+	if ($listitem) {
+		$buf .= "</listitem>\n";
+		$listitem = 0;
+	}
+}
+
+sub closeorderedlist {
+	&closepara;
+	&closelistitem;
+	if ($orderedlist) {
+		$buf .= "</orderedlist>\n";
+		$orderedlist = 0;
+	}
+}
+
+sub closeitemizedlist {
+	&closepara;
+	&closelistitem;
+	if ($itemizedlist) {
+		$buf .= "</itemizedlist>\n";
+		$itemizedlist = 0;
+	}
+}
+
+sub closelists {
+	&closeitemizedlist;
+	&closeorderedlist;
+}
+
+sub closeanswer {
+	&closepara;
+	if ($answer) {
+		$buf .= "</answer>\n";
+		$answer = 0;
+	}
+}
+
+sub closeqandaentry {
+	&closeanswer;
+	if ($qandaentry) {
+		$buf .= "</qandaentry>\n";
+		$qandaentry = 0;
+	}
+}
+
+sub closeqandaset {
+	&closeqandaentry;
+	if ($qandaset) {
+		$buf .= "</qandaset>\n";
+		$qandaset = 0;
+	}
+}
+
+sub closepara {
+	if ($para) {
+		$buf .= "</para>\n";
+		$para = 0;
+	}
+}
+
+sub trimline {
+	$line =~ s/\s+$//;
+	$line =~ s/^\s+//;
+}
+
+sub splittitle {
+	$line =~ s/^=+//;
+	$line =~ s/=+$//;
+	$title = $line;
+	$id = "";
+	if ($line =~ /\|/) {
+		$title =~ s/\|.+//;
+		$id = $line;
+		$id =~ s/^.+\|//;
+	}
+	$title =~ s/\s+$//;
+	$title =~ s/^\s+//;
+	$id =~ s/\s+$//;
+	$id =~ s/^\s+//;
+}
+
+sub Usage {
+	my $error = shift;
+	print "Usage: wt2db [OPTIONS] {FILE]\n";
+	print "\n";
+	print "Options:\n";
+	print "-o, --output-to    write to the specified file.\n";
+	print "-v, --verbose      show diagnostic output.\n";
+	print "-h, --help         show this usage message.\n";
+	exit($error);
+}
+1;
--- a/LDP/wt2db/test.pl
+++ b/LDP/wt2db/test.pl
@ -0,0 +1,19 @@
+#!/usr/bin/perl
+#
+use Wt2Db;
+$WT = new Wt2Db;
+
+$buffer = "foo bar
+
+baz
+
+";
+
+$outbuf = '';
+
+foreach $line (split /\n/, $buffer) {
+	$WT->ProcessLine($line);
+}
+$WT->ProcessEnd();
+$outbuf = $WT->Buffer();
+print $outbuf;
--- a/LDP/wt2db/wt2db
+++ b/LDP/wt2db/wt2db
@ -2,37 +2,13 @@
 #
 #Converts txt files into docbook.
 #
-# Requirements:
-# 
-# If you use the "ldp:" namespace, you must have wget installed.
-# Wget is used to request an xml record from the LDP # database,
-# http://db.linuxdoc.org.
-# 
+use Wt2Db;
+$WT2DB = new Wt2Db;

-use File::Basename;
-use HTML::Entities;
-
-my($txtfile, $dbfile) = '';
-
-#These keep track of which constructs we're in the middle of
-my($level1,
-   $level2,
-   $level3,
-   $orderedlist,
-   $listitem,
-   $itemizedlist,
-   $para,
-   $qandaset,
-   $qandaentry,
-   $answer);
-
-my($line);
-my($id, $title);
-
-my($verbose);
-
-my($error);
-$error = 0;
+my $txtfile = '';
+my $dbfile = '';
+my $verbose = 0;
+my $error = 0;

 # read in cmd-line arguments
 #
@ -42,7 +18,7 @@ while (1) {
 		$dbfile = $ARGV[0];
 		shift(@ARGV);
 	} elsif($ARGV[0] eq "-h" or $ARGV[0] eq "--help") {
-		&usage;
+		$WT2DB->Usage();
 	} elsif($ARGV[0] eq "-v" or $ARGV[0] eq "--verbose") {
 		$verbose++;
 		shift(@ARGV);
@ -56,467 +32,4 @@ while (1) {
 	}
 }

-# abort if no input file given
-# 
-if($txtfile eq '') {
-	$error = 1;
-	&usage();
-} elsif( !(-r $txtfile) ) {
-	print "txt2db: ERROR cannot read $f ($!)\n\n";
-	$error = 1;
-	&usage();
-}
-
-unless ($dbfile) {
-	($basename, $path, $ext) = fileparse($txtfile);
-	$dbfile = $basename;
-	$dbfile =~ s/\..*?$/\.sgml/;
-}
-
-$buf = '';
-
-&proc_txt($txtfile);
-
-open(DB, "> $dbfile") || die "txt2db: cannot write to $dbfile ($!)\n";
-print DB $buf, "\n";
-close(DB);
-
-exit(0);
-
-# -----------------------------------------------------------
-
-sub proc_txt {
-	my($f) = @_;
-	
-	my($linenumber);
-	$linenumber = 0;
-	
-	my ($noparatag,
-	    $noparadepth);
-	$noparadepth = 0;
-	$noparaline = 0;
-
-	# read in the text file
-	#
-	open(TXT, "$f") || die "txt2db: cannot open $f ($!)\n";
-	while ($originalline = <TXT>) {
-		$line = $originalline;
-		$linenumber++;
-
-		&trimline;
-
-		# blank lines
-		if ($line eq '') {
-			if ($noparadepth == 0) {
-				&closenonsect;
-				next;
-			}
-		}
-
-		# capitalize hints that can be entered in lowercase
-		#
-		$line =~ s/^q:/Q:/;
-		$line =~ s/^a:/A:/;
-
-		# encode entities
-		#
-#		while ($line =~ //) {
-#		}
-#		decode_entities($line);
-		encode_entities($line);
-		
-		# inline docbook
-		#
-		# ulink
-		# 
-		while ($line =~ /\[\[/) {
-			unless ($line =~ /\]\]/) {
-				$buf .= "ERROR unterminated '[[' tag on line $linenumber.\n";
-			}
-
-			# separate link url from link name
-			#
-			$link = $line;
-			$link=~ s/\n//g;
-			$link =~ s/.*?\[\[//;
-			$link =~ s/\]\].*?$//;
-			if ($link =~ /\|/) {
-				$linkname = $link;
-				$link =~ s/\|.+$//;
-				$linkname =~ s/^\S+\|//;
-			} else {
-				$linkname = $link;
-			}
-
-			# kill quotes, they mess us up
-			# 
-			$link =~ s/'/%27/g;
-
-			# namespaces are handled differently
-			#
-			print "$link\n" if ($verbose);
-			if ($link =~ /^http:/) {
-				$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
-			} elsif ($link =~ /^mailto:/) {
-				$linkname =~ s/^mailto://;
-				$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
-			} elsif ($link =~ /^wiki:/) {
-				$linkname =~ s/^wiki://;
-				$link =~ s/^wiki:/http:\/\/www\.wikipedia\.com\/wiki\.phtml\?title=/;
-				$link =~ s/\ /+/;
-				$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
-			} elsif ($link =~ /^ldp:/) {
-				$linkname =~ s/^ldp://;
-				$link =~ s/^ldp://;
-				$tempfile = "/tmp/txt2db-" . $rand;
-				$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
-				system("$cmd");
-				open(URL, "$tempfile") || die "txt2db: cannot open temporary file ($!)\n";
-				$link = "";
-				while ($url_line = <URL>) {
-					$url_line =~ s/\n//;
-					if ($url_line =~ /identifier/) {
-						$link .= $url_line;
-					}
-				}
-				close(URL);
-				unlink $tempfile;
-				$link =~ s/^.*?<identifier>//;
-				$link =~ s/<\/identifier>.*?$//;
-				if ($link eq '') {
-					$linkname = "ERROR: LDP namespace resolution failure on $linkname";
-				}
-				$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
-			} elsif ($link =~ /^file:/) {
-				$linkname =~ s/^file://;
-				$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
-			} else {
-				$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
-			}
-		}
-
-		# emphasis
-		#
-		while ($line =~ /'''.*'''/) {
-			$line =~ s/'''/<emphasis role='bold'>/;
-			$line =~ s/'''/<\/emphasis>/;
-		}
-
-		# this block defines DocBook structures that won't be broken up with 
-		# paragraphs when we hit empty lines:
-		#
-		#	<para>
-		#	<sect1>
-		#	<sect2>
-		#	<sect3>
-		#	<programlisting>
-		#	<literallayout>
-	
-		# forget about nopara
-		if ($noparadepth == 0) {
-			$noparatag = "";
-		}
-		
-		# start a new nopara section
-		#
-		if ((($line =~ /^<para>/) or
-		     ($line =~ /^<sect/) or
-		     ($line =~ /^<screen>/) or
-		     ($line =~ /^<literallayout>/) or
-		     ($line =~ /^<programlisting>/)) and
-		    ($noparadepth == 0)) { 
-		    	&closepara;
-			$noparatag = $line;
-			$noparatag =~ s/^.*?<//;
-			$noparatag =~ s/>.*?$//;
-			$noparaline = $linenumber;
-			if ($line =~ /^<screen>/) {
-				unless ($para) {
-					$line = "<para>" . $line;
-					$para = 1;
-				}
-			}
-		}
-
-		# count noparadepth
-		#
-		if ($noparatag ne '') {
-			$temp = $line;
-			while ($temp =~ /<$noparatag>/) {
-				$temp =~ s/<?$noparatag>//;
-				$noparadepth ++;
-			}
-			while ($temp =~ /<\/$noparatag>/) {
-				$temp =~ s/<?\/$noparatag>//;
-				$noparadepth --;
-				if ($noparadepth == 0) {
-					$noparaline == 0;
-				}
-			}
-
-			# runon protection
-			#
-			if ($linenumber >= ($noparaline + 100)) {
-				$buf .= "ERROR: runon block starting on line $noparaline\n";
-				last;
-			}
-
-			# recover original line -- no whitespace modifiers
-			#
-			$line = $originalline;
-
-		# sect3
-		#
-		} elsif ($line =~ /^===/) {
-			&close3;
-			&splittitle;
-			if ($id eq '') {
-				$line = "<sect3><title>$title</title>\n";
-			} else {
-				$line = "<sect3 id='$id'><title id='$id-title'>$title</title>\n";
-			}
-			$level3 = 1;
-
-		# sect2
-		#
-		} elsif ($line =~ /^==/) {
-			&close2;
-			&splittitle;
-			if ($id eq '') {
-				$line = "<sect2><title>$title</title>\n";
-			} else {
-				$line = "<sect2 id='$id'><title id='$id-title'>$title</title>\n";
-			}
-			$level2 = 1;
-
-		# sect1
-		#
-		} elsif ($line =~ /^=/) {
-			&close1;
-			&splittitle;
-			if ($id eq '') {
-				$line = "<sect1><title>$title</title>\n";
-			} else {
-				$line = "<sect1 id='$id'><title id='$id-title'>$title</title>\n";
-			}
-			$level1 = 1;
-
-		# orderedlist
-		#
-		} elsif ($line =~ /^#/) {
-			&closeitemizedlist;
-			if ($orderedlist == 0) {
-				$buf .= "\n<orderedlist>\n";
-				$orderedlist = 1;
-			}
-			&closelistitem;
-			$line =~ s/^#//;
-			&trimline;
-			$line =~ s/^/<listitem><para>/;
-			$listitem = 1;
-			$para = 1;
-		} elsif ($line =~ /^\/#/) {
-			$line =~ s/^\/#//;
-			&trimline;
-			&closeorderedlist;
-
-		# itemizedlist
-		#
-		} elsif ($line =~ /^\*/) {
-			&closeorderedlist;
-			if ($itemizedlist == 0) {
-				$buf .= "\n<itemizedlist>\n";
-				$itemizedlist = 1;
-			}
-			&closelistitem;
-			$line =~ s/^\*//;
-			&trimline;
-			$line =~ s/^/<listitem><para>/;
-			$listitem = 1;
-			$para = 1;
-		} elsif ($line =~ /\/\*/) {
-			$line =~ s/^\/\*//;
-			&trimline;
-			&closeitemizedlist;
-
-		# question
-		#
-		} elsif ($line =~ /^Q:/) {
-			&closelists;
-			&closeqandaentry;
-			$line =~ s/^Q://;
-			&trimline;
-			&splittitle;
-			if ($id eq '') {
-				$line = "<question><para>" . $title . "</para></question>\n";
-			} else {
-				$line = "<question id='$id'><para>" . $title . "</para></question>\n";
-			}
-			unless ($qandaentry == 1) {
-				$line = "<qandaentry>\n" . $line;
-				$qandaentry = 1;
-			}
-			if ($qandaset == 0) {
-				$line = "<qandaset defaultlabel='qanda'>\n". $line;
-				$qandaset = 1;
-			}
-
-		# answer
-		#
-		} elsif ($line =~ /^A:/) {
-			$line =~ s/^A://;
-			&trimline;
-			&closeanswer;
-			$line = "<answer><para>" . $line;
-			$answer = 1;
-			$para = 1;
-
-		} elsif ($line =~ /^\s*----\s*$/) {
-			$line = '';
-
-		# para
-		#
-		} else {
-			if (($para == 0) and ($noparatag eq '')) {
-				$line = "<para>" . $line;
-				$para = 1;
-			} else {
-				$line .= " ";
-			}
-		}
-
-		$buf .= "$line ";
-	}
-	# close nesting
-	#
-	&close1;
-
-	if ($noparadepth > 0) {
-		$buf .= "ERROR tag $noparatag on line $noparaline unterminated.\n";
-	}
-}
-
-sub close1 {
-	&close2;
-	if ($level1 == 1) {
-		$buf .= "</sect1>\n";
-		$level1 = 0;
-	}
-}
-
-sub close2 {
-	&close3;
-	if ($level2 == 1) {
-		$buf .= "</sect2>\n";
-		$level2 = 0;
-	}
-}
-
-sub close3 {
-	&closeorderedlist;
-	&closeitemizedlist;
-	&closepara;
-	&closeqandaset;
-	if ($level3 == 1) {
-		$buf .= "</sect3>\n";
-		$level3 = 0;
-	}
-}
-
-sub closenonsect {
-	&closepara;
-#	&closeorderedlist;
-#	&closeitemizedlist;
-}
-
-sub closelistitem {
-	&closepara;
-	if ($listitem == 1 ) {
-		$buf .= "</listitem>\n";
-		$listitem = 0;
-	}
-}
-
-sub closeorderedlist {
-	&closepara;
-	&closelistitem;
-	if ($orderedlist == 1 ) {
-		$buf .= "</orderedlist>\n";
-		$orderedlist = 0;
-	}
-}
-
-sub closeitemizedlist {
-	&closepara;
-	&closelistitem;
-	if ($itemizedlist == 1 ) {
-		$buf .= "</itemizedlist>\n";
-		$itemizedlist = 0;
-	}
-}
-
-sub closelists {
-	&closeitemizedlist;
-	&closeorderedlist;
-}
-
-sub closeanswer {
-	&closepara;
-	if ($answer == 1) {
-		$buf .= "</answer>\n";
-		$answer = 0;
-	}
-}
-
-sub closeqandaentry {
-	&closeanswer;
-	if ($qandaentry == 1) {
-		$buf .= "</qandaentry>\n";
-		$qandaentry = 0;
-	}
-}
-
-sub closeqandaset {
-	&closeqandaentry;
-	if ($qandaset == 1) {
-		$buf .= "</qandaset>\n";
-		$qandaset = 0;
-	}
-}
-
-sub closepara {
-	if ($para == 1) {
-		$buf .= "</para>\n";
-		$para = 0;
-	}
-}
-
-sub trimline {
-	$line =~ s/\s+$//;
-	$line =~ s/^\s+//;
-}
-
-sub splittitle {
-	$line =~ s/^=+//;
-	$line =~ s/=+$//;
-	$title = $line;
-	$id = "";
-	if ($line =~ /\|/) {
-		$title =~ s/\|.+//;
-		$id = $line;
-		$id =~ s/^.+\|//;
-	}
-	$title =~ s/\s+$//;
-	$title =~ s/^\s+//;
-	$id =~ s/\s+$//;
-	$id =~ s/^\s+//;
-}
-
-sub usage {
-	print "Usage: txt2db [-v] [-h|-o <sgml file>] <text file>\n";
-	print "-o, --output-to    write to the specified file.\n";
-	print "-v, --verbose      show diagnostic output.\n";
-	print "-h, --help         show this usage message.\n";
-	exit($error);
-}
-
+$WT2DB->ProcessFile($txtfile, $dbfile, $verbose);