added encoding option

2002-06-21 01:50:53 +00:00 · 2002-06-21 01:50:53 +00:00 · 041e777c89
parent 199e47d087
commit 041e777c89
8 changed files with 352 additions and 186 deletions
--- a/LDP/wt2db/Changelog
+++ b/LDP/wt2db/Changelog
@ -13,3 +13,16 @@ CVS			Switched -V and -v, now -v is version, -V is verbose.
 			Changed -a, --article to -x, --xml and -s, --sgml to
 			let you pick your own doctype.
 			Added -n, --nonet parameter to disable network
 			lookups.
 			Fixed bug in screen blocks, para tags were being
 			closed when not open.
 			Deleted [ for <filename>, added [[file: namespace.
 			[[ is now for internal links and become <xref>.
            Encoding of literal block tags into entities.
            Added -e, --encoding to specify encoding.            
--- a/LDP/wt2db/INSTALL
+++ b/LDP/wt2db/INSTALL
@ -16,7 +16,7 @@ to build the package, and run:
 	make test
 to convert a small, arbitrary bit of WikiText. If the output looks
-like DocBook, go ahead and run, as root:
+like DocBook, go ahead and run:
 	make install
--- a/LDP/wt2db/Makefile.PL
+++ b/LDP/wt2db/Makefile.PL
@ -20,7 +20,7 @@ WriteMakefile(
    PREREQ_PM       => { 
                         'File::Basename'	=> 0,
 		         'HTML::Entities'	=> 0,
-		         'FileHandle'	=> 0,
+		         'FileHandle'		=> 0,
                       },
    MAN1PODS        => {
                         "doc/wt2db.pod"  => 'blib/man1/wt2db.1',
--- a/LDP/wt2db/README
+++ b/LDP/wt2db/README
@ -1,75 +1,11 @@
 This is wt2db version 0.1.
 wt2db is a utility to convert text files in WikiText format into
 DocBook.  It generates a DocBook fragment, not valid DocBook.
 See the manpage for more information and WikiText help.
 Reporting Bugs
 --------------
 Bugs should be reported at sourceforge.net/projects/linuxdoc. Select
 'wt2db' in the 'Category' field.
 WikiText Tags
 -------------
 The following constructs are currently supported.
 Foo			<para>Foo</para>
 =Title=			<sect1><title>Title</title>
 			</sect1>
 =Title|id=		<sect1 id='id'><title>Title</title>
 			</sect1>
 			works for other sect levels as well, and many other
 			tags. It is either the "id" value, or the "title"
 			value, depending on the semantics of the particular
 			tag. Usage should be obvious in context.
 ==Title==		<sect2><title>Title</title>
 			</sect2>
 ===Title===		<sect3><title>Title</title>
 			</sect3>
 #Foo			<orderedlist>
 #Bar			<listitem><para>Foo</para></listitem>
 #Baz			<listitem><para>Bar</para></listitem>
 /#			<listitem><para>Baz</para></listitem>
 			</orderedlist>
 *Foo			<simplelist>
 *Bar			<listitem><para>Foo</para></listitem>
 *Baz			<listitem><para>Bar</para></listitem>
 /*			<listitem><para>Baz</para></listitem>
 			</simplelist>
 [[http://foo.org]]	<ulink url='http://foo.org'>
 			  <citetitle>http://foo.org</citetitle>
 			</ulink>
 [[http://foo.org Foo]]	<ulink url='http://foo.org'>
 			  <citetitle>Foo</citetitle>
 			</ulink>
 [http://foo.org|Foo]]	You can also delimit with the pipe character "|".
 			This works on any of these [[]] tags.
 [[file:Foo]]		<filename>Foo</filename>
 '''Foo'''		<emphasis>Foo</emphasis>
 A few DocBook structures do not have <para> tags wrapped around them. They
 are <para> itself (duh!), <sect?> and <programlisting>. If you insert anything
 using these tags, no <para> tags will be wrapped around it or inserted into it.
 So if you want fine control over your <para> tags, insert them yourself.
 These tags include:
 <programlisting/>
 <screen/>
--- a/LDP/wt2db/doc/wt2db.pod
+++ b/LDP/wt2db/doc/wt2db.pod
@ -1,34 +1,33 @@
 =head1 NAME
-B<wt2db> - utility to convert WikiText documents into DocBook XML/SGML.
+B<wt2db> - converts WikiText documents into DocBook XML/SGML.
 =head1 SYNOPSIS
 B<wt2db> [I<OPTION>] [I<FILE>]
 =head1 DESCRIPTION
 B<wt2db> converts a text file in a special format similar to that used
 in WikiWikiWebs into DocBook XML/SGML.
-
+The DocBook it writes out by default is only
-The DocBook it writes out is only
+a fragment, but it will write a complete document upon request.
 a fragment, not a complete document, because it has no DOCTYPE declaration.
 And due to the source format, there is no meta-data, such as in an
 <articleinfo> structure.
 As part of a larger publishing or document processing system, it is
 expected that later processing will supply these elements.
 By default it reads from STDIN and writes to STDOUT. However, if given a
 filename, it will read that file, and an output filename can also be
 specified as a command-line option.
 =head1 OPTIONS
 B<-x>, B<--xml> add XML DOCTYPE and article tags.
 B<-s>, B<--sgml> add SGML DOCTYPE and article tags.
 B<-n>, B<--nonet> do not access the network.
 B<-o>, B<--output-to> I<filename> write to the specified file.
 B<-v>, B<--verbose> show diagnostic output.
@ -37,6 +36,126 @@ B<-V>, B<--version> show program version.
 B<-h>, B<--help> show a usage message.
 =head1 NOTES
 B<wt2db> was developed to provide an easier way to write
 DocBook documentation.
 WikiText is based on the form of text that is used in a
 WikiWikiWeb. It provides very simple and easy to remember
 tags so you can write a Wiki article without learning HTML.
 B<wt2db> was originally written to convert Wikipedia articles
 into DocBook.
 The Wiki format has been enhanced in several ways to make it
 more powerful for authors. Support has been added for Wiki tags
 that don't exist in any real Wiki, by giving common DocBook
 elements their own Wiki tags. Support has also been added
 for including DocBook elements right in the source file.
 This means WikiText is a merging of DocBook into a plain text file.
 In in its simplest form, it is plain text. A plain text
 document can be processed by B<wt2db> and converted into
 DocBook.
 Or, a complete and valid DocBook document can be processed,
 and will pass through the B<w2db> filters and come out
 unchanged.
 Virtually any combination of DocBook with plain text will work,
 with the additional Wiki style tags to make things even easier
 for authors.
 It put all of the semantic of DocBook
 at your disposal, while being as easy to write as a Wiki page.
 =head1 WIKITEXT
 These are the tags which are supported in this release of
 B<wt2db>. All DocBook tags are also supported. If you encounter
 any valid DocBook that is not handled correctly, please file
 a bug report.
 Foo                    <para>Foo</para>
 =Title=                <sect1>
                            <title>Title</title>
                        </sect1>
 =Title|id=             <sect1 id='id'>
                            <title>Title</title>
                        </sect1>
 The id attribute, delimited with a pipe character,
 works for other sect levels as well, and many other
 tags. In some cases it is not an id value, but the
 title, depending on the semantics of the particular
 tag. Usage should be obvious in context.
 ==Title==              <sect2>
                            <title>Title</title>
                        </sect2>
 ===Title===            <sect3>
                            <title>Title</title>
                        </sect3>
 #Foo                   <orderedlist>
 #Bar                       <listitem>
 #Baz                           <para>Foo</para>
 /#                         </listitem>
                            <listitem>
                                <para>Bar</para>
                            </listitem>
                            <listitem>
                                <para>Baz</para>
                            </listitem>
                        </orderedlist>
 *Foo                   <simplelist>
 *Bar                       <listitem>
 *Baz                           <para>Foo</para>
 /*                         </listitem>
                            <listitem>
                                <para>Bar</para>
                            </listitem>
                            <listitem>
                                <para>Baz</para>
                            </listitem>
                        </simplelist>
 [[foo]]                <xref linkend='foo' linkterm='foo'/>
 [[link:Foo]]
 [[file:/dev/foo]]      <filename>/dev/foo</filename>
 [[http://foo.org]]     <ulink url='http://foo.org'>
                            <citetitle>http://foo.org</citetitle>
                        </ulink>
 [[http://foo.org Foo]] <ulink url='http://foo.org'>
 [[http://foo.org|Foo]]     <citetitle>Foo</citetitle>
                        </ulink>
 '''Foo'''              <emphasis>Foo</emphasis>
 A few DocBook structures will not have <para> tags wrapped around them. They
 are <para> itself, <sect?> and <programlisting>. If you insert anything
 using these tags, no <para> tags will be wrapped around it or inserted into it.
 So if you want fine control over your <para> tags, insert them yourself.
 The <screen> element will be wrapped with <para> tags, but no internal
 paragraph breaks will be generated.
 =head1 RESTRICTIONS
 Currently only a single form of WikiText is supported, which is very
 similar to that used by the Wikipedia (see http://www.wikipedia.com).
 A future release will be configurable to support additional styles of
 WikiText.
 =head1 BUGS
 Bugs are tracked in the SourceForge project page at:
@ -46,25 +165,13 @@ http://www.sourceforge.net/projects/linuxdoc
 If you report a bug in B<wt2db>, specify wt2db as the category so it will
 be routed the appropriate person.
 =head1 RESTRICTIONS
 Currently only a single form of WikiText is supported, which is very
 similar to that used by the Wikipedia (see http://www.wikipedia.com).
 A future release will be configurable to support additional styles of
 WikiText.
 =head1 NOTES
 B<wt2db> was developed as a project of the Linux Documentation Project
 to create an easier way of writing DocBook documentation. While it is
 useful on its own, it is part of Lampadas, the LDP's document
 production system.
 =head1 SEE ALSO
 See the home page of the Linux Documentation Project,
 http://www.tldp.org for updates and more information.
 =head1 AUTHOR
 This man page was written by David C. Merrill <david@lupercalia.net>.
--- a/LDP/wt2db/lib/Wt2Db.pm
+++ b/LDP/wt2db/lib/Wt2Db.pm
@ -19,36 +19,42 @@ use Exporter;
 	Reset
 	);
-# These keep track of which constructs we're in the middle of
+&Reset;
 #
 $level1 = 0;
 $level2 = 0;
 $level3 = 0;
 $orderedlist = 0;
 $listitem = 0;
 $itemizedlist = 0;
 $para = 0;
 $qandaset = 0;
 $qandaentry = 0;
 $answer = 0;
-# These are passed in by the caller
+# Call this before rerunning ProcessLine to clear state.
-#
+# 
-$txtfile = '';
+sub Reset {
-$dbfile = '';
+	$level1 = 0;
-$verbose = 0;
+	$level2 = 0;
 	$level3 = 0;
 	$orderedlist = 0;
 	$listitem = 0;
 	$itemizedlist = 0;
 	$para = 0;
 	$qandaset = 0;
 	$qandaentry = 0;
 	$answer = 0;
-# These maintain state
+	# These are passed in by the caller
-#
+	#
-$line = '';
+	$txtfile = '';
-$linenumber = 0;
+	$dbfile = '';
-$id = '';
+	$verbose = 0;
-$title = '';
+	$doctype = 0;
-$buf = '';
+	$nonet = 0;
-$noparatag = 0;
+	# These maintain state
-$noparadepth = 0;
+	#
-$noparaline = 0;
+	$line = '';
 	$linenumber = 0;
 	$id = '';
 	$title = '';
 	$buf = '';
 	$noparatag = 0;
 	$noparadepth = 0;
 	$noparaline = 0;
 }
 # -----------------------------------------------------------
@ -62,7 +68,7 @@ sub new {
 }
 sub ProcessFile {
-	($self, $txtfile, $dbfile, $verbose, $doctype) = @_;
+	($self, $txtfile, $dbfile, $verbose, $doctype, $nonet, $encoding) = @_;
 	# Read from STDIN if no input file given
 	# 
@ -87,9 +93,10 @@ sub ProcessFile {
 	# wrap article if requested
 	#
    $encoding = 'ISO-8859-1' unless ($encoding);
 	if ($doctype eq 'XML') {
 		print "Adding XML DOCTYPE and article tags." if ($verbose);
-		$buf = '<?xml version="1.0" standalone="no"?>' . "\n";
+		$buf = '<?xml version="1.0" encoding="' . $encoding . '" standalone="no"?>' . "\n";
 		$buf .= '<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"' . "\n";
     		$buf .= '    "http://docbook.org/xml/4.1.2/docbookx.dtd"';
 		$buf .= "\[\]\>\n";
@ -153,8 +160,8 @@ sub ProcessLine {
 	# inline docbook
 	#
-	# ulink
+	# parse all links, internal and external
-	# 
+	#
 	while ($line =~ /\[\[/) {
 		unless ($line =~ /\]\]/) {
 			$buf .= "ERROR unterminated '[[' tag on line $linenumber.\n";
@ -174,15 +181,22 @@ sub ProcessLine {
 			$linkname = $link;
 		}
-		# kill quotes, they mess us up
+		# kill quotes inside links, they mess us up because
 		# we have to wrap this string with quotes.
 		# perhaps it should be encoding the entire URL?
 		# 
 		$link =~ s/'/%27/g;
 		# namespaces are handled differently
 		#
 		print "$link\n" if ($verbose);
-		if ($link =~ /^http:/) {
+
 		if ($link =~ /^http:\/\//) {
 			$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
 		} elsif ($link =~ /^link:/) {
 			$link =~ s/^link://;
 			$linkname =~ s/^link://;
 			$line =~ s/\[\[.*?\]\]/<xref linkend='$link' endterm='$link'\>\<\/xref\>/;
 		} elsif ($link =~ /^mailto:/) {
 			$linkname =~ s/^mailto://;
 			$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
@ -194,28 +208,41 @@ sub ProcessLine {
 		} elsif ($link =~ /^ldp:/) {
 			$linkname =~ s/^ldp://;
 			$link =~ s/^ldp://;
-			$tempfile = "/tmp/wt2db-" . $rand;
+			if ($nonet) {
-			$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
+				$line =~ s/\[\[.*?\]\]/<citetitle>$link<\/citetitle>/;
-			system("$cmd");
+			} else {
-			open(URL, "$tempfile") || die "wt2db: cannot open temporary file ($!)\n\n";
+				$tempfile = "/tmp/wt2db-" . $rand;
-			$link = "";
+				$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
-			while ($url_line = <URL>) {
+				print "$cmd\n" if ($verbose > 1);
-				$url_line =~ s/\n//;
+				$return = system("$cmd");
-				if ($url_line =~ /identifier/) {
+				unless ($return) {
-					$link .= $url_line;
+					open(URL, "$tempfile") || die "wt2db: cannot open temporary file ($!)\n\n";
 					$link = '';
 					while ($url_line = <URL>) {
 						$url_line =~ s/\n//;
 						if ($url_line =~ /identifier/) {
 							$link .= $url_line;
 						}
 					}
 					close(URL);
 					unlink $tempfile;
 				}
 				$link =~ s/^.*?<identifier>//;
 				$link =~ s/<\/identifier>.*?$//;
 				if ($link eq '') {
 					$linkname = "ERROR: LDP namespace resolution failure on $linkname";
 				}
 				$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
 			}
 			close(URL);
 			unlink $tempfile;
 			$link =~ s/^.*?<identifier>//;
 			$link =~ s/<\/identifier>.*?$//;
 			if ($link eq '') {
 				$linkname = "ERROR: LDP namespace resolution failure on $linkname";
 			}
 			$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
 		} elsif ($link =~ /^file:/) {
 			$linkname =~ s/^file://;
 			$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
 		} elsif ($link =~ /^dir:/) {
 # FIXME: need to check attribute on filename element
 # 
 			$linkname =~ s/^dir://;
 			$line =~ s/\[\[.*?\]\]/<filename type='directory'>$linkname<\/filename>/;
 		} else {
 			$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
 		}
@ -238,7 +265,8 @@ sub ProcessLine {
 	#	<programlisting>
 	#	<literallayout>
-	# forget about nopara
+	# forget about being in nopara state if we're no longer in one
 	# 
 	if ($noparadepth == 0) {
 		$noparatag = "";
 	}
@ -248,6 +276,8 @@ sub ProcessLine {
 	if ((($line =~ /^<para>/) or
 	     ($line =~ /^<sect/) or
 	     ($line =~ /^<screen>/) or
 	     ($line =~ /^<screen>/) or
 	     ($line =~ /^<blockquote>/) or
 	     ($line =~ /^<literallayout>/) or
 	     ($line =~ /^<articleinfo>/) or
 	     ($line =~ /^<programlisting>/)) and
@ -257,9 +287,12 @@ sub ProcessLine {
 		$noparatag =~ s/^.*?<//;
 		$noparatag =~ s/>.*?$//;
 		$noparaline = $linenumber;
 		# screen sections don't embed para tags, but are wrapped in them
 		#
 		if ($line =~ /^<screen>/) {
 			unless ($para) {
-				$line = "<para>" . $line;
+				$buf .= "<para>";
 				$para = 1;
 			}
 		}
@ -289,9 +322,29 @@ sub ProcessLine {
 		}
 		# recover original line -- no whitespace modifiers
 		# allow nonencoded text in unparsed lines, when in a literal block
 		#
 		$line = $originalline;
 		chomp($line);
 		if ($line =~ /^<$noparatag>/ ) {
 			$starttag = "<$noparatag>";
 		} else {
 			$starttag = '';
 		}
 		if ($line =~ /<\/$noparatag>/ ) {
 			$endtag = "<\/$noparatag>";
 		} else {
 			$endtag = '';
 		}
 		$line =~ s/<$noparatag>//;
 		$line =~ s/<\/$noparatag>//;
 	    if (($noparatag eq 'screen') or
 	        ($noparatag eq 'literallayout') or
 	        ($noparatag eq 'programlisting')) {
    		encode_entities($line);
        }
 		$line = "$starttag$line$endtag";
 	# sect3
 	#
@ -376,9 +429,9 @@ sub ProcessLine {
 		&trimline;
 		&splittitle;
 		if ($id eq '') {
-			$line = "<question><para>" . $title . "</para></question>";
+			$line = "<question><para>$title</para></question>";
 		} else {
-			$line = "<question id='$id'><para>" . $title . "</para></question>";
+			$line = "<question id='$id'><para>$title</para></question>";
 		}
 		unless ($qandaentry) {
 			$line = "<qandaentry>\n" . $line;
@ -430,42 +483,6 @@ sub Buffer {
 	return $buf;
 }
 # Basically a cut-and-paste of the original declarations,
 # to make sure all variables are completely cleared.
 #
 # Call this before rerunning ProcessLine to clear state.
 # 
 sub Reset {
 	$level1 = 0;
 	$level2 = 0;
 	$level3 = 0;
 	$orderedlist = 0;
 	$listitem = 0;
 	$itemizedlist = 0;
 	$para = 0;
 	$qandaset = 0;
 	$qandaentry = 0;
 	$answer = 0;
 	# These are passed in by the caller
 	#
 	$txtfile = '';
 	$dbfile = '';
 	$verbose = 0;
 	# These maintain state
 	#
 	$line = '';
 	$linenumber = 0;
 	$id = '';
 	$title = '';
 	$buf = '';
 	$noparatag = 0;
 	$noparadepth = 0;
 	$noparaline = 0;
 }
 sub close1 {
 	&close2;
 	if ($level1) {
@ -570,15 +587,78 @@ sub splittitle {
 	$line =~ s/^=+//;
 	$line =~ s/=+$//;
 	$title = $line;
 	$id = "";
 	if ($line =~ /\|/) {
 		$title =~ s/\|.+//;
 		$id = $line;
 		$id =~ s/^.+\|//;
 	} else {
 		$id = &anchorfix($title);
 	}
 	$title =~ s/\s+$//;
 	$title =~ s/^\s+//;
 	$id =~ s/\s+$//;
 	$id =~ s/^\s+//;
 }
 sub anchorfix {
 	my $anchor = $_[0];
 	$anchor = lc(&trim($anchor));
 	$anchor = decode_entities($anchor);
 	$anchor =~ s/-/-dash-/g;
 	$anchor =~ s/&/-and-/g;
 	$anchor =~ s/;//g;
 	$anchor = encode_entities($anchor);
 	$anchor =~ s/&(\w)grave/\1/g;
 	$anchor =~ s/&(\w)acute/\1/g;
 	$anchor =~ s/&(\w)circ/\1/g;
 	$anchor =~ s/&(\w)uml/\1/g;
 	$anchor =~ s/&(\w)tilde/\1/g;
 	$anchor =~ s/&(\w)cedil/\1/g;
 	$anchor =~ s/&/-and-/g;
 	$anchor =~ s/;//g;
 	$anchor =~ s/\//-slash-/g;
 	$anchor =~ s/\\/-bslash-/g;
 	$anchor =~ s/\s+/-/g;
 	$anchor =~ s/'//g;
 	$anchor =~ s/`//g;
 	$anchor =~ s/,/-comma-/g;
 	$anchor =~ s/\./-dot-/g;
 	$anchor =~ s/!/-bang-/g;
 	$anchor =~ s/\?/-question-/g;
 	$anchor =~ s/\+/-plus-/g;
 	$anchor =~ s/\*/-x-/g;
 	$anchor =~ s/\(/-op-/g;
 	$anchor =~ s/\)/-cp-/g;
 	$anchor =~ s/\@/-at-/g;
 	$anchor =~ s/dcm_at/-at-/gi;
 	$anchor =~ s/\^/-hat-/g;
 	$anchor =~ s/=/-eq-/g;
 	$anchor =~ s/\$/S/;
 	$anchor =~ s/~/-tilde-/g;
 	$anchor =~ s/0/-zero-/g;
 	$anchor =~ s/1/-one-/g;
 	$anchor =~ s/2/-two-/g;
 	$anchor =~ s/3/-three-/g;
 	$anchor =~ s/4/-four-/g;
 	$anchor =~ s/5/-five-/g;
 	$anchor =~ s/6/-six-/g;
 	$anchor =~ s/7/-seven-/g;
 	$anchor =~ s/8/-eight-/g;
 	$anchor =~ s/9/-nine-/g;
 	$anchor =~ s/\|/-pipe-/g;
 	$anchor =~ s/\[/-lsqb-/g;
 	$anchor =~ s/\]/-rsqb-/g;
 	$anchor =~ s/^-+//;
 	$anchor =~ s/-+$//;
 	$anchor =~ s/--/-/g;	# get rid of double, initial and trailing hyphens
 	return &trim($anchor);
 }
 sub trim {
 	my $temp = $_[0];
 	$temp =~ s/^\s+//g;
 	$temp =~ s/\s+$//g;
 	return $temp;
 }
 1;
--- a/LDP/wt2db/test.pl
+++ b/LDP/wt2db/test.pl
@ -5,7 +5,26 @@ $WT = new Wt2Db;
 $buffer = "foo bar
-baz
+=Section|section=
 paragraph
 ==Subsection|subsection==
 paragraph
 =Namespaces=
 ==MailTo==
 [[mailto:david@lupercalia.net]]
 [[mailto:david@lupercalia.net|David Merrill]]
 ==HTTP==
 [[http://www.tldp.org]]
 [[http://www.tldp.org|The Linux Documentation Project]]
 ";
--- a/LDP/wt2db/wt2db
+++ b/LDP/wt2db/wt2db
@ -10,6 +10,8 @@ $WT2DB = new Wt2Db;
 my $txtfile = '';
 my $dbfile = '';
 my $doctype = '';
 my $nonet = 0;
 my $encoding = 'ISO-8859-1';
 my $verbose = 0;
 my $error = 0;
@ -22,6 +24,13 @@ while (1) {
 	} elsif($ARGV[0] eq "-x" or $ARGV[0] eq "--xml") {
 		$doctype = 'XML';
 		shift(@ARGV);
 	} elsif($ARGV[0] eq "-e" or $ARGV[0] eq "--encoding") {
 		shift(@ARGV);
        $encoding = $ARGV[0];
 		shift(@ARGV);
 	} elsif($ARGV[0] eq "-n" or $ARGV[0] eq "--nonet") {
 		$nonet = 1;
 		shift(@ARGV);
 	} elsif($ARGV[0] eq "-o" or $ARGV[0] eq "--output-to") {
 		shift(@ARGV);
 		$dbfile = $ARGV[0];
@ -44,7 +53,7 @@ while (1) {
 	}
 }
-$WT2DB->ProcessFile($txtfile, $dbfile, $verbose, $doctype);
+$WT2DB->ProcessFile($txtfile, $dbfile, $verbose, $doctype, $nonet, $encoding);
 sub version {
 	print "wt2db version $VERSION\n";
@ -53,7 +62,7 @@ sub version {
 	print "Converts a WikiText file into DocBook XML/SGML.\n";
 	print "\n";
 	print "This is free software; see the source for copying conditions. There is no\n";
-	print "warranty; not even for merchantability or fitness for a particular purpose.\n";
+	print "warranty; not even for merchantability or fitness for a particular purpose.\n\n";
 }
 sub usage {
@ -64,9 +73,11 @@ sub usage {
 	print "Options:\n";
 	print "-s, --SGML         add XML DOCTYPE and article tags.\n";
 	print "-x, --XML          add SGML DOCTYPE and article tags.\n";
    print "-e, --encoding     specify character encoding.\n";
 	print "-n, --nonet        do not look up documents on the net.\n";
 	print "-o, --output-to    write to the specified file.\n";
-	print "-v, --verbose      show diagnostic output.\n";
+	print "-V, --verbose      show diagnostic output.\n";
-	print "-V, --version      show program version.\n";
+	print "-v, --version      show program version.\n";
 	print "-h, --help         show this usage message.\n";
 	exit($error);
 }