added encoding option

2002-06-21 01:50:53 +00:00 · 2002-06-21 01:50:53 +00:00 · 041e777c89
parent 199e47d087
commit 041e777c89
8 changed files with 352 additions and 186 deletions
--- a/LDP/wt2db/Changelog
+++ b/LDP/wt2db/Changelog
@ -13,3 +13,16 @@ CVS			Switched -V and -v, now -v is version, -V is verbose.
 			Changed -a, --article to -x, --xml and -s, --sgml to
 			let you pick your own doctype.

+			Added -n, --nonet parameter to disable network
+			lookups.
+
+			Fixed bug in screen blocks, para tags were being
+			closed when not open.
+
+			Deleted [ for <filename>, added [[file: namespace.
+			
+			[[ is now for internal links and become <xref>.
+
+            Encoding of literal block tags into entities.
+
+            Added -e, --encoding to specify encoding.            
--- a/LDP/wt2db/INSTALL
+++ b/LDP/wt2db/INSTALL
@ -16,7 +16,7 @@ to build the package, and run:
 	make test

 to convert a small, arbitrary bit of WikiText. If the output looks
-like DocBook, go ahead and run, as root:
+like DocBook, go ahead and run:

 	make install

--- a/LDP/wt2db/Makefile.PL
+++ b/LDP/wt2db/Makefile.PL
@ -20,7 +20,7 @@ WriteMakefile(
    PREREQ_PM       => { 
                         'File::Basename'	=> 0,
 		         'HTML::Entities'	=> 0,
-		         'FileHandle'	=> 0,
+		         'FileHandle'		=> 0,
                       },
    MAN1PODS        => {
                         "doc/wt2db.pod"  => 'blib/man1/wt2db.1',
--- a/LDP/wt2db/README
+++ b/LDP/wt2db/README
@ -1,75 +1,11 @@
-This is wt2db version 0.1.
-
 wt2db is a utility to convert text files in WikiText format into
 DocBook.  It generates a DocBook fragment, not valid DocBook.

+See the manpage for more information and WikiText help.
+

 Reporting Bugs
 --------------

 Bugs should be reported at sourceforge.net/projects/linuxdoc. Select
 'wt2db' in the 'Category' field.
-
-
-WikiText Tags
-------------
-
-The following constructs are currently supported.
-
-Foo			<para>Foo</para>
-
-=Title=			<sect1><title>Title</title>
-			</sect1>
-
-=Title|id=		<sect1 id='id'><title>Title</title>
-			</sect1>
-
-			works for other sect levels as well, and many other
-			tags. It is either the "id" value, or the "title"
-			value, depending on the semantics of the particular
-			tag. Usage should be obvious in context.
-
-==Title==		<sect2><title>Title</title>
-			</sect2>
-
-===Title===		<sect3><title>Title</title>
-			</sect3>
-
-
-#Foo			<orderedlist>
-#Bar			<listitem><para>Foo</para></listitem>
-#Baz			<listitem><para>Bar</para></listitem>
-/#			<listitem><para>Baz</para></listitem>
-			</orderedlist>
-
-*Foo			<simplelist>
-*Bar			<listitem><para>Foo</para></listitem>
-*Baz			<listitem><para>Bar</para></listitem>
-/*			<listitem><para>Baz</para></listitem>
-			</simplelist>
-
-[[http://foo.org]]	<ulink url='http://foo.org'>
-			  <citetitle>http://foo.org</citetitle>
-			</ulink>
-
-[[http://foo.org Foo]]	<ulink url='http://foo.org'>
-			  <citetitle>Foo</citetitle>
-			</ulink>
-
-[http://foo.org|Foo]]	You can also delimit with the pipe character "|".
-			This works on any of these [[]] tags.
-
-[[file:Foo]]		<filename>Foo</filename>
-
-'''Foo'''		<emphasis>Foo</emphasis>
-
-A few DocBook structures do not have <para> tags wrapped around them. They
-are <para> itself (duh!), <sect?> and <programlisting>. If you insert anything
-using these tags, no <para> tags will be wrapped around it or inserted into it.
-So if you want fine control over your <para> tags, insert them yourself.
-
-These tags include:
-
-<programlisting/>
-<screen/>
-
--- a/LDP/wt2db/doc/wt2db.pod
+++ b/LDP/wt2db/doc/wt2db.pod
@ -1,34 +1,33 @@
 =head1 NAME

-B<wt2db> - utility to convert WikiText documents into DocBook XML/SGML.
+B<wt2db> - converts WikiText documents into DocBook XML/SGML.
+

 =head1 SYNOPSIS

 B<wt2db> [I<OPTION>] [I<FILE>]

+
 =head1 DESCRIPTION

 B<wt2db> converts a text file in a special format similar to that used
 in WikiWikiWebs into DocBook XML/SGML.
-
-The DocBook it writes out is only
-a fragment, not a complete document, because it has no DOCTYPE declaration.
-And due to the source format, there is no meta-data, such as in an
-<articleinfo> structure.
-
-As part of a larger publishing or document processing system, it is
-expected that later processing will supply these elements.
+The DocBook it writes out by default is only
+a fragment, but it will write a complete document upon request.

 By default it reads from STDIN and writes to STDOUT. However, if given a
 filename, it will read that file, and an output filename can also be
 specified as a command-line option.

+
 =head1 OPTIONS

 B<-x>, B<--xml> add XML DOCTYPE and article tags.

 B<-s>, B<--sgml> add SGML DOCTYPE and article tags.

+B<-n>, B<--nonet> do not access the network.
+
 B<-o>, B<--output-to> I<filename> write to the specified file.

 B<-v>, B<--verbose> show diagnostic output.
@ -37,6 +36,126 @@ B<-V>, B<--version> show program version.

 B<-h>, B<--help> show a usage message.

+
+=head1 NOTES
+
+B<wt2db> was developed to provide an easier way to write
+DocBook documentation.
+
+WikiText is based on the form of text that is used in a
+WikiWikiWeb. It provides very simple and easy to remember
+tags so you can write a Wiki article without learning HTML.
+B<wt2db> was originally written to convert Wikipedia articles
+into DocBook.
+
+The Wiki format has been enhanced in several ways to make it
+more powerful for authors. Support has been added for Wiki tags
+that don't exist in any real Wiki, by giving common DocBook
+elements their own Wiki tags. Support has also been added
+for including DocBook elements right in the source file.
+
+This means WikiText is a merging of DocBook into a plain text file.
+In in its simplest form, it is plain text. A plain text
+document can be processed by B<wt2db> and converted into
+DocBook.
+Or, a complete and valid DocBook document can be processed,
+and will pass through the B<w2db> filters and come out
+unchanged.
+Virtually any combination of DocBook with plain text will work,
+with the additional Wiki style tags to make things even easier
+for authors.
+It put all of the semantic of DocBook
+at your disposal, while being as easy to write as a Wiki page.
+
+
+=head1 WIKITEXT
+
+These are the tags which are supported in this release of
+B<wt2db>. All DocBook tags are also supported. If you encounter
+any valid DocBook that is not handled correctly, please file
+a bug report.
+
+ Foo                    <para>Foo</para>
+
+ =Title=                <sect1>
+                            <title>Title</title>
+                        </sect1>
+
+ =Title|id=             <sect1 id='id'>
+                            <title>Title</title>
+                        </sect1>
+
+The id attribute, delimited with a pipe character,
+works for other sect levels as well, and many other
+tags. In some cases it is not an id value, but the
+title, depending on the semantics of the particular
+tag. Usage should be obvious in context.
+
+ ==Title==              <sect2>
+                            <title>Title</title>
+                        </sect2>
+
+ ===Title===            <sect3>
+                            <title>Title</title>
+                        </sect3>
+
+ #Foo                   <orderedlist>
+ #Bar                       <listitem>
+ #Baz                           <para>Foo</para>
+ /#                         </listitem>
+                            <listitem>
+                                <para>Bar</para>
+                            </listitem>
+                            <listitem>
+                                <para>Baz</para>
+                            </listitem>
+                        </orderedlist>
+
+ *Foo                   <simplelist>
+ *Bar                       <listitem>
+ *Baz                           <para>Foo</para>
+ /*                         </listitem>
+                            <listitem>
+                                <para>Bar</para>
+                            </listitem>
+                            <listitem>
+                                <para>Baz</para>
+                            </listitem>
+                        </simplelist>
+
+ [[foo]]                <xref linkend='foo' linkterm='foo'/>
+ [[link:Foo]]
+
+ [[file:/dev/foo]]      <filename>/dev/foo</filename>
+
+
+ [[http://foo.org]]     <ulink url='http://foo.org'>
+                            <citetitle>http://foo.org</citetitle>
+                        </ulink>
+
+ [[http://foo.org Foo]] <ulink url='http://foo.org'>
+ [[http://foo.org|Foo]]     <citetitle>Foo</citetitle>
+                        </ulink>
+
+ '''Foo'''              <emphasis>Foo</emphasis>
+
+A few DocBook structures will not have <para> tags wrapped around them. They
+are <para> itself, <sect?> and <programlisting>. If you insert anything
+using these tags, no <para> tags will be wrapped around it or inserted into it.
+So if you want fine control over your <para> tags, insert them yourself.
+
+The <screen> element will be wrapped with <para> tags, but no internal
+paragraph breaks will be generated.
+
+
+=head1 RESTRICTIONS
+
+Currently only a single form of WikiText is supported, which is very
+similar to that used by the Wikipedia (see http://www.wikipedia.com).
+A future release will be configurable to support additional styles of
+WikiText.
+
+
 =head1 BUGS

 Bugs are tracked in the SourceForge project page at:
@ -46,25 +165,13 @@ http://www.sourceforge.net/projects/linuxdoc
 If you report a bug in B<wt2db>, specify wt2db as the category so it will
 be routed the appropriate person.

-=head1 RESTRICTIONS
-
-Currently only a single form of WikiText is supported, which is very
-similar to that used by the Wikipedia (see http://www.wikipedia.com).
-A future release will be configurable to support additional styles of
-WikiText.
-
-=head1 NOTES
-
-B<wt2db> was developed as a project of the Linux Documentation Project
-to create an easier way of writing DocBook documentation. While it is
-useful on its own, it is part of Lampadas, the LDP's document
-production system.

 =head1 SEE ALSO

 See the home page of the Linux Documentation Project,
 http://www.tldp.org for updates and more information.

+
 =head1 AUTHOR

 This man page was written by David C. Merrill <david@lupercalia.net>.
--- a/LDP/wt2db/lib/Wt2Db.pm
+++ b/LDP/wt2db/lib/Wt2Db.pm
@ -19,36 +19,42 @@ use Exporter;
 	Reset
 	);

-# These keep track of which constructs we're in the middle of
-#
-$level1 = 0;
-$level2 = 0;
-$level3 = 0;
-$orderedlist = 0;
-$listitem = 0;
-$itemizedlist = 0;
-$para = 0;
-$qandaset = 0;
-$qandaentry = 0;
-$answer = 0;
+&Reset;

-# These are passed in by the caller
-#
-$txtfile = '';
-$dbfile = '';
-$verbose = 0;
+# Call this before rerunning ProcessLine to clear state.
+# 
+sub Reset {
+	$level1 = 0;
+	$level2 = 0;
+	$level3 = 0;
+	$orderedlist = 0;
+	$listitem = 0;
+	$itemizedlist = 0;
+	$para = 0;
+	$qandaset = 0;
+	$qandaentry = 0;
+	$answer = 0;

-# These maintain state
-#
-$line = '';
-$linenumber = 0;
-$id = '';
-$title = '';
-$buf = '';
+	# These are passed in by the caller
+	#
+	$txtfile = '';
+	$dbfile = '';
+	$verbose = 0;
+	$doctype = 0;
+	$nonet = 0;

-$noparatag = 0;
-$noparadepth = 0;
-$noparaline = 0;
+	# These maintain state
+	#
+	$line = '';
+	$linenumber = 0;
+	$id = '';
+	$title = '';
+	$buf = '';
+
+	$noparatag = 0;
+	$noparadepth = 0;
+	$noparaline = 0;
+}


 # -----------------------------------------------------------
@ -62,7 +68,7 @@ sub new {
 }

 sub ProcessFile {
-	($self, $txtfile, $dbfile, $verbose, $doctype) = @_;
+	($self, $txtfile, $dbfile, $verbose, $doctype, $nonet, $encoding) = @_;

 	# Read from STDIN if no input file given
 	# 
@ -87,9 +93,10 @@ sub ProcessFile {

 	# wrap article if requested
 	#
+    $encoding = 'ISO-8859-1' unless ($encoding);
 	if ($doctype eq 'XML') {
 		print "Adding XML DOCTYPE and article tags." if ($verbose);
-		$buf = '<?xml version="1.0" standalone="no"?>' . "\n";
+		$buf = '<?xml version="1.0" encoding="' . $encoding . '" standalone="no"?>' . "\n";
 		$buf .= '<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"' . "\n";
     		$buf .= '    "http://docbook.org/xml/4.1.2/docbookx.dtd"';
 		$buf .= "\[\]\>\n";
@ -153,8 +160,8 @@ sub ProcessLine {
 		
 	# inline docbook
 	#
-	# ulink
-	# 
+	# parse all links, internal and external
+	#
 	while ($line =~ /\[\[/) {
 		unless ($line =~ /\]\]/) {
 			$buf .= "ERROR unterminated '[[' tag on line $linenumber.\n";
@ -174,15 +181,22 @@ sub ProcessLine {
 			$linkname = $link;
 		}

-		# kill quotes, they mess us up
+		# kill quotes inside links, they mess us up because
+		# we have to wrap this string with quotes.
+		# perhaps it should be encoding the entire URL?
 		# 
 		$link =~ s/'/%27/g;

 		# namespaces are handled differently
 		#
 		print "$link\n" if ($verbose);
-		if ($link =~ /^http:/) {
+
+		if ($link =~ /^http:\/\//) {
 			$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
+		} elsif ($link =~ /^link:/) {
+			$link =~ s/^link://;
+			$linkname =~ s/^link://;
+			$line =~ s/\[\[.*?\]\]/<xref linkend='$link' endterm='$link'\>\<\/xref\>/;
 		} elsif ($link =~ /^mailto:/) {
 			$linkname =~ s/^mailto://;
 			$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
@ -194,28 +208,41 @@ sub ProcessLine {
 		} elsif ($link =~ /^ldp:/) {
 			$linkname =~ s/^ldp://;
 			$link =~ s/^ldp://;
-			$tempfile = "/tmp/wt2db-" . $rand;
-			$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
-			system("$cmd");
-			open(URL, "$tempfile") || die "wt2db: cannot open temporary file ($!)\n\n";
-			$link = "";
-			while ($url_line = <URL>) {
-				$url_line =~ s/\n//;
-				if ($url_line =~ /identifier/) {
-					$link .= $url_line;
+			if ($nonet) {
+				$line =~ s/\[\[.*?\]\]/<citetitle>$link<\/citetitle>/;
+			} else {
+				$tempfile = "/tmp/wt2db-" . $rand;
+				$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
+				print "$cmd\n" if ($verbose > 1);
+				$return = system("$cmd");
+				unless ($return) {
+					open(URL, "$tempfile") || die "wt2db: cannot open temporary file ($!)\n\n";
+					$link = '';
+					while ($url_line = <URL>) {
+						$url_line =~ s/\n//;
+						if ($url_line =~ /identifier/) {
+							$link .= $url_line;
+						}
+					}
+					close(URL);
+					unlink $tempfile;
 				}
+				$link =~ s/^.*?<identifier>//;
+				$link =~ s/<\/identifier>.*?$//;
+				if ($link eq '') {
+					$linkname = "ERROR: LDP namespace resolution failure on $linkname";
+				}
+				$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
 			}
-			close(URL);
-			unlink $tempfile;
-			$link =~ s/^.*?<identifier>//;
-			$link =~ s/<\/identifier>.*?$//;
-			if ($link eq '') {
-				$linkname = "ERROR: LDP namespace resolution failure on $linkname";
-			}
-			$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
 		} elsif ($link =~ /^file:/) {
 			$linkname =~ s/^file://;
 			$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
+		} elsif ($link =~ /^dir:/) {
+
+# FIXME: need to check attribute on filename element
+# 
+			$linkname =~ s/^dir://;
+			$line =~ s/\[\[.*?\]\]/<filename type='directory'>$linkname<\/filename>/;
 		} else {
 			$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
 		}
@ -238,7 +265,8 @@ sub ProcessLine {
 	#	<programlisting>
 	#	<literallayout>
 	
-	# forget about nopara
+	# forget about being in nopara state if we're no longer in one
+	# 
 	if ($noparadepth == 0) {
 		$noparatag = "";
 	}
@ -248,6 +276,8 @@ sub ProcessLine {
 	if ((($line =~ /^<para>/) or
 	     ($line =~ /^<sect/) or
 	     ($line =~ /^<screen>/) or
+	     ($line =~ /^<screen>/) or
+	     ($line =~ /^<blockquote>/) or
 	     ($line =~ /^<literallayout>/) or
 	     ($line =~ /^<articleinfo>/) or
 	     ($line =~ /^<programlisting>/)) and
@ -257,9 +287,12 @@ sub ProcessLine {
 		$noparatag =~ s/^.*?<//;
 		$noparatag =~ s/>.*?$//;
 		$noparaline = $linenumber;
+
+		# screen sections don't embed para tags, but are wrapped in them
+		#
 		if ($line =~ /^<screen>/) {
 			unless ($para) {
-				$line = "<para>" . $line;
+				$buf .= "<para>";
 				$para = 1;
 			}
 		}
@ -289,9 +322,29 @@ sub ProcessLine {
 		}

 		# recover original line -- no whitespace modifiers
+		# allow nonencoded text in unparsed lines, when in a literal block
 		#
 		$line = $originalline;
 		chomp($line);
+		if ($line =~ /^<$noparatag>/ ) {
+			$starttag = "<$noparatag>";
+		} else {
+			$starttag = '';
+		}
+		if ($line =~ /<\/$noparatag>/ ) {
+			$endtag = "<\/$noparatag>";
+		} else {
+			$endtag = '';
+		}
+
+		$line =~ s/<$noparatag>//;
+		$line =~ s/<\/$noparatag>//;
+	    if (($noparatag eq 'screen') or
+	        ($noparatag eq 'literallayout') or
+	        ($noparatag eq 'programlisting')) {
+    		encode_entities($line);
+        }
+		$line = "$starttag$line$endtag";

 	# sect3
 	#
@ -376,9 +429,9 @@ sub ProcessLine {
 		&trimline;
 		&splittitle;
 		if ($id eq '') {
-			$line = "<question><para>" . $title . "</para></question>";
+			$line = "<question><para>$title</para></question>";
 		} else {
-			$line = "<question id='$id'><para>" . $title . "</para></question>";
+			$line = "<question id='$id'><para>$title</para></question>";
 		}
 		unless ($qandaentry) {
 			$line = "<qandaentry>\n" . $line;
@ -430,42 +483,6 @@ sub Buffer {
 	return $buf;
 }

-# Basically a cut-and-paste of the original declarations,
-# to make sure all variables are completely cleared.
-#
-# Call this before rerunning ProcessLine to clear state.
-# 
-sub Reset {
-	$level1 = 0;
-	$level2 = 0;
-	$level3 = 0;
-	$orderedlist = 0;
-	$listitem = 0;
-	$itemizedlist = 0;
-	$para = 0;
-	$qandaset = 0;
-	$qandaentry = 0;
-	$answer = 0;
-
-	# These are passed in by the caller
-	#
-	$txtfile = '';
-	$dbfile = '';
-	$verbose = 0;
-
-	# These maintain state
-	#
-	$line = '';
-	$linenumber = 0;
-	$id = '';
-	$title = '';
-	$buf = '';
-
-	$noparatag = 0;
-	$noparadepth = 0;
-	$noparaline = 0;
-}
-
 sub close1 {
 	&close2;
 	if ($level1) {
@ -570,15 +587,78 @@ sub splittitle {
 	$line =~ s/^=+//;
 	$line =~ s/=+$//;
 	$title = $line;
-	$id = "";
 	if ($line =~ /\|/) {
 		$title =~ s/\|.+//;
 		$id = $line;
 		$id =~ s/^.+\|//;
+	} else {
+		$id = &anchorfix($title);
 	}
 	$title =~ s/\s+$//;
 	$title =~ s/^\s+//;
 	$id =~ s/\s+$//;
 	$id =~ s/^\s+//;
 }
+
+sub anchorfix {
+	my $anchor = $_[0];
+	$anchor = lc(&trim($anchor));
+	$anchor = decode_entities($anchor);
+	$anchor =~ s/-/-dash-/g;
+	$anchor =~ s/&/-and-/g;
+	$anchor =~ s/;//g;
+	$anchor = encode_entities($anchor);
+	$anchor =~ s/&(\w)grave/\1/g;
+	$anchor =~ s/&(\w)acute/\1/g;
+	$anchor =~ s/&(\w)circ/\1/g;
+	$anchor =~ s/&(\w)uml/\1/g;
+	$anchor =~ s/&(\w)tilde/\1/g;
+	$anchor =~ s/&(\w)cedil/\1/g;
+	$anchor =~ s/&/-and-/g;
+	$anchor =~ s/;//g;
+	$anchor =~ s/\//-slash-/g;
+	$anchor =~ s/\\/-bslash-/g;
+	$anchor =~ s/\s+/-/g;
+	$anchor =~ s/'//g;
+	$anchor =~ s/`//g;
+	$anchor =~ s/,/-comma-/g;
+	$anchor =~ s/\./-dot-/g;
+	$anchor =~ s/!/-bang-/g;
+	$anchor =~ s/\?/-question-/g;
+	$anchor =~ s/\+/-plus-/g;
+	$anchor =~ s/\*/-x-/g;
+	$anchor =~ s/\(/-op-/g;
+	$anchor =~ s/\)/-cp-/g;
+	$anchor =~ s/\@/-at-/g;
+	$anchor =~ s/dcm_at/-at-/gi;
+	$anchor =~ s/\^/-hat-/g;
+	$anchor =~ s/=/-eq-/g;
+	$anchor =~ s/\$/S/;
+	$anchor =~ s/~/-tilde-/g;
+	$anchor =~ s/0/-zero-/g;
+	$anchor =~ s/1/-one-/g;
+	$anchor =~ s/2/-two-/g;
+	$anchor =~ s/3/-three-/g;
+	$anchor =~ s/4/-four-/g;
+	$anchor =~ s/5/-five-/g;
+	$anchor =~ s/6/-six-/g;
+	$anchor =~ s/7/-seven-/g;
+	$anchor =~ s/8/-eight-/g;
+	$anchor =~ s/9/-nine-/g;
+	$anchor =~ s/\|/-pipe-/g;
+	$anchor =~ s/\[/-lsqb-/g;
+	$anchor =~ s/\]/-rsqb-/g;
+	$anchor =~ s/^-+//;
+	$anchor =~ s/-+$//;
+	$anchor =~ s/--/-/g;	# get rid of double, initial and trailing hyphens
+	return &trim($anchor);
+}
+
+sub trim {
+	my $temp = $_[0];
+
+	$temp =~ s/^\s+//g;
+	$temp =~ s/\s+$//g;
+	return $temp;
+}
 1;
--- a/LDP/wt2db/test.pl
+++ b/LDP/wt2db/test.pl
@ -5,7 +5,26 @@ $WT = new Wt2Db;

 $buffer = "foo bar

-baz
+=Section|section=
+
+paragraph
+
+==Subsection|subsection==
+
+paragraph
+
+
+=Namespaces=
+
+==MailTo==
+
+[[mailto:david@lupercalia.net]]
+[[mailto:david@lupercalia.net|David Merrill]]
+
+==HTTP==
+
+[[http://www.tldp.org]]
+[[http://www.tldp.org|The Linux Documentation Project]]

 ";

--- a/LDP/wt2db/wt2db
+++ b/LDP/wt2db/wt2db
@ -10,6 +10,8 @@ $WT2DB = new Wt2Db;
 my $txtfile = '';
 my $dbfile = '';
 my $doctype = '';
+my $nonet = 0;
+my $encoding = 'ISO-8859-1';
 my $verbose = 0;
 my $error = 0;

@ -22,6 +24,13 @@ while (1) {
 	} elsif($ARGV[0] eq "-x" or $ARGV[0] eq "--xml") {
 		$doctype = 'XML';
 		shift(@ARGV);
+	} elsif($ARGV[0] eq "-e" or $ARGV[0] eq "--encoding") {
+		shift(@ARGV);
+        $encoding = $ARGV[0];
+		shift(@ARGV);
+	} elsif($ARGV[0] eq "-n" or $ARGV[0] eq "--nonet") {
+		$nonet = 1;
+		shift(@ARGV);
 	} elsif($ARGV[0] eq "-o" or $ARGV[0] eq "--output-to") {
 		shift(@ARGV);
 		$dbfile = $ARGV[0];
@ -44,7 +53,7 @@ while (1) {
 	}
 }

-$WT2DB->ProcessFile($txtfile, $dbfile, $verbose, $doctype);
+$WT2DB->ProcessFile($txtfile, $dbfile, $verbose, $doctype, $nonet, $encoding);

 sub version {
 	print "wt2db version $VERSION\n";
@ -53,7 +62,7 @@ sub version {
 	print "Converts a WikiText file into DocBook XML/SGML.\n";
 	print "\n";
 	print "This is free software; see the source for copying conditions. There is no\n";
-	print "warranty; not even for merchantability or fitness for a particular purpose.\n";
+	print "warranty; not even for merchantability or fitness for a particular purpose.\n\n";
 }

 sub usage {
@ -64,9 +73,11 @@ sub usage {
 	print "Options:\n";
 	print "-s, --SGML         add XML DOCTYPE and article tags.\n";
 	print "-x, --XML          add SGML DOCTYPE and article tags.\n";
+    print "-e, --encoding     specify character encoding.\n";
+	print "-n, --nonet        do not look up documents on the net.\n";
 	print "-o, --output-to    write to the specified file.\n";
-	print "-v, --verbose      show diagnostic output.\n";
-	print "-V, --version      show program version.\n";
+	print "-V, --verbose      show diagnostic output.\n";
+	print "-v, --version      show program version.\n";
 	print "-h, --help         show this usage message.\n";
 	exit($error);
 }