mirror of https://github.com/tLDP/LDP
moved from a straight script to a Perl module with a wrapper script.
This is so the module can be used by Lampadas directly without needing to shell out to the OS.
This commit is contained in:
parent
1d11ea1682
commit
698c2baf70
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/perl -sw
|
||||
##
|
||||
## Makefile for the wt2db Perl module.
|
||||
##
|
||||
## Copyright (c) 2001, 2002, David Merrill. All rights reserved.
|
||||
## This code is free software; you can redistribute it and/or modify
|
||||
## it under the same terms as Perl itself.
|
||||
##
|
||||
|
||||
use ExtUtils::MakeMaker;
|
||||
|
||||
WriteMakefile(
|
||||
|
||||
NAME => 'Wt2Db',
|
||||
AUTHOR => 'David Merrill <david@lupercalia.net>',
|
||||
ABSTRACT => 'Converts WikiText documents into DocBook XML.',
|
||||
NOECHO => '',
|
||||
VERSION_FROM => 'lib/Wt2Db.pm',
|
||||
EXE_FILES => 'wt2db',
|
||||
PREREQ_PM => {
|
||||
'File::Basename' => 0,
|
||||
'HTML::Entities' => 0,
|
||||
'FileHandle' => 0,
|
||||
},
|
||||
|
||||
);
|
|
@ -0,0 +1,145 @@
|
|||
<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4//EN" [
|
||||
|
||||
<!-- Process this file with docbook-to-man to generate an nroff manual
|
||||
page: `docbook-to-man manpage.sgml > manpage.1'. You may view
|
||||
the manual page with: `docbook-to-man manpage.sgml | nroff -man |
|
||||
less'. A typical entry in a Makefile or Makefile.am is:
|
||||
|
||||
manpage.1: manpage.sgml
|
||||
docbook-to-man $< > $@
|
||||
|
||||
|
||||
The docbook-to-man binary is found in the docbook-to-man package.
|
||||
Please remember that if you create the nroff version in one of the
|
||||
debian/rules file targets (such as build), you will need to include
|
||||
docbook-to-man in your Build-Depends control field.
|
||||
|
||||
-->
|
||||
|
||||
<!-- Fill in your name for FIRSTNAME and SURNAME. -->
|
||||
<!ENTITY dhfirstname "<firstname>David</firstname>">
|
||||
<!ENTITY dhsurname "<surname>Merrill</surname>">
|
||||
<!-- Please adjust the date whenever revising the manpage. -->
|
||||
<!ENTITY dhdate "<date>March 28, 2002</date>">
|
||||
<!-- SECTION should be 1-8, maybe w/ subsection other parameters are
|
||||
allowed: see man(7), man(1). -->
|
||||
<!ENTITY dhsection "<manvolnum>1</manvolnum>">
|
||||
<!ENTITY dhemail "<email>david@lupercalia.net</email>">
|
||||
<!ENTITY dhusername "David C. Merrill">
|
||||
<!ENTITY dhucpackage "<refentrytitle>WT2DB</refentrytitle>">
|
||||
<!ENTITY dhpackage "wt2db">
|
||||
|
||||
<!ENTITY debian "<productname>Debian</productname>">
|
||||
<!ENTITY gnu "<acronym>GNU</acronym>">
|
||||
]>
|
||||
|
||||
<refentry>
|
||||
<refentryinfo>
|
||||
<address>
|
||||
&dhemail;
|
||||
</address>
|
||||
<author>
|
||||
&dhfirstname;
|
||||
&dhsurname;
|
||||
</author>
|
||||
<copyright>
|
||||
<year>2001</year>
|
||||
<holder>&dhusername;</holder>
|
||||
</copyright>
|
||||
&dhdate;
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
&dhucpackage;
|
||||
|
||||
&dhsection;
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>&dhpackage;</refname>
|
||||
|
||||
<refpurpose>utility to convert WikiText documents into DocBook XML</refpurpose>
|
||||
</refnamediv>
|
||||
<refsynopsisdiv>
|
||||
<cmdsynopsis>
|
||||
<command>&dhpackage;</command>
|
||||
<arg><option>-o </option><replaceable>filename</replaceable></arg>
|
||||
<arg><option>-v</option></arg>
|
||||
<arg><option>-h</option></arg>
|
||||
</cmdsynopsis>
|
||||
</refsynopsisdiv>
|
||||
<refsect1>
|
||||
<title>DESCRIPTION</title>
|
||||
|
||||
<para>This manual page documents briefly the
|
||||
<command>&dhpackage;</command> command.</para>
|
||||
|
||||
<para><command>&dhpackage;</command> is a program that converts a text
|
||||
file in a format commonly used by WikiWikiWebs into DocBook SGML.</para>
|
||||
|
||||
</refsect1>
|
||||
<refsect1>
|
||||
<title>OPTIONS</title>
|
||||
|
||||
<para>These programs follow the usual GNU command line syntax,
|
||||
with long options starting with two dashes (`-'). A summary of
|
||||
options is included below. For a complete description, see the
|
||||
<application>Info</application> files.</para>
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>-o</option>
|
||||
<option>--output-to</option>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>Write to the specified file. The default is to write to standard output.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<term><option>-v</option>
|
||||
<option>--verbose</option>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>Show diagnostic output.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>-h</option>
|
||||
<option>--help</option>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>Show summary of options.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
<refsect1>
|
||||
<title>AUTHOR</title>
|
||||
|
||||
<para>This manual page was written by &dhusername; &dhemail;.
|
||||
Permission is
|
||||
granted to copy, distribute and/or modify this document under
|
||||
the terms of the <acronym>GNU</acronym> Free Documentation
|
||||
License, Version 1.1 or any later version published by the Free
|
||||
Software Foundation; with no Invariant Sections, no Front-Cover
|
||||
Texts and no Back-Cover Texts.</para>
|
||||
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
<!-- Keep this comment at the end of the file
|
||||
Local variables:
|
||||
mode: sgml
|
||||
sgml-omittag:t
|
||||
sgml-shorttag:t
|
||||
sgml-minimize-attributes:nil
|
||||
sgml-always-quote-attributes:t
|
||||
sgml-indent-step:2
|
||||
sgml-indent-data:t
|
||||
sgml-parent-document:nil
|
||||
sgml-default-dtd-file:nil
|
||||
sgml-exposed-tags:nil
|
||||
sgml-local-catalogs:nil
|
||||
sgml-local-ecat-files:nil
|
||||
End:
|
||||
-->
|
||||
|
||||
|
|
@ -0,0 +1,573 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
# Converts WikiText files into docbook.
|
||||
#
|
||||
package Wt2Db;
|
||||
|
||||
$VERSION = '0.2';
|
||||
|
||||
use File::Basename;
|
||||
use HTML::Entities;
|
||||
use FileHandle;
|
||||
use Exporter;
|
||||
|
||||
@ISA = qw(Exporter);
|
||||
@EXPORT = qw(
|
||||
new,
|
||||
ProcessFile,
|
||||
ProcessLine,
|
||||
ProcessEnd,
|
||||
Buffer,
|
||||
Reset,
|
||||
Usage
|
||||
);
|
||||
|
||||
# These keep track of which constructs we're in the middle of
|
||||
#
|
||||
$level1 = 0;
|
||||
$level2 = 0;
|
||||
$level3 = 0;
|
||||
$orderedlist = 0;
|
||||
$listitem = 0;
|
||||
$itemizedlist = 0;
|
||||
$para = 0;
|
||||
$qandaset = 0;
|
||||
$qandaentry = 0;
|
||||
$answer = 0;
|
||||
|
||||
# These are passed in by the caller
|
||||
#
|
||||
$txtfile = '';
|
||||
$dbfile = '';
|
||||
$verbose = 0;
|
||||
|
||||
# These maintain state
|
||||
#
|
||||
$line = '';
|
||||
$linenumber = 0;
|
||||
$id = '';
|
||||
$title = '';
|
||||
$buf = '';
|
||||
|
||||
$noparatag = 0;
|
||||
$noparadepth = 0;
|
||||
$noparaline = 0;
|
||||
|
||||
|
||||
# -----------------------------------------------------------
|
||||
|
||||
sub new {
|
||||
my $that = shift;
|
||||
my $class = ref($that) || $that;
|
||||
my $self = {};
|
||||
bless $self, $class;
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub ProcessFile {
|
||||
($self, $txtfile, $dbfile, $verbose) = @_;
|
||||
|
||||
# Read from STDIN if no input file given
|
||||
#
|
||||
if ($txtfile) {
|
||||
if( !(-r $txtfile) ) {
|
||||
print "txt2db: ERROR cannot read $f ($!)\n\n";
|
||||
&usage(1);
|
||||
} else {
|
||||
$fh = new FileHandle;
|
||||
open $fh, "<$txtfile" or die "Cannot open $txtfile ($!)\n";
|
||||
}
|
||||
} else {
|
||||
$fh = STDIN;
|
||||
}
|
||||
|
||||
if ($dbfile) {
|
||||
$outfh = new FileHandle;
|
||||
open $outfh, ">$dbfile" or die "Cannot write to $dbfile\n\n";
|
||||
} else {
|
||||
$outfh = STDOUT;
|
||||
}
|
||||
|
||||
# read in the text file
|
||||
#
|
||||
while ($originalline = <$fh>) {
|
||||
ProcessLine($foo, $originalline);
|
||||
print $outfh "$buf";
|
||||
$buf = '';
|
||||
}
|
||||
|
||||
ProcessEnd();
|
||||
print $outfh "$buf";
|
||||
$buf = '';
|
||||
close $fh;
|
||||
close $outfh;
|
||||
}
|
||||
|
||||
sub ProcessLine {
|
||||
($foo, $originalline) = @_;
|
||||
|
||||
$line = $originalline;
|
||||
$linenumber++;
|
||||
|
||||
&trimline;
|
||||
|
||||
# blank lines
|
||||
unless ($line) {
|
||||
unless ($noparadepth) {
|
||||
&closenonsect;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
# capitalize hints that can be entered in lowercase
|
||||
#
|
||||
$line =~ s/^q:/Q:/;
|
||||
$line =~ s/^a:/A:/;
|
||||
|
||||
# encode entities
|
||||
#
|
||||
# while ($line =~ //) {
|
||||
# }
|
||||
# decode_entities($line);
|
||||
# encode_entities($line);
|
||||
|
||||
# inline docbook
|
||||
#
|
||||
# ulink
|
||||
#
|
||||
while ($line =~ /\[\[/) {
|
||||
unless ($line =~ /\]\]/) {
|
||||
$buf .= "ERROR unterminated '[[' tag on line $linenumber.\n";
|
||||
}
|
||||
|
||||
# separate link url from link name
|
||||
#
|
||||
$link = $line;
|
||||
$link=~ s/\n//g;
|
||||
$link =~ s/.*?\[\[//;
|
||||
$link =~ s/\]\].*?$//;
|
||||
if ($link =~ /\|/) {
|
||||
$linkname = $link;
|
||||
$link =~ s/\|.+$//;
|
||||
$linkname =~ s/^\S+\|//;
|
||||
} else {
|
||||
$linkname = $link;
|
||||
}
|
||||
|
||||
# kill quotes, they mess us up
|
||||
#
|
||||
$link =~ s/'/%27/g;
|
||||
|
||||
# namespaces are handled differently
|
||||
#
|
||||
print "$link\n" if ($verbose);
|
||||
if ($link =~ /^http:/) {
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^mailto:/) {
|
||||
$linkname =~ s/^mailto://;
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^wiki:/) {
|
||||
$linkname =~ s/^wiki://;
|
||||
$link =~ s/^wiki:/http:\/\/www\.wikipedia\.com\/wiki\.phtml\?title=/;
|
||||
$link =~ s/\ /+/;
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^ldp:/) {
|
||||
$linkname =~ s/^ldp://;
|
||||
$link =~ s/^ldp://;
|
||||
$tempfile = "/tmp/txt2db-" . $rand;
|
||||
$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
|
||||
system("$cmd");
|
||||
open(URL, "$tempfile") || die "txt2db: cannot open temporary file ($!)\n\n";
|
||||
$link = "";
|
||||
while ($url_line = <URL>) {
|
||||
$url_line =~ s/\n//;
|
||||
if ($url_line =~ /identifier/) {
|
||||
$link .= $url_line;
|
||||
}
|
||||
}
|
||||
close(URL);
|
||||
unlink $tempfile;
|
||||
$link =~ s/^.*?<identifier>//;
|
||||
$link =~ s/<\/identifier>.*?$//;
|
||||
if ($link eq '') {
|
||||
$linkname = "ERROR: LDP namespace resolution failure on $linkname";
|
||||
}
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^file:/) {
|
||||
$linkname =~ s/^file://;
|
||||
$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
|
||||
} else {
|
||||
$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
|
||||
}
|
||||
}
|
||||
|
||||
# emphasis
|
||||
#
|
||||
while ($line =~ /'''.*'''/) {
|
||||
$line =~ s/'''/<emphasis role='bold'>/;
|
||||
$line =~ s/'''/<\/emphasis>/;
|
||||
}
|
||||
|
||||
# this block defines DocBook structures that won't be broken up with
|
||||
# paragraphs when we hit empty lines:
|
||||
#
|
||||
# <para>
|
||||
# <sect1>
|
||||
# <sect2>
|
||||
# <sect3>
|
||||
# <programlisting>
|
||||
# <literallayout>
|
||||
|
||||
# forget about nopara
|
||||
if ($noparadepth == 0) {
|
||||
$noparatag = "";
|
||||
}
|
||||
|
||||
# start a new nopara section
|
||||
#
|
||||
if ((($line =~ /^<para>/) or
|
||||
($line =~ /^<sect/) or
|
||||
($line =~ /^<screen>/) or
|
||||
($line =~ /^<literallayout>/) or
|
||||
($line =~ /^<programlisting>/)) and
|
||||
($noparadepth == 0)) {
|
||||
&closepara;
|
||||
$noparatag = $line;
|
||||
$noparatag =~ s/^.*?<//;
|
||||
$noparatag =~ s/>.*?$//;
|
||||
$noparaline = $linenumber;
|
||||
if ($line =~ /^<screen>/) {
|
||||
unless ($para) {
|
||||
$line = "<para>" . $line;
|
||||
$para = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# count noparadepth
|
||||
#
|
||||
if ($noparatag ne '') {
|
||||
$temp = $line;
|
||||
while ($temp =~ /<$noparatag>/) {
|
||||
$temp =~ s/<?$noparatag>//;
|
||||
$noparadepth ++;
|
||||
}
|
||||
while ($temp =~ /<\/$noparatag>/) {
|
||||
$temp =~ s/<?\/$noparatag>//;
|
||||
$noparadepth --;
|
||||
if ($noparadepth == 0) {
|
||||
$noparaline = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# runon protection
|
||||
#
|
||||
if (($noparaline) and ($linenumber >= ($noparaline + 100))) {
|
||||
$buf .= "ERROR: runon block starting on line $noparaline\n";
|
||||
return;
|
||||
}
|
||||
|
||||
# recover original line -- no whitespace modifiers
|
||||
#
|
||||
$line = $originalline;
|
||||
|
||||
# sect3
|
||||
#
|
||||
} elsif ($line =~ /^===/) {
|
||||
&close3;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<sect3><title>$title</title>";
|
||||
} else {
|
||||
$line = "<sect3 id='$id'><title id='$id-title'>$title</title>";
|
||||
}
|
||||
$level3 = 1;
|
||||
|
||||
# sect2
|
||||
#
|
||||
} elsif ($line =~ /^==/) {
|
||||
&close2;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<sect2><title>$title</title>";
|
||||
} else {
|
||||
$line = "<sect2 id='$id'><title id='$id-title'>$title</title>";
|
||||
}
|
||||
$level2 = 1;
|
||||
|
||||
# sect1
|
||||
#
|
||||
} elsif ($line =~ /^=/) {
|
||||
&close1;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<sect1><title>$title</title>";
|
||||
} else {
|
||||
$line = "<sect1 id='$id'><title id='$id-title'>$title</title>";
|
||||
}
|
||||
$level1 = 1;
|
||||
|
||||
# orderedlist
|
||||
#
|
||||
} elsif ($line =~ /^#/) {
|
||||
&closeitemizedlist;
|
||||
if ($orderedlist == 0) {
|
||||
$buf .= "<orderedlist>\n";
|
||||
$orderedlist = 1;
|
||||
}
|
||||
&closelistitem;
|
||||
$line =~ s/^#//;
|
||||
&trimline;
|
||||
$line =~ s/^/<listitem><para>/;
|
||||
$listitem = 1;
|
||||
$para = 1;
|
||||
} elsif ($line =~ /^\/#/) {
|
||||
$line =~ s/^\/#//;
|
||||
&trimline;
|
||||
&closeorderedlist;
|
||||
|
||||
# itemizedlist
|
||||
#
|
||||
} elsif ($line =~ /^\*/) {
|
||||
&closeorderedlist;
|
||||
if ($itemizedlist == 0) {
|
||||
$buf .= "<itemizedlist>\n";
|
||||
$itemizedlist = 1;
|
||||
}
|
||||
&closelistitem;
|
||||
$line =~ s/^\*//;
|
||||
&trimline;
|
||||
$line =~ s/^/<listitem><para>/;
|
||||
$listitem = 1;
|
||||
$para = 1;
|
||||
} elsif ($line =~ /\/\*/) {
|
||||
$line =~ s/^\/\*//;
|
||||
&trimline;
|
||||
&closeitemizedlist;
|
||||
|
||||
# question
|
||||
#
|
||||
} elsif ($line =~ /^Q:/) {
|
||||
&closelists;
|
||||
&closeqandaentry;
|
||||
$line =~ s/^Q://;
|
||||
&trimline;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<question><para>" . $title . "</para></question>";
|
||||
} else {
|
||||
$line = "<question id='$id'><para>" . $title . "</para></question>";
|
||||
}
|
||||
unless ($qandaentry) {
|
||||
$line = "<qandaentry>\n" . $line;
|
||||
$qandaentry = 1;
|
||||
}
|
||||
if ($qandaset == 0) {
|
||||
$line = "<qandaset defaultlabel='qanda'>\n". $line;
|
||||
$qandaset = 1;
|
||||
}
|
||||
|
||||
# answer
|
||||
#
|
||||
} elsif ($line =~ /^A:/) {
|
||||
$line =~ s/^A://;
|
||||
&trimline;
|
||||
&closeanswer;
|
||||
$line = "<answer><para>" . $line;
|
||||
$answer = 1;
|
||||
$para = 1;
|
||||
|
||||
} elsif ($line =~ /^\s*----\s*$/) {
|
||||
$line = '';
|
||||
|
||||
# para
|
||||
#
|
||||
} else {
|
||||
if (($para == 0) and ($noparatag eq '')) {
|
||||
$line = "<para>" . $line;
|
||||
$para = 1;
|
||||
} else {
|
||||
$line .= " ";
|
||||
}
|
||||
}
|
||||
|
||||
$buf .= "$line\n";
|
||||
}
|
||||
|
||||
sub ProcessEnd {
|
||||
# close nesting
|
||||
#
|
||||
&close1;
|
||||
|
||||
if ($noparadepth > 0) {
|
||||
$buf .= "ERROR tag $noparatag on line $noparaline unterminated.\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub Buffer {
|
||||
return $buf;
|
||||
}
|
||||
|
||||
# Basically a cut-and-paste of the original declarations,
|
||||
# to make sure all variables are completely cleared.
|
||||
#
|
||||
# Call this before rerunning ProcessLine to clear state.
|
||||
#
|
||||
sub Reset {
|
||||
$level1 = 0;
|
||||
$level2 = 0;
|
||||
$level3 = 0;
|
||||
$orderedlist = 0;
|
||||
$listitem = 0;
|
||||
$itemizedlist = 0;
|
||||
$para = 0;
|
||||
$qandaset = 0;
|
||||
$qandaentry = 0;
|
||||
$answer = 0;
|
||||
|
||||
# These are passed in by the caller
|
||||
#
|
||||
$txtfile = '';
|
||||
$dbfile = '';
|
||||
$verbose = 0;
|
||||
|
||||
# These maintain state
|
||||
#
|
||||
$line = '';
|
||||
$linenumber = 0;
|
||||
$id = '';
|
||||
$title = '';
|
||||
$buf = '';
|
||||
|
||||
$noparatag = 0;
|
||||
$noparadepth = 0;
|
||||
$noparaline = 0;
|
||||
}
|
||||
|
||||
sub close1 {
|
||||
&close2;
|
||||
if ($level1) {
|
||||
$buf .= "</sect1>\n";
|
||||
$level1 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub close2 {
|
||||
&close3;
|
||||
if ($level2) {
|
||||
$buf .= "</sect2>\n";
|
||||
$level2 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub close3 {
|
||||
&closeorderedlist;
|
||||
&closeitemizedlist;
|
||||
&closepara;
|
||||
&closeqandaset;
|
||||
if ($level3) {
|
||||
$buf .= "</sect3>\n";
|
||||
$level3 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closenonsect {
|
||||
&closepara;
|
||||
# &closeorderedlist;
|
||||
# &closeitemizedlist;
|
||||
}
|
||||
|
||||
sub closelistitem {
|
||||
&closepara;
|
||||
if ($listitem) {
|
||||
$buf .= "</listitem>\n";
|
||||
$listitem = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeorderedlist {
|
||||
&closepara;
|
||||
&closelistitem;
|
||||
if ($orderedlist) {
|
||||
$buf .= "</orderedlist>\n";
|
||||
$orderedlist = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeitemizedlist {
|
||||
&closepara;
|
||||
&closelistitem;
|
||||
if ($itemizedlist) {
|
||||
$buf .= "</itemizedlist>\n";
|
||||
$itemizedlist = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closelists {
|
||||
&closeitemizedlist;
|
||||
&closeorderedlist;
|
||||
}
|
||||
|
||||
sub closeanswer {
|
||||
&closepara;
|
||||
if ($answer) {
|
||||
$buf .= "</answer>\n";
|
||||
$answer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeqandaentry {
|
||||
&closeanswer;
|
||||
if ($qandaentry) {
|
||||
$buf .= "</qandaentry>\n";
|
||||
$qandaentry = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeqandaset {
|
||||
&closeqandaentry;
|
||||
if ($qandaset) {
|
||||
$buf .= "</qandaset>\n";
|
||||
$qandaset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closepara {
|
||||
if ($para) {
|
||||
$buf .= "</para>\n";
|
||||
$para = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub trimline {
|
||||
$line =~ s/\s+$//;
|
||||
$line =~ s/^\s+//;
|
||||
}
|
||||
|
||||
sub splittitle {
|
||||
$line =~ s/^=+//;
|
||||
$line =~ s/=+$//;
|
||||
$title = $line;
|
||||
$id = "";
|
||||
if ($line =~ /\|/) {
|
||||
$title =~ s/\|.+//;
|
||||
$id = $line;
|
||||
$id =~ s/^.+\|//;
|
||||
}
|
||||
$title =~ s/\s+$//;
|
||||
$title =~ s/^\s+//;
|
||||
$id =~ s/\s+$//;
|
||||
$id =~ s/^\s+//;
|
||||
}
|
||||
|
||||
sub Usage {
|
||||
my $error = shift;
|
||||
print "Usage: wt2db [OPTIONS] {FILE]\n";
|
||||
print "\n";
|
||||
print "Options:\n";
|
||||
print "-o, --output-to write to the specified file.\n";
|
||||
print "-v, --verbose show diagnostic output.\n";
|
||||
print "-h, --help show this usage message.\n";
|
||||
exit($error);
|
||||
}
|
||||
1;
|
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
use Wt2Db;
|
||||
$WT = new Wt2Db;
|
||||
|
||||
$buffer = "foo bar
|
||||
|
||||
baz
|
||||
|
||||
";
|
||||
|
||||
$outbuf = '';
|
||||
|
||||
foreach $line (split /\n/, $buffer) {
|
||||
$WT->ProcessLine($line);
|
||||
}
|
||||
$WT->ProcessEnd();
|
||||
$outbuf = $WT->Buffer();
|
||||
print $outbuf;
|
503
LDP/wt2db/wt2db
503
LDP/wt2db/wt2db
|
@ -2,37 +2,13 @@
|
|||
#
|
||||
#Converts txt files into docbook.
|
||||
#
|
||||
# Requirements:
|
||||
#
|
||||
# If you use the "ldp:" namespace, you must have wget installed.
|
||||
# Wget is used to request an xml record from the LDP # database,
|
||||
# http://db.linuxdoc.org.
|
||||
#
|
||||
use Wt2Db;
|
||||
$WT2DB = new Wt2Db;
|
||||
|
||||
use File::Basename;
|
||||
use HTML::Entities;
|
||||
|
||||
my($txtfile, $dbfile) = '';
|
||||
|
||||
#These keep track of which constructs we're in the middle of
|
||||
my($level1,
|
||||
$level2,
|
||||
$level3,
|
||||
$orderedlist,
|
||||
$listitem,
|
||||
$itemizedlist,
|
||||
$para,
|
||||
$qandaset,
|
||||
$qandaentry,
|
||||
$answer);
|
||||
|
||||
my($line);
|
||||
my($id, $title);
|
||||
|
||||
my($verbose);
|
||||
|
||||
my($error);
|
||||
$error = 0;
|
||||
my $txtfile = '';
|
||||
my $dbfile = '';
|
||||
my $verbose = 0;
|
||||
my $error = 0;
|
||||
|
||||
# read in cmd-line arguments
|
||||
#
|
||||
|
@ -42,7 +18,7 @@ while (1) {
|
|||
$dbfile = $ARGV[0];
|
||||
shift(@ARGV);
|
||||
} elsif($ARGV[0] eq "-h" or $ARGV[0] eq "--help") {
|
||||
&usage;
|
||||
$WT2DB->Usage();
|
||||
} elsif($ARGV[0] eq "-v" or $ARGV[0] eq "--verbose") {
|
||||
$verbose++;
|
||||
shift(@ARGV);
|
||||
|
@ -56,467 +32,4 @@ while (1) {
|
|||
}
|
||||
}
|
||||
|
||||
# abort if no input file given
|
||||
#
|
||||
if($txtfile eq '') {
|
||||
$error = 1;
|
||||
&usage();
|
||||
} elsif( !(-r $txtfile) ) {
|
||||
print "txt2db: ERROR cannot read $f ($!)\n\n";
|
||||
$error = 1;
|
||||
&usage();
|
||||
}
|
||||
|
||||
unless ($dbfile) {
|
||||
($basename, $path, $ext) = fileparse($txtfile);
|
||||
$dbfile = $basename;
|
||||
$dbfile =~ s/\..*?$/\.sgml/;
|
||||
}
|
||||
|
||||
$buf = '';
|
||||
|
||||
&proc_txt($txtfile);
|
||||
|
||||
open(DB, "> $dbfile") || die "txt2db: cannot write to $dbfile ($!)\n";
|
||||
print DB $buf, "\n";
|
||||
close(DB);
|
||||
|
||||
exit(0);
|
||||
|
||||
# -----------------------------------------------------------
|
||||
|
||||
sub proc_txt {
|
||||
my($f) = @_;
|
||||
|
||||
my($linenumber);
|
||||
$linenumber = 0;
|
||||
|
||||
my ($noparatag,
|
||||
$noparadepth);
|
||||
$noparadepth = 0;
|
||||
$noparaline = 0;
|
||||
|
||||
# read in the text file
|
||||
#
|
||||
open(TXT, "$f") || die "txt2db: cannot open $f ($!)\n";
|
||||
while ($originalline = <TXT>) {
|
||||
$line = $originalline;
|
||||
$linenumber++;
|
||||
|
||||
&trimline;
|
||||
|
||||
# blank lines
|
||||
if ($line eq '') {
|
||||
if ($noparadepth == 0) {
|
||||
&closenonsect;
|
||||
next;
|
||||
}
|
||||
}
|
||||
|
||||
# capitalize hints that can be entered in lowercase
|
||||
#
|
||||
$line =~ s/^q:/Q:/;
|
||||
$line =~ s/^a:/A:/;
|
||||
|
||||
# encode entities
|
||||
#
|
||||
# while ($line =~ //) {
|
||||
# }
|
||||
# decode_entities($line);
|
||||
encode_entities($line);
|
||||
|
||||
# inline docbook
|
||||
#
|
||||
# ulink
|
||||
#
|
||||
while ($line =~ /\[\[/) {
|
||||
unless ($line =~ /\]\]/) {
|
||||
$buf .= "ERROR unterminated '[[' tag on line $linenumber.\n";
|
||||
}
|
||||
|
||||
# separate link url from link name
|
||||
#
|
||||
$link = $line;
|
||||
$link=~ s/\n//g;
|
||||
$link =~ s/.*?\[\[//;
|
||||
$link =~ s/\]\].*?$//;
|
||||
if ($link =~ /\|/) {
|
||||
$linkname = $link;
|
||||
$link =~ s/\|.+$//;
|
||||
$linkname =~ s/^\S+\|//;
|
||||
} else {
|
||||
$linkname = $link;
|
||||
}
|
||||
|
||||
# kill quotes, they mess us up
|
||||
#
|
||||
$link =~ s/'/%27/g;
|
||||
|
||||
# namespaces are handled differently
|
||||
#
|
||||
print "$link\n" if ($verbose);
|
||||
if ($link =~ /^http:/) {
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^mailto:/) {
|
||||
$linkname =~ s/^mailto://;
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^wiki:/) {
|
||||
$linkname =~ s/^wiki://;
|
||||
$link =~ s/^wiki:/http:\/\/www\.wikipedia\.com\/wiki\.phtml\?title=/;
|
||||
$link =~ s/\ /+/;
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^ldp:/) {
|
||||
$linkname =~ s/^ldp://;
|
||||
$link =~ s/^ldp://;
|
||||
$tempfile = "/tmp/txt2db-" . $rand;
|
||||
$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
|
||||
system("$cmd");
|
||||
open(URL, "$tempfile") || die "txt2db: cannot open temporary file ($!)\n";
|
||||
$link = "";
|
||||
while ($url_line = <URL>) {
|
||||
$url_line =~ s/\n//;
|
||||
if ($url_line =~ /identifier/) {
|
||||
$link .= $url_line;
|
||||
}
|
||||
}
|
||||
close(URL);
|
||||
unlink $tempfile;
|
||||
$link =~ s/^.*?<identifier>//;
|
||||
$link =~ s/<\/identifier>.*?$//;
|
||||
if ($link eq '') {
|
||||
$linkname = "ERROR: LDP namespace resolution failure on $linkname";
|
||||
}
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^file:/) {
|
||||
$linkname =~ s/^file://;
|
||||
$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
|
||||
} else {
|
||||
$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
|
||||
}
|
||||
}
|
||||
|
||||
# emphasis
|
||||
#
|
||||
while ($line =~ /'''.*'''/) {
|
||||
$line =~ s/'''/<emphasis role='bold'>/;
|
||||
$line =~ s/'''/<\/emphasis>/;
|
||||
}
|
||||
|
||||
# this block defines DocBook structures that won't be broken up with
|
||||
# paragraphs when we hit empty lines:
|
||||
#
|
||||
# <para>
|
||||
# <sect1>
|
||||
# <sect2>
|
||||
# <sect3>
|
||||
# <programlisting>
|
||||
# <literallayout>
|
||||
|
||||
# forget about nopara
|
||||
if ($noparadepth == 0) {
|
||||
$noparatag = "";
|
||||
}
|
||||
|
||||
# start a new nopara section
|
||||
#
|
||||
if ((($line =~ /^<para>/) or
|
||||
($line =~ /^<sect/) or
|
||||
($line =~ /^<screen>/) or
|
||||
($line =~ /^<literallayout>/) or
|
||||
($line =~ /^<programlisting>/)) and
|
||||
($noparadepth == 0)) {
|
||||
&closepara;
|
||||
$noparatag = $line;
|
||||
$noparatag =~ s/^.*?<//;
|
||||
$noparatag =~ s/>.*?$//;
|
||||
$noparaline = $linenumber;
|
||||
if ($line =~ /^<screen>/) {
|
||||
unless ($para) {
|
||||
$line = "<para>" . $line;
|
||||
$para = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# count noparadepth
|
||||
#
|
||||
if ($noparatag ne '') {
|
||||
$temp = $line;
|
||||
while ($temp =~ /<$noparatag>/) {
|
||||
$temp =~ s/<?$noparatag>//;
|
||||
$noparadepth ++;
|
||||
}
|
||||
while ($temp =~ /<\/$noparatag>/) {
|
||||
$temp =~ s/<?\/$noparatag>//;
|
||||
$noparadepth --;
|
||||
if ($noparadepth == 0) {
|
||||
$noparaline == 0;
|
||||
}
|
||||
}
|
||||
|
||||
# runon protection
|
||||
#
|
||||
if ($linenumber >= ($noparaline + 100)) {
|
||||
$buf .= "ERROR: runon block starting on line $noparaline\n";
|
||||
last;
|
||||
}
|
||||
|
||||
# recover original line -- no whitespace modifiers
|
||||
#
|
||||
$line = $originalline;
|
||||
|
||||
# sect3
|
||||
#
|
||||
} elsif ($line =~ /^===/) {
|
||||
&close3;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<sect3><title>$title</title>\n";
|
||||
} else {
|
||||
$line = "<sect3 id='$id'><title id='$id-title'>$title</title>\n";
|
||||
}
|
||||
$level3 = 1;
|
||||
|
||||
# sect2
|
||||
#
|
||||
} elsif ($line =~ /^==/) {
|
||||
&close2;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<sect2><title>$title</title>\n";
|
||||
} else {
|
||||
$line = "<sect2 id='$id'><title id='$id-title'>$title</title>\n";
|
||||
}
|
||||
$level2 = 1;
|
||||
|
||||
# sect1
|
||||
#
|
||||
} elsif ($line =~ /^=/) {
|
||||
&close1;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<sect1><title>$title</title>\n";
|
||||
} else {
|
||||
$line = "<sect1 id='$id'><title id='$id-title'>$title</title>\n";
|
||||
}
|
||||
$level1 = 1;
|
||||
|
||||
# orderedlist
|
||||
#
|
||||
} elsif ($line =~ /^#/) {
|
||||
&closeitemizedlist;
|
||||
if ($orderedlist == 0) {
|
||||
$buf .= "\n<orderedlist>\n";
|
||||
$orderedlist = 1;
|
||||
}
|
||||
&closelistitem;
|
||||
$line =~ s/^#//;
|
||||
&trimline;
|
||||
$line =~ s/^/<listitem><para>/;
|
||||
$listitem = 1;
|
||||
$para = 1;
|
||||
} elsif ($line =~ /^\/#/) {
|
||||
$line =~ s/^\/#//;
|
||||
&trimline;
|
||||
&closeorderedlist;
|
||||
|
||||
# itemizedlist
|
||||
#
|
||||
} elsif ($line =~ /^\*/) {
|
||||
&closeorderedlist;
|
||||
if ($itemizedlist == 0) {
|
||||
$buf .= "\n<itemizedlist>\n";
|
||||
$itemizedlist = 1;
|
||||
}
|
||||
&closelistitem;
|
||||
$line =~ s/^\*//;
|
||||
&trimline;
|
||||
$line =~ s/^/<listitem><para>/;
|
||||
$listitem = 1;
|
||||
$para = 1;
|
||||
} elsif ($line =~ /\/\*/) {
|
||||
$line =~ s/^\/\*//;
|
||||
&trimline;
|
||||
&closeitemizedlist;
|
||||
|
||||
# question
|
||||
#
|
||||
} elsif ($line =~ /^Q:/) {
|
||||
&closelists;
|
||||
&closeqandaentry;
|
||||
$line =~ s/^Q://;
|
||||
&trimline;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<question><para>" . $title . "</para></question>\n";
|
||||
} else {
|
||||
$line = "<question id='$id'><para>" . $title . "</para></question>\n";
|
||||
}
|
||||
unless ($qandaentry == 1) {
|
||||
$line = "<qandaentry>\n" . $line;
|
||||
$qandaentry = 1;
|
||||
}
|
||||
if ($qandaset == 0) {
|
||||
$line = "<qandaset defaultlabel='qanda'>\n". $line;
|
||||
$qandaset = 1;
|
||||
}
|
||||
|
||||
# answer
|
||||
#
|
||||
} elsif ($line =~ /^A:/) {
|
||||
$line =~ s/^A://;
|
||||
&trimline;
|
||||
&closeanswer;
|
||||
$line = "<answer><para>" . $line;
|
||||
$answer = 1;
|
||||
$para = 1;
|
||||
|
||||
} elsif ($line =~ /^\s*----\s*$/) {
|
||||
$line = '';
|
||||
|
||||
# para
|
||||
#
|
||||
} else {
|
||||
if (($para == 0) and ($noparatag eq '')) {
|
||||
$line = "<para>" . $line;
|
||||
$para = 1;
|
||||
} else {
|
||||
$line .= " ";
|
||||
}
|
||||
}
|
||||
|
||||
$buf .= "$line ";
|
||||
}
|
||||
# close nesting
|
||||
#
|
||||
&close1;
|
||||
|
||||
if ($noparadepth > 0) {
|
||||
$buf .= "ERROR tag $noparatag on line $noparaline unterminated.\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub close1 {
|
||||
&close2;
|
||||
if ($level1 == 1) {
|
||||
$buf .= "</sect1>\n";
|
||||
$level1 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub close2 {
|
||||
&close3;
|
||||
if ($level2 == 1) {
|
||||
$buf .= "</sect2>\n";
|
||||
$level2 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub close3 {
|
||||
&closeorderedlist;
|
||||
&closeitemizedlist;
|
||||
&closepara;
|
||||
&closeqandaset;
|
||||
if ($level3 == 1) {
|
||||
$buf .= "</sect3>\n";
|
||||
$level3 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closenonsect {
|
||||
&closepara;
|
||||
# &closeorderedlist;
|
||||
# &closeitemizedlist;
|
||||
}
|
||||
|
||||
sub closelistitem {
|
||||
&closepara;
|
||||
if ($listitem == 1 ) {
|
||||
$buf .= "</listitem>\n";
|
||||
$listitem = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeorderedlist {
|
||||
&closepara;
|
||||
&closelistitem;
|
||||
if ($orderedlist == 1 ) {
|
||||
$buf .= "</orderedlist>\n";
|
||||
$orderedlist = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeitemizedlist {
|
||||
&closepara;
|
||||
&closelistitem;
|
||||
if ($itemizedlist == 1 ) {
|
||||
$buf .= "</itemizedlist>\n";
|
||||
$itemizedlist = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closelists {
|
||||
&closeitemizedlist;
|
||||
&closeorderedlist;
|
||||
}
|
||||
|
||||
sub closeanswer {
|
||||
&closepara;
|
||||
if ($answer == 1) {
|
||||
$buf .= "</answer>\n";
|
||||
$answer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeqandaentry {
|
||||
&closeanswer;
|
||||
if ($qandaentry == 1) {
|
||||
$buf .= "</qandaentry>\n";
|
||||
$qandaentry = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeqandaset {
|
||||
&closeqandaentry;
|
||||
if ($qandaset == 1) {
|
||||
$buf .= "</qandaset>\n";
|
||||
$qandaset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closepara {
|
||||
if ($para == 1) {
|
||||
$buf .= "</para>\n";
|
||||
$para = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub trimline {
|
||||
$line =~ s/\s+$//;
|
||||
$line =~ s/^\s+//;
|
||||
}
|
||||
|
||||
sub splittitle {
|
||||
$line =~ s/^=+//;
|
||||
$line =~ s/=+$//;
|
||||
$title = $line;
|
||||
$id = "";
|
||||
if ($line =~ /\|/) {
|
||||
$title =~ s/\|.+//;
|
||||
$id = $line;
|
||||
$id =~ s/^.+\|//;
|
||||
}
|
||||
$title =~ s/\s+$//;
|
||||
$title =~ s/^\s+//;
|
||||
$id =~ s/\s+$//;
|
||||
$id =~ s/^\s+//;
|
||||
}
|
||||
|
||||
sub usage {
|
||||
print "Usage: txt2db [-v] [-h|-o <sgml file>] <text file>\n";
|
||||
print "-o, --output-to write to the specified file.\n";
|
||||
print "-v, --verbose show diagnostic output.\n";
|
||||
print "-h, --help show this usage message.\n";
|
||||
exit($error);
|
||||
}
|
||||
|
||||
$WT2DB->ProcessFile($txtfile, $dbfile, $verbose);
|
||||
|
|
Loading…
Reference in New Issue