mirror of https://github.com/tLDP/LDP
added texi2db, texinfo to docbook converter nearing initial release
This commit is contained in:
parent
abd7408625
commit
1872402c7f
|
@ -28,8 +28,9 @@ scrollserver/ python web application server front end to scrollkeeper
|
|||
stylesheets/ xsl stylesheets for xml -> html conversion
|
||||
www/ www.scrollserver.org website
|
||||
test/ to test your cvs without disturbing things, use this
|
||||
txt2db/ utility to convert text files into docbook
|
||||
texi2db/ utility to convert Texinfo files into docbook
|
||||
users/ individual users' areas
|
||||
wt2db/ utility to convert WikiText files into docbook
|
||||
www/ websites
|
||||
db./ ldp database website
|
||||
cgi-bin/ perl scripts for the ldp database
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,69 +0,0 @@
|
|||
This is a utility to convert text files in a specific format into valid
|
||||
DocBook. Just pass it the input filename on the commmand line and you'll
|
||||
get a .sgml file out. It won't be a complete valid document, as it will
|
||||
have no header information or dtd specification. It's just a DocBook
|
||||
fragment, not a complete document.
|
||||
|
||||
The following constructs are currently supported. If you need support for
|
||||
an addition construct, write discuss@linuxdoc.org if you're subscribed,
|
||||
or feedback@linuxdoc.org if you're not.
|
||||
|
||||
Or just add it in the cvs. :-)
|
||||
|
||||
Foo <para>Foo</para>
|
||||
|
||||
=Title= <sect1><title>Title</title>
|
||||
</sect1>
|
||||
|
||||
=Title|id= <sect1 id='id'><title>Title</title>
|
||||
</sect1>
|
||||
|
||||
works for other sect levels as well, and many other
|
||||
tags. It is either the "id" value, or the "title"
|
||||
value, depending on the semantics of the particular
|
||||
tag. Usage should be obvious in context.
|
||||
|
||||
==Title== <sect2><title>Title</title>
|
||||
</sect2>
|
||||
|
||||
===Title=== <sect3><title>Title</title>
|
||||
</sect3>
|
||||
|
||||
|
||||
#Foo <orderedlist>
|
||||
#Bar <listitem><para>Foo</para></listitem>
|
||||
#Baz <listitem><para>Bar</para></listitem>
|
||||
/# <listitem><para>Baz</para></listitem>
|
||||
</orderedlist>
|
||||
|
||||
*Foo <simplelist>
|
||||
*Bar <listitem><para>Foo</para></listitem>
|
||||
*Baz <listitem><para>Bar</para></listitem>
|
||||
/* <listitem><para>Baz</para></listitem>
|
||||
</simplelist>
|
||||
|
||||
[[http://foo.org]] <ulink url='http://foo.org'>
|
||||
<citetitle>http://foo.org</citetitle>
|
||||
</ulink>
|
||||
|
||||
[[http://foo.org Foo]] <ulink url='http://foo.org'>
|
||||
<citetitle>Foo</citetitle>
|
||||
</ulink>
|
||||
|
||||
[http://foo.org|Foo]] You can also delimit with the pipe character "|".
|
||||
This works on any of these [[]] tags.
|
||||
|
||||
[[file:Foo]] <filename>Foo</filename>
|
||||
|
||||
'''Foo''' <emphasis>Foo</emphasis>
|
||||
|
||||
A few DocBook structures do not have <para> tags wrapped around them. They
|
||||
are <para> itself (duh!), <sect?> and <programlisting>. If you insert anything
|
||||
using these tags, no <para> tags will be wrapped around it or inserted into it.
|
||||
So if you want fine control over your <para> tags, insert them yourself.
|
||||
|
||||
These tags include:
|
||||
|
||||
<programlisting/>
|
||||
<screen/>
|
||||
|
|
@ -1,72 +0,0 @@
|
|||
=Introduction|intro-to-the-program=
|
||||
|
||||
The following list should be rendered as a qandaset:
|
||||
|
||||
Q: Why?|why-id
|
||||
A: Why
|
||||
not?
|
||||
|
||||
Multiple questions and multiple answers:
|
||||
|
||||
Q: Why?
|
||||
A: Why not?
|
||||
A: Why not2?
|
||||
|
||||
Q: Why?
|
||||
A: Why not?
|
||||
|
||||
Simple List
|
||||
|
||||
*item
|
||||
*item
|
||||
*item
|
||||
/*
|
||||
|
||||
This tests arbitrary DocBook. It should be passed right on to the output file
|
||||
with no changes. It can be nested arbitrarily deep.
|
||||
|
||||
<informaltable>test
|
||||
<foo>test some more
|
||||
</foo>
|
||||
<informaltable>This is the second level!
|
||||
</informaltable>
|
||||
</informaltable>
|
||||
|
||||
This document is from the [[http://www.linuxdoc.org Linux Documentation Project]].
|
||||
|
||||
Numbered List
|
||||
|
||||
This is an '''important''' [file].
|
||||
|
||||
#item
|
||||
#item
|
||||
|
||||
#item
|
||||
/#
|
||||
|
||||
Another to make sure the numbers restart at one.
|
||||
|
||||
#item
|
||||
#item
|
||||
#item
|
||||
/#
|
||||
|
||||
=Bar=
|
||||
|
||||
Just another section.
|
||||
|
||||
==Level 2|level2==
|
||||
|
||||
===Level 3|level3===
|
||||
|
||||
=Conclusion|conclusion=
|
||||
|
||||
All previous sections should be properly closed.
|
||||
|
||||
|
||||
=test again=
|
||||
|
||||
[[ldp:INFO-SHEET]]
|
||||
[[ldp:Distributions-HOWTO]]
|
||||
|
||||
|
|
@ -1,523 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
#Converts txt files into docbook.
|
||||
#
|
||||
# Requirements:
|
||||
#
|
||||
# If you use the "ldp:" namespace, you must have wget installed.
|
||||
# Wget is used to request an xml record from the LDP # database,
|
||||
# http://db.linuxdoc.org.
|
||||
#
|
||||
|
||||
use File::Basename;
|
||||
use HTML::Entities;
|
||||
|
||||
my($txtfile, $dbfile) = '';
|
||||
|
||||
#These keep track of which constructs we're in the middle of
|
||||
my($level1,
|
||||
$level2,
|
||||
$level3,
|
||||
$orderedlist,
|
||||
$listitem,
|
||||
$itemizedlist,
|
||||
$para,
|
||||
$qandaset,
|
||||
$qandaentry,
|
||||
$answer);
|
||||
|
||||
my($line);
|
||||
my($id, $title);
|
||||
|
||||
my($verbose);
|
||||
|
||||
my($error);
|
||||
$error = 0;
|
||||
|
||||
# read in cmd-line arguments
|
||||
#
|
||||
while (1) {
|
||||
if($ARGV[0] eq "-o" or $ARGV[0] eq "--output-to") {
|
||||
shift(@ARGV);
|
||||
$dbfile = $ARGV[0];
|
||||
shift(@ARGV);
|
||||
} elsif($ARGV[0] eq "-h" or $ARGV[0] eq "--help") {
|
||||
&usage;
|
||||
} elsif($ARGV[0] eq "-v" or $ARGV[0] eq "--verbose") {
|
||||
$verbose++;
|
||||
shift(@ARGV);
|
||||
} else {
|
||||
$txtfile = $ARGV[0];
|
||||
shift(@ARGV);
|
||||
}
|
||||
|
||||
if ($ARGV[0] eq '') {
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
# abort if no input file given
|
||||
#
|
||||
if($txtfile eq '') {
|
||||
print "txt2db: ERROR text file not specified.\n\n";
|
||||
$error = 1;
|
||||
&usage();
|
||||
} elsif( !(-r $txtfile) ) {
|
||||
print "txt2db: ERROR cannot read $f ($!)\n\n";
|
||||
$error = 1;
|
||||
&usage();
|
||||
}
|
||||
|
||||
unless ($dbfile) {
|
||||
($basename, $path, $ext) = fileparse($txtfile);
|
||||
$dbfile = $basename;
|
||||
$dbfile =~ s/\..*?$/\.sgml/;
|
||||
}
|
||||
|
||||
$buf = '';
|
||||
|
||||
&proc_txt($txtfile);
|
||||
|
||||
open(DB, "> $dbfile") || die "txt2db: cannot write to $dbfile ($!)\n";
|
||||
print DB $buf, "\n";
|
||||
close(DB);
|
||||
|
||||
exit(0);
|
||||
|
||||
# -----------------------------------------------------------
|
||||
|
||||
sub proc_txt {
|
||||
my($f) = @_;
|
||||
|
||||
my($linenumber);
|
||||
$linenumber = 0;
|
||||
|
||||
my ($noparatag,
|
||||
$noparadepth);
|
||||
$noparadepth = 0;
|
||||
$noparaline = 0;
|
||||
|
||||
# read in the text file
|
||||
#
|
||||
open(TXT, "$f") || die "txt2db: cannot open $f ($!)\n";
|
||||
while ($originalline = <TXT>) {
|
||||
$line = $originalline;
|
||||
$linenumber++;
|
||||
|
||||
&trimline;
|
||||
|
||||
# blank lines
|
||||
if ($line eq '') {
|
||||
if ($noparadepth == 0) {
|
||||
&closenonsect;
|
||||
next;
|
||||
}
|
||||
}
|
||||
|
||||
# capitalize hints that can be entered in lowercase
|
||||
#
|
||||
$line =~ s/^q:/Q:/;
|
||||
$line =~ s/^a:/A:/;
|
||||
|
||||
# encode entities
|
||||
#
|
||||
# while ($line =~ //) {
|
||||
# }
|
||||
# decode_entities($line);
|
||||
encode_entities($line);
|
||||
|
||||
# inline docbook
|
||||
#
|
||||
# ulink
|
||||
#
|
||||
while ($line =~ /\[\[/) {
|
||||
unless ($line =~ /\]\]/) {
|
||||
$buf .= "ERROR unterminated '[[' tag on line $linenumber.\n";
|
||||
}
|
||||
|
||||
# separate link url from link name
|
||||
#
|
||||
$link = $line;
|
||||
$link=~ s/\n//g;
|
||||
$link =~ s/.*?\[\[//;
|
||||
$link =~ s/\]\].*?$//;
|
||||
if ($link =~ /\|/) {
|
||||
$linkname = $link;
|
||||
$link =~ s/\|.+$//;
|
||||
$linkname =~ s/^\S+\|//;
|
||||
} else {
|
||||
$linkname = $link;
|
||||
}
|
||||
|
||||
# kill quotes, they mess us up
|
||||
#
|
||||
$link =~ s/'/%27/g;
|
||||
|
||||
# namespaces are handled differently
|
||||
#
|
||||
print "$link\n" if ($verbose);
|
||||
if ($link =~ /^http:/) {
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^mailto:/) {
|
||||
$linkname =~ s/^mailto://;
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^wiki:/) {
|
||||
$linkname =~ s/^wiki://;
|
||||
$link =~ s/^wiki:/http:\/\/www\.wikipedia\.com\/wiki\.phtml\?title=/;
|
||||
$link =~ s/\ /+/;
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^ldp:/) {
|
||||
$linkname =~ s/^ldp://;
|
||||
$link =~ s/^ldp://;
|
||||
$tempfile = "/tmp/txt2db-" . $rand;
|
||||
$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
|
||||
system("$cmd");
|
||||
open(URL, "$tempfile") || die "txt2db: cannot open temporary file ($!)\n";
|
||||
$link = "";
|
||||
while ($url_line = <URL>) {
|
||||
$url_line =~ s/\n//;
|
||||
if ($url_line =~ /identifier/) {
|
||||
$link .= $url_line;
|
||||
}
|
||||
}
|
||||
close(URL);
|
||||
unlink $tempfile;
|
||||
$link =~ s/^.*?<identifier>//;
|
||||
$link =~ s/<\/identifier>.*?$//;
|
||||
if ($link eq '') {
|
||||
$linkname = "ERROR: LDP namespace resolution failure on $linkname";
|
||||
}
|
||||
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
||||
} elsif ($link =~ /^file:/) {
|
||||
$linkname =~ s/^file://;
|
||||
$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
|
||||
} else {
|
||||
$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
|
||||
}
|
||||
}
|
||||
|
||||
# emphasis
|
||||
#
|
||||
while ($line =~ /'''.*'''/) {
|
||||
$line =~ s/'''/<emphasis role='bold'>/;
|
||||
$line =~ s/'''/<\/emphasis>/;
|
||||
}
|
||||
|
||||
# this block defines DocBook structures that won't be broken up with
|
||||
# paragraphs when we hit empty lines:
|
||||
#
|
||||
# <para>
|
||||
# <sect1>
|
||||
# <sect2>
|
||||
# <sect3>
|
||||
# <programlisting>
|
||||
# <literallayout>
|
||||
|
||||
# forget about nopara
|
||||
if ($noparadepth == 0) {
|
||||
$noparatag = "";
|
||||
}
|
||||
|
||||
# start a new nopara section
|
||||
#
|
||||
if ((($line =~ /^<para>/) or
|
||||
($line =~ /^<sect/) or
|
||||
($line =~ /^<screen>/) or
|
||||
($line =~ /^<literallayout>/) or
|
||||
($line =~ /^<programlisting>/)) and
|
||||
($noparadepth == 0)) {
|
||||
&closepara;
|
||||
$noparatag = $line;
|
||||
$noparatag =~ s/^.*?<//;
|
||||
$noparatag =~ s/>.*?$//;
|
||||
$noparaline = $linenumber;
|
||||
if ($line =~ /^<screen>/) {
|
||||
unless ($para) {
|
||||
$line = "<para>" . $line;
|
||||
$para = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# count noparadepth
|
||||
#
|
||||
if ($noparatag ne '') {
|
||||
$temp = $line;
|
||||
while ($temp =~ /<$noparatag>/) {
|
||||
$temp =~ s/<?$noparatag>//;
|
||||
$noparadepth ++;
|
||||
}
|
||||
while ($temp =~ /<\/$noparatag>/) {
|
||||
$temp =~ s/<?\/$noparatag>//;
|
||||
$noparadepth --;
|
||||
if ($noparadepth == 0) {
|
||||
$noparaline == 0;
|
||||
}
|
||||
}
|
||||
|
||||
# runon protection
|
||||
#
|
||||
if ($linenumber >= ($noparaline + 100)) {
|
||||
$buf .= "ERROR: runon block starting on line $noparaline\n";
|
||||
last;
|
||||
}
|
||||
|
||||
# recover original line -- no whitespace modifiers
|
||||
#
|
||||
$line = $originalline;
|
||||
|
||||
# sect3
|
||||
#
|
||||
} elsif ($line =~ /^===/) {
|
||||
&close3;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<sect3><title>$title</title>\n";
|
||||
} else {
|
||||
$line = "<sect3 id='$id'><title id='$id-title'>$title</title>\n";
|
||||
}
|
||||
$level3 = 1;
|
||||
|
||||
# sect2
|
||||
#
|
||||
} elsif ($line =~ /^==/) {
|
||||
&close2;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<sect2><title>$title</title>\n";
|
||||
} else {
|
||||
$line = "<sect2 id='$id'><title id='$id-title'>$title</title>\n";
|
||||
}
|
||||
$level2 = 1;
|
||||
|
||||
# sect1
|
||||
#
|
||||
} elsif ($line =~ /^=/) {
|
||||
&close1;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<sect1><title>$title</title>\n";
|
||||
} else {
|
||||
$line = "<sect1 id='$id'><title id='$id-title'>$title</title>\n";
|
||||
}
|
||||
$level1 = 1;
|
||||
|
||||
# orderedlist
|
||||
#
|
||||
} elsif ($line =~ /^#/) {
|
||||
&closeitemizedlist;
|
||||
if ($orderedlist == 0) {
|
||||
$buf .= "\n<orderedlist>\n";
|
||||
$orderedlist = 1;
|
||||
}
|
||||
&closelistitem;
|
||||
$line =~ s/^#//;
|
||||
&trimline;
|
||||
$line =~ s/^/<listitem><para>/;
|
||||
$listitem = 1;
|
||||
$para = 1;
|
||||
} elsif ($line =~ /^\/#/) {
|
||||
$line =~ s/^\/#//;
|
||||
&trimline;
|
||||
&closeorderedlist;
|
||||
|
||||
# itemizedlist
|
||||
#
|
||||
} elsif ($line =~ /^\*/) {
|
||||
&closeorderedlist;
|
||||
if ($itemizedlist == 0) {
|
||||
$buf .= "\n<itemizedlist>\n";
|
||||
$itemizedlist = 1;
|
||||
}
|
||||
&closelistitem;
|
||||
$line =~ s/^\*//;
|
||||
&trimline;
|
||||
$line =~ s/^/<listitem><para>/;
|
||||
$listitem = 1;
|
||||
$para = 1;
|
||||
} elsif ($line =~ /\/\*/) {
|
||||
$line =~ s/^\/\*//;
|
||||
&trimline;
|
||||
&closeitemizedlist;
|
||||
|
||||
# question
|
||||
#
|
||||
} elsif ($line =~ /^Q:/) {
|
||||
&closelists;
|
||||
&closeqandaentry;
|
||||
$line =~ s/^Q://;
|
||||
&trimline;
|
||||
&splittitle;
|
||||
if ($id eq '') {
|
||||
$line = "<question><para>" . $title . "</para></question>\n";
|
||||
} else {
|
||||
$line = "<question id='$id'><para>" . $title . "</para></question>\n";
|
||||
}
|
||||
unless ($qandaentry == 1) {
|
||||
$line = "<qandaentry>\n" . $line;
|
||||
$qandaentry = 1;
|
||||
}
|
||||
if ($qandaset == 0) {
|
||||
$line = "<qandaset defaultlabel='qanda'>\n". $line;
|
||||
$qandaset = 1;
|
||||
}
|
||||
|
||||
# answer
|
||||
#
|
||||
} elsif ($line =~ /^A:/) {
|
||||
$line =~ s/^A://;
|
||||
&trimline;
|
||||
&closeanswer;
|
||||
$line = "<answer><para>" . $line;
|
||||
$answer = 1;
|
||||
$para = 1;
|
||||
|
||||
} elsif ($line =~ /^\s*----\s*$/) {
|
||||
$line = '';
|
||||
|
||||
# para
|
||||
#
|
||||
} else {
|
||||
if (($para == 0) and ($noparatag eq '')) {
|
||||
$line = "<para>" . $line;
|
||||
$para = 1;
|
||||
} else {
|
||||
$line .= " ";
|
||||
}
|
||||
}
|
||||
|
||||
$buf .= "$line ";
|
||||
}
|
||||
# close nesting
|
||||
#
|
||||
&close1;
|
||||
|
||||
if ($noparadepth > 0) {
|
||||
$buf .= "ERROR tag $noparatag on line $noparaline unterminated.\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub close1 {
|
||||
&close2;
|
||||
if ($level1 == 1) {
|
||||
$buf .= "</sect1>\n";
|
||||
$level1 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub close2 {
|
||||
&close3;
|
||||
if ($level2 == 1) {
|
||||
$buf .= "</sect2>\n";
|
||||
$level2 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub close3 {
|
||||
&closeorderedlist;
|
||||
&closeitemizedlist;
|
||||
&closepara;
|
||||
&closeqandaset;
|
||||
if ($level3 == 1) {
|
||||
$buf .= "</sect3>\n";
|
||||
$level3 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closenonsect {
|
||||
&closepara;
|
||||
# &closeorderedlist;
|
||||
# &closeitemizedlist;
|
||||
}
|
||||
|
||||
sub closelistitem {
|
||||
&closepara;
|
||||
if ($listitem == 1 ) {
|
||||
$buf .= "</listitem>\n";
|
||||
$listitem = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeorderedlist {
|
||||
&closepara;
|
||||
&closelistitem;
|
||||
if ($orderedlist == 1 ) {
|
||||
$buf .= "</orderedlist>\n";
|
||||
$orderedlist = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeitemizedlist {
|
||||
&closepara;
|
||||
&closelistitem;
|
||||
if ($itemizedlist == 1 ) {
|
||||
$buf .= "</itemizedlist>\n";
|
||||
$itemizedlist = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closelists {
|
||||
&closeitemizedlist;
|
||||
&closeorderedlist;
|
||||
}
|
||||
|
||||
sub closeanswer {
|
||||
&closepara;
|
||||
if ($answer == 1) {
|
||||
$buf .= "</answer>\n";
|
||||
$answer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeqandaentry {
|
||||
&closeanswer;
|
||||
if ($qandaentry == 1) {
|
||||
$buf .= "</qandaentry>\n";
|
||||
$qandaentry = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closeqandaset {
|
||||
&closeqandaentry;
|
||||
if ($qandaset == 1) {
|
||||
$buf .= "</qandaset>\n";
|
||||
$qandaset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub closepara {
|
||||
if ($para == 1) {
|
||||
$buf .= "</para>\n";
|
||||
$para = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub trimline {
|
||||
$line =~ s/\s+$//;
|
||||
$line =~ s/^\s+//;
|
||||
}
|
||||
|
||||
sub splittitle {
|
||||
$line =~ s/^=+//;
|
||||
$line =~ s/=+$//;
|
||||
$title = $line;
|
||||
$id = "";
|
||||
if ($line =~ /\|/) {
|
||||
$title =~ s/\|.+//;
|
||||
$id = $line;
|
||||
$id =~ s/^.+\|//;
|
||||
}
|
||||
$title =~ s/\s+$//;
|
||||
$title =~ s/^\s+//;
|
||||
$id =~ s/\s+$//;
|
||||
$id =~ s/^\s+//;
|
||||
}
|
||||
|
||||
sub usage {
|
||||
print "Usage: txt2db [-v] [-h|-o <sgml file>] <text file>\n";
|
||||
print "-o, --output-to write to the specified file.\n";
|
||||
print "-v, --verbose show diagnostic output.\n";
|
||||
print "-h, --help show this usage message.\n";
|
||||
exit($error);
|
||||
}
|
||||
|
Loading…
Reference in New Issue