203 lines
6.3 KiB
HTML
203 lines
6.3 KiB
HTML
<!-- MHonArc v2.5.0b2 -->
|
|
<!--X-Subject: FW: DOCBOOK-APPS: sgml auto-indenter -->
|
|
<!--X-From-R13: Uertbel Zroynap <UZroynapNph-cbegynaq.rqh> -->
|
|
<!--X-Date: Mon, 27 Nov 2000 19:35:13 -0500 (EST) -->
|
|
<!--X-Message-Id: 025836EFF856D411A6660090272811E61D0841@EMAIL -->
|
|
<!--X-Content-Type: text/plain -->
|
|
<!--X-Head-End-->
|
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML//EN">
|
|
<html>
|
|
<head>
|
|
<title>FW: DOCBOOK-APPS: sgml auto-indenter</title>
|
|
<link rev="made" href="mailto:GLeblanc@cu-portland.edu">
|
|
</head>
|
|
<body>
|
|
<!--X-Body-Begin-->
|
|
<!--X-User-Header-->
|
|
<!--X-User-Header-End-->
|
|
<!--X-TopPNI-->
|
|
<hr>
|
|
[<a href="msg04497.html">Date Prev</a>][<a href="msg04499.html">Date Next</a>][<a href="msg04497.html">Thread Prev</a>][<a href="msg04499.html">Thread Next</a>][<a href="maillist.html#04498">Date Index</a>][<a href="threads.html#04498">Thread Index</a>]
|
|
<!--X-TopPNI-End-->
|
|
<!--X-MsgBody-->
|
|
<!--X-Subject-Header-Begin-->
|
|
<h1>FW: DOCBOOK-APPS: sgml auto-indenter</h1>
|
|
<hr>
|
|
<!--X-Subject-Header-End-->
|
|
<!--X-Head-of-Message-->
|
|
<ul>
|
|
<li><em>To</em>: "Gnome Doc List (E-mail)" <<A HREF="mailto:gnome-doc-list@gnome.org">gnome-doc-list@gnome.org</A>>, "Ldp Discuss List (E-mail)" <<A HREF="mailto:ldp-discuss@lists.linuxdoc.org">ldp-discuss@lists.linuxdoc.org</A>></li>
|
|
<li><em>Subject</em>: FW: DOCBOOK-APPS: sgml auto-indenter</li>
|
|
<li><em>From</em>: Gregory Leblanc <<A HREF="mailto:GLeblanc@cu-portland.edu">GLeblanc@cu-portland.edu</A>></li>
|
|
<li><em>Date</em>: Mon, 27 Nov 2000 16:37:54 -0800</li>
|
|
<li><em>Resent-date</em>: Mon, 27 Nov 2000 19:35:13 -0500 (EST)</li>
|
|
<li><em>Resent-from</em>: <A HREF="mailto:ldp-discuss@lists.debian.org">ldp-discuss@lists.debian.org</A></li>
|
|
<li><em>Resent-message-id</em>: <PITH_D.A.uiE.p5vI6@murphy></li>
|
|
<li><em>Resent-sender</em>: <A HREF="mailto:ldp-discuss-request@lists.debian.org">ldp-discuss-request@lists.debian.org</A></li>
|
|
</ul>
|
|
<!--X-Head-of-Message-End-->
|
|
<!--X-Head-Body-Sep-Begin-->
|
|
<hr>
|
|
<!--X-Head-Body-Sep-End-->
|
|
<!--X-Body-of-Message-->
|
|
<pre>
|
|
-----Original Message-----
|
|
From: Kevin M. Dunn [<A HREF="mailto:kdunn@hsc.edu">mailto:kdunn@hsc.edu</A>]
|
|
Sent: Sunday, November 26, 2000 12:22 PM
|
|
To: docbook-apps@lists.oasis-open.org
|
|
Subject: DOCBOOK-APPS: sgml auto-indenter
|
|
|
|
|
|
Several people have discussed the use of tidy to indent sgml and xml
|
|
sources. It didn't work for my documents, as
|
|
tidy did not recognize my entities. Rather than fix tidy, I just wrote a
|
|
perl script to indent anything with sgml-type
|
|
tags. Only non-empty tags are indented, and text is justified at 80
|
|
characters/line (easily changed). Try it out, if you
|
|
like, and let me know what needs fixing. I am running perl under redhat 6.1.
|
|
|
|
Known problems: will break line-specific enviroments. So far, the script is
|
|
quite general--it does not recognize
|
|
specific tags and so could be used for any xml or sgml, not just docbook. Is
|
|
there any way to recognize literal text
|
|
independent of DTD? Leading whitespace, for example? Trailing whitespace? Or
|
|
I could indent tags only, and leave
|
|
all non-tag text unjustified and unindented.
|
|
----Cut Here------
|
|
#!/usr/bin/perl -w
|
|
#
|
|
# sb: the sgml beautifier
|
|
# indents non-empty sgml tags
|
|
# usage: sb filename or sb < filename or | sb
|
|
# author: Kevin M. Dunn (kdunn@hsc.edu)
|
|
# license: anyone is free to use this for any purpose whatever
|
|
#
|
|
$jl = 80; #text will be justified to 80 characters/line
|
|
$nl = 0;
|
|
$sp = 0;
|
|
$newline = ""; # hack to prevent extraneous blank first line
|
|
$space[0] = "";
|
|
separate_tags();
|
|
get_tags();
|
|
indent_tags();
|
|
unlink ("$$.tmp"); # remove temporary file
|
|
print "\n"; # add final newline to output
|
|
sub separate_tags {
|
|
open(FILETMP, ">$$.tmp");
|
|
while (<>){
|
|
$_ =~ s/</\n</g;
|
|
$_ =~ s/>/>\n/g;
|
|
print FILETMP "$_";
|
|
}
|
|
close(FILETMP);
|
|
}
|
|
sub get_tags {
|
|
open(FILETMP, "$$.tmp");
|
|
while (<FILETMP>){
|
|
$word = $_;
|
|
$word =~ s/[> ].*//;
|
|
chomp($word);
|
|
if ( $word =~ /^<\/.*/ ){;
|
|
$tag2{$word} = 1;
|
|
$word =~ s/\///;
|
|
$tag1{$word} = 1;
|
|
}
|
|
}
|
|
}
|
|
sub indent_tags {
|
|
open(FILETMP, "$$.tmp");
|
|
while (<FILETMP>){
|
|
chomp($_); $word = $_;
|
|
$word =~ s/[> ].*//;
|
|
if ( $tag1{$word} ){
|
|
print "\n$space[$sp]$_";
|
|
$nl = $jl; # force new line on next line of input
|
|
$sp++;
|
|
if ( ! $space[$sp] ){
|
|
$space[$sp] = $space[$sp-1] . " ";
|
|
}
|
|
}
|
|
elsif ( $tag2{$word} ){
|
|
$sp--;
|
|
print "\n$space[$sp]$_";
|
|
$nl = $jl; # force new line on next line of input
|
|
}
|
|
elsif ( $word =~ /<.*/ ) {
|
|
print "$newline$space[$sp]$_";
|
|
$newline = "\n"; # hack to prevent extraneous blank first line
|
|
$nl = $jl; # force new line on next line of input
|
|
}
|
|
elsif ( length($_) > 0 ) {
|
|
justify();
|
|
}
|
|
}
|
|
}
|
|
sub justify {
|
|
@words = split;
|
|
$nw = @words;
|
|
for ($i = 0; $i < $nw; $i++ ){
|
|
$ll += length($words[$i]) + 1 + $nl; # line length if this word is added
|
|
if ($ll < $jl){ # if short enough, print it
|
|
print "$words[$i] ";
|
|
$nl = 0;
|
|
}
|
|
else { # if line is too long, start a new one
|
|
print "\n$space[$sp]$words[$i] ";
|
|
$nl = 0;
|
|
$ll = length($space[$sp] . $words[$i]) + 1;
|
|
}
|
|
}
|
|
}
|
|
----Cut Here------
|
|
--
|
|
Kevin M. Dunn
|
|
kdunn@hsc.edu
|
|
Department of Chemistry
|
|
Hampden-Sydney College
|
|
HSC, VA 23943
|
|
(804) 223-6181
|
|
(804) 223-6374 (Fax)
|
|
|
|
|
|
|
|
--
|
|
To UNSUBSCRIBE, email to ldp-discuss-request@lists.debian.org
|
|
with a subject of "unsubscribe". Trouble? Contact listmaster@lists.debian.org
|
|
|
|
</pre>
|
|
|
|
<!--X-Body-of-Message-End-->
|
|
<!--X-MsgBody-End-->
|
|
<!--X-Follow-Ups-->
|
|
<hr>
|
|
<!--X-Follow-Ups-End-->
|
|
<!--X-References-->
|
|
<!--X-References-End-->
|
|
<!--X-BotPNI-->
|
|
<ul>
|
|
<li>Prev by Date:
|
|
<strong><a href="msg04497.html">Re: Linux NetMeeting HOWTO</a></strong>
|
|
</li>
|
|
<li>Next by Date:
|
|
<strong><a href="msg04499.html">tag for translators?</a></strong>
|
|
</li>
|
|
<li>Previous by thread:
|
|
<strong><a href="msg04497.html">Re: Linux NetMeeting HOWTO</a></strong>
|
|
</li>
|
|
<li>Next by thread:
|
|
<strong><a href="msg04499.html">tag for translators?</a></strong>
|
|
</li>
|
|
<li>Index(es):
|
|
<ul>
|
|
<li><a href="maillist.html#04498"><strong>Date</strong></a></li>
|
|
<li><a href="threads.html#04498"><strong>Thread</strong></a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
|
|
<!--X-BotPNI-End-->
|
|
<!--X-User-Footer-->
|
|
<!--X-User-Footer-End-->
|
|
</body>
|
|
</html>
|