|
|
|
#!/usr/bin/perl
|
|
|
|
#
|
|
|
|
# ldp_mk - create all output forms needed for the LDP from SGML/XML file
|
|
|
|
# Copyright (C) 2002-2000 - Greg Ferguson (gferg@metalab.unc.edu)
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
#
|
|
|
|
# usage:
|
|
|
|
#
|
|
|
|
# ldp_mk -style <stylesheet> -txt <txt_filter> -mk_index <file>.sgml
|
|
|
|
#
|
|
|
|
# where:
|
|
|
|
# <stylesheet> - fullpath to a DSSSL stylesheet
|
|
|
|
# <txt_filter> - one of the following: lynx, w3m, html2text
|
|
|
|
#
|
|
|
|
|
|
|
|
if($ARGV[0] eq '') {
|
|
|
|
&usage();
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
my($_toolroot)=$ENV{'SGML_TOOLROOT'} || '/export/sunsite/users/gferg/toolroot';
|
|
|
|
my($_jade) = 'jade-1.2.1';
|
|
|
|
my($dtd) = 'SGML';
|
|
|
|
my($dcl) = '';
|
|
|
|
my($linuxdoc) = 1;
|
|
|
|
my($create_index, $html_only, $ldpwn) = 0;
|
|
|
|
my($cmd, $fname, $fname_wo_ext, $txt_filter, $style, $db_style, $s, $db_v, $x) = '';
|
|
|
|
my(@flines) = ();
|
|
|
|
|
|
|
|
while(1) {
|
|
|
|
|
|
|
|
if ($ARGV[0] eq "-style") {
|
|
|
|
shift(@ARGV);
|
|
|
|
$style = $ARGV[0];
|
|
|
|
shift(@ARGV);
|
|
|
|
} elsif ($ARGV[0] eq "-txt") {
|
|
|
|
shift(@ARGV);
|
|
|
|
$txt_filter = $ARGV[0];
|
|
|
|
shift(@ARGV);
|
|
|
|
} elsif ($ARGV[0] eq "-mk_index") {
|
|
|
|
shift(@ARGV);
|
|
|
|
$create_index = 1;
|
|
|
|
} elsif ($ARGV[0] =~ /^\-html/) {
|
|
|
|
shift(@ARGV);
|
|
|
|
$html_only = 1;
|
|
|
|
} elsif ($ARGV[0] =~ /^\-ldpwn/) {
|
|
|
|
shift(@ARGV);
|
|
|
|
$ldpwn = 1;
|
|
|
|
} else {
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$fname = $ARGV[(@ARGV + 0) - 1];
|
|
|
|
$fname_wo_ext = $fname;
|
|
|
|
if( $fname =~ /\.[\w]+ml$/ ) {
|
|
|
|
$fname_wo_ext =~ s/\.[\w]+$//;
|
|
|
|
} else {
|
|
|
|
if( -e "$fname.sgml" ) {
|
|
|
|
$fname .= "\.sgml";
|
|
|
|
} else {
|
|
|
|
$fname .= "\.xml";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if( !(-e "$fname") ) {
|
|
|
|
print "\nldp_mk: ERROR - cannot find/read $fname\n";
|
|
|
|
&usage();
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if( $style eq '' ) {
|
|
|
|
if ($ldpwn == 1) {
|
|
|
|
$style = "$_toolroot/dsssl/docbook/html/ldpwn.dsl";
|
|
|
|
} else {
|
|
|
|
$style = "$_toolroot/dsssl/docbook/html/ldp.dsl#html";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$db_style = "$_toolroot/dsssl/docbook/html/docbook.dsl";
|
|
|
|
|
|
|
|
|
|
|
|
# determine DTD
|
|
|
|
#
|
|
|
|
open(FP_IN, "head -100 $fname | grep -i '\!doctype' |") ||
|
|
|
|
die "\nldp_mk: ERROR - cannot determine DTD for $fname\n";
|
|
|
|
$s = <FP_IN>;
|
|
|
|
close(FP_IN);
|
|
|
|
|
|
|
|
if( $s eq '' ) {
|
|
|
|
die "\nldp_mk: cannot determine DTD for $fname\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if( $s =~ /linuxdoc/i ) {
|
|
|
|
|
|
|
|
$linuxdoc = 1;
|
|
|
|
print "\nldp_mk: $fname is LinuxDoc SGML\n";
|
|
|
|
|
|
|
|
if( $txt_filter eq '' ) {
|
|
|
|
$txt_filter = 'lynx';
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
if( $s =~ /\ xml\ /i ) {
|
|
|
|
$dtd = 'XML';
|
|
|
|
$x = 'x';
|
|
|
|
$dcl = "${_toolroot}/${_jade}/pubtext/xml.dcl";
|
|
|
|
}
|
|
|
|
|
|
|
|
# determine docbook version
|
|
|
|
#
|
|
|
|
if( $s =~ /V3\./ ) {
|
|
|
|
$db_v = "3.x";
|
|
|
|
$s = "${_toolroot}/dtd/docbook_31/catalog:";
|
|
|
|
} elsif ( $s =~ /V4\.1/ ) {
|
|
|
|
$db_v = "4.1.2";
|
|
|
|
$s = "${_toolroot}/dtd/docbook${x}_41/catalog:";
|
|
|
|
} elsif ( $s =~ /V4\.2/ ) {
|
|
|
|
$db_v = "4.2";
|
|
|
|
$s = "${_toolroot}/dtd/docbook${x}_42/catalog:";
|
|
|
|
} else {
|
|
|
|
die "\nldp_mk: cannot determine DTD version for $fname\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
# do not override any various SGML catalog file settings
|
|
|
|
#
|
|
|
|
if( $ENV{'SGML_CATALOG_FILES'} eq '' ) {
|
|
|
|
$s .= "${_toolroot}/dsssl/docbook/catalog:" .
|
|
|
|
"${_toolroot}/${_jade}/dsssl/catalog";
|
|
|
|
$ENV{'SGML_CATALOG_FILES'} = $s;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( !(-d "$fname_wo_ext") && $ldpwn == 0 ) {
|
|
|
|
mkdir("$fname_wo_ext", 0755);
|
|
|
|
}
|
|
|
|
|
|
|
|
$linuxdoc = 0;
|
|
|
|
print "\nldp_mk: $fname is DocBook (vers. ${db_v}) $dtd\n";
|
|
|
|
|
|
|
|
if( $txt_filter eq '' ) {
|
|
|
|
$txt_filter = 'w3m';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# create the index...
|
|
|
|
#
|
|
|
|
if( $linuxdoc == 0 && $create_index == 1 ) {
|
|
|
|
|
|
|
|
print "\nldp_mk: creating index from $fname...\n";
|
|
|
|
|
|
|
|
$cmd =
|
|
|
|
"$_toolroot/mkindex/collateindex.pl -N -o index." .
|
|
|
|
($dtd eq 'XML' ? "xml;" : "sgml;") .
|
|
|
|
"jade -t sgml -V html-index -d $db_style $dcl $fname; " .
|
|
|
|
"$_toolroot/mkindex/collateindex.pl -g -t Index -i doc-index " .
|
|
|
|
"-o index." . ($dtd eq 'XML' ? "xml" : "sgml") . " HTML.index;" .
|
|
|
|
"rm -f HTML.index";
|
|
|
|
|
|
|
|
system($cmd);
|
|
|
|
}
|
|
|
|
|
|
|
|
# create HTML version
|
|
|
|
#
|
|
|
|
print "\nldp_mk: creating HTML from $fname...\n";
|
|
|
|
|
|
|
|
if( $linuxdoc == 1 ) {
|
|
|
|
|
|
|
|
## $cmd = "$_toolroot/linuxdoc-tools/bin/sgml2html -c latin $fname";
|
|
|
|
$cmd = "$_toolroot/linuxdoc-tools/bin/sgml2html -c ascii $fname";
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
$cmd =
|
|
|
|
"jade -t sgml -i html -d $style $dcl $fname; " .
|
|
|
|
"mv -f \*.htm\* $fname_wo_ext/;" .
|
|
|
|
"jade -t sgml -i html -V nochunks " .
|
|
|
|
"-d $style $dcl $fname > 00_${fname_wo_ext}.html";
|
|
|
|
|
|
|
|
if( $ldpwn == 1 ) {
|
|
|
|
$cmd = "jade -t sgml -i html -d $style $dcl $fname > 00_${fname_wo_ext}.html ";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
system($cmd);
|
|
|
|
if( ($linuxdoc == 1 && !(-e "$fname_wo_ext.html"))
|
|
|
|
||
|
|
|
|
($linuxdoc == 0 && !(-e "00_${fname_wo_ext}.html")) ) {
|
|
|
|
print "\nldp_mk: WARNING - could not create HTML: $fname_wo_ext\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
if( $html_only == 1 ) {
|
|
|
|
system("rm -f index.sgml index.xml");
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# create PLAIN TEXT version
|
|
|
|
#
|
|
|
|
print "\nldp_mk: creating plain text from $fname...\n";
|
|
|
|
|
|
|
|
if( $linuxdoc == 1 ) {
|
|
|
|
|
|
|
|
$cmd = "$_toolroot/linuxdoc-tools/bin/sgml2txt -c latin -f $fname";
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
$s = '';
|
|
|
|
open(F_IN, "+< 00_${fname_wo_ext}.html");
|
|
|
|
while(<F_IN>) {
|
|
|
|
if( $_ =~ /^WIDTH=\"\d\"/ ) {
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
$s .= $_;
|
|
|
|
}
|
|
|
|
seek(F_IN,0,0);
|
|
|
|
print F_IN $s;
|
|
|
|
truncate(F_IN, tell(F_IN));
|
|
|
|
close(F_IN);
|
|
|
|
|
|
|
|
# fix to place URLs in-line for text variant
|
|
|
|
#
|
|
|
|
open(F_OUT, "> TXT_${fname_wo_ext}.html");
|
|
|
|
$s =~ s/<A\s*HREF=\"(.*?)\"\s*TARGET=\"(.*?)\"\s*>(.*?)<\/A\s*>/<TT>[$1]<\/TT>\ $3/gm;
|
|
|
|
print F_OUT $s;
|
|
|
|
close(F_OUT);
|
|
|
|
|
|
|
|
if( $txt_filter =~ /lynx/i ) {
|
|
|
|
$cmd = "lynx -dump ";
|
|
|
|
|
|
|
|
} elsif( $txt_filter =~ /w3m/i ) {
|
|
|
|
$cmd = "$_toolroot/w3m/w3m -S -cols 78 -dump ";
|
|
|
|
|
|
|
|
} elsif( $txt_filter =~ /html2text/ ) {
|
|
|
|
$cmd = "$_toolroot/html2text/bin/html2text -style pretty -nobs ";
|
|
|
|
|
|
|
|
} else {
|
|
|
|
print "\nldp_mk: txt_filter($txt_filter) unrecognized, using lynx\n";
|
|
|
|
$cmd = "lynx -dump ";
|
|
|
|
}
|
|
|
|
|
|
|
|
$cmd .= "TXT_${fname_wo_ext}.html > $fname_wo_ext.txt;" .
|
|
|
|
"rm -f TXT_${fname_wo_ext}.html";
|
|
|
|
}
|
|
|
|
|
|
|
|
system($cmd);
|
|
|
|
|
|
|
|
# patch the text file; especially needed w/lynx
|
|
|
|
#
|
|
|
|
if( -e "$fname_wo_ext.txt" ) {
|
|
|
|
|
|
|
|
my($def_blanks)= 3;
|
|
|
|
my($count) = 0;
|
|
|
|
@flines = ();
|
|
|
|
|
|
|
|
open(CPIPE, "pwd |");
|
|
|
|
$s = <CPIPE>; chop($s);
|
|
|
|
my($pat) = 'file://localhost' . $s . '/';
|
|
|
|
$pat =~ s/\+/\\+/g;
|
|
|
|
my($pat2) = $fname_wo_ext;
|
|
|
|
$pat2 =~ s/\+/\\+/g;
|
|
|
|
close(CPIPE);
|
|
|
|
|
|
|
|
open(HFILE, "+< $fname_wo_ext.txt") ||
|
|
|
|
die "\nldp_mk: cannot open: $fname_wo_ext.txt";
|
|
|
|
while(<HFILE>) {
|
|
|
|
if (length($_) == 1) {
|
|
|
|
$count++;
|
|
|
|
} else {
|
|
|
|
$count = 0;
|
|
|
|
}
|
|
|
|
if ($count <= $def_blanks) {
|
|
|
|
$_ =~ s/$pat//g;
|
|
|
|
$_ =~ s/00_$pat2/$pat2/g;
|
|
|
|
push(@flines, $_);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
seek(HFILE,0,0);
|
|
|
|
print HFILE @flines;
|
|
|
|
truncate(HFILE, tell(HFILE));
|
|
|
|
close(HFILE);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
print "\nldp_mk: WARNING - could not create TXT: $fname_wo_ext.txt\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($ldpwn == 1) {
|
|
|
|
|
|
|
|
system("mv -f 00_${fname_wo_ext}.html ${fname_wo_ext}.html");
|
|
|
|
|
|
|
|
# create rss feed
|
|
|
|
#
|
|
|
|
open(NFILE, "+< ${fname_wo_ext}.html") ||
|
|
|
|
die "ldp_mk: cannot open ${fname_wo_ext}.html $!\n";
|
|
|
|
@flines = <NFILE>;
|
|
|
|
for ($i=0; $i < (@flines + 0); $i++) {
|
|
|
|
|
|
|
|
if ($flines[$i] =~ /<LI/i) {
|
|
|
|
|
|
|
|
$flines[$i] =~ s/<LI/<LI><span class=\"rss:item\"/;
|
|
|
|
if ($flines[($i+1)] =~ /^><p/i) {
|
|
|
|
$i++;
|
|
|
|
$flines[$i] = '';
|
|
|
|
}
|
|
|
|
|
|
|
|
} elsif ($flines[$i] =~ /^><\/li/i) {
|
|
|
|
|
|
|
|
$flines[$i] = "</span></li\n";
|
|
|
|
if ($flines[($i-1)] =~ /<\/p/i) {
|
|
|
|
$flines[($i-1)] =~ s/<\/p//i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$flines[$i] =~ s/$PAT_STR/$CHG_STR/g;
|
|
|
|
}
|
|
|
|
|
|
|
|
seek(NFILE,0,0);
|
|
|
|
print NFILE @flines;
|
|
|
|
truncate(NFILE, tell(NFILE));
|
|
|
|
close(NFILE);
|
|
|
|
|
|
|
|
$cmd = "$_toolroot/tidy/bin/tidy -ascii -c -wrap 200 -f /dev/null " .
|
|
|
|
"-m ${fname_wo_ext}.html";
|
|
|
|
system($cmd);
|
|
|
|
|
|
|
|
exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# create PDF/PS versions
|
|
|
|
#
|
|
|
|
# Note that we use the single-page HTML variant
|
|
|
|
#
|
|
|
|
print "\nldp_mk: creating PDF/PS from $fname...\n";
|
|
|
|
|
|
|
|
my($print_str) = '';
|
|
|
|
|
|
|
|
if( $linuxdoc == 1 ) {
|
|
|
|
|
|
|
|
# patch the linuxdoc-source single HTML file
|
|
|
|
#
|
|
|
|
system("$_toolroot/sgml_ld_1html $fname");
|
|
|
|
$print_str = "00_${fname_wo_ext}.html";
|
|
|
|
|
|
|
|
$cmd = "$_toolroot/htmldoc/bin/htmldoc --size universal -t pdf " .
|
|
|
|
"--datadir $_toolroot/htmldoc/share/htmldoc " .
|
|
|
|
"--firstpage p1 -f $fname_wo_ext.pdf $print_str; " .
|
|
|
|
"$_toolroot/htmldoc/bin/htmldoc --size universal -t ps " .
|
|
|
|
"--datadir $_toolroot/htmldoc/share/htmldoc " .
|
|
|
|
"--firstpage p1 -f $fname_wo_ext.ps $print_str";
|
|
|
|
|
|
|
|
if( -e "00_$fname_wo_ext.html" ) {
|
|
|
|
system($cmd);
|
|
|
|
}
|
|
|
|
|
|
|
|
} elsif( -e "00_$fname_wo_ext.html" ) {
|
|
|
|
|
|
|
|
# create new files from DocBook-source single HTML file to use for print
|
|
|
|
#
|
|
|
|
$cmd="$_toolroot/ldp_print/ldp_print --toolroot ${_toolroot}/htmldoc/bin " .
|
|
|
|
"--postscript 00_${fname_wo_ext}.html";
|
|
|
|
system($cmd);
|
|
|
|
system("mv -f 00_${fname_wo_ext}.pdf ${fname_wo_ext}.pdf");
|
|
|
|
system("mv -f 00_${fname_wo_ext}.ps ${fname_wo_ext}.ps");
|
|
|
|
}
|
|
|
|
|
|
|
|
if( !(-e "$fname_wo_ext.pdf") ) {
|
|
|
|
print "\nldp_mk: WARNING - could not create $fname_wo_ext.pdf\n";
|
|
|
|
}
|
|
|
|
if( !(-e "$fname_wo_ext.ps") ) {
|
|
|
|
print "\nldp_mk: WARNING - could not create $fname_wo_ext.ps..trying pdf2ps\n";
|
|
|
|
if( -e "${fname_wo_ext}.pdf" ) {
|
|
|
|
system("pdf2ps ${fname_wo_ext}.pdf ${fname_wo_ext}.ps");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if( -e "$fname_wo_ext.ps" ){
|
|
|
|
$cmd = "gzip -f $fname_wo_ext.ps";
|
|
|
|
system($cmd);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# perform the packaging steps
|
|
|
|
#
|
|
|
|
|
|
|
|
print "\nldp_mk: creating HTML package...\n";
|
|
|
|
|
|
|
|
if( $linuxdoc == 1 ) {
|
|
|
|
$cmd = "tar -cvf $fname_wo_ext-html.tar $fname_wo_ext\*.html; " .
|
|
|
|
"gzip -f $fname_wo_ext-html.tar";
|
|
|
|
} else {
|
|
|
|
$cmd = "tar -cvf $fname_wo_ext-html.tar $fname_wo_ext/\*; " .
|
|
|
|
"gzip -f $fname_wo_ext-html.tar";
|
|
|
|
}
|
|
|
|
system($cmd);
|
|
|
|
|
|
|
|
if( !(-e "$fname_wo_ext-html.tar.gz") ) {
|
|
|
|
print "\nldp_mk: WARNING - could not create $fname_wo_ext-html.tar.gz\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
# print "\nldp_mk: creating SGML package...\n";
|
|
|
|
#
|
|
|
|
# $cmd = "cp $fname ldp_mk_tmp; gzip -f ldp_mk_tmp; " .
|
|
|
|
# "mv -f ldp_mk_tmp.gz $fname_wo_ext." . lc($dtd) . ".gz";
|
|
|
|
# system($cmd);
|
|
|
|
|
|
|
|
|
|
|
|
# if LinuxDoc, create DocBook SGML...
|
|
|
|
#
|
|
|
|
if( $linuxdoc == 1 ) {
|
|
|
|
|
|
|
|
print "\nldp_mk: creating DocBook from LinuxDoc...\n";
|
|
|
|
|
|
|
|
$ENV{'SGML_CATALOG_FILES'} =
|
|
|
|
"$_toolroot/dtd/docbook.cat:$_toolroot/$_jade/dsssl/catalog";
|
|
|
|
|
|
|
|
$cmd = "sgmlnorm $_toolroot/ld2db/docbook.dcl $fname > EX_$fname ;" .
|
|
|
|
"jade -t sgml -c $_toolroot/ld2db/catalog " .
|
|
|
|
"-d $_toolroot/ld2db/ld2db.dsl#db EX_$fname > DB_$fname ;" .
|
|
|
|
"rm -f EX_$fname";
|
|
|
|
system($cmd);
|
|
|
|
|
|
|
|
if( !(-e "DB_$fname") ) {
|
|
|
|
print "\nldp_mk: WARNING - could not create DocBook: DB_$fname\n";
|
|
|
|
} else {
|
|
|
|
$cmd = "gzip -f DB_$fname";
|
|
|
|
system($cmd);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# cleanup
|
|
|
|
system("rm -f index.sgml index.xml body.html title.html");
|
|
|
|
|
|
|
|
# make plucker version
|
|
|
|
|
|
|
|
$plucker_cmd = "$_toolroot/plucker/bin/plucker-build --zlib-compression " .
|
|
|
|
"-M999 -N \"${fname_wo_ext}\" -f ${fname_wo_ext} " .
|
|
|
|
"--category=LDP --stayonhost --pluckerdir=./ ";
|
|
|
|
if( $linuxdoc == 1 ) {
|
|
|
|
$plucker_cmd .= "-H ${fname_wo_ext}.html";
|
|
|
|
system("${plucker_cmd}");
|
|
|
|
} else {
|
|
|
|
$plucker_cmd .= "-H index.html";
|
|
|
|
system("cd ${fname_wo_ext} ; ${plucker_cmd}; mv ${fname_wo_ext}.pdb ../${fname_wo_ext}.pdb");
|
|
|
|
}
|
|
|
|
|
|
|
|
print "\nldp_mk: completed...\n";
|
|
|
|
|
|
|
|
exit(0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub usage {
|
|
|
|
|
|
|
|
print "\n\n",
|
|
|
|
"usage: ldp_mk ",
|
|
|
|
"-style <stylesheet> -txt <txt_filter> -mk_index <file>.sgml\n\n",
|
|
|
|
" where:\n",
|
|
|
|
" <stylesheet> - fullpath to a DSSSL stylesheet\n",
|
|
|
|
" <txt_filter> - one of the following: lynx, w3m, html2text\n";
|
|
|
|
}
|
|
|
|
|