#!/usr/bin/perl # # ldp_mk - create all output forms needed for the LDP from SGML/XML file # Copyright (C) 2002-2000 - Greg Ferguson (gferg@metalab.unc.edu) # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # usage: # # ldp_mk -style -txt -mk_index .sgml # # where: # - fullpath to a DSSSL stylesheet # - one of the following: lynx, w3m, html2text # if($ARGV[0] eq '') { &usage(); exit(0); } my($_toolroot)=$ENV{'SGML_TOOLROOT'} || '/export/sunsite/users/gferg/toolroot'; my($_jade) = 'jade-1.2.1'; my($dtd) = 'SGML'; my($dcl) = ''; my($linuxdoc) = 1; my($create_index, $html_only, $ldpwn) = 0; my($cmd, $fname, $fname_wo_ext, $txt_filter, $style, $db_style, $s, $db_v, $x) = ''; my(@flines) = (); while(1) { if ($ARGV[0] eq "-style") { shift(@ARGV); $style = $ARGV[0]; shift(@ARGV); } elsif ($ARGV[0] eq "-txt") { shift(@ARGV); $txt_filter = $ARGV[0]; shift(@ARGV); } elsif ($ARGV[0] eq "-mk_index") { shift(@ARGV); $create_index = 1; } elsif ($ARGV[0] =~ /^\-html/) { shift(@ARGV); $html_only = 1; } elsif ($ARGV[0] =~ /^\-ldpwn/) { shift(@ARGV); $ldpwn = 1; } else { last; } } $fname = $ARGV[(@ARGV + 0) - 1]; $fname_wo_ext = $fname; if( $fname =~ /\.[\w]+ml$/ ) { $fname_wo_ext =~ s/\.[\w]+$//; } else { if( -e "$fname.sgml" ) { $fname .= "\.sgml"; } else { $fname .= "\.xml"; } } if( !(-e "$fname") ) { print "\nldp_mk: ERROR - cannot find/read $fname\n"; &usage(); exit(0); } if( $style eq '' ) { if ($ldpwn == 1) { $style = "$_toolroot/dsssl/docbook/html/ldpwn.dsl"; } else { $style = "$_toolroot/dsssl/docbook/html/ldp.dsl#html"; } } $db_style = "$_toolroot/dsssl/docbook/html/docbook.dsl"; # determine DTD # open(FP_IN, "head -100 $fname | grep -i '\!doctype' |") || die "\nldp_mk: ERROR - cannot determine DTD for $fname\n"; $s = ; close(FP_IN); if( $s eq '' ) { die "\nldp_mk: cannot determine DTD for $fname\n"; } if( $s =~ /linuxdoc/i ) { $linuxdoc = 1; print "\nldp_mk: $fname is LinuxDoc SGML\n"; if( $txt_filter eq '' ) { $txt_filter = 'lynx'; } } else { if( $s =~ /\ xml\ /i ) { $dtd = 'XML'; $x = 'x'; $dcl = "${_toolroot}/${_jade}/pubtext/xml.dcl"; } # determine docbook version # if( $s =~ /V3\./ ) { $db_v = "3.x"; $s = "${_toolroot}/dtd/docbook_31/catalog:"; } elsif ( $s =~ /V4\.1/ ) { $db_v = "4.1.2"; $s = "${_toolroot}/dtd/docbook${x}_41/catalog:"; } elsif ( $s =~ /V4\.2/ ) { $db_v = "4.2"; $s = "${_toolroot}/dtd/docbook${x}_42/catalog:"; } else { die "\nldp_mk: cannot determine DTD version for $fname\n"; } # do not override any various SGML catalog file settings # if( $ENV{'SGML_CATALOG_FILES'} eq '' ) { $s .= "${_toolroot}/dsssl/docbook/catalog:" . "${_toolroot}/${_jade}/dsssl/catalog"; $ENV{'SGML_CATALOG_FILES'} = $s; } if( !(-d "$fname_wo_ext") && $ldpwn == 0 ) { mkdir("$fname_wo_ext", 0755); } $linuxdoc = 0; print "\nldp_mk: $fname is DocBook (vers. ${db_v}) $dtd\n"; if( $txt_filter eq '' ) { $txt_filter = 'w3m'; } } # create the index... # if( $linuxdoc == 0 && $create_index == 1 ) { print "\nldp_mk: creating index from $fname...\n"; $cmd = "$_toolroot/mkindex/collateindex.pl -N -o index." . ($dtd eq 'XML' ? "xml;" : "sgml;") . "jade -t sgml -V html-index -d $db_style $dcl $fname; " . "$_toolroot/mkindex/collateindex.pl -g -t Index -i doc-index " . "-o index." . ($dtd eq 'XML' ? "xml" : "sgml") . " HTML.index;" . "rm -f HTML.index"; system($cmd); } # create HTML version # print "\nldp_mk: creating HTML from $fname...\n"; if( $linuxdoc == 1 ) { ## $cmd = "$_toolroot/linuxdoc-tools/bin/sgml2html -c latin $fname"; $cmd = "$_toolroot/linuxdoc-tools/bin/sgml2html -c ascii $fname"; } else { $cmd = "jade -t sgml -i html -d $style $dcl $fname; " . "mv -f \*.htm\* $fname_wo_ext/;" . "jade -t sgml -i html -V nochunks " . "-d $style $dcl $fname > 00_${fname_wo_ext}.html"; if( $ldpwn == 1 ) { $cmd = "jade -t sgml -i html -d $style $dcl $fname > 00_${fname_wo_ext}.html "; } } system($cmd); if( ($linuxdoc == 1 && !(-e "$fname_wo_ext.html")) || ($linuxdoc == 0 && !(-e "00_${fname_wo_ext}.html")) ) { print "\nldp_mk: WARNING - could not create HTML: $fname_wo_ext\n"; } if( $html_only == 1 ) { system("rm -f index.sgml index.xml"); exit(0); } # create PLAIN TEXT version # print "\nldp_mk: creating plain text from $fname...\n"; if( $linuxdoc == 1 ) { $cmd = "$_toolroot/linuxdoc-tools/bin/sgml2txt -c latin -f $fname"; } else { $s = ''; open(F_IN, "+< 00_${fname_wo_ext}.html"); while() { if( $_ =~ /^WIDTH=\"\d\"/ ) { next; } $s .= $_; } seek(F_IN,0,0); print F_IN $s; truncate(F_IN, tell(F_IN)); close(F_IN); # fix to place URLs in-line for text variant # open(F_OUT, "> TXT_${fname_wo_ext}.html"); $s =~ s/(.*?)<\/A\s*>/[$1]<\/TT>\ $3/gm; print F_OUT $s; close(F_OUT); if( $txt_filter =~ /lynx/i ) { $cmd = "lynx -dump "; } elsif( $txt_filter =~ /w3m/i ) { $cmd = "$_toolroot/w3m/w3m -S -cols 78 -dump "; } elsif( $txt_filter =~ /html2text/ ) { $cmd = "$_toolroot/html2text/bin/html2text -style pretty -nobs "; } else { print "\nldp_mk: txt_filter($txt_filter) unrecognized, using lynx\n"; $cmd = "lynx -dump "; } $cmd .= "TXT_${fname_wo_ext}.html > $fname_wo_ext.txt;" . "rm -f TXT_${fname_wo_ext}.html"; } system($cmd); # patch the text file; especially needed w/lynx # if( -e "$fname_wo_ext.txt" ) { my($def_blanks)= 3; my($count) = 0; @flines = (); open(CPIPE, "pwd |"); $s = ; chop($s); my($pat) = 'file://localhost' . $s . '/'; $pat =~ s/\+/\\+/g; my($pat2) = $fname_wo_ext; $pat2 =~ s/\+/\\+/g; close(CPIPE); open(HFILE, "+< $fname_wo_ext.txt") || die "\nldp_mk: cannot open: $fname_wo_ext.txt"; while() { if (length($_) == 1) { $count++; } else { $count = 0; } if ($count <= $def_blanks) { $_ =~ s/$pat//g; $_ =~ s/00_$pat2/$pat2/g; push(@flines, $_); } } seek(HFILE,0,0); print HFILE @flines; truncate(HFILE, tell(HFILE)); close(HFILE); } else { print "\nldp_mk: WARNING - could not create TXT: $fname_wo_ext.txt\n"; } if ($ldpwn == 1) { system("mv -f 00_${fname_wo_ext}.html ${fname_wo_ext}.html"); # create rss feed # open(NFILE, "+< ${fname_wo_ext}.html") || die "ldp_mk: cannot open ${fname_wo_ext}.html $!\n"; @flines = ; for ($i=0; $i < (@flines + 0); $i++) { if ($flines[$i] =~ /
  • <\/li/i) { $flines[$i] = " DB_$fname ;" . "rm -f EX_$fname"; system($cmd); if( !(-e "DB_$fname") ) { print "\nldp_mk: WARNING - could not create DocBook: DB_$fname\n"; } else { $cmd = "gzip -f DB_$fname"; system($cmd); } } # cleanup system("rm -f index.sgml index.xml body.html title.html"); # make plucker version $plucker_cmd = "$_toolroot/plucker/bin/plucker-build --zlib-compression " . "-M999 -N \"${fname_wo_ext}\" -f ${fname_wo_ext} " . "--category=LDP --stayonhost --pluckerdir=./ "; if( $linuxdoc == 1 ) { $plucker_cmd .= "-H ${fname_wo_ext}.html"; system("${plucker_cmd}"); } else { $plucker_cmd .= "-H index.html"; system("cd ${fname_wo_ext} ; ${plucker_cmd}; mv ${fname_wo_ext}.pdb ../${fname_wo_ext}.pdb"); } print "\nldp_mk: completed...\n"; exit(0); sub usage { print "\n\n", "usage: ldp_mk ", "-style -txt -mk_index .sgml\n\n", " where:\n", " - fullpath to a DSSSL stylesheet\n", " - one of the following: lynx, w3m, html2text\n"; }