updated

2002-08-09 15:37:26 +00:00 · 2002-08-09 15:37:26 +00:00 · b079e94034
parent c988cca28a
commit b079e94034
4 changed files with 344 additions and 50 deletions
--- a/LDP/builder/ldp_mk
+++ b/LDP/builder/ldp_mk
@ -20,6 +20,7 @@ my($dtd)          = 'SGML';
 my($dcl)          = '';
 my($linuxdoc)     = 1;
 my($create_index) = 1;
+my($html_only)    = 0;
 my($cmd, $fname, $fname_wo_ext, $txt_filter, $style, $s, $db_v, $x) = '';
 my(@flines)       = ();

@ -36,6 +37,9 @@ while(1) {
     } elsif ($ARGV[0] eq "-no_index") {
         shift(@ARGV);
         $create_index = 0;
+     } elsif ($ARGV[0] eq "-html_only") {
+         shift(@ARGV);
+         $html_only = 1;
     } else {
         last;
     }
@ -143,7 +147,8 @@ print "\nldp_mk: creating HTML from $fname...\n";

 if( $linuxdoc == 1 ) {

-    $cmd = "sgml2html -c latin $fname";
+    ## $cmd = "sgml2html -c latin $fname";
+    $cmd = "sgml2html -c ascii $fname";

 } else {

@ -161,6 +166,11 @@ if( ($linuxdoc == 1 && !(-e "$fname_wo_ext.html"))
   print "\nldp_mk: WARNING - could not create HTML: $fname_wo_ext\n";
 }

+if( $html_only == 1 ) {
+    system("rm -f index.sgml index.xml");
+    exit(0);
+}
+

 # create PLAIN TEXT version
 #
@ -258,7 +268,7 @@ if( -e "$fname_wo_ext.txt" ) {
 #
 # Note that we use the single-page HTML variant
 #
-print "\nldp_mk: creating PDF from $fname...\n";
+print "\nldp_mk: creating PDF/PS from $fname...\n";

 my($print_str) = '';

@ -269,38 +279,34 @@ if( $linuxdoc == 1 ) {
    system("$_toolroot/sgml_ld_1html $fname");
    $print_str = "00_${fname_wo_ext}.html";

-} else {
-
-    # create new files from DocBook-source single HTML file to use for print
-    #
-    require "$_toolroot/lib/fix_print_html.lib";
-    &fix_print_html("00_${fname_wo_ext}.html", 'body.html', 'title.html');
-
-    $print_str = "--titlefile title.html body.html";
-}
-
-
-if( -e "00_$fname_wo_ext.html" ) {
-
    $cmd = "$_toolroot/htmldoc/bin/htmldoc --size universal -t pdf " .
           "--firstpage p1 -f $fname_wo_ext.pdf $print_str; " .
           "$_toolroot/htmldoc/bin/htmldoc --size universal -t ps  " .
           "--firstpage p1 -f $fname_wo_ext.ps  $print_str";
+
+    if( -e "00_$fname_wo_ext.html" ) {
+        system($cmd);
+    }
+
+} elsif( -e "00_$fname_wo_ext.html" ) {
+
+    # create new files from DocBook-source single HTML file to use for print
+    #
+    $cmd = "$_toolroot/ldp_print/ldp_print --toolroot ${_toolroot}/htmldoc/bin " .
+           "--postscript 00_${fname_wo_ext}.html";
    system($cmd);
+    system("mv -f 00_${fname_wo_ext}.pdf ${fname_wo_ext}.pdf");
+    system("mv -f 00_${fname_wo_ext}.ps  ${fname_wo_ext}.ps");
+}

-    if( !(-e "$fname_wo_ext.pdf") ) {
-          print "\nldp_mk: WARNING - could not create $fname_wo_ext.pdf\n";
-    }
-    if( !(-e "$fname_wo_ext.ps") ) {
-          print "\nldp_mk: WARNING - could not create $fname_wo_ext.ps\n";
-    } else {
-          $cmd = "gzip -f $fname_wo_ext.ps";
-          system($cmd);
-    }
-
+if( !(-e "$fname_wo_ext.pdf") ) {
+      print "\nldp_mk: WARNING - could not create $fname_wo_ext.pdf\n";
+}
+if( !(-e "$fname_wo_ext.ps") ) {
+      print "\nldp_mk: WARNING - could not create $fname_wo_ext.ps\n";
 } else {
-    print "\nldp_mk: WARNING - HTML file 00_$fname_wo_ext.html not found; ",
-          "skipping PDF/PS creation\n";
+      $cmd = "gzip -f $fname_wo_ext.ps";
+      system($cmd);
 }


@ -358,6 +364,12 @@ if( $linuxdoc == 1 ) {
 # cleanup
 system("rm -f index.sgml index.xml body.html title.html");

+if( $linuxdoc == 1 ) {
+    system("mk_pluckerdb ${fname_wo_ext} ${fname_wo_ext}.html");
+} else {
+    system("cd ${fname_wo_ext} ; mk_pluckerdb ${fname_wo_ext} index.html");
+}
+
 print "\nldp_mk: completed...\n";

 exit(0);
--- a/LDP/builder/ldp_print/README
+++ b/LDP/builder/ldp_print/README
@ -3,7 +3,8 @@
    ldp_print -  print tool/script for DocBook SGML/XML documents 
 ######################################################################

- Copyright (C) 2002-2000  - Greg Ferguson (gferg@metalab.unc.edu)
+ Copyright (C) 2002-2000  - Greg Ferguson (gferg@metalab.unc.edu) and
+                            David A. Wheeler (dwheeler@dwheeler.com)

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -39,6 +40,12 @@ file into 'title.html' and 'body.html' and send each to htmldoc (as
 the corresponding title page and body of the document).


+TO INSTALL
+==========
+
+Type "make install"
+
+
 CAVEATS
 =======

@ -47,15 +54,12 @@ o  Assumes perl is in /usr/bin; adjust if necessary
 o  You may need to specify where the htmldoc executable resides.
   The script assumes it's within your $PATH.

-o  If you want Postscript as an output variant, uncomment the
-   appropriate lines (see below).
-
 o  Relies on output from a DocBook instance created via DSSSL/{open}jade!

 o  Cleans up (removes) the intermediate files it creates (but not the
   PDF or Postscript files, obviously!)

-o  Works silently; PDF (PostScript) will be created in the same directory
+o  Works silently; PDF or PostScript will be created in the same directory
   as was specified for the input (single-file HTML) file.

 o  Provided without warranty or support!
@ -74,6 +78,10 @@ o  I ran into a problem with htmldoc v1.8.8 which required a source
   UPDATE (2001-10-10): It appears that later versions of htmldoc
   have this problem corrected. The patch is not required.

+o htmldoc version 1.8.19 has a bug that causes it to NOT generate
+  title pages correctly.  Use a different version of htmldoc
+  (such as 1.8.18) instead.
+
 ====
 Greg Ferguson / gferg (at) metalab.unc.edu
 11 Jan 2000
--- a/LDP/builder/ldp_print/VERSION
+++ b/LDP/builder/ldp_print/VERSION
@ -1 +1 @@
-0.7.0, 2002-04-04
+0.90, 2002-08-09
--- a/LDP/builder/ldp_print/ldp_print
+++ b/LDP/builder/ldp_print/ldp_print
@ -1,6 +1,6 @@
 #!/usr/bin/perl -w
 #
-# usage: ldp_print <single_file.html>
+# usage: ldp_print [options] <single_file.html>
 #
 # Creates a PDF variant of a single-file HTML representation of a
 # DocBook SGML (or XML) instance. This simple wrapper assumes that
@ -12,6 +12,12 @@
 # the file into 'title.html' and 'body.html' and send each to
 # htmldoc (as the corresponding title page and body of the document).
 #
+# OPTIONS:
+#  --postscript        Generate Postscript (by default, doesn't)
+#  --nopdf             Don't generate PDF (by default, generates PDF)
+#  --size X            Set output page size (default "universal")
+#                      X can be "A4", "Letter", or "WIDTHxLENGTHunits".
+#                      where units can be in, mm, or cm.
 #
 # CAVEATS:
 #
@ -33,35 +39,302 @@
 #
 # Provided without warranty or support!
 #
-#	gferg@sgi.com / Ferg (used as part of the LDP production env)
+#	<gferg (at) metalab.unc.edu> / Ferg
+#       <dan.scott (at) acm.org> / Dan Scott
+#       <dwheeler (at) dwheeler.com) / David A. Wheeler
+#
+# Licensed under the GNU GPL version 2.
+#
+# ChangeLog:
+#     16Oct2000 - 0.1   - initial entry <gferg (at) sgi.com>
+#     03Apr2001 - 0.2   - fix for <preface>
+#     05Jul2001 - 0.3   - fix for <tt> and -f
+#     12Oct2001 - 0.4   - fix for sections; loop thru both files (body/title)
+#     27Nov2001 - 0.5   - fixed bug in determining where doc-index lies
+#     18Jan2002 - 0.5.1 - entity fix (822*)
+#     02Apr2002 - 0.6   - misc fixes (bibliography/appendix, etc).
+#     04Apr2002 - 0.7   - fix for newer DSSSL
+#     27May2002 - 0.8   - Merged library and driver, greatly simplifying
+#                         installation, and added options to driver.
+#     09Aug2002 - 0.9   - Some minor clean-up
 #

 use strict;
-push(@INC, "./");
-require 'fix_print_html.lib';

-if( $ARGV[0] eq '' || !(-r $ARGV[0]) ) {
-    die "\nusage: ldp_print <single_file.html>\n\n";
+sub fix_print_html {
+
+   my($in,$out,$ttl) = @_;
+
+   open(IN_FILE, "< $in") || do {
+        print "fix_print_html: cannot open $in: $!\n";
+        return 0;
+   };
+
+   my($buf, $ttl_buf) = '';
+   my($indx) = -1;
+   my($is_article) = 1;
+   while(<IN_FILE>) {
+
+         if( $indx == 1 ) {
+
+             # ignore everything until we see the chapter or sect
+             #
+             if( $_ =~ /CLASS="CHAP/i || $_ =~ /CLASS="PREF/i
+                 ||
+                 $_ =~ /CLASS="SECT/i )  {
+
+                 $buf .= $_;
+                 $indx++;
+
+             } else {
+                 next;
+             }
+
+         } elsif( $indx == 0 ) {
+
+             # write out the title page file
+             #
+             if( $_ =~ /CLASS="TOC"/ ) {
+
+                 $ttl_buf .= "></DIV>\n</BODY>\n</HTML>\n"; 
+                 $ttl_buf =~ s/<\/H1\n/<\/H1\n><P><BR><BR\n/ms;
+                 $ttl_buf =~ s/<HR><\/DIV\n><HR>/<HR><\/DIV\n>/ms;
+                 &fix_html(\$ttl_buf, 1);
+                 
+                 open(TOC_FILE, "> $ttl") || do {
+                      print "fix_print_html: cannot open $ttl: $!\n";
+                      close(IN_FILE);
+                      return 0;
+                 };
+                 print TOC_FILE $ttl_buf;
+                 close(TOC_FILE);
+                 $ttl_buf = '';
+                 $indx++;
+
+             } else {
+                $ttl_buf .= $_;
+             }
+
+         } elsif( $indx < 0 ) {
+
+             if( $_ =~ /CLASS="BOOK"/i ) {
+                 $is_article = 0;
+             }
+
+             # up to this point, both buffers get the line
+             #
+             if( $_ =~ /CLASS="TITLEPAGE"/ ) {
+
+                 $ttl_buf .= $_ . ">\n<P>\n<BR><BR><BR><BR>\n<\/P\n";
+                 $indx++;
+
+             } else {
+                 $buf .= $_;
+                 $ttl_buf .= $_;
+             }
+
+         } else {
+
+             $buf .= $_;
+         }
+   }
+   close(IN_FILE);
+
+
+   # fix body file
+   #
+   open(OUT_FILE, "> $out") || do {
+        print "fix_print_html: cannot open $out: $!\n";
+        return 0;
+   };
+
+   &fix_html(\$buf, $is_article);
+
+   print OUT_FILE $buf;
+   close(OUT_FILE);
+
+
+   return 1;
 }

-my($fname_wo_ext) = $ARGV[0];
+
+sub fix_html {
+   
+   my($buf, $is_article) = @_;
+   my($indx) = -1;
+
+
+   # make corrections and write out the file
+   #
+
+   $$buf =~ s/(\n><LI\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
+   $$buf =~ s/(\n><LI\n><DIV\nCLASS="FORMALPARA"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
+   $$buf =~ s/(\n><LI\nSTYLE="[^\"]+"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
+   if( $is_article == 0 ) {
+       $$buf =~ 
+         s/(\nCLASS="SECT[TION\d]+"\n>)<H1\n(.*?)<\/H1/$1<H0\n$2<\/H0/gims;
+       $$buf =~ 
+         s/(\nCLASS="SECT[TION\d]+"\n><HR>)<H1\n(.*?)<\/H1/$1<H0\n$2<\/H0/gims;
+   }
+   $$buf =~ s/<H1(\nCLASS="INDEXDIV"\n)(.*?)<\/H1/<H2$1$2<\/H2/gims;
+   if( ($indx = rindex($$buf, "<H1\n><A\nNAME=\"DOC-INDEX\"")) > -1 ) {
+       $$buf = substr($$buf, 0, $indx);
+       $$buf .= "\n<\/BODY>\n<\/HTML>\n\n";
+   } elsif( ($indx = rindex($$buf, "<H1\n><A\nNAME=\"doc-index\"")) > -1 ) {
+       $$buf = substr($$buf, 0, $indx);
+       $$buf .= "\n<\/BODY>\n<\/HTML>\n\n";
+   }
+
+   $$buf =~ s/\&\#13;//g;
+   $$buf =~ s/\&\#60;/\&lt;/g;
+   $$buf =~ s/\&\#62;/\&gt;/g;
+   $$buf =~ s/\&\#8211;/\-/g;
+   $$buf =~ s/\&\#8220;/\"/g;
+   $$buf =~ s/\&\#8221;/\"/g;
+   $$buf =~ s/WIDTH=\"\d\"//g;
+   $$buf =~ s/><[\/]*TBODY//g;
+   $$buf =~ s/><[\/]*THEAD//g;
+   $$buf =~ s/TYPE=\"1\"\n//gim;
+
+   $$buf =~ s/<P\nCLASS="LITERALLAYOUT"(.*?)<\/P/<P CLASS="LITERALLAYOUT"><FONT FACE=\"courier\"$1<\/FONT><\/P/gms;
+
+   my($cnt, $j) = 0;
+
+   if( $$buf !~ /<H1/ ) {
+       
+       # for newer docbook styles, set h2 to h1, etc.
+       #
+       for($cnt=2; $cnt < 7; $cnt++ ) {
+           $j = $cnt - 1;
+           $$buf =~ s/<H${cnt}/<H${j}/g;
+           $$buf =~ s/<\/H${cnt}/<\/H${j}/g;
+       }
+
+   } elsif( $is_article == 0 ) {
+
+       # decrement the headers by 1 and then re-set the
+       # chapter level only to H1...
+       #
+       for($cnt=5; $cnt >= 0; $cnt--) {
+           $j = $cnt + 1;
+           if( $cnt == 0 ) {
+               $j = 2;
+           }
+           $$buf =~ s/<H${cnt}/<H${j}/g;
+           $$buf =~ s/<\/H${cnt}/<\/H${j}/g;
+       }
+
+       my(@l) = split(/\n/, $$buf);
+       for( $cnt=0; $cnt < (@l + 0); $cnt++ ) {
+
+            if( $j == 1 ) {
+                if( $l[$cnt] =~ /<DIV/ ) {
+                    $j = 0;
+                    next;
+                }
+                $l[$cnt] =~ s/<H2/<H1/g;
+                $l[$cnt] =~ s/<\/H2/<\/H1/g;
+            }
+
+            if( $l[$cnt] =~ /^CLASS=\"CHAP/i
+                ||
+                $l[$cnt] =~ /^NAME=\"BIBL/i
+                ||
+                $l[$cnt] =~ /^CLASS=\"APPENDIX/i
+                ||
+                $l[$cnt] =~ /^CLASS=\"GLOSSARY/i
+                ||
+                $l[$cnt] =~ /^CLASS=\"PREF/i
+                ||
+                $l[$cnt] =~ /^CLASS=\"TITLE/i ) {
+                $j = 1;
+            }
+       }
+
+       $$buf = join("\n", @l);
+
+   }
+   $$buf =~ s/><DIV\nCLASS="\w+"\n//gms;
+   $$buf =~ s/><\/DIV\n//gms;
+
+   $buf =~ s/<SPAN\n[^>]*?>//gms;
+   $buf =~ s/<\/SPAN\n>//gms;
+
+   $$buf =~ s/(><LI\n)><P\n(.*?)<\/P\n>(<\/LI\n)/$1$2$3/gms;
+
+   return;
+}
+
+
+########### MAIN DRIVER ##############
+
+# Default values for options:
+my($generate_pdf) = 1;
+my($generate_ps) = 0;
+my($pagesize) = "universal";
+my($pth) = '';
+
+
+# Process options.
+my($arg);
+while (($#ARGV >= 0) && ($ARGV[0] =~ m/^-/)) {
+  $arg = shift;
+  if ($arg eq "--") {last;}
+  elsif ($arg eq "--postscript") {$generate_ps = 1;}
+  elsif ($arg eq "--nopostscript") {$generate_ps = 0;}
+  elsif ($arg eq "--pdf") {$generate_pdf = 1;}
+  elsif ($arg eq "--nopdf") {$generate_pdf = 0;}
+  elsif ($arg eq "--size") {$pagesize = shift;}
+  elsif ($arg eq "--toolroot") {$pth = shift; $pth .= "/";}
+  else {die "\nldp_print: unknown option $arg\n";}
+}
+
+if( $ARGV[0] eq '' || !(-r $ARGV[0]) ) {
+    die "\nusage: ldp_print [options] <single_file.html>\n\n";
+}
+
+# Compute htmldoc options.
+my($htmldoc_options) = "--size ${pagesize} --firstpage p1 --footer c.1";
+
+
+# Now get filename and check it.  Don't allow whitespace, since a
+# filename with whitespace will cause trouble.
+# NOTE: If the filename can be controlled by an untrusted user,
+# the filename (and options!) need to be filtered further to forbid
+# metacharacters, control characters, etc. as well.
+
+my($filename) = $ARGV[0];
+
+if ($filename =~ m/ /) {
+  die "\nldp_print: filenames cannot contain spaces.\n";
+}
+
+if ($filename =~ m/[\t\n]/) {
+  die "\nldp_print: filenames cannot contain whitespace.\n";
+}
+
+my($fname_wo_ext) = $filename;
 $fname_wo_ext =~ s/\.[\w]+$//;


 # create new files from single HTML file to use for print
 #
-&fix_print_html($ARGV[0], 'body.html', 'title.html');
+&fix_print_html($filename, 'body.html', 'title.html');

-my($cmd) = "htmldoc --size universal -t pdf -f ${fname_wo_ext}.pdf " .
-           "--firstpage p1 --titlefile title.html body.html --footer c.1";

-# For postscript output; append onto the above cmd string:
-#
-#          "; htmldoc --size universal -t ps -f ${fname_wo_ext}.ps " .
-#          "--firstpage p1 --titlefile title.html body.html";
-#
-system($cmd);
-die "\nldp_print: could not create ${fname_wo_ext}.pdf ($!)\n" if ($?);
+if ($generate_pdf) {
+   my($pdf_cmd) = "${pth}htmldoc ${htmldoc_options} -t pdf -f ${fname_wo_ext}.pdf " .
+                  "--titlefile title.html body.html ";
+   system($pdf_cmd);
+   die "\nldp_print: could not create ${fname_wo_ext}.pdf ($!)\n" if ($?);
+};
+
+if ($generate_ps) {
+   my($ps_cmd) = "${pth}htmldoc ${htmldoc_options} -t ps -f ${fname_wo_ext}.ps " .
+                 "--titlefile title.html body.html";
+   system($ps_cmd);
+   die "\nldp_print: could not create ${fname_wo_ext}.ps ($!)\n" if ($?);
+};

 # cleanup
 #
@ -69,3 +342,4 @@ system("rm -f body.html title.html");

 exit(0);

+