new entry; print lib works w/htmldoc

2001-01-25 22:15:41 +00:00 · 2001-01-25 22:15:41 +00:00 · 0e15f02677
parent 88b1b2483c
commit 0e15f02677
3 changed files with 259 additions and 0 deletions
--- a/LDP/builder/ldp_print/README
+++ b/LDP/builder/ldp_print/README
@ -0,0 +1,60 @@
+
+######################################################################
+    ldp_print -  print tool/script for DocBook SGML/XML documents 
+######################################################################
+
+This process/script is used in the production environment for the
+LDP.  It relies on the HTMLDOC software package (GPL'ed) which can be
+obtained from the Easy Software Products (c) web site:
+
+        http://www.easysw.com/htmldoc/
+
+This process creates a PDF variant from the single-file HTML
+representation of a DocBook SGML (or XML) instance. The simple
+wrapper script (ldp_print) assumes that the file was created using
+{open}jade in a manner similar to:
+
+        jade -t sgml -i html -V nochunks -d $style $fname > $fname.html
+
+Give the script the filename as an argument. It will then parse the
+file into 'title.html' and 'body.html' and send each to htmldoc (as
+the corresponding title page and body of the document).
+
+
+CAVEATS
+=======
+
+o  Assumes perl is in /usr/bin; adjust if necessary
+
+o  You may need to specify where the htmldoc executable resides.
+   The script assumes it's within your $PATH.
+
+o  If you want Postscript as an output variant, uncomment the
+   appropriate lines (see below).
+
+o  Relies on output from a DocBook instance created via DSSSL/{open}jade!
+
+o  Cleans up (removes) the intermediate files it creates (but not the
+   PDF or Postscript files, obviously!)
+
+o  Works silently; PDF (PostScript) will be created in the same directory
+   as was specified for the input (single-file HTML) file.
+
+o  Provided without warranty or support!
+
+o  I ran into a problem with htmldoc v1.8.8 which required a source
+   code change (I was getting a core dump from the htmldoc process).
+   Here is the change required:
+
+	htmldoc/ps-pdf.cxx :
+	3662,3665d3661
+	<      /* gjf = 11Oct2000 */
+	<      if( temprow == NULL )
+	<          break;
+	< 
+
+
+====
+gferg (at) sgi.com / Ferg
+11 Jan 2000
+
--- a/LDP/builder/ldp_print/fix_print_html.lib
+++ b/LDP/builder/ldp_print/fix_print_html.lib
@ -0,0 +1,128 @@
+#
+# fix_print_html.lib
+#
+#   Dan Scott  / <dan.scott (at) acm.org>
+#   Ferg       / <gferg (at) sgi.com>
+#
+#   Used to prepare single-file HTML variant for PDF/Postscript creation
+#   thru htmldoc.
+#
+# log:
+#     16Oct2000 - initial entry <gferg (at) sgi.com>
+#
+#
+
+sub fix_print_html {
+
+   my($in,$out,$ttl) = @_;
+
+   open(IN_FILE, "< $in") || do {
+        print "fix_print_html: cannot open $in: $!\n";
+        return 0;
+   };
+
+   my($buf,$ttl_buf) = '';
+   my($indx) = -1;
+   my($is_article) = 0;
+   while(<IN_FILE>) {
+
+         if( $indx == 1 ) {
+
+             # ignore everything until we see the chapter or sect
+             #
+             if( $_ =~ /CLASS="CHAP/ ) {
+
+                 $buf .= $_;
+                 $indx++;
+
+             } elsif( $_ =~ /CLASS="SECT/ || $_ =~ /CLASS="sect/ )  {
+
+                 $buf .= $_;
+                 $indx++;
+                 $is_article = 1;
+
+             } else {
+                 next;
+             }
+
+         } elsif( $indx == 0 ) {
+
+             # write out the title page file
+             #
+             if( $_ =~ /CLASS="TOC"/ ) {
+
+                 $ttl_buf .= "></DIV>\n</BODY>\n</HTML>\n"; 
+                 $ttl_buf =~ s/<\/H1\n/<\/H1\n><P><BR><BR\n/ms;
+                 
+                 open(TOC_FILE, "> $ttl") || do {
+                      print "fix_print_html: cannot open $ttl: $!\n";
+                      close(IN_FILE);
+                      return 0;
+                 };
+                 print TOC_FILE $ttl_buf;
+                 close(TOC_FILE);
+                 $ttl_buf = '';
+                 $indx++;
+
+             } else {
+                $ttl_buf .= $_;
+             }
+
+         } elsif( $indx < 0 ) {
+
+             # up to this point, both buffers get the line
+             #
+             if( $_ =~ /CLASS="TITLEPAGE"/ ) {
+
+                 $ttl_buf .= $_ . ">\n<P>\n<BR><BR><BR><BR>\n<\/P\n";
+                 $indx++;
+
+             } else {
+                 $buf .= $_;
+                 $ttl_buf .= $_;
+             }
+
+         } else {
+
+             $buf .= $_;
+         }
+   }
+   close(IN_FILE);
+
+   open(OUT_FILE, "> $out") || do {
+        print "fix_print_html: cannot open $out: $!\n";
+        return 0;
+   };
+
+
+   # make these corrections and write out the file
+   #
+
+   $buf =~ s/(\n><LI\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
+   $buf =~ s/(\n><LI\n><DIV\nCLASS="FORMALPARA"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
+   $buf =~ s/(\n><LI\nSTYLE="[^\"]+"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
+   if( $is_article == 0 ) {
+       $buf =~ s/(\nCLASS="SECT[TION\d]+"\n>)<H1\n(.*?)<\/H1/$1<H2\n$2<\/H2/gms;
+       $buf =~ s/(\nCLASS="SECT[TION\d]+"\n><HR>)<H1\n(.*?)<\/H1/$1<H2\n$2<\/H2/gms;
+   }
+   $buf =~ s/<H1(\nCLASS="INDEXDIV"\n)(.*?)<\/H1/<H2$1$2<\/H2/gms;
+   if( ($indx = rindex($buf, "<H1\n><A\nNAME=\"DOC-INDEX\"")) > -1 ) {
+       $buf = substr($buf, 0, $indx);
+       $buf .= "\n<\/BODY>\n<\/HTML>\n\n";
+   }
+   $buf =~ s/\&\#60;/\&lt;/g;
+   $buf =~ s/\&\#62;/\&gt;/g;
+   $buf =~ s/\&\#8211;/\-/g;
+   $buf =~ s/><[\/]*TBODY//g;
+
+   print OUT_FILE $buf;
+   close(OUT_FILE);
+
+   return 1;
+}
+
+
+# Return true from package include
+#
+1;
+
--- a/LDP/builder/ldp_print/ldp_print
+++ b/LDP/builder/ldp_print/ldp_print
@ -0,0 +1,71 @@
+#!/usr/bin/perl -w
+#
+# usage: ldp_print <single_file.html>
+#
+# Creates a PDF variant of a single-file HTML representation of a
+# DocBook SGML (or XML) instance. This simple wrapper assumes that
+# the file was created using {open}jade in a manner similar to:
+#
+#	jade -t sgml -i html -V nochunks -d $style $fname > $fname.html
+#
+# Give this script the filename as an argument. It will then parse
+# the file into 'title.html' and 'body.html' and send each to
+# htmldoc (as the corresponding title page and body of the document).
+#
+#
+# CAVEATS:
+#
+# Assumes perl is in /usr/bin; adjust if necessary
+#
+# You may need to specify where the htmldoc executable resides.
+# The script assumes it's within your $PATH.
+#
+# If you want Postscript as an output variant, uncomment the 
+# appropriate lines (see below).
+#
+# Relies on output from a DocBook instance created via DSSSL/{open}jade!
+#
+# Cleans up (removes) the intermediate files it creates (but not the
+# PDF or Postscript files, obviously!)
+#
+# Works silently; PDF (PostScript) will be created in the same directory
+# as was specified for the input (single-file HTML) file.
+#
+# Provided without warranty or support!
+#
+#	gferg@sgi.com / Ferg (used as part of the LDP production env)
+#
+
+use strict;
+push(@INC, "./");
+require 'fix_print_html.lib';
+
+if( $ARGV[0] eq '' || !(-r $ARGV[0]) ) {
+    die "\nusage: ldp_print <single_file.html>\n\n";
+}
+
+my($fname_wo_ext) = $ARGV[0];
+$fname_wo_ext =~ s/\.[\w]+$//;
+
+
+# create new files from single HTML file to use for print
+#
+&fix_print_html($ARGV[0], 'body.html', 'title.html');
+
+my($cmd) = "htmldoc --size universal -t pdf  -f ${fname_wo_ext}.pdf " .
+           "--firstpage p1 --titlefile title.html body.html";
+
+# For postscript output; append onto the above cmd string:
+#
+#          "; htmldoc --size universal -t ps -f -f ${fname_wo_ext}.ps " .
+#          "--firstpage p1 --titlefile title.html body.html";
+#
+system($cmd);
+die "\nldp_print: could not create ${fname_wo_ext}.pdf ($!)\n" if ($?);
+
+# cleanup
+#
+system("rm -f body.html title.html");
+
+exit(0);
+