new entry; print lib works w/htmldoc

This commit is contained in:
gferg 2001-01-25 22:15:41 +00:00
parent 88b1b2483c
commit 0e15f02677
3 changed files with 259 additions and 0 deletions

View File

@ -0,0 +1,60 @@
ldp_print - print tool/script for DocBook SGML/XML documents
This process/script is used in the production environment for the
LDP. It relies on the HTMLDOC software package (GPL'ed) which can be
obtained from the Easy Software Products (c) web site:
This process creates a PDF variant from the single-file HTML
representation of a DocBook SGML (or XML) instance. The simple
wrapper script (ldp_print) assumes that the file was created using
{open}jade in a manner similar to:
jade -t sgml -i html -V nochunks -d $style $fname > $fname.html
Give the script the filename as an argument. It will then parse the
file into 'title.html' and 'body.html' and send each to htmldoc (as
the corresponding title page and body of the document).
o Assumes perl is in /usr/bin; adjust if necessary
o You may need to specify where the htmldoc executable resides.
The script assumes it's within your $PATH.
o If you want Postscript as an output variant, uncomment the
appropriate lines (see below).
o Relies on output from a DocBook instance created via DSSSL/{open}jade!
o Cleans up (removes) the intermediate files it creates (but not the
PDF or Postscript files, obviously!)
o Works silently; PDF (PostScript) will be created in the same directory
as was specified for the input (single-file HTML) file.
o Provided without warranty or support!
o I ran into a problem with htmldoc v1.8.8 which required a source
code change (I was getting a core dump from the htmldoc process).
Here is the change required:
htmldoc/ps-pdf.cxx :
< /* gjf = 11Oct2000 */
< if( temprow == NULL )
< break;
gferg (at) / Ferg
11 Jan 2000

View File

@ -0,0 +1,128 @@
# fix_print_html.lib
# Dan Scott / <dan.scott (at)>
# Ferg / <gferg (at)>
# Used to prepare single-file HTML variant for PDF/Postscript creation
# thru htmldoc.
# log:
# 16Oct2000 - initial entry <gferg (at)>
sub fix_print_html {
my($in,$out,$ttl) = @_;
open(IN_FILE, "< $in") || do {
print "fix_print_html: cannot open $in: $!\n";
return 0;
my($buf,$ttl_buf) = '';
my($indx) = -1;
my($is_article) = 0;
while(<IN_FILE>) {
if( $indx == 1 ) {
# ignore everything until we see the chapter or sect
if( $_ =~ /CLASS="CHAP/ ) {
$buf .= $_;
} elsif( $_ =~ /CLASS="SECT/ || $_ =~ /CLASS="sect/ ) {
$buf .= $_;
$is_article = 1;
} else {
} elsif( $indx == 0 ) {
# write out the title page file
if( $_ =~ /CLASS="TOC"/ ) {
$ttl_buf .= "></DIV>\n</BODY>\n</HTML>\n";
$ttl_buf =~ s/<\/H1\n/<\/H1\n><P><BR><BR\n/ms;
open(TOC_FILE, "> $ttl") || do {
print "fix_print_html: cannot open $ttl: $!\n";
return 0;
print TOC_FILE $ttl_buf;
$ttl_buf = '';
} else {
$ttl_buf .= $_;
} elsif( $indx < 0 ) {
# up to this point, both buffers get the line
if( $_ =~ /CLASS="TITLEPAGE"/ ) {
$ttl_buf .= $_ . ">\n<P>\n<BR><BR><BR><BR>\n<\/P\n";
} else {
$buf .= $_;
$ttl_buf .= $_;
} else {
$buf .= $_;
open(OUT_FILE, "> $out") || do {
print "fix_print_html: cannot open $out: $!\n";
return 0;
# make these corrections and write out the file
$buf =~ s/(\n><LI\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
$buf =~ s/(\n><LI\n><DIV\nCLASS="FORMALPARA"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
$buf =~ s/(\n><LI\nSTYLE="[^\"]+"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
if( $is_article == 0 ) {
$buf =~ s/(\nCLASS="SECT[TION\d]+"\n>)<H1\n(.*?)<\/H1/$1<H2\n$2<\/H2/gms;
$buf =~ s/(\nCLASS="SECT[TION\d]+"\n><HR>)<H1\n(.*?)<\/H1/$1<H2\n$2<\/H2/gms;
$buf =~ s/<H1(\nCLASS="INDEXDIV"\n)(.*?)<\/H1/<H2$1$2<\/H2/gms;
if( ($indx = rindex($buf, "<H1\n><A\nNAME=\"DOC-INDEX\"")) > -1 ) {
$buf = substr($buf, 0, $indx);
$buf .= "\n<\/BODY>\n<\/HTML>\n\n";
$buf =~ s/\&\#60;/\&lt;/g;
$buf =~ s/\&\#62;/\&gt;/g;
$buf =~ s/\&\#8211;/\-/g;
$buf =~ s/><[\/]*TBODY//g;
print OUT_FILE $buf;
return 1;
# Return true from package include

LDP/builder/ldp_print/ldp_print Executable file
View File

@ -0,0 +1,71 @@
#!/usr/bin/perl -w
# usage: ldp_print <single_file.html>
# Creates a PDF variant of a single-file HTML representation of a
# DocBook SGML (or XML) instance. This simple wrapper assumes that
# the file was created using {open}jade in a manner similar to:
# jade -t sgml -i html -V nochunks -d $style $fname > $fname.html
# Give this script the filename as an argument. It will then parse
# the file into 'title.html' and 'body.html' and send each to
# htmldoc (as the corresponding title page and body of the document).
# Assumes perl is in /usr/bin; adjust if necessary
# You may need to specify where the htmldoc executable resides.
# The script assumes it's within your $PATH.
# If you want Postscript as an output variant, uncomment the
# appropriate lines (see below).
# Relies on output from a DocBook instance created via DSSSL/{open}jade!
# Cleans up (removes) the intermediate files it creates (but not the
# PDF or Postscript files, obviously!)
# Works silently; PDF (PostScript) will be created in the same directory
# as was specified for the input (single-file HTML) file.
# Provided without warranty or support!
# / Ferg (used as part of the LDP production env)
use strict;
push(@INC, "./");
require 'fix_print_html.lib';
if( $ARGV[0] eq '' || !(-r $ARGV[0]) ) {
die "\nusage: ldp_print <single_file.html>\n\n";
my($fname_wo_ext) = $ARGV[0];
$fname_wo_ext =~ s/\.[\w]+$//;
# create new files from single HTML file to use for print
&fix_print_html($ARGV[0], 'body.html', 'title.html');
my($cmd) = "htmldoc --size universal -t pdf -f ${fname_wo_ext}.pdf " .
"--firstpage p1 --titlefile title.html body.html";
# For postscript output; append onto the above cmd string:
# "; htmldoc --size universal -t ps -f -f ${fname_wo_ext}.ps " .
# "--firstpage p1 --titlefile title.html body.html";
die "\nldp_print: could not create ${fname_wo_ext}.pdf ($!)\n" if ($?);
# cleanup
system("rm -f body.html title.html");