mirror of https://github.com/tLDP/LDP
new entry; print lib works w/htmldoc
This commit is contained in:
parent
88b1b2483c
commit
0e15f02677
|
@ -0,0 +1,60 @@
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
ldp_print - print tool/script for DocBook SGML/XML documents
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
This process/script is used in the production environment for the
|
||||||
|
LDP. It relies on the HTMLDOC software package (GPL'ed) which can be
|
||||||
|
obtained from the Easy Software Products (c) web site:
|
||||||
|
|
||||||
|
http://www.easysw.com/htmldoc/
|
||||||
|
|
||||||
|
This process creates a PDF variant from the single-file HTML
|
||||||
|
representation of a DocBook SGML (or XML) instance. The simple
|
||||||
|
wrapper script (ldp_print) assumes that the file was created using
|
||||||
|
{open}jade in a manner similar to:
|
||||||
|
|
||||||
|
jade -t sgml -i html -V nochunks -d $style $fname > $fname.html
|
||||||
|
|
||||||
|
Give the script the filename as an argument. It will then parse the
|
||||||
|
file into 'title.html' and 'body.html' and send each to htmldoc (as
|
||||||
|
the corresponding title page and body of the document).
|
||||||
|
|
||||||
|
|
||||||
|
CAVEATS
|
||||||
|
=======
|
||||||
|
|
||||||
|
o Assumes perl is in /usr/bin; adjust if necessary
|
||||||
|
|
||||||
|
o You may need to specify where the htmldoc executable resides.
|
||||||
|
The script assumes it's within your $PATH.
|
||||||
|
|
||||||
|
o If you want Postscript as an output variant, uncomment the
|
||||||
|
appropriate lines (see below).
|
||||||
|
|
||||||
|
o Relies on output from a DocBook instance created via DSSSL/{open}jade!
|
||||||
|
|
||||||
|
o Cleans up (removes) the intermediate files it creates (but not the
|
||||||
|
PDF or Postscript files, obviously!)
|
||||||
|
|
||||||
|
o Works silently; PDF (PostScript) will be created in the same directory
|
||||||
|
as was specified for the input (single-file HTML) file.
|
||||||
|
|
||||||
|
o Provided without warranty or support!
|
||||||
|
|
||||||
|
o I ran into a problem with htmldoc v1.8.8 which required a source
|
||||||
|
code change (I was getting a core dump from the htmldoc process).
|
||||||
|
Here is the change required:
|
||||||
|
|
||||||
|
htmldoc/ps-pdf.cxx :
|
||||||
|
3662,3665d3661
|
||||||
|
< /* gjf = 11Oct2000 */
|
||||||
|
< if( temprow == NULL )
|
||||||
|
< break;
|
||||||
|
<
|
||||||
|
|
||||||
|
|
||||||
|
====
|
||||||
|
gferg (at) sgi.com / Ferg
|
||||||
|
11 Jan 2000
|
||||||
|
|
|
@ -0,0 +1,128 @@
|
||||||
|
#
|
||||||
|
# fix_print_html.lib
|
||||||
|
#
|
||||||
|
# Dan Scott / <dan.scott (at) acm.org>
|
||||||
|
# Ferg / <gferg (at) sgi.com>
|
||||||
|
#
|
||||||
|
# Used to prepare single-file HTML variant for PDF/Postscript creation
|
||||||
|
# thru htmldoc.
|
||||||
|
#
|
||||||
|
# log:
|
||||||
|
# 16Oct2000 - initial entry <gferg (at) sgi.com>
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
sub fix_print_html {
|
||||||
|
|
||||||
|
my($in,$out,$ttl) = @_;
|
||||||
|
|
||||||
|
open(IN_FILE, "< $in") || do {
|
||||||
|
print "fix_print_html: cannot open $in: $!\n";
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
my($buf,$ttl_buf) = '';
|
||||||
|
my($indx) = -1;
|
||||||
|
my($is_article) = 0;
|
||||||
|
while(<IN_FILE>) {
|
||||||
|
|
||||||
|
if( $indx == 1 ) {
|
||||||
|
|
||||||
|
# ignore everything until we see the chapter or sect
|
||||||
|
#
|
||||||
|
if( $_ =~ /CLASS="CHAP/ ) {
|
||||||
|
|
||||||
|
$buf .= $_;
|
||||||
|
$indx++;
|
||||||
|
|
||||||
|
} elsif( $_ =~ /CLASS="SECT/ || $_ =~ /CLASS="sect/ ) {
|
||||||
|
|
||||||
|
$buf .= $_;
|
||||||
|
$indx++;
|
||||||
|
$is_article = 1;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
} elsif( $indx == 0 ) {
|
||||||
|
|
||||||
|
# write out the title page file
|
||||||
|
#
|
||||||
|
if( $_ =~ /CLASS="TOC"/ ) {
|
||||||
|
|
||||||
|
$ttl_buf .= "></DIV>\n</BODY>\n</HTML>\n";
|
||||||
|
$ttl_buf =~ s/<\/H1\n/<\/H1\n><P><BR><BR\n/ms;
|
||||||
|
|
||||||
|
open(TOC_FILE, "> $ttl") || do {
|
||||||
|
print "fix_print_html: cannot open $ttl: $!\n";
|
||||||
|
close(IN_FILE);
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
print TOC_FILE $ttl_buf;
|
||||||
|
close(TOC_FILE);
|
||||||
|
$ttl_buf = '';
|
||||||
|
$indx++;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
$ttl_buf .= $_;
|
||||||
|
}
|
||||||
|
|
||||||
|
} elsif( $indx < 0 ) {
|
||||||
|
|
||||||
|
# up to this point, both buffers get the line
|
||||||
|
#
|
||||||
|
if( $_ =~ /CLASS="TITLEPAGE"/ ) {
|
||||||
|
|
||||||
|
$ttl_buf .= $_ . ">\n<P>\n<BR><BR><BR><BR>\n<\/P\n";
|
||||||
|
$indx++;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
$buf .= $_;
|
||||||
|
$ttl_buf .= $_;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
$buf .= $_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(IN_FILE);
|
||||||
|
|
||||||
|
open(OUT_FILE, "> $out") || do {
|
||||||
|
print "fix_print_html: cannot open $out: $!\n";
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
# make these corrections and write out the file
|
||||||
|
#
|
||||||
|
|
||||||
|
$buf =~ s/(\n><LI\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
|
||||||
|
$buf =~ s/(\n><LI\n><DIV\nCLASS="FORMALPARA"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
|
||||||
|
$buf =~ s/(\n><LI\nSTYLE="[^\"]+"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
|
||||||
|
if( $is_article == 0 ) {
|
||||||
|
$buf =~ s/(\nCLASS="SECT[TION\d]+"\n>)<H1\n(.*?)<\/H1/$1<H2\n$2<\/H2/gms;
|
||||||
|
$buf =~ s/(\nCLASS="SECT[TION\d]+"\n><HR>)<H1\n(.*?)<\/H1/$1<H2\n$2<\/H2/gms;
|
||||||
|
}
|
||||||
|
$buf =~ s/<H1(\nCLASS="INDEXDIV"\n)(.*?)<\/H1/<H2$1$2<\/H2/gms;
|
||||||
|
if( ($indx = rindex($buf, "<H1\n><A\nNAME=\"DOC-INDEX\"")) > -1 ) {
|
||||||
|
$buf = substr($buf, 0, $indx);
|
||||||
|
$buf .= "\n<\/BODY>\n<\/HTML>\n\n";
|
||||||
|
}
|
||||||
|
$buf =~ s/\&\#60;/\</g;
|
||||||
|
$buf =~ s/\&\#62;/\>/g;
|
||||||
|
$buf =~ s/\&\#8211;/\-/g;
|
||||||
|
$buf =~ s/><[\/]*TBODY//g;
|
||||||
|
|
||||||
|
print OUT_FILE $buf;
|
||||||
|
close(OUT_FILE);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Return true from package include
|
||||||
|
#
|
||||||
|
1;
|
||||||
|
|
|
@ -0,0 +1,71 @@
|
||||||
|
#!/usr/bin/perl -w
|
||||||
|
#
|
||||||
|
# usage: ldp_print <single_file.html>
|
||||||
|
#
|
||||||
|
# Creates a PDF variant of a single-file HTML representation of a
|
||||||
|
# DocBook SGML (or XML) instance. This simple wrapper assumes that
|
||||||
|
# the file was created using {open}jade in a manner similar to:
|
||||||
|
#
|
||||||
|
# jade -t sgml -i html -V nochunks -d $style $fname > $fname.html
|
||||||
|
#
|
||||||
|
# Give this script the filename as an argument. It will then parse
|
||||||
|
# the file into 'title.html' and 'body.html' and send each to
|
||||||
|
# htmldoc (as the corresponding title page and body of the document).
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# CAVEATS:
|
||||||
|
#
|
||||||
|
# Assumes perl is in /usr/bin; adjust if necessary
|
||||||
|
#
|
||||||
|
# You may need to specify where the htmldoc executable resides.
|
||||||
|
# The script assumes it's within your $PATH.
|
||||||
|
#
|
||||||
|
# If you want Postscript as an output variant, uncomment the
|
||||||
|
# appropriate lines (see below).
|
||||||
|
#
|
||||||
|
# Relies on output from a DocBook instance created via DSSSL/{open}jade!
|
||||||
|
#
|
||||||
|
# Cleans up (removes) the intermediate files it creates (but not the
|
||||||
|
# PDF or Postscript files, obviously!)
|
||||||
|
#
|
||||||
|
# Works silently; PDF (PostScript) will be created in the same directory
|
||||||
|
# as was specified for the input (single-file HTML) file.
|
||||||
|
#
|
||||||
|
# Provided without warranty or support!
|
||||||
|
#
|
||||||
|
# gferg@sgi.com / Ferg (used as part of the LDP production env)
|
||||||
|
#
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
push(@INC, "./");
|
||||||
|
require 'fix_print_html.lib';
|
||||||
|
|
||||||
|
if( $ARGV[0] eq '' || !(-r $ARGV[0]) ) {
|
||||||
|
die "\nusage: ldp_print <single_file.html>\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
my($fname_wo_ext) = $ARGV[0];
|
||||||
|
$fname_wo_ext =~ s/\.[\w]+$//;
|
||||||
|
|
||||||
|
|
||||||
|
# create new files from single HTML file to use for print
|
||||||
|
#
|
||||||
|
&fix_print_html($ARGV[0], 'body.html', 'title.html');
|
||||||
|
|
||||||
|
my($cmd) = "htmldoc --size universal -t pdf -f ${fname_wo_ext}.pdf " .
|
||||||
|
"--firstpage p1 --titlefile title.html body.html";
|
||||||
|
|
||||||
|
# For postscript output; append onto the above cmd string:
|
||||||
|
#
|
||||||
|
# "; htmldoc --size universal -t ps -f -f ${fname_wo_ext}.ps " .
|
||||||
|
# "--firstpage p1 --titlefile title.html body.html";
|
||||||
|
#
|
||||||
|
system($cmd);
|
||||||
|
die "\nldp_print: could not create ${fname_wo_ext}.pdf ($!)\n" if ($?);
|
||||||
|
|
||||||
|
# cleanup
|
||||||
|
#
|
||||||
|
system("rm -f body.html title.html");
|
||||||
|
|
||||||
|
exit(0);
|
||||||
|
|
Loading…
Reference in New Issue