This commit is contained in:
gferg 2002-08-09 15:37:26 +00:00
parent c988cca28a
commit b079e94034
4 changed files with 344 additions and 50 deletions

View File

@ -20,6 +20,7 @@ my($dtd) = 'SGML';
my($dcl) = ''; my($dcl) = '';
my($linuxdoc) = 1; my($linuxdoc) = 1;
my($create_index) = 1; my($create_index) = 1;
my($html_only) = 0;
my($cmd, $fname, $fname_wo_ext, $txt_filter, $style, $s, $db_v, $x) = ''; my($cmd, $fname, $fname_wo_ext, $txt_filter, $style, $s, $db_v, $x) = '';
my(@flines) = (); my(@flines) = ();
@ -36,6 +37,9 @@ while(1) {
} elsif ($ARGV[0] eq "-no_index") { } elsif ($ARGV[0] eq "-no_index") {
shift(@ARGV); shift(@ARGV);
$create_index = 0; $create_index = 0;
} elsif ($ARGV[0] eq "-html_only") {
shift(@ARGV);
$html_only = 1;
} else { } else {
last; last;
} }
@ -143,7 +147,8 @@ print "\nldp_mk: creating HTML from $fname...\n";
if( $linuxdoc == 1 ) { if( $linuxdoc == 1 ) {
$cmd = "sgml2html -c latin $fname"; ## $cmd = "sgml2html -c latin $fname";
$cmd = "sgml2html -c ascii $fname";
} else { } else {
@ -161,6 +166,11 @@ if( ($linuxdoc == 1 && !(-e "$fname_wo_ext.html"))
print "\nldp_mk: WARNING - could not create HTML: $fname_wo_ext\n"; print "\nldp_mk: WARNING - could not create HTML: $fname_wo_ext\n";
} }
if( $html_only == 1 ) {
system("rm -f index.sgml index.xml");
exit(0);
}
# create PLAIN TEXT version # create PLAIN TEXT version
# #
@ -258,7 +268,7 @@ if( -e "$fname_wo_ext.txt" ) {
# #
# Note that we use the single-page HTML variant # Note that we use the single-page HTML variant
# #
print "\nldp_mk: creating PDF from $fname...\n"; print "\nldp_mk: creating PDF/PS from $fname...\n";
my($print_str) = ''; my($print_str) = '';
@ -269,38 +279,34 @@ if( $linuxdoc == 1 ) {
system("$_toolroot/sgml_ld_1html $fname"); system("$_toolroot/sgml_ld_1html $fname");
$print_str = "00_${fname_wo_ext}.html"; $print_str = "00_${fname_wo_ext}.html";
} else {
# create new files from DocBook-source single HTML file to use for print
#
require "$_toolroot/lib/fix_print_html.lib";
&fix_print_html("00_${fname_wo_ext}.html", 'body.html', 'title.html');
$print_str = "--titlefile title.html body.html";
}
if( -e "00_$fname_wo_ext.html" ) {
$cmd = "$_toolroot/htmldoc/bin/htmldoc --size universal -t pdf " . $cmd = "$_toolroot/htmldoc/bin/htmldoc --size universal -t pdf " .
"--firstpage p1 -f $fname_wo_ext.pdf $print_str; " . "--firstpage p1 -f $fname_wo_ext.pdf $print_str; " .
"$_toolroot/htmldoc/bin/htmldoc --size universal -t ps " . "$_toolroot/htmldoc/bin/htmldoc --size universal -t ps " .
"--firstpage p1 -f $fname_wo_ext.ps $print_str"; "--firstpage p1 -f $fname_wo_ext.ps $print_str";
if( -e "00_$fname_wo_ext.html" ) {
system($cmd);
}
} elsif( -e "00_$fname_wo_ext.html" ) {
# create new files from DocBook-source single HTML file to use for print
#
$cmd = "$_toolroot/ldp_print/ldp_print --toolroot ${_toolroot}/htmldoc/bin " .
"--postscript 00_${fname_wo_ext}.html";
system($cmd); system($cmd);
system("mv -f 00_${fname_wo_ext}.pdf ${fname_wo_ext}.pdf");
system("mv -f 00_${fname_wo_ext}.ps ${fname_wo_ext}.ps");
}
if( !(-e "$fname_wo_ext.pdf") ) { if( !(-e "$fname_wo_ext.pdf") ) {
print "\nldp_mk: WARNING - could not create $fname_wo_ext.pdf\n"; print "\nldp_mk: WARNING - could not create $fname_wo_ext.pdf\n";
} }
if( !(-e "$fname_wo_ext.ps") ) { if( !(-e "$fname_wo_ext.ps") ) {
print "\nldp_mk: WARNING - could not create $fname_wo_ext.ps\n"; print "\nldp_mk: WARNING - could not create $fname_wo_ext.ps\n";
} else {
$cmd = "gzip -f $fname_wo_ext.ps";
system($cmd);
}
} else { } else {
print "\nldp_mk: WARNING - HTML file 00_$fname_wo_ext.html not found; ", $cmd = "gzip -f $fname_wo_ext.ps";
"skipping PDF/PS creation\n"; system($cmd);
} }
@ -358,6 +364,12 @@ if( $linuxdoc == 1 ) {
# cleanup # cleanup
system("rm -f index.sgml index.xml body.html title.html"); system("rm -f index.sgml index.xml body.html title.html");
if( $linuxdoc == 1 ) {
system("mk_pluckerdb ${fname_wo_ext} ${fname_wo_ext}.html");
} else {
system("cd ${fname_wo_ext} ; mk_pluckerdb ${fname_wo_ext} index.html");
}
print "\nldp_mk: completed...\n"; print "\nldp_mk: completed...\n";
exit(0); exit(0);

View File

@ -3,7 +3,8 @@
ldp_print - print tool/script for DocBook SGML/XML documents ldp_print - print tool/script for DocBook SGML/XML documents
###################################################################### ######################################################################
Copyright (C) 2002-2000 - Greg Ferguson (gferg@metalab.unc.edu) Copyright (C) 2002-2000 - Greg Ferguson (gferg@metalab.unc.edu) and
David A. Wheeler (dwheeler@dwheeler.com)
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -39,6 +40,12 @@ file into 'title.html' and 'body.html' and send each to htmldoc (as
the corresponding title page and body of the document). the corresponding title page and body of the document).
TO INSTALL
==========
Type "make install"
CAVEATS CAVEATS
======= =======
@ -47,15 +54,12 @@ o Assumes perl is in /usr/bin; adjust if necessary
o You may need to specify where the htmldoc executable resides. o You may need to specify where the htmldoc executable resides.
The script assumes it's within your $PATH. The script assumes it's within your $PATH.
o If you want Postscript as an output variant, uncomment the
appropriate lines (see below).
o Relies on output from a DocBook instance created via DSSSL/{open}jade! o Relies on output from a DocBook instance created via DSSSL/{open}jade!
o Cleans up (removes) the intermediate files it creates (but not the o Cleans up (removes) the intermediate files it creates (but not the
PDF or Postscript files, obviously!) PDF or Postscript files, obviously!)
o Works silently; PDF (PostScript) will be created in the same directory o Works silently; PDF or PostScript will be created in the same directory
as was specified for the input (single-file HTML) file. as was specified for the input (single-file HTML) file.
o Provided without warranty or support! o Provided without warranty or support!
@ -74,6 +78,10 @@ o I ran into a problem with htmldoc v1.8.8 which required a source
UPDATE (2001-10-10): It appears that later versions of htmldoc UPDATE (2001-10-10): It appears that later versions of htmldoc
have this problem corrected. The patch is not required. have this problem corrected. The patch is not required.
o htmldoc version 1.8.19 has a bug that causes it to NOT generate
title pages correctly. Use a different version of htmldoc
(such as 1.8.18) instead.
==== ====
Greg Ferguson / gferg (at) metalab.unc.edu Greg Ferguson / gferg (at) metalab.unc.edu
11 Jan 2000 11 Jan 2000

View File

@ -1 +1 @@
0.7.0, 2002-04-04 0.90, 2002-08-09

View File

@ -1,6 +1,6 @@
#!/usr/bin/perl -w #!/usr/bin/perl -w
# #
# usage: ldp_print <single_file.html> # usage: ldp_print [options] <single_file.html>
# #
# Creates a PDF variant of a single-file HTML representation of a # Creates a PDF variant of a single-file HTML representation of a
# DocBook SGML (or XML) instance. This simple wrapper assumes that # DocBook SGML (or XML) instance. This simple wrapper assumes that
@ -12,6 +12,12 @@
# the file into 'title.html' and 'body.html' and send each to # the file into 'title.html' and 'body.html' and send each to
# htmldoc (as the corresponding title page and body of the document). # htmldoc (as the corresponding title page and body of the document).
# #
# OPTIONS:
# --postscript Generate Postscript (by default, doesn't)
# --nopdf Don't generate PDF (by default, generates PDF)
# --size X Set output page size (default "universal")
# X can be "A4", "Letter", or "WIDTHxLENGTHunits".
# where units can be in, mm, or cm.
# #
# CAVEATS: # CAVEATS:
# #
@ -33,35 +39,302 @@
# #
# Provided without warranty or support! # Provided without warranty or support!
# #
# gferg@sgi.com / Ferg (used as part of the LDP production env) # <gferg (at) metalab.unc.edu> / Ferg
# <dan.scott (at) acm.org> / Dan Scott
# <dwheeler (at) dwheeler.com) / David A. Wheeler
#
# Licensed under the GNU GPL version 2.
#
# ChangeLog:
# 16Oct2000 - 0.1 - initial entry <gferg (at) sgi.com>
# 03Apr2001 - 0.2 - fix for <preface>
# 05Jul2001 - 0.3 - fix for <tt> and -f
# 12Oct2001 - 0.4 - fix for sections; loop thru both files (body/title)
# 27Nov2001 - 0.5 - fixed bug in determining where doc-index lies
# 18Jan2002 - 0.5.1 - entity fix (822*)
# 02Apr2002 - 0.6 - misc fixes (bibliography/appendix, etc).
# 04Apr2002 - 0.7 - fix for newer DSSSL
# 27May2002 - 0.8 - Merged library and driver, greatly simplifying
# installation, and added options to driver.
# 09Aug2002 - 0.9 - Some minor clean-up
# #
use strict; use strict;
push(@INC, "./");
require 'fix_print_html.lib';
if( $ARGV[0] eq '' || !(-r $ARGV[0]) ) { sub fix_print_html {
die "\nusage: ldp_print <single_file.html>\n\n";
my($in,$out,$ttl) = @_;
open(IN_FILE, "< $in") || do {
print "fix_print_html: cannot open $in: $!\n";
return 0;
};
my($buf, $ttl_buf) = '';
my($indx) = -1;
my($is_article) = 1;
while(<IN_FILE>) {
if( $indx == 1 ) {
# ignore everything until we see the chapter or sect
#
if( $_ =~ /CLASS="CHAP/i || $_ =~ /CLASS="PREF/i
||
$_ =~ /CLASS="SECT/i ) {
$buf .= $_;
$indx++;
} else {
next;
}
} elsif( $indx == 0 ) {
# write out the title page file
#
if( $_ =~ /CLASS="TOC"/ ) {
$ttl_buf .= "></DIV>\n</BODY>\n</HTML>\n";
$ttl_buf =~ s/<\/H1\n/<\/H1\n><P><BR><BR\n/ms;
$ttl_buf =~ s/<HR><\/DIV\n><HR>/<HR><\/DIV\n>/ms;
&fix_html(\$ttl_buf, 1);
open(TOC_FILE, "> $ttl") || do {
print "fix_print_html: cannot open $ttl: $!\n";
close(IN_FILE);
return 0;
};
print TOC_FILE $ttl_buf;
close(TOC_FILE);
$ttl_buf = '';
$indx++;
} else {
$ttl_buf .= $_;
}
} elsif( $indx < 0 ) {
if( $_ =~ /CLASS="BOOK"/i ) {
$is_article = 0;
}
# up to this point, both buffers get the line
#
if( $_ =~ /CLASS="TITLEPAGE"/ ) {
$ttl_buf .= $_ . ">\n<P>\n<BR><BR><BR><BR>\n<\/P\n";
$indx++;
} else {
$buf .= $_;
$ttl_buf .= $_;
}
} else {
$buf .= $_;
}
}
close(IN_FILE);
# fix body file
#
open(OUT_FILE, "> $out") || do {
print "fix_print_html: cannot open $out: $!\n";
return 0;
};
&fix_html(\$buf, $is_article);
print OUT_FILE $buf;
close(OUT_FILE);
return 1;
} }
my($fname_wo_ext) = $ARGV[0];
sub fix_html {
my($buf, $is_article) = @_;
my($indx) = -1;
# make corrections and write out the file
#
$$buf =~ s/(\n><LI\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
$$buf =~ s/(\n><LI\n><DIV\nCLASS="FORMALPARA"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
$$buf =~ s/(\n><LI\nSTYLE="[^\"]+"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
if( $is_article == 0 ) {
$$buf =~
s/(\nCLASS="SECT[TION\d]+"\n>)<H1\n(.*?)<\/H1/$1<H0\n$2<\/H0/gims;
$$buf =~
s/(\nCLASS="SECT[TION\d]+"\n><HR>)<H1\n(.*?)<\/H1/$1<H0\n$2<\/H0/gims;
}
$$buf =~ s/<H1(\nCLASS="INDEXDIV"\n)(.*?)<\/H1/<H2$1$2<\/H2/gims;
if( ($indx = rindex($$buf, "<H1\n><A\nNAME=\"DOC-INDEX\"")) > -1 ) {
$$buf = substr($$buf, 0, $indx);
$$buf .= "\n<\/BODY>\n<\/HTML>\n\n";
} elsif( ($indx = rindex($$buf, "<H1\n><A\nNAME=\"doc-index\"")) > -1 ) {
$$buf = substr($$buf, 0, $indx);
$$buf .= "\n<\/BODY>\n<\/HTML>\n\n";
}
$$buf =~ s/\&\#13;//g;
$$buf =~ s/\&\#60;/\&lt;/g;
$$buf =~ s/\&\#62;/\&gt;/g;
$$buf =~ s/\&\#8211;/\-/g;
$$buf =~ s/\&\#8220;/\"/g;
$$buf =~ s/\&\#8221;/\"/g;
$$buf =~ s/WIDTH=\"\d\"//g;
$$buf =~ s/><[\/]*TBODY//g;
$$buf =~ s/><[\/]*THEAD//g;
$$buf =~ s/TYPE=\"1\"\n//gim;
$$buf =~ s/<P\nCLASS="LITERALLAYOUT"(.*?)<\/P/<P CLASS="LITERALLAYOUT"><FONT FACE=\"courier\"$1<\/FONT><\/P/gms;
my($cnt, $j) = 0;
if( $$buf !~ /<H1/ ) {
# for newer docbook styles, set h2 to h1, etc.
#
for($cnt=2; $cnt < 7; $cnt++ ) {
$j = $cnt - 1;
$$buf =~ s/<H${cnt}/<H${j}/g;
$$buf =~ s/<\/H${cnt}/<\/H${j}/g;
}
} elsif( $is_article == 0 ) {
# decrement the headers by 1 and then re-set the
# chapter level only to H1...
#
for($cnt=5; $cnt >= 0; $cnt--) {
$j = $cnt + 1;
if( $cnt == 0 ) {
$j = 2;
}
$$buf =~ s/<H${cnt}/<H${j}/g;
$$buf =~ s/<\/H${cnt}/<\/H${j}/g;
}
my(@l) = split(/\n/, $$buf);
for( $cnt=0; $cnt < (@l + 0); $cnt++ ) {
if( $j == 1 ) {
if( $l[$cnt] =~ /<DIV/ ) {
$j = 0;
next;
}
$l[$cnt] =~ s/<H2/<H1/g;
$l[$cnt] =~ s/<\/H2/<\/H1/g;
}
if( $l[$cnt] =~ /^CLASS=\"CHAP/i
||
$l[$cnt] =~ /^NAME=\"BIBL/i
||
$l[$cnt] =~ /^CLASS=\"APPENDIX/i
||
$l[$cnt] =~ /^CLASS=\"GLOSSARY/i
||
$l[$cnt] =~ /^CLASS=\"PREF/i
||
$l[$cnt] =~ /^CLASS=\"TITLE/i ) {
$j = 1;
}
}
$$buf = join("\n", @l);
}
$$buf =~ s/><DIV\nCLASS="\w+"\n//gms;
$$buf =~ s/><\/DIV\n//gms;
$buf =~ s/<SPAN\n[^>]*?>//gms;
$buf =~ s/<\/SPAN\n>//gms;
$$buf =~ s/(><LI\n)><P\n(.*?)<\/P\n>(<\/LI\n)/$1$2$3/gms;
return;
}
########### MAIN DRIVER ##############
# Default values for options:
my($generate_pdf) = 1;
my($generate_ps) = 0;
my($pagesize) = "universal";
my($pth) = '';
# Process options.
my($arg);
while (($#ARGV >= 0) && ($ARGV[0] =~ m/^-/)) {
$arg = shift;
if ($arg eq "--") {last;}
elsif ($arg eq "--postscript") {$generate_ps = 1;}
elsif ($arg eq "--nopostscript") {$generate_ps = 0;}
elsif ($arg eq "--pdf") {$generate_pdf = 1;}
elsif ($arg eq "--nopdf") {$generate_pdf = 0;}
elsif ($arg eq "--size") {$pagesize = shift;}
elsif ($arg eq "--toolroot") {$pth = shift; $pth .= "/";}
else {die "\nldp_print: unknown option $arg\n";}
}
if( $ARGV[0] eq '' || !(-r $ARGV[0]) ) {
die "\nusage: ldp_print [options] <single_file.html>\n\n";
}
# Compute htmldoc options.
my($htmldoc_options) = "--size ${pagesize} --firstpage p1 --footer c.1";
# Now get filename and check it. Don't allow whitespace, since a
# filename with whitespace will cause trouble.
# NOTE: If the filename can be controlled by an untrusted user,
# the filename (and options!) need to be filtered further to forbid
# metacharacters, control characters, etc. as well.
my($filename) = $ARGV[0];
if ($filename =~ m/ /) {
die "\nldp_print: filenames cannot contain spaces.\n";
}
if ($filename =~ m/[\t\n]/) {
die "\nldp_print: filenames cannot contain whitespace.\n";
}
my($fname_wo_ext) = $filename;
$fname_wo_ext =~ s/\.[\w]+$//; $fname_wo_ext =~ s/\.[\w]+$//;
# create new files from single HTML file to use for print # create new files from single HTML file to use for print
# #
&fix_print_html($ARGV[0], 'body.html', 'title.html'); &fix_print_html($filename, 'body.html', 'title.html');
my($cmd) = "htmldoc --size universal -t pdf -f ${fname_wo_ext}.pdf " .
"--firstpage p1 --titlefile title.html body.html --footer c.1";
# For postscript output; append onto the above cmd string: if ($generate_pdf) {
# my($pdf_cmd) = "${pth}htmldoc ${htmldoc_options} -t pdf -f ${fname_wo_ext}.pdf " .
# "; htmldoc --size universal -t ps -f ${fname_wo_ext}.ps " . "--titlefile title.html body.html ";
# "--firstpage p1 --titlefile title.html body.html"; system($pdf_cmd);
# die "\nldp_print: could not create ${fname_wo_ext}.pdf ($!)\n" if ($?);
system($cmd); };
die "\nldp_print: could not create ${fname_wo_ext}.pdf ($!)\n" if ($?);
if ($generate_ps) {
my($ps_cmd) = "${pth}htmldoc ${htmldoc_options} -t ps -f ${fname_wo_ext}.ps " .
"--titlefile title.html body.html";
system($ps_cmd);
die "\nldp_print: could not create ${fname_wo_ext}.ps ($!)\n" if ($?);
};
# cleanup # cleanup
# #
@ -69,3 +342,4 @@ system("rm -f body.html title.html");
exit(0); exit(0);