#!/usr/bin/perl -w # # usage: ldp_print [options] # # Creates a PDF variant of a single-file HTML representation of a # DocBook SGML (or XML) instance. This simple wrapper assumes that # the file was created using {open}jade in a manner similar to: # # jade -t sgml -i html -V nochunks -d $style $fname > $fname.html # # Give this script the filename as an argument. It will then parse # the file into 'title.html' and 'body.html' and send each to # htmldoc (as the corresponding title page and body of the document). # # OPTIONS: # --postscript Generate Postscript (by default, doesn't) # --nopdf Don't generate PDF (by default, generates PDF) # --size X Set output page size (default "universal") # X can be "A4", "Letter", or "WIDTHxLENGTHunits". # where units can be in, mm, or cm. # # CAVEATS: # # Assumes perl is in /usr/bin; adjust if necessary # # You may need to specify where the htmldoc executable resides. # The script assumes it's within your $PATH. # # If you want Postscript as an output variant, uncomment the # appropriate lines (see below). # # Relies on output from a DocBook instance created via DSSSL/{open}jade! # # Cleans up (removes) the intermediate files it creates (but not the # PDF or Postscript files, obviously!) # # Works silently; PDF (PostScript) will be created in the same directory # as was specified for the input (single-file HTML) file. # # Provided without warranty or support! # # / Ferg # / Dan Scott # # 03Apr2001 - 0.2 - fix for # 05Jul2001 - 0.3 - fix for and -f # 12Oct2001 - 0.4 - fix for sections; loop thru both files (body/title) # 27Nov2001 - 0.5 - fixed bug in determining where doc-index lies # 18Jan2002 - 0.5.1 - entity fix (822*) # 02Apr2002 - 0.6 - misc fixes (bibliography/appendix, etc). # 04Apr2002 - 0.7 - fix for newer DSSSL # 27May2002 - 0.8 - Merged library and driver, greatly simplifying # installation, and added options to driver. # 09Aug2002 - 0.9 - Some minor clean-up # use strict; sub fix_print_html { my($in,$out,$ttl) = @_; open(IN_FILE, "< $in") || do { print "fix_print_html: cannot open $in: $!\n"; return 0; }; my($buf, $ttl_buf) = ''; my($indx) = -1; my($is_article) = 1; while() { if( $indx == 1 ) { # ignore everything until we see the chapter or sect # if( $_ =~ /CLASS="CHAP/i || $_ =~ /CLASS="PREF/i || $_ =~ /CLASS="SECT/i ) { $buf .= $_; $indx++; } else { next; } } elsif( $indx == 0 ) { # write out the title page file # if( $_ =~ /CLASS="TOC"/ ) { $ttl_buf .= ">\n\n\n"; $ttl_buf =~ s/<\/H1\n/<\/H1\n>


<\/DIV\n>


/
<\/DIV\n>/ms; &fix_html(\$ttl_buf, 1); open(TOC_FILE, "> $ttl") || do { print "fix_print_html: cannot open $ttl: $!\n"; close(IN_FILE); return 0; }; print TOC_FILE $ttl_buf; close(TOC_FILE); $ttl_buf = ''; $indx++; } else { $ttl_buf .= $_; } } elsif( $indx < 0 ) { if( $_ =~ /CLASS="BOOK"/i ) { $is_article = 0; } # up to this point, both buffers get the line # if( $_ =~ /CLASS="TITLEPAGE"/ ) { $ttl_buf .= $_ . ">\n

\n



\n<\/P\n"; $indx++; } else { $buf .= $_; $ttl_buf .= $_; } } else { $buf .= $_; } } close(IN_FILE); # fix body file # open(OUT_FILE, "> $out") || do { print "fix_print_html: cannot open $out: $!\n"; return 0; }; &fix_html(\$buf, $is_article); print OUT_FILE $buf; close(OUT_FILE); return 1; } sub fix_html { my($buf, $is_article) = @_; my($indx) = -1; # make corrections and write out the file # $$buf =~ s/(\n>/$1$2\n/gms; $$buf =~ s/(\n>/$1$2\n/gms; $$buf =~ s/(\n>/$1$2\n/gms; if( $is_article == 0 ) { $$buf =~ s/(\nCLASS="SECT[TION\d]+"\n>)


) -1 ) { $$buf = substr($$buf, 0, $indx); $$buf .= "\n<\/BODY>\n<\/HTML>\n\n"; } elsif( ($indx = rindex($$buf, " -1 ) { $$buf = substr($$buf, 0, $indx); $$buf .= "\n<\/BODY>\n<\/HTML>\n\n"; } $$buf =~ s/\&\#13;//g; $$buf =~ s/\&\#60;/\</g; $$buf =~ s/\&\#62;/\>/g; $$buf =~ s/\&\#8211;/\-/g; $$buf =~ s/\&\#8220;/\"/g; $$buf =~ s/\&\#8221;/\"/g; $$buf =~ s/WIDTH=\"\d\"//g; $$buf =~ s/><[\/]*TBODY//g; $$buf =~ s/><[\/]*THEAD//g; $$buf =~ s/TYPE=\"1\"\n//gim; $$buf =~ s/<\/P/gms; my($cnt, $j) = 0; if( $$buf !~ /

= 0; $cnt--) { $j = $cnt + 1; if( $cnt == 0 ) { $j = 2; } $$buf =~ s/<\/DIV\n//gms; $buf =~ s/]*?>//gms; $buf =~ s/<\/SPAN\n>//gms; $$buf =~ s/(>(<\/LI\n)/$1$2$3/gms; return; } ########### MAIN DRIVER ############## # Default values for options: my($generate_pdf) = 1; my($generate_ps) = 0; my($pagesize) = "universal"; my($pth) = ''; # Process options. my($arg); while (($#ARGV >= 0) && ($ARGV[0] =~ m/^-/)) { $arg = shift; if ($arg eq "--") {last;} elsif ($arg eq "--postscript") {$generate_ps = 1;} elsif ($arg eq "--nopostscript") {$generate_ps = 0;} elsif ($arg eq "--pdf") {$generate_pdf = 1;} elsif ($arg eq "--nopdf") {$generate_pdf = 0;} elsif ($arg eq "--size") {$pagesize = shift;} elsif ($arg eq "--toolroot") {$pth = shift; $pth .= "/";} else {die "\nldp_print: unknown option $arg\n";} } if( $ARGV[0] eq '' || !(-r $ARGV[0]) ) { die "\nusage: ldp_print [options] \n\n"; } # Compute htmldoc options. my($htmldoc_options) = "--size ${pagesize} --firstpage p1 --footer c.1"; # Now get filename and check it. Don't allow whitespace, since a # filename with whitespace will cause trouble. # NOTE: If the filename can be controlled by an untrusted user, # the filename (and options!) need to be filtered further to forbid # metacharacters, control characters, etc. as well. my($filename) = $ARGV[0]; if ($filename =~ m/ /) { die "\nldp_print: filenames cannot contain spaces.\n"; } if ($filename =~ m/[\t\n]/) { die "\nldp_print: filenames cannot contain whitespace.\n"; } my($fname_wo_ext) = $filename; $fname_wo_ext =~ s/\.[\w]+$//; # create new files from single HTML file to use for print # &fix_print_html($filename, 'body.html', 'title.html'); if ($generate_pdf) { my($pdf_cmd) = "${pth}htmldoc ${htmldoc_options} -t pdf -f ${fname_wo_ext}.pdf " . "--titlefile title.html body.html "; system($pdf_cmd); die "\nldp_print: could not create ${fname_wo_ext}.pdf ($!)\n" if ($?); }; if ($generate_ps) { my($ps_cmd) = "${pth}htmldoc ${htmldoc_options} -t ps -f ${fname_wo_ext}.ps " . "--titlefile title.html body.html"; system($ps_cmd); die "\nldp_print: could not create ${fname_wo_ext}.ps ($!)\n" if ($?); }; # cleanup # system("rm -f body.html title.html"); exit(0);