diff --git a/LDP/builder/db2omf/README b/LDP/builder/db2omf/README new file mode 100644 index 00000000..6e7ef803 --- /dev/null +++ b/LDP/builder/db2omf/README @@ -0,0 +1,33 @@ + #################################################################### + # db2omf - perl filter for producing OMF files from DocBook SGML # + # version 0.3 / June 2001 # + #################################################################### + + Copyright (C) 2001-2000 - Greg Ferguson (gferg@metalab.unc.edu) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + #################################################################### + + Execute script as : db2omf {-o } .sgml + Perl referenced as : /usr/bin/perl + + See the 'samples' directory for output examples derived from + actual DocBook (v3.x and v4.x) files. + + #################################################################### + + For more information, see the OMF site - http://www.ibiblio.org/osrt/omf/ + or contact me (Greg Ferguson - gferg@metalab.unc.edu) diff --git a/LDP/builder/db2omf/db2omf b/LDP/builder/db2omf/db2omf new file mode 100644 index 00000000..4a86f68f --- /dev/null +++ b/LDP/builder/db2omf/db2omf @@ -0,0 +1,498 @@ +#!/usr/bin/perl +# +# DESC: Script for creating an OMF (OpenSource Metadata Framework) file +# from DocBook SGML. +# +# USAGE: db2omf {-o } .sgml +# +# Greg Ferguson +# +# 12Mar2001 0.1 initial release +# 26Mar2001 0.2 modified public identifier +# 06jun2001 0.3 various clean-up, rm'd some elements that are not clear +# + +# output stream flush +# +$| = 1; + +use Text::Wrap qw($columns &wrap); +$columns = 70; + +$_vrs = '0.3'; +$_res_template = ''; +$_creator_template = ''; + +my($in_f, $out_f) = ''; + + +# read in cmd-line arguments +# +while(1) { + + if($ARGV[0] eq "-o") { + shift(@ARGV); + $out_f = $ARGV[0]; + shift(@ARGV); + } elsif($ARGV[0] eq "-h") { + &usage(); + } else { + $in_f = $ARGV[0]; + shift(@ARGV); + } + + if($ARGV[0] eq '') { + last; + } +} + +if( $in_f eq '' ) { + print "db2omf: ERROR .sgml not specified.\n\n"; + &usage(); +} elsif( !(-r $in_f) ) { + print "db2omf: ERROR cannot read $f ($!)\n\n"; + &usage(); +} + +if( $out_f eq '' ) { + $in_f =~ /([^\/]+)\.sgml$/i; + $out_f = $1 . ".omf"; +} + + +# get today's date; timestamp the template entries +# +my(@t_comps) = localtime(time()); +$i = (($t_comps[5])+0) + 1900; +my($y) = "$i"; +$i = (($t_comps[4])+0) + 1; +my($m) = "$i"; +my($d) = $t_comps[3]; +$_today = "${y}" . (length($m) == 1 ? '0' : '') . "${m}" . + (length($d) == 1 ? '0' : '') . "${d}"; + + +# read in the OMF resource template +# +while(){ + $_res_template .= $_; +} +close(DATA); + +$_creator_template =" \n" . + " \n" . + " \n" . + " %%FIRSTNAME\n" . + " \n" . + " \n" . + " %%LASTNAME\n" . + " \n" . + " \n" . + " %%EMAIL\n" . + " \n" . + " \n" . + " "; + +$_creator_template =~ s/%%DATE/$_today/g; +$_res_template =~ s/%%DATE/$_today/g; + +&proc_sgml($in_f); + +$_res_template =~ s/%%CREATOR/$_creator_template/; +$_res_template =~ s/%%SGML/$in_f/; + +# write out the OMF file +# +open(OMF, "> $out_f") || die "db2omf: cannot write to $out_f ($!)\n"; +print OMF $_res_template, "\n"; +close(OMF); + + +print "db2omf: created $out_f from $in_f\n"; +exit(0); + + + +# +# +# +sub proc_sgml { + + my($f) = @_; + + my($buf,$vers,$auth,$tmp,$y,$m,$d,$s) = ''; + my($i, $notfound) = 0; + + # read in file (book/article info area only) + # + open(DBF, "$f") || die "db2omf: cannot open $f ($!)\n"; + while() { + + if($notfound == 0) { + if( $_ =~ /
\n$/)) { + chop; + } + if($_ =~ /<\/artheader/i || /<\/articleinfo/i || $_ =~ /<\/bookinfo/i) { + $buf .= $_; + last; + } + if($_ ne '') { + $buf .= $_; + } + } + close(DBF); + $buf =~ s/\r//g; + + # grab title; clean it up + # + ($s) = ($buf =~ m#\s*(.*?)\s*#is); + $s =~ s/\n//g; + $s =~ s/\s*(.*?)\s*<\/subtitle>//gi; + $s =~ s/<[^>]*>//gs; + $s =~ s/[\s\.\ ]+$//g; + $s =~ s/^[\s\.\ ]+//g; + $s =~ s/_/_/g; + $_res_template =~ s/%%TITLE/$s/; + + + # document type; based on title (or filename...) + # + if( $s =~ /how[\ ]*to/i || $f =~ /how[\ ]*to/i ) { + $_res_template =~ s/%%TYPE/HOWTO/; + } elsif( $s =~ /guide/i ) { + $_res_template =~ s/%%TYPE/guide/; + } else { + $_res_template =~ s/%%TYPE//; + } + + + # keywords; based on title + # + $s = lc($s); + $s =~ s/[\-\;\,\:\(\)\+\/\\]+/\ /g; + + my($wrd) = ''; + my(@wrds) = ('to', 'the', 'and', 'is', 'a', + 'of', 'for', 'from', 'but', 'brief', 'at', 'how', + 'with', 'on', 'off', 'via', 'list', 'la', 'le', 'mini'); + foreach $wrd (@wrds) { + $s =~ s/[\ ]+$wrd[\ ]+/\ /g; + } + $s =~ s/cd\ rom/cd-rom/g; + $s =~ s/i\ o/i\/o/g; + + @wrds = split(/\ /, $s); + %seen = (); + @wrds = grep(!$seen{$_}++, @wrds); + $s = ''; + foreach (sort(@wrds)) { + $s .= " " . $_ . "\n"; + } + $_res_template =~ s/%%KEYWORDS/$s/g; + + + # grab author/email + # XXX TODO: only grabbing one author... + # + ($auth) = ($buf =~ m#\s*(.*?)\s*#is); + ($s) = ($auth =~ m#\s*(.*?)\s*#is); + $_creator_template =~ s/%%FIRSTNAME/$s/; + ($s) = ($auth =~ m#\s*(.*?)\s*#is); + $_creator_template =~ s/%%LASTNAME/$s/; + ($s) = ($auth =~ m#\s*(.*?)\s*#is); + $_creator_template =~ s/%%EMAIL/$s/; + + + # grab description/abstract; clean it up + # + ($s) = ($buf =~ m#\s*(.*?)\s*#is); + $s =~ s/\s*(.*?)\s*<\/indexterm>//gim; + $s =~ s/<[^>]*>//gs; + $s =~ s/\n/\ /g; + $s =~ s/[\ ]{2,}/\ /g; + $tmp = wrap(' ', ' ', $s); + $_res_template =~ s/%%DESC/$tmp/; + + + # grab rev/date; take the last entry + # XXX TODO: sometimes resides outside the meta area... + # + $vers = ''; + $tmp = ''; + + ($tmp) = ($buf =~ m#]*>\s*(.*?)\s*#is); + + if( $tmp eq '' || $buf =~ //i ) { + + # grab rev/date; take the first entry, or should it be the last?! + # + if( $buf =~ m#\s*(.*?)\s*#gis ) { + $vers = $1; + } + if( $buf =~ m#\s*(.*?)\s*#gis ) { + $tmp = $1; + } elsif( $buf =~ m#\s*(.*?)\s*#gis ) { + $tmp = $1; + } + } + + if( $vers eq '' && $tmp ne '' ) { + + # pull the version out of the <{pub}date> specification + # + if( $tmp =~ /[Rr]+evision:[\ ]+([\w\.\-]+)[,\ ]+/ ) { + $vers = $1; + } elsif( $tmp =~ /[Vv]+ersion[\ ]+([\w\.\-]+)[,\ ]+/ ) { + $vers = $1; + } elsif( $tmp =~ /[Vv]+([\w\.\-]+)[,\ ]+/ ) { + $vers = $1; + } elsif( $tmp =~ /[Vv\.\,]+\ ([\w\.\-]+)[,\ ]+/ ) { + $vers = $1; + } elsif( $tmp =~ /[Vv]+er[\.\ ]+([\w]+)\-([\w]+)\-([\w]+)\-([\w]+)/ ) { + $vers = $4; + } elsif( $tmp =~ /^([\d\.]+)[,\ ]+/ ) { + $vers = $1; + } + + if( $tmp =~ /[Vv]+[\,\ ]+[\d]+[\ ]+[A-Za-z]+[\ ]+[\d]{4}/ + || + $tmp =~ /draft[\,\ ]+/i ) { + # date is the version, so null it + # + $vers = ''; + } + } + + if( $vers eq '' || !($vers =~ /\d+/) || $vers =~/\d\d\d\d/ ) { + + # a fallback, rarely needed... + # + if( open(HTF_REV, "grep -i '' $f |") ) { + $vers = ; + chop($vers); + close(HTF_REV); + + $vers =~ s/<[^>]*>//gs; + $vers =~ s/^[Vv]//; + $vers =~ s/^[\s\.\,\;\ \"\>]+//g; + } + + if( $vers eq '' ) { + ($vers) = ($buf =~ m#\s*(.*?)\s*#is); + $vers =~ s/revision//i; + $vers =~ s/version//i; + $vers =~ s/ver//i; + $vers =~ s/v//i; + $vers =~ s/^[\s\.\,\;\ \"\>]+//g; + } + + if( $vers eq '' ) { + $vers = '1.0(?)'; + } + + } else { + $vers =~ s/^[Vv]//; + $vers =~ s/^\.//; + } + + $tmp =~ s/(?:rd|nd|st|th)[\ \,]+/,\ /g; + + if( $tmp =~ /(\d\d\d\d)[\/\-]+(\d\d)[\/\-]+(\d\d)/ ) { + + $y = $1; + $m = $2; + $d = $3; + + } elsif( $tmp =~ /\w+[\ ]+([\w]+)[\ ]+([\d]+)[\ ]+[\d\:]+[\ ]+\w+[\ ]+(\d\d\d\d)/ ) { + + $y = $3; + $m = &get_date($1); + $d = $2; + if( length($d) == 1 ) { + $d = "0$d"; + } + + } elsif( $tmp =~ /(\w+)[\.,\ ]+(\d+)[,\ ]+(\d\d\d\d)/ ) { + + $m = &get_date($1); + $d = $2; + $y = $3; + if( length($d) == 1 ) { + $d = "0$d"; + } + + } elsif( $tmp =~ /[\ ]+(\w+)[\,\.\ ]+(\d\d\d\d)/ ) { + + $y = $2; + $m = &get_date($1); + $d = '01'; + if( $tmp =~ /(\d+)[\ ]+[\w\.]+[\ ]+\d\d\d\d/ ) { + $d = $1; + if( length($d) == 1 ) { + $d = "0$d"; + } + } + + } elsif( $tmp =~ /(\d+)[\/\.]+(\d+)[\/\.]+(\d\d\d\d)/ ) { + $y = $3; + $m = $1; + if( length($m) == 1 ) { + $m = "0$m"; + } + $d = $2; + if( length($d) == 1 ) { + $d = "0$d"; + } + + } elsif( $tmp =~ /[Vv]+er[\.\ ]+([\w]+)\-([\w]+)\-([\w]+)\-([\w]+)/ ) { + + $y = $1; + $m = &get_date($2); + if( length($m) == 1 ) { + $m = "0$m"; + } + $d = $3; + if( length($d) == 1 ) { + $d = "0$d"; + } + + } elsif( $tmp =~ /(\d\d\d\d)\-([\w]+)\-(\d\d)/ ) { + + $y = $1; + $m = &get_date($2); + $d = $3; + if( length($d) == 1 ) { + $d = "0$d"; + } + + } elsif( $tmp =~ /([\d]+)[\ ]+([\w]+)[,\ ]+(\d\d\d\d)/ ) { + + $y = $3; + $m = &get_date($2); + $d = $1; + if( length($d) == 1 ) { + $d = "0$d"; + } + } elsif( $tmp =~ /([\w]+)[\ \,]+(\d\d\d\d)/ ) { + + $y = $2; + $m = &get_date($1); + $d = '01'; + } + + if( !($y =~ /^2/ || $y =~ /^1/) ) { + $y = ''; + } + if( !($y =~ /^\d\d\d\d$/) ) { + $y = ''; + } + if( $m eq '00' ) { + $y = ''; + } + + $s = "${y}${m}${d}"; + if( $y eq '' ) { + $s = $_today; + } + + $_res_template =~ s/%%VERSION/$vers/; + $_res_template =~ s/%%V_DATE/$s/; + $_res_template =~ s/%%C_DATE/$s/; + return; +} + + +# +# +# +sub get_date { + + my($str) = @_; + + if($str =~ /^Jan/i ) { + return '01'; + } elsif($str =~ /^Feb/i ) { + return '02'; + } elsif($str =~ /^Mar/i ) { + return '03'; + } elsif($str =~ /^Apr/i ) { + return '04'; + } elsif($str =~ /^May/i ) { + return '05'; + } elsif($str =~ /^Jun/i ) { + return '06'; + } elsif($str =~ /^Jul/i ) { + return '07'; + } elsif($str =~ /^Aug/i ) { + return '08'; + } elsif($str =~ /^Sep/i ) { + return '09'; + } elsif($str =~ /^Oct/i ) { + return '10'; + } elsif($str =~ /^Nov/i ) { + return '11'; + } elsif($str =~ /^Dec/i ) { + return '12'; + } + + return '00'; +} + + +# +# +# +sub usage { + print "\ndb2omf {-o } .sgml\n\n", + "db2omf - version $_vrs\n\n"; + exit(0); +} + + + +# __END__ delimits the script from the data section; +# the following data is read in by the "DATA" filehandle + +__END__ + + + + + + + + + %%TITLE + %%C_DATE + +%%CREATOR + + + + + %%VERSION + + + %%V_DATE + + + + +%%KEYWORDS + +%%DESC + + + %%TYPE + + + + +