2001-06-06 22:16:38 +00:00
|
|
|
#!/usr/bin/perl
|
|
|
|
#
|
|
|
|
# DESC: Script for creating an OMF (OpenSource Metadata Framework) file
|
|
|
|
# from DocBook SGML.
|
|
|
|
#
|
2002-05-05 20:29:12 +00:00
|
|
|
# USAGE: db2omf {-o <output_file>} <docbook_file>
|
2001-06-06 22:16:38 +00:00
|
|
|
#
|
|
|
|
# Greg Ferguson <gferg@sgi.com>
|
|
|
|
#
|
2002-05-05 18:13:43 +00:00
|
|
|
# See Changelog for the revision history.
|
|
|
|
|
2002-05-05 21:54:31 +00:00
|
|
|
$VERSION = '0.6-pre';
|
2001-06-06 22:16:38 +00:00
|
|
|
|
|
|
|
# output stream flush
|
|
|
|
#
|
|
|
|
$| = 1;
|
|
|
|
|
|
|
|
use Text::Wrap qw($columns &wrap);
|
|
|
|
$columns = 70;
|
|
|
|
|
2002-05-05 18:13:43 +00:00
|
|
|
$_vrs = $VERSION;
|
2001-06-06 22:16:38 +00:00
|
|
|
$_res_template = '';
|
|
|
|
$_creator_template = '';
|
|
|
|
|
|
|
|
my($in_f, $out_f) = '';
|
|
|
|
|
|
|
|
|
|
|
|
# read in cmd-line arguments
|
|
|
|
#
|
|
|
|
while(1) {
|
|
|
|
|
|
|
|
if($ARGV[0] eq "-o") {
|
|
|
|
shift(@ARGV);
|
|
|
|
$out_f = $ARGV[0];
|
|
|
|
shift(@ARGV);
|
|
|
|
} elsif($ARGV[0] eq "-h") {
|
|
|
|
&usage();
|
|
|
|
} else {
|
|
|
|
$in_f = $ARGV[0];
|
|
|
|
shift(@ARGV);
|
|
|
|
}
|
|
|
|
|
|
|
|
if($ARGV[0] eq '') {
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( $in_f eq '' ) {
|
2002-05-05 20:29:12 +00:00
|
|
|
print "db2omf: ERROR <docbook_file> not specified.\n\n";
|
2001-06-06 22:16:38 +00:00
|
|
|
&usage();
|
|
|
|
} elsif( !(-r $in_f) ) {
|
|
|
|
print "db2omf: ERROR cannot read $f ($!)\n\n";
|
|
|
|
&usage();
|
|
|
|
}
|
|
|
|
|
|
|
|
if( $out_f eq '' ) {
|
2002-05-05 20:29:12 +00:00
|
|
|
$in_f =~ /([^\/]+)\./i;
|
2001-06-06 22:16:38 +00:00
|
|
|
$out_f = $1 . ".omf";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# get today's date; timestamp the template entries
|
|
|
|
#
|
|
|
|
my(@t_comps) = localtime(time());
|
|
|
|
$i = (($t_comps[5])+0) + 1900;
|
|
|
|
my($y) = "$i";
|
|
|
|
$i = (($t_comps[4])+0) + 1;
|
|
|
|
my($m) = "$i";
|
|
|
|
my($d) = $t_comps[3];
|
|
|
|
$_today = "${y}" . (length($m) == 1 ? '0' : '') . "${m}" .
|
|
|
|
(length($d) == 1 ? '0' : '') . "${d}";
|
|
|
|
|
|
|
|
|
|
|
|
# read in the OMF resource template
|
|
|
|
#
|
|
|
|
while(<DATA>){
|
|
|
|
$_res_template .= $_;
|
|
|
|
}
|
|
|
|
close(DATA);
|
|
|
|
|
|
|
|
$_creator_template =" <creator created=\"%%DATE\">\n" .
|
|
|
|
" <person created=\"%%DATE\">\n" .
|
|
|
|
" <firstName created=\"%%DATE\">\n" .
|
|
|
|
" %%FIRSTNAME\n" .
|
|
|
|
" </firstName>\n" .
|
|
|
|
" <lastName created=\"%%DATE\">\n" .
|
|
|
|
" %%LASTNAME\n" .
|
|
|
|
" </lastName>\n" .
|
|
|
|
" <email created=\"%%DATE\">\n" .
|
|
|
|
" %%EMAIL\n" .
|
|
|
|
" </email>\n" .
|
|
|
|
" </person>\n" .
|
|
|
|
" </creator>";
|
|
|
|
|
|
|
|
$_creator_template =~ s/%%DATE/$_today/g;
|
|
|
|
$_res_template =~ s/%%DATE/$_today/g;
|
|
|
|
|
2002-05-05 20:29:12 +00:00
|
|
|
&proc_file($in_f);
|
2001-06-06 22:16:38 +00:00
|
|
|
|
|
|
|
$_res_template =~ s/%%CREATOR/$_creator_template/;
|
|
|
|
$_res_template =~ s/%%SGML/$in_f/;
|
|
|
|
|
|
|
|
# write out the OMF file
|
|
|
|
#
|
|
|
|
open(OMF, "> $out_f") || die "db2omf: cannot write to $out_f ($!)\n";
|
|
|
|
print OMF $_res_template, "\n";
|
|
|
|
close(OMF);
|
|
|
|
|
|
|
|
|
|
|
|
print "db2omf: created $out_f from $in_f\n";
|
|
|
|
exit(0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
#
|
|
|
|
#
|
2002-05-05 20:29:12 +00:00
|
|
|
sub proc_file {
|
2001-06-06 22:16:38 +00:00
|
|
|
|
|
|
|
my($f) = @_;
|
|
|
|
|
|
|
|
my($buf,$vers,$auth,$tmp,$y,$m,$d,$s) = '';
|
|
|
|
my($i, $notfound) = 0;
|
|
|
|
|
|
|
|
# read in file (book/article info area only)
|
|
|
|
#
|
|
|
|
open(DBF, "$f") || die "db2omf: cannot open $f ($!)\n";
|
|
|
|
while(<DBF>) {
|
|
|
|
|
|
|
|
if($notfound == 0) {
|
|
|
|
if( $_ =~ /<article/i || $_ =~ /<book/i ) {
|
|
|
|
$notfound = 1;
|
|
|
|
} else {
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if($_ =~ /^</ && !($_ =~ />\n$/)) {
|
|
|
|
chop;
|
|
|
|
}
|
|
|
|
if($_ =~ /<\/artheader/i || /<\/articleinfo/i || $_ =~ /<\/bookinfo/i) {
|
|
|
|
$buf .= $_;
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
if($_ ne '') {
|
|
|
|
$buf .= $_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
close(DBF);
|
|
|
|
$buf =~ s/\r//g;
|
|
|
|
|
|
|
|
# grab title; clean it up
|
|
|
|
#
|
|
|
|
($s) = ($buf =~ m#<title>\s*(.*?)\s*</title>#is);
|
|
|
|
$s =~ s/\n//g;
|
|
|
|
$s =~ s/<subtitle>\s*(.*?)\s*<\/subtitle>//gi;
|
|
|
|
$s =~ s/<[^>]*>//gs;
|
|
|
|
$s =~ s/[\s\.\ ]+$//g;
|
|
|
|
$s =~ s/^[\s\.\ ]+//g;
|
|
|
|
$s =~ s/_/_/g;
|
|
|
|
$_res_template =~ s/%%TITLE/$s/;
|
|
|
|
|
|
|
|
|
|
|
|
# document type; based on title (or filename...)
|
|
|
|
#
|
|
|
|
if( $s =~ /how[\ ]*to/i || $f =~ /how[\ ]*to/i ) {
|
|
|
|
$_res_template =~ s/%%TYPE/HOWTO/;
|
|
|
|
} elsif( $s =~ /guide/i ) {
|
|
|
|
$_res_template =~ s/%%TYPE/guide/;
|
|
|
|
} else {
|
|
|
|
$_res_template =~ s/%%TYPE//;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# keywords; based on title
|
|
|
|
#
|
|
|
|
$s = lc($s);
|
|
|
|
$s =~ s/[\-\;\,\:\(\)\+\/\\]+/\ /g;
|
|
|
|
|
|
|
|
my($wrd) = '';
|
|
|
|
my(@wrds) = ('to', 'the', 'and', 'is', 'a',
|
|
|
|
'of', 'for', 'from', 'but', 'brief', 'at', 'how',
|
|
|
|
'with', 'on', 'off', 'via', 'list', 'la', 'le', 'mini');
|
|
|
|
foreach $wrd (@wrds) {
|
|
|
|
$s =~ s/[\ ]+$wrd[\ ]+/\ /g;
|
|
|
|
}
|
|
|
|
$s =~ s/cd\ rom/cd-rom/g;
|
|
|
|
$s =~ s/i\ o/i\/o/g;
|
|
|
|
|
|
|
|
@wrds = split(/\ /, $s);
|
|
|
|
%seen = ();
|
|
|
|
@wrds = grep(!$seen{$_}++, @wrds);
|
|
|
|
$s = '';
|
|
|
|
foreach (sort(@wrds)) {
|
|
|
|
$s .= " <keywords created=\"${_today}\">" . $_ . "</keywords>\n";
|
|
|
|
}
|
|
|
|
$_res_template =~ s/%%KEYWORDS/$s/g;
|
|
|
|
|
|
|
|
|
|
|
|
# grab author/email
|
|
|
|
# XXX TODO: only grabbing one author...
|
|
|
|
#
|
|
|
|
($auth) = ($buf =~ m#<author>\s*(.*?)\s*</author>#is);
|
|
|
|
($s) = ($auth =~ m#<firstname>\s*(.*?)\s*</firstname>#is);
|
|
|
|
$_creator_template =~ s/%%FIRSTNAME/$s/;
|
|
|
|
($s) = ($auth =~ m#<surname>\s*(.*?)\s*</surname>#is);
|
|
|
|
$_creator_template =~ s/%%LASTNAME/$s/;
|
|
|
|
($s) = ($auth =~ m#<email>\s*(.*?)\s*</email>#is);
|
|
|
|
$_creator_template =~ s/%%EMAIL/$s/;
|
|
|
|
|
|
|
|
|
|
|
|
# grab description/abstract; clean it up
|
|
|
|
#
|
|
|
|
($s) = ($buf =~ m#<abstract>\s*(.*?)\s*</abstract>#is);
|
|
|
|
$s =~ s/<indexterm>\s*(.*?)\s*<\/indexterm>//gim;
|
|
|
|
$s =~ s/<[^>]*>//gs;
|
|
|
|
$s =~ s/\n/\ /g;
|
|
|
|
$s =~ s/[\ ]{2,}/\ /g;
|
|
|
|
$tmp = wrap(' ', ' ', $s);
|
|
|
|
$_res_template =~ s/%%DESC/$tmp/;
|
|
|
|
|
|
|
|
|
|
|
|
# grab rev/date; take the last entry
|
|
|
|
# XXX TODO: sometimes <revhistory> resides outside the meta area...
|
|
|
|
#
|
|
|
|
$vers = '';
|
|
|
|
$tmp = '';
|
|
|
|
|
|
|
|
($tmp) = ($buf =~ m#<pubdate[^>]*>\s*(.*?)\s*</pubdate>#is);
|
|
|
|
|
|
|
|
if( $tmp eq '' || $buf =~ /<revhistory>/i ) {
|
|
|
|
|
|
|
|
# grab rev/date; take the first entry, or should it be the last?!
|
|
|
|
#
|
|
|
|
if( $buf =~ m#<revnumber>\s*(.*?)\s*</revnumber>#gis ) {
|
|
|
|
$vers = $1;
|
|
|
|
}
|
|
|
|
if( $buf =~ m#<date>\s*(.*?)\s*</date>#gis ) {
|
|
|
|
$tmp = $1;
|
|
|
|
} elsif( $buf =~ m#<releaseinfo>\s*(.*?)\s*</releaseinfo>#gis ) {
|
|
|
|
$tmp = $1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( $vers eq '' && $tmp ne '' ) {
|
|
|
|
|
|
|
|
# pull the version out of the <{pub}date> specification
|
|
|
|
#
|
|
|
|
if( $tmp =~ /[Rr]+evision:[\ ]+([\w\.\-]+)[,\ ]+/ ) {
|
|
|
|
$vers = $1;
|
|
|
|
} elsif( $tmp =~ /[Vv]+ersion[\ ]+([\w\.\-]+)[,\ ]+/ ) {
|
|
|
|
$vers = $1;
|
|
|
|
} elsif( $tmp =~ /[Vv]+([\w\.\-]+)[,\ ]+/ ) {
|
|
|
|
$vers = $1;
|
|
|
|
} elsif( $tmp =~ /[Vv\.\,]+\ ([\w\.\-]+)[,\ ]+/ ) {
|
|
|
|
$vers = $1;
|
|
|
|
} elsif( $tmp =~ /[Vv]+er[\.\ ]+([\w]+)\-([\w]+)\-([\w]+)\-([\w]+)/ ) {
|
|
|
|
$vers = $4;
|
|
|
|
} elsif( $tmp =~ /^([\d\.]+)[,\ ]+/ ) {
|
|
|
|
$vers = $1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( $tmp =~ /[Vv]+[\,\ ]+[\d]+[\ ]+[A-Za-z]+[\ ]+[\d]{4}/
|
|
|
|
||
|
|
|
|
$tmp =~ /draft[\,\ ]+/i ) {
|
|
|
|
# date is the version, so null it
|
|
|
|
#
|
|
|
|
$vers = '';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( $vers eq '' || !($vers =~ /\d+/) || $vers =~/\d\d\d\d/ ) {
|
|
|
|
|
|
|
|
# a fallback, rarely needed...
|
|
|
|
#
|
|
|
|
if( open(HTF_REV, "grep -i '<revnumber>' $f |") ) {
|
|
|
|
$vers = <HTF_REV>;
|
|
|
|
chop($vers);
|
|
|
|
close(HTF_REV);
|
|
|
|
|
|
|
|
$vers =~ s/<[^>]*>//gs;
|
|
|
|
$vers =~ s/^[Vv]//;
|
|
|
|
$vers =~ s/^[\s\.\,\;\ \"\>]+//g;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( $vers eq '' ) {
|
|
|
|
($vers) = ($buf =~ m#<edition>\s*(.*?)\s*</edition>#is);
|
|
|
|
$vers =~ s/revision//i;
|
|
|
|
$vers =~ s/version//i;
|
|
|
|
$vers =~ s/ver//i;
|
|
|
|
$vers =~ s/v//i;
|
|
|
|
$vers =~ s/^[\s\.\,\;\ \"\>]+//g;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( $vers eq '' ) {
|
|
|
|
$vers = '1.0(?)';
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
$vers =~ s/^[Vv]//;
|
|
|
|
$vers =~ s/^\.//;
|
|
|
|
}
|
|
|
|
|
|
|
|
$tmp =~ s/(?:rd|nd|st|th)[\ \,]+/,\ /g;
|
|
|
|
|
|
|
|
if( $tmp =~ /(\d\d\d\d)[\/\-]+(\d\d)[\/\-]+(\d\d)/ ) {
|
|
|
|
|
|
|
|
$y = $1;
|
|
|
|
$m = $2;
|
|
|
|
$d = $3;
|
|
|
|
|
|
|
|
} elsif( $tmp =~ /\w+[\ ]+([\w]+)[\ ]+([\d]+)[\ ]+[\d\:]+[\ ]+\w+[\ ]+(\d\d\d\d)/ ) {
|
|
|
|
|
|
|
|
$y = $3;
|
|
|
|
$m = &get_date($1);
|
|
|
|
$d = $2;
|
|
|
|
if( length($d) == 1 ) {
|
|
|
|
$d = "0$d";
|
|
|
|
}
|
|
|
|
|
|
|
|
} elsif( $tmp =~ /(\w+)[\.,\ ]+(\d+)[,\ ]+(\d\d\d\d)/ ) {
|
|
|
|
|
|
|
|
$m = &get_date($1);
|
|
|
|
$d = $2;
|
|
|
|
$y = $3;
|
|
|
|
if( length($d) == 1 ) {
|
|
|
|
$d = "0$d";
|
|
|
|
}
|
|
|
|
|
|
|
|
} elsif( $tmp =~ /[\ ]+(\w+)[\,\.\ ]+(\d\d\d\d)/ ) {
|
|
|
|
|
|
|
|
$y = $2;
|
|
|
|
$m = &get_date($1);
|
|
|
|
$d = '01';
|
|
|
|
if( $tmp =~ /(\d+)[\ ]+[\w\.]+[\ ]+\d\d\d\d/ ) {
|
|
|
|
$d = $1;
|
|
|
|
if( length($d) == 1 ) {
|
|
|
|
$d = "0$d";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} elsif( $tmp =~ /(\d+)[\/\.]+(\d+)[\/\.]+(\d\d\d\d)/ ) {
|
|
|
|
$y = $3;
|
|
|
|
$m = $1;
|
|
|
|
if( length($m) == 1 ) {
|
|
|
|
$m = "0$m";
|
|
|
|
}
|
|
|
|
$d = $2;
|
|
|
|
if( length($d) == 1 ) {
|
|
|
|
$d = "0$d";
|
|
|
|
}
|
|
|
|
|
|
|
|
} elsif( $tmp =~ /[Vv]+er[\.\ ]+([\w]+)\-([\w]+)\-([\w]+)\-([\w]+)/ ) {
|
|
|
|
|
|
|
|
$y = $1;
|
|
|
|
$m = &get_date($2);
|
|
|
|
if( length($m) == 1 ) {
|
|
|
|
$m = "0$m";
|
|
|
|
}
|
|
|
|
$d = $3;
|
|
|
|
if( length($d) == 1 ) {
|
|
|
|
$d = "0$d";
|
|
|
|
}
|
|
|
|
|
|
|
|
} elsif( $tmp =~ /(\d\d\d\d)\-([\w]+)\-(\d\d)/ ) {
|
|
|
|
|
|
|
|
$y = $1;
|
|
|
|
$m = &get_date($2);
|
|
|
|
$d = $3;
|
|
|
|
if( length($d) == 1 ) {
|
|
|
|
$d = "0$d";
|
|
|
|
}
|
|
|
|
|
|
|
|
} elsif( $tmp =~ /([\d]+)[\ ]+([\w]+)[,\ ]+(\d\d\d\d)/ ) {
|
|
|
|
|
|
|
|
$y = $3;
|
|
|
|
$m = &get_date($2);
|
|
|
|
$d = $1;
|
|
|
|
if( length($d) == 1 ) {
|
|
|
|
$d = "0$d";
|
|
|
|
}
|
|
|
|
} elsif( $tmp =~ /([\w]+)[\ \,]+(\d\d\d\d)/ ) {
|
|
|
|
|
|
|
|
$y = $2;
|
|
|
|
$m = &get_date($1);
|
|
|
|
$d = '01';
|
|
|
|
}
|
|
|
|
|
|
|
|
if( !($y =~ /^2/ || $y =~ /^1/) ) {
|
|
|
|
$y = '';
|
|
|
|
}
|
|
|
|
if( !($y =~ /^\d\d\d\d$/) ) {
|
|
|
|
$y = '';
|
|
|
|
}
|
|
|
|
if( $m eq '00' ) {
|
|
|
|
$y = '';
|
|
|
|
}
|
|
|
|
|
|
|
|
$s = "${y}${m}${d}";
|
|
|
|
if( $y eq '' ) {
|
|
|
|
$s = $_today;
|
|
|
|
}
|
|
|
|
|
|
|
|
$_res_template =~ s/%%VERSION/$vers/;
|
|
|
|
$_res_template =~ s/%%V_DATE/$s/;
|
|
|
|
$_res_template =~ s/%%C_DATE/$s/;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
#
|
|
|
|
#
|
|
|
|
sub get_date {
|
|
|
|
|
|
|
|
my($str) = @_;
|
|
|
|
|
|
|
|
if($str =~ /^Jan/i ) {
|
|
|
|
return '01';
|
|
|
|
} elsif($str =~ /^Feb/i ) {
|
|
|
|
return '02';
|
|
|
|
} elsif($str =~ /^Mar/i ) {
|
|
|
|
return '03';
|
|
|
|
} elsif($str =~ /^Apr/i ) {
|
|
|
|
return '04';
|
|
|
|
} elsif($str =~ /^May/i ) {
|
|
|
|
return '05';
|
|
|
|
} elsif($str =~ /^Jun/i ) {
|
|
|
|
return '06';
|
|
|
|
} elsif($str =~ /^Jul/i ) {
|
|
|
|
return '07';
|
|
|
|
} elsif($str =~ /^Aug/i ) {
|
|
|
|
return '08';
|
|
|
|
} elsif($str =~ /^Sep/i ) {
|
|
|
|
return '09';
|
|
|
|
} elsif($str =~ /^Oct/i ) {
|
|
|
|
return '10';
|
|
|
|
} elsif($str =~ /^Nov/i ) {
|
|
|
|
return '11';
|
|
|
|
} elsif($str =~ /^Dec/i ) {
|
|
|
|
return '12';
|
|
|
|
}
|
|
|
|
|
|
|
|
return '00';
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
#
|
|
|
|
#
|
|
|
|
sub usage {
|
2002-05-05 20:29:12 +00:00
|
|
|
print "\ndb2omf {-o <output_file>} <docbook_file>\n\n",
|
2001-06-06 22:16:38 +00:00
|
|
|
"db2omf - version $_vrs\n\n";
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# __END__ delimits the script from the data section;
|
|
|
|
# the following data is read in by the "DATA" filehandle
|
|
|
|
|
|
|
|
__END__
|
|
|
|
<?xml version='1.0'?>
|
|
|
|
<!DOCTYPE omf PUBLIC "-//Open Source Metadata Framework (OMF) //DTD OMF.dtd V1.1//EN"
|
|
|
|
"http://www.ibiblio.org/pub/Linux/metadata/OMF.dtd">
|
|
|
|
|
|
|
|
<omf xmlns="http://www.ibiblio.org/osrt/omf/" created="%%DATE" agent="db2omf">
|
|
|
|
|
|
|
|
<resource created="%%DATE">
|
|
|
|
|
|
|
|
<title created="%%DATE">%%TITLE</title>
|
|
|
|
<date created="%%DATE">%%C_DATE</date>
|
|
|
|
|
|
|
|
%%CREATOR
|
|
|
|
|
|
|
|
<versionGroup created="%%DATE">
|
|
|
|
<version created="%%DATE">
|
|
|
|
<id created="%%DATE">
|
|
|
|
%%VERSION
|
|
|
|
</id>
|
|
|
|
<date created="%%DATE">
|
|
|
|
%%V_DATE
|
|
|
|
</date>
|
|
|
|
</version>
|
|
|
|
</versionGroup>
|
|
|
|
|
|
|
|
%%KEYWORDS
|
|
|
|
<description created="%%DATE">
|
|
|
|
%%DESC
|
|
|
|
</description>
|
|
|
|
|
|
|
|
<type created="%%DATE">%%TYPE</type>
|
|
|
|
<format created="%%DATE" dtd="DocBook" mime="text/sgml" />
|
|
|
|
<identifier created="%%DATE" url="file:/%%SGML" />
|
|
|
|
</resource>
|
|
|
|
</omf>
|
|
|
|
|