mirror of https://github.com/tLDP/LDP
743 lines
19 KiB
Perl
Executable File
743 lines
19 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
#
|
|
# Converts WikiText files into docbook.
|
|
#
|
|
# Copyright (c) 2001, 2002, 2003 David Merrill <david@lupercalia.net>.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
#
|
|
package Wt2Db;
|
|
|
|
use File::Basename;
|
|
use HTML::Entities;
|
|
use FileHandle;
|
|
use Exporter;
|
|
|
|
@ISA = qw(Exporter);
|
|
@EXPORT = qw(
|
|
new
|
|
ProcessFile
|
|
ProcessLine
|
|
ProcessEnd
|
|
Buffer
|
|
Reset
|
|
);
|
|
|
|
&Reset;
|
|
|
|
# Call this before rerunning ProcessLine to clear state.
|
|
#
|
|
sub Reset {
|
|
$level1 = 0;
|
|
$level2 = 0;
|
|
$level3 = 0;
|
|
$orderedlist = 0;
|
|
$listitem = 0;
|
|
$itemizedlist = 0;
|
|
@lists = ();
|
|
$para = 0;
|
|
$qandaset = 0;
|
|
$qandaentry = 0;
|
|
$answer = 0;
|
|
|
|
# These are passed in by the caller
|
|
#
|
|
$txtfile = '';
|
|
$dbfile = '';
|
|
$verbose = 0;
|
|
$doctype = 0;
|
|
$articleclass = '';
|
|
$nonet = 0;
|
|
|
|
# These maintain state
|
|
#
|
|
$line = '';
|
|
$linenumber = 0;
|
|
$id = '';
|
|
$title = '';
|
|
$buf = '';
|
|
|
|
$noparatag = 0;
|
|
$noparadepth = 0;
|
|
$noparaline = 0;
|
|
}
|
|
|
|
|
|
# -----------------------------------------------------------
|
|
|
|
sub new {
|
|
my $that = shift;
|
|
my $class = ref($that) || $that;
|
|
my $self = {};
|
|
bless $self, $class;
|
|
return $self;
|
|
}
|
|
|
|
sub ProcessFile {
|
|
($self, $txtfile, $dbfile, $verbose, $doctype, $articleclass, $nonet, $encoding) = @_;
|
|
|
|
# Read from STDIN if no input file given
|
|
#
|
|
if ($txtfile) {
|
|
if( !(-r $txtfile) ) {
|
|
print "wt2db: ERROR cannot read $f ($!)\n\n";
|
|
exit(1);
|
|
} else {
|
|
$fh = new FileHandle;
|
|
open $fh, "<$txtfile" or die "Cannot open $txtfile ($!)\n";
|
|
}
|
|
} else {
|
|
$fh = STDIN;
|
|
}
|
|
|
|
if ($dbfile) {
|
|
$outfh = new FileHandle;
|
|
open $outfh, ">$dbfile" or die "Cannot write to $dbfile\n\n";
|
|
} else {
|
|
$outfh = STDOUT;
|
|
}
|
|
|
|
# wrap article if requested
|
|
#
|
|
$encoding = 'ISO-8859-1' unless ($encoding);
|
|
if ($doctype eq 'XML') {
|
|
print "Adding XML DOCTYPE and article tags\n" if ($verbose);
|
|
$buf = '<?xml version="1.0" encoding="' . $encoding . '" standalone="no"?>' . "\n";
|
|
$buf .= '<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"' . "\n";
|
|
$buf .= ' "http://docbook.org/xml/4.3/docbookx.dtd"';
|
|
$buf .= "\[\n";
|
|
$buf .= '<!ENTITY % ISOnum PUBLIC' . "\n";
|
|
$buf .= ' "ISO 8879:1986//ENTITIES Numeric and Special Graphic//EN//XML"' . "\n";
|
|
$buf .= ' "http://docbook.org/xml/4.3/ent/iso-num.ent">' . "\n";
|
|
$buf .= ' %ISOnum;' . "\n";
|
|
$buf .= "\]\>\n";
|
|
$buf .= "\n";
|
|
if ($articleclass) {
|
|
print "Setting article class to $articleclass\n" if ($verbose);
|
|
$buf .= "<article class='$articleclass'>\n";
|
|
} else {
|
|
$buf .= '<article>' . "\n";
|
|
}
|
|
} elsif ($doctype eq 'SGML') {
|
|
print "Adding SGML DOCTYPE and article tags\n" if ($verbose);
|
|
$buf = '<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook V4.1//EN">' . "\n";
|
|
if ($articleclass) {
|
|
$buf .= "<article class='$articleclass'>\n";
|
|
} else {
|
|
$buf .= '<article>' . "\n";
|
|
}
|
|
}
|
|
|
|
# read in the text file
|
|
#
|
|
while ($originalline = <$fh>) {
|
|
chomp($originalline);
|
|
$originalline =~ s/\x0a//g;;
|
|
$originalline =~ s/\x0d//g;;
|
|
print "Read line $originalline\n" if ($verbose);
|
|
ProcessLine($foo, $originalline);
|
|
print $outfh "$buf";
|
|
$buf = '';
|
|
}
|
|
|
|
ProcessEnd();
|
|
|
|
# wrap article if requested
|
|
#
|
|
if ($doctype) {
|
|
$buf .= '</article>' . "\n";
|
|
}
|
|
|
|
print $outfh "$buf";
|
|
$buf = '';
|
|
close $fh;
|
|
close $outfh;
|
|
}
|
|
|
|
sub ProcessLine {
|
|
($foo, $originalline) = @_;
|
|
|
|
$line = $originalline;
|
|
$linenumber++;
|
|
|
|
&trimline;
|
|
|
|
# blank lines
|
|
unless ($line) {
|
|
unless ($noparadepth) {
|
|
&closenonsect;
|
|
return;
|
|
}
|
|
}
|
|
|
|
# capitalize hints that can be entered in lowercase
|
|
#
|
|
$line =~ s/^q:/Q:/;
|
|
$line =~ s/^a:/A:/;
|
|
|
|
# encode entities
|
|
#
|
|
# while ($line =~ //) {
|
|
# }
|
|
# decode_entities($line);
|
|
# encode_entities($line);
|
|
|
|
# inline docbook
|
|
#
|
|
# parse all links, internal and external
|
|
#
|
|
while ($line =~ /\[\[/) {
|
|
unless ($line =~ /\]\]/) {
|
|
$buf .= "ERROR unterminated '[[' tag on line $linenumber.\n";
|
|
}
|
|
|
|
# separate link url from link name
|
|
#
|
|
$link = $line;
|
|
$link=~ s/\n//g;
|
|
$link =~ s/.*?\[\[//;
|
|
$link =~ s/\]\].*?$//;
|
|
if ($link =~ /\|/) {
|
|
$linkname = $link;
|
|
$link =~ s/\|.+$//;
|
|
$linkname =~ s/^\S+\|//;
|
|
} else {
|
|
$linkname = $link;
|
|
}
|
|
|
|
# kill quotes inside links, they mess us up because
|
|
# we have to wrap this string with quotes.
|
|
# perhaps it should be encoding the entire URL?
|
|
#
|
|
$link =~ s/'/%27/g;
|
|
|
|
# namespaces are handled differently
|
|
#
|
|
print "Link to $link\n" if ($verbose);
|
|
|
|
if ($link =~ /^http:\/\//) {
|
|
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
|
} elsif ($link =~ /^link:/) {
|
|
$link =~ s/^link://;
|
|
$linkname =~ s/^link://;
|
|
$line =~ s/\[\[.*?\]\]/<xref linkend='$link' endterm='$link-title'\/\>/;
|
|
} elsif ($link =~ /^ftp:/) {
|
|
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
|
} elsif ($link =~ /^news:/) {
|
|
$linkname =~ s/^news:\/\///;
|
|
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
|
} elsif ($link =~ /^mailto:/) {
|
|
$linkname =~ s/^mailto://;
|
|
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
|
} elsif ($link =~ /^wiki:/) {
|
|
$linkname =~ s/^wiki://;
|
|
$link =~ s/^wiki:/http:\/\/www\.wikipedia\.com\/wiki\.phtml\?title=/;
|
|
$link =~ s/\ /_/g;
|
|
$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
|
} elsif ($link =~ /^ldp:/) {
|
|
$linkname =~ s/^ldp://;
|
|
$link =~ s/^ldp://;
|
|
if ($nonet) {
|
|
$line =~ s/\[\[.*?\]\]/<citetitle>$link<\/citetitle>/;
|
|
} else {
|
|
#$tempfile = "/tmp/wt2db-" . $rand;
|
|
#$cmd = "wget -q http://db.linuxdoc.org/cgi-pub/ldp-xml.pl?name=$link -O $tempfile";
|
|
#print "Command $cmd\n" if ($verbose > 1);
|
|
#$return = system("$cmd");
|
|
#unless ($return) {
|
|
# open(URL, "$tempfile") || die "wt2db: cannot open temporary file ($!)\n\n";
|
|
# $link = '';
|
|
# while ($url_line = <URL>) {
|
|
# $url_line =~ s/\n//;
|
|
# if ($url_line =~ /identifier/) {
|
|
# $link .= $url_line;
|
|
# }
|
|
# }
|
|
# close(URL);
|
|
# unlink $tempfile;
|
|
#}
|
|
#$link =~ s/^.*?<identifier>//;
|
|
#$link =~ s/<\/identifier>.*?$//;
|
|
#if ($link eq '') {
|
|
# $linkname = "ERROR: LDP namespace resolution failure on $linkname";
|
|
#}
|
|
#$line =~ s/\[\[.*?\]\]/<ulink url='$link'><citetitle>$linkname<\/citetitle><\/ulink>/;
|
|
$line =~ s/\[\[.*?\]\]/<citetitle>$linkname<\/citetitle>/;
|
|
}
|
|
} elsif ($link =~ /^file:/) {
|
|
$linkname =~ s/^file://;
|
|
$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
|
|
} elsif ($link =~ /^dir:/) {
|
|
$linkname =~ s/^dir://;
|
|
$line =~ s/\[\[.*?\]\]/<filename class='directory'>$linkname<\/filename>/;
|
|
} elsif ($link =~ /^dev:/) {
|
|
$linkname =~ s/^dev://;
|
|
$line =~ s/\[\[.*?\]\]/<filename class='devicefile'>$linkname<\/filename>/;
|
|
} else {
|
|
$line =~ s/\[\[.*?\]\]/<filename>$linkname<\/filename>/;
|
|
}
|
|
}
|
|
|
|
# emphasis
|
|
#
|
|
while ($line =~ /'''.*'''/) {
|
|
$line =~ s/'''/<emphasis role='bold'>/;
|
|
$line =~ s/'''/<\/emphasis>/;
|
|
}
|
|
|
|
# this block defines DocBook structures that won't be broken up with
|
|
# paragraphs when we hit empty lines:
|
|
#
|
|
# <para>
|
|
# <sect1>
|
|
# <sect2>
|
|
# <sect3>
|
|
# <programlisting>
|
|
# <literallayout>
|
|
|
|
# forget about being in nopara state if we're no longer in one
|
|
#
|
|
if ($noparadepth == 0) {
|
|
$noparatag = "";
|
|
}
|
|
|
|
# start a new nopara section
|
|
#
|
|
if ((($line =~ /^<para>/) or
|
|
($line =~ /^<sect/) or
|
|
($line =~ /^<screen>/) or
|
|
($line =~ /^<blockquote>/) or
|
|
($line =~ /^<literallayout>/) or
|
|
($line =~ /^<articleinfo>/) or
|
|
($line =~ /^<programlisting>/)) and
|
|
($noparadepth == 0)) {
|
|
&closepara;
|
|
$noparatag = $line;
|
|
$noparatag =~ s/^.*?<//;
|
|
$noparatag =~ s/>.*$//;
|
|
$noparaline = $linenumber;
|
|
|
|
# screen sections don't embed para tags, but are wrapped in them
|
|
#
|
|
if ($line =~ /^<screen>/) {
|
|
unless ($para) {
|
|
$buf .= "<para>";
|
|
$para++;
|
|
}
|
|
}
|
|
}
|
|
|
|
# count noparadepth
|
|
#
|
|
if ($noparatag ne '') {
|
|
$temp = $line;
|
|
while ($temp =~ /<$noparatag>/) {
|
|
$temp =~ s/<$noparatag>//;
|
|
$noparadepth ++;
|
|
}
|
|
while ($temp =~ /<\/$noparatag>/) {
|
|
$temp =~ s/<\/$noparatag>//;
|
|
$noparadepth --;
|
|
if ($noparadepth == 0) {
|
|
$noparaline = 0;
|
|
}
|
|
}
|
|
|
|
# runon protection
|
|
#
|
|
if (($noparaline) and ($linenumber >= ($noparaline + 100))) {
|
|
$buf .= "ERROR: runon block starting on line $noparaline\n";
|
|
return;
|
|
}
|
|
|
|
# recover original line -- no whitespace modifiers
|
|
# allow nonencoded text in unparsed lines, when in a literal block
|
|
#
|
|
$line = $originalline;
|
|
chomp($line);
|
|
if ($line =~ /^<$noparatag>/ ) {
|
|
$starttag = "<$noparatag>";
|
|
} else {
|
|
$starttag = '';
|
|
}
|
|
if ($line =~ /<\/$noparatag>/ ) {
|
|
$endtag = "<\/$noparatag>";
|
|
} else {
|
|
$endtag = '';
|
|
}
|
|
|
|
$line =~ s/<$noparatag>//;
|
|
$line =~ s/<\/$noparatag>//;
|
|
if (($noparatag eq 'screen') or
|
|
($noparatag eq 'literallayout') or
|
|
($noparatag eq 'programlisting')) {
|
|
encode_entities($line);
|
|
}
|
|
$line = "$starttag$line$endtag";
|
|
chomp($line);
|
|
|
|
# sect3
|
|
#
|
|
} elsif ($line =~ /^===/) {
|
|
&close3;
|
|
&splittitle;
|
|
if ($id eq '') {
|
|
$line = "<sect3><title>$title</title>";
|
|
} else {
|
|
$line = "<sect3 id='$id'><title id='$id-title'>$title</title>";
|
|
}
|
|
$level3 = 1;
|
|
|
|
# sect2
|
|
#
|
|
} elsif ($line =~ /^==/) {
|
|
&close2;
|
|
&splittitle;
|
|
if ($id eq '') {
|
|
$line = "<sect2><title>$title</title>";
|
|
} else {
|
|
$line = "<sect2 id='$id'><title id='$id-title'>$title</title>";
|
|
}
|
|
$level2 = 1;
|
|
|
|
# sect1
|
|
#
|
|
} elsif ($line =~ /^=/) {
|
|
&close1;
|
|
&splittitle;
|
|
if ($id eq '') {
|
|
$line = "<sect1><title>$title</title>";
|
|
} else {
|
|
$line = "<sect1 id='$id'><title id='$id-title'>$title</title>";
|
|
}
|
|
$level1 = 1;
|
|
|
|
# appendix
|
|
#
|
|
} elsif (($line =~ /^<appendix/) or
|
|
($line =~ /<\/appendix>/)) {
|
|
&close1;
|
|
|
|
# $appendix = $line;
|
|
# $appendix =~ s/^\s*?(<appendix[^>]*?>)/$1/;
|
|
|
|
# orderedlist
|
|
#
|
|
} elsif ($line =~ /^\s*#/) {
|
|
&trimline;
|
|
# &closeitemizedlist;
|
|
$listdepth = $line;
|
|
$listdepth =~ s/(^#+).*/$1/;
|
|
$listdepth = length($listdepth);
|
|
if ($listdepth > $orderedlist) {
|
|
$buf .= "<orderedlist>\n";
|
|
$orderedlist++;
|
|
push @lists, 'ordered';
|
|
} else {
|
|
&closelistitem;
|
|
}
|
|
$line =~ s/^#+\s*//;
|
|
$line =~ s/^/<listitem>\n<para>/;
|
|
$listitem++;
|
|
$para++;
|
|
} elsif ($line =~ /^\/#/) {
|
|
$line =~ s/^\/#+//;
|
|
&trimline;
|
|
$listdepth = $line;
|
|
$listdepth =~ s/(^#+).*/$1/;
|
|
$listdepth = length($listdepth);
|
|
while (($orderedlist) and ($orderedlist >= $listdepth)) {
|
|
&closeorderedlist;
|
|
}
|
|
|
|
# itemizedlist
|
|
#
|
|
} elsif ($line =~ /^\s*\*/) {
|
|
&trimline;
|
|
# &closeitemizedlist;
|
|
$listdepth = $line;
|
|
$listdepth =~ s/(^\*+).*/$1/;
|
|
$listdepth = length($listdepth);
|
|
if ($listdepth > $itemizedlist) {
|
|
$buf .= "<itemizedlist>\n";
|
|
$itemizedlist++;
|
|
push @lists, 'itemized';
|
|
} else {
|
|
&closelistitem;
|
|
}
|
|
$line =~ s/^\*+\s*//;
|
|
$line =~ s/^/<listitem>\n<para>/;
|
|
$listitem++;
|
|
$para++;
|
|
} elsif ($line =~ /^\/\*/) {
|
|
$line =~ s/^\/\*+//;
|
|
&trimline;
|
|
$listdepth = $line;
|
|
$listdepth =~ s/(^\*+).*/$1/;
|
|
$listdepth = length($listdepth);
|
|
while (($itemizedlist) and ($itemizedlist >= $listdepth)) {
|
|
&closeitemizedlist;
|
|
}
|
|
|
|
# question
|
|
#
|
|
} elsif ($line =~ /^Q:/) {
|
|
&closelists;
|
|
&closeqandaentry;
|
|
$line =~ s/^Q://;
|
|
&trimline;
|
|
&splittitle;
|
|
if ($id eq '') {
|
|
$line = "<question><para>$title</para></question>";
|
|
} else {
|
|
$line = "<question id='$id'><para id='$id-title'>$title</para></question>";
|
|
}
|
|
unless ($qandaentry) {
|
|
$line = "<qandaentry>\n" . $line;
|
|
$qandaentry = 1;
|
|
}
|
|
if ($qandaset == 0) {
|
|
$line = "<qandaset defaultlabel='qanda'>\n" .
|
|
"<?dbhtml toc='1' ?>\n" .
|
|
# "<?dbhtml cell-spacing='1em' cell-padding='1em' ?>" .
|
|
$line;
|
|
$qandaset = 1;
|
|
}
|
|
|
|
# answer
|
|
#
|
|
} elsif ($line =~ /^A:/) {
|
|
$line =~ s/^A://;
|
|
&trimline;
|
|
&closeanswer;
|
|
$line = "<answer><para>" . $line;
|
|
$answer = 1;
|
|
$para++;
|
|
|
|
} elsif ($line =~ /^\s*----\s*$/) {
|
|
$line = '';
|
|
|
|
# para
|
|
#
|
|
} else {
|
|
if (($para == 0) and ($noparatag eq '')) {
|
|
$line = "<para>" . $line;
|
|
$para++;
|
|
} else {
|
|
$line .= " ";
|
|
}
|
|
}
|
|
|
|
$buf .= "$line\n";
|
|
}
|
|
|
|
sub ProcessEnd {
|
|
# close nesting
|
|
#
|
|
&close1;
|
|
|
|
if ($noparadepth > 0) {
|
|
$buf .= "ERROR tag $noparatag on line $noparaline unterminated.\n";
|
|
}
|
|
}
|
|
|
|
sub Buffer {
|
|
return $buf;
|
|
}
|
|
|
|
sub close1 {
|
|
&close2;
|
|
if ($level1) {
|
|
$buf .= "</sect1>\n";
|
|
$level1 = 0;
|
|
}
|
|
}
|
|
|
|
sub close2 {
|
|
&close3;
|
|
if ($level2) {
|
|
$buf .= "</sect2>\n";
|
|
$level2 = 0;
|
|
}
|
|
}
|
|
|
|
sub close3 {
|
|
&closelists;
|
|
&closepara;
|
|
&closeqandaset;
|
|
if ($level3) {
|
|
$buf .= "</sect3>\n";
|
|
$level3 = 0;
|
|
}
|
|
}
|
|
|
|
sub closenonsect {
|
|
&closepara;
|
|
# &closeorderedlist;
|
|
# &closeitemizedlist;
|
|
}
|
|
|
|
sub closelistitem {
|
|
&closepara;
|
|
if ($listitem) {
|
|
$buf .= "</listitem>\n";
|
|
$listitem--;
|
|
}
|
|
}
|
|
|
|
sub closeorderedlist {
|
|
&closelistitem;
|
|
if ($orderedlist) {
|
|
$buf .= "</orderedlist>\n";
|
|
$orderedlist--;
|
|
pop @lists;
|
|
}
|
|
}
|
|
|
|
sub closeitemizedlist {
|
|
&closelistitem;
|
|
if ($itemizedlist) {
|
|
$buf .= "</itemizedlist>\n";
|
|
$itemizedlist--;
|
|
pop @lists;
|
|
}
|
|
}
|
|
|
|
sub closelists {
|
|
while (($orderedlist) or ($itemizedlist)) {
|
|
if (@lists[-1] eq 'itemized') {
|
|
&closeitemizedlist;
|
|
} elsif (@lists[-1] eq 'ordered') {
|
|
&closeorderedlist;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub closeanswer {
|
|
&closepara;
|
|
if ($answer) {
|
|
$buf .= "</answer>\n";
|
|
$answer = 0;
|
|
}
|
|
}
|
|
|
|
sub closeqandaentry {
|
|
&closeanswer;
|
|
if ($qandaentry) {
|
|
$buf .= "</qandaentry>\n";
|
|
$qandaentry = 0;
|
|
}
|
|
}
|
|
|
|
sub closeqandaset {
|
|
&closeqandaentry;
|
|
if ($qandaset) {
|
|
$buf .= "</qandaset>\n";
|
|
$qandaset = 0;
|
|
}
|
|
}
|
|
|
|
sub closepara {
|
|
if ($para) {
|
|
$buf .= "</para>\n";
|
|
$para--;
|
|
}
|
|
}
|
|
|
|
sub trimline {
|
|
$line =~ s/\s+$//;
|
|
$line =~ s/^\s+//;
|
|
}
|
|
|
|
sub splittitle {
|
|
$line =~ s/^=+//;
|
|
$line =~ s/=+$//;
|
|
$title = $line;
|
|
if ($line =~ /\|/) {
|
|
$title =~ s/\|.+//;
|
|
$id = $line;
|
|
$id =~ s/^.+\|//;
|
|
} else {
|
|
$id = &anchorfix($title);
|
|
}
|
|
$title =~ s/\s+$//;
|
|
$title =~ s/^\s+//;
|
|
$id =~ s/\s+$//;
|
|
$id =~ s/^\s+//;
|
|
}
|
|
|
|
sub anchorfix {
|
|
my $anchor = $_[0];
|
|
$anchor = lc(&trim($anchor));
|
|
$anchor = decode_entities($anchor);
|
|
$anchor =~ s/-/-dash-/g;
|
|
$anchor =~ s/&/-and-/g;
|
|
$anchor =~ s/;//g;
|
|
$anchor = encode_entities($anchor);
|
|
$anchor =~ s/&(\w)grave/\1/g;
|
|
$anchor =~ s/&(\w)acute/\1/g;
|
|
$anchor =~ s/&(\w)circ/\1/g;
|
|
$anchor =~ s/&(\w)uml/\1/g;
|
|
$anchor =~ s/&(\w)tilde/\1/g;
|
|
$anchor =~ s/&(\w)cedil/\1/g;
|
|
$anchor =~ s/&/-and-/g;
|
|
$anchor =~ s/;//g;
|
|
$anchor =~ s/\//-slash-/g;
|
|
$anchor =~ s/\\/-bslash-/g;
|
|
$anchor =~ s/\s+/-/g;
|
|
$anchor =~ s/'//g;
|
|
$anchor =~ s/`//g;
|
|
$anchor =~ s/,/-comma-/g;
|
|
$anchor =~ s/\./-dot-/g;
|
|
$anchor =~ s/!/-bang-/g;
|
|
$anchor =~ s/\?/-question-/g;
|
|
$anchor =~ s/\+/-plus-/g;
|
|
$anchor =~ s/\*/-x-/g;
|
|
$anchor =~ s/\(/-op-/g;
|
|
$anchor =~ s/\)/-cp-/g;
|
|
$anchor =~ s/\@/-at-/g;
|
|
$anchor =~ s/dcm_at/-at-/gi;
|
|
$anchor =~ s/\^/-hat-/g;
|
|
$anchor =~ s/=/-eq-/g;
|
|
$anchor =~ s/\$/S/;
|
|
$anchor =~ s/~/-tilde-/g;
|
|
$anchor =~ s/0/-zero-/g;
|
|
$anchor =~ s/1/-one-/g;
|
|
$anchor =~ s/2/-two-/g;
|
|
$anchor =~ s/3/-three-/g;
|
|
$anchor =~ s/4/-four-/g;
|
|
$anchor =~ s/5/-five-/g;
|
|
$anchor =~ s/6/-six-/g;
|
|
$anchor =~ s/7/-seven-/g;
|
|
$anchor =~ s/8/-eight-/g;
|
|
$anchor =~ s/9/-nine-/g;
|
|
$anchor =~ s/\|/-pipe-/g;
|
|
$anchor =~ s/\[/-lsqb-/g;
|
|
$anchor =~ s/\]/-rsqb-/g;
|
|
$anchor =~ s/^-+//;
|
|
$anchor =~ s/-+$//;
|
|
$anchor =~ s/--/-/g; # get rid of double, initial and trailing hyphens
|
|
return &trim($anchor);
|
|
}
|
|
|
|
sub trim {
|
|
my $temp = $_[0];
|
|
|
|
$temp =~ s/^\s+//g;
|
|
$temp =~ s/\s+$//g;
|
|
return $temp;
|
|
}
|
|
1;
|