mirror of https://github.com/mkerrisk/man-pages
583 lines
22 KiB
Plaintext
583 lines
22 KiB
Plaintext
.\" Copyright (c) 2001-2003 The Open Group, All Rights Reserved
|
|
.TH "REGCOMP" P 2003 "IEEE/The Open Group" "POSIX Programmer's Manual"
|
|
.\" regcomp
|
|
.SH NAME
|
|
regcomp, regerror, regexec, regfree \- regular expression matching
|
|
.SH SYNOPSIS
|
|
.LP
|
|
\fB#include <regex.h>
|
|
.br
|
|
.sp
|
|
int regcomp(regex_t *restrict\fP \fIpreg\fP\fB, const char *restrict\fP
|
|
\fIpattern\fP\fB,
|
|
.br
|
|
\ \ \ \ \ \ int\fP \fIcflags\fP\fB);
|
|
.br
|
|
size_t regerror(int\fP \fIerrcode\fP\fB, const regex_t *restrict\fP
|
|
\fIpreg\fP\fB,
|
|
.br
|
|
\ \ \ \ \ \ char *restrict\fP \fIerrbuf\fP\fB, size_t\fP \fIerrbuf_size\fP\fB);
|
|
.br
|
|
int regexec(const regex_t *restrict\fP \fIpreg\fP\fB, const char *restrict\fP
|
|
\fIstring\fP\fB,
|
|
.br
|
|
\ \ \ \ \ \ size_t\fP \fInmatch\fP\fB, regmatch_t\fP \fIpmatch\fP\fB[restrict],
|
|
int\fP
|
|
\fIeflags\fP\fB);
|
|
.br
|
|
void regfree(regex_t *\fP\fIpreg\fP\fB);
|
|
.br
|
|
\fP
|
|
.SH DESCRIPTION
|
|
.LP
|
|
These functions interpret \fIbasic\fP and \fIextended\fP regular expressions
|
|
as described in the Base Definitions volume of
|
|
IEEE\ Std\ 1003.1-2001, Chapter 9, Regular Expressions.
|
|
.LP
|
|
The \fBregex_t\fP structure is defined in \fI<regex.h>\fP and contains
|
|
at least
|
|
the following member:
|
|
.TS C
|
|
center; l2 l2 l.
|
|
\fBMember Type\fP \fBMember Name\fP \fBDescription\fP
|
|
size_t re_nsub Number of parenthesized subexpressions.
|
|
.TE
|
|
.LP
|
|
The \fBregmatch_t\fP structure is defined in \fI<regex.h>\fP and contains
|
|
at
|
|
least the following members:
|
|
.TS C
|
|
center; l1 l1 lw(40).
|
|
\fBMember Type\fP \fBMember Name\fP T{
|
|
.na
|
|
\fBDescription\fP
|
|
.ad
|
|
T}
|
|
\fBregoff_t\fP \fIrm_so\fP T{
|
|
.na
|
|
Byte offset from start of \fIstring\fP to start of substring.
|
|
.ad
|
|
T}
|
|
\fBregoff_t\fP \fIrm_eo\fP T{
|
|
.na
|
|
Byte offset from start of \fIstring\fP of the first character after the end of substring.
|
|
.ad
|
|
T}
|
|
.TE
|
|
.LP
|
|
The \fIregcomp\fP() function shall compile the regular expression
|
|
contained in the string pointed to by the \fIpattern\fP
|
|
argument and place the results in the structure pointed to by \fIpreg\fP.
|
|
The \fIcflags\fP argument is the bitwise-inclusive OR
|
|
of zero or more of the following flags, which are defined in the \fI<regex.h>\fP
|
|
header:
|
|
.TP 7
|
|
REG_EXTENDED
|
|
Use Extended Regular Expressions.
|
|
.TP 7
|
|
REG_ICASE
|
|
Ignore case in match. (See the Base Definitions volume of IEEE\ Std\ 1003.1-2001,
|
|
Chapter 9, Regular Expressions.)
|
|
.TP 7
|
|
REG_NOSUB
|
|
Report only success/fail in \fIregexec\fP().
|
|
.TP 7
|
|
REG_NEWLINE
|
|
Change the handling of <newline>s, as described in the text.
|
|
.sp
|
|
.LP
|
|
The default regular expression type for \fIpattern\fP is a Basic Regular
|
|
Expression. The application can specify Extended
|
|
Regular Expressions using the REG_EXTENDED \fIcflags\fP flag.
|
|
.LP
|
|
If the REG_NOSUB flag was not set in \fIcflags\fP, then \fIregcomp\fP()
|
|
shall set \fIre_nsub\fP to the number of
|
|
parenthesized subexpressions (delimited by \fB"\\(\\)"\fP in basic
|
|
regular expressions or \fB"()"\fP in extended regular
|
|
expressions) found in \fIpattern\fP.
|
|
.LP
|
|
The \fIregexec\fP() function compares the null-terminated string specified
|
|
by \fIstring\fP with the compiled regular
|
|
expression \fIpreg\fP initialized by a previous call to \fIregcomp\fP().
|
|
If it finds a match, \fIregexec\fP() shall return 0;
|
|
otherwise, it shall return non-zero indicating either no match or
|
|
an error. The \fIeflags\fP argument is the bitwise-inclusive OR
|
|
of zero or more of the following flags, which are defined in the \fI<regex.h>\fP
|
|
header:
|
|
.TP 7
|
|
REG_NOTBOL
|
|
The first character of the string pointed to by \fIstring\fP is not
|
|
the beginning of the line. Therefore, the circumflex
|
|
character ( \fB'^'\fP ), when taken as a special character, shall
|
|
not match the beginning of \fIstring\fP.
|
|
.TP 7
|
|
REG_NOTEOL
|
|
The last character of the string pointed to by \fIstring\fP is not
|
|
the end of the line. Therefore, the dollar sign (
|
|
\fB'$'\fP ), when taken as a special character, shall not match the
|
|
end of \fIstring\fP.
|
|
.sp
|
|
.LP
|
|
If \fInmatch\fP is 0 or REG_NOSUB was set in the \fIcflags\fP argument
|
|
to \fIregcomp\fP(), then \fIregexec\fP() shall ignore
|
|
the \fIpmatch\fP argument. Otherwise, the application shall ensure
|
|
that the \fIpmatch\fP argument points to an array with at
|
|
least \fInmatch\fP elements, and \fIregexec\fP() shall fill in the
|
|
elements of that array with offsets of the substrings of
|
|
\fIstring\fP that correspond to the parenthesized subexpressions of
|
|
\fIpattern\fP: \fIpmatch\fP[ \fIi\fP]. \fIrm_so\fP shall
|
|
be the byte offset of the beginning and \fIpmatch\fP[ \fIi\fP]. \fIrm_eo\fP
|
|
shall be one greater than the byte offset of the end
|
|
of substring \fIi\fP. (Subexpression \fIi\fP begins at the \fIi\fPth
|
|
matched open parenthesis, counting from 1.) Offsets in
|
|
\fIpmatch\fP[0] identify the substring that corresponds to the entire
|
|
regular expression. Unused elements of \fIpmatch\fP up to
|
|
\fIpmatch\fP[ \fInmatch\fP-1] shall be filled with -1. If there are
|
|
more than \fInmatch\fP subexpressions in \fIpattern\fP (
|
|
\fIpattern\fP itself counts as a subexpression), then \fIregexec\fP()
|
|
shall still do the match, but shall record only the first
|
|
\fInmatch\fP substrings.
|
|
.LP
|
|
When matching a basic or extended regular expression, any given parenthesized
|
|
subexpression of \fIpattern\fP might participate
|
|
in the match of several different substrings of \fIstring\fP, or it
|
|
might not match any substring even though the pattern as a
|
|
whole did match. The following rules shall be used to determine which
|
|
substrings to report in \fIpmatch\fP when matching regular
|
|
expressions:
|
|
.IP " 1." 4
|
|
If subexpression \fIi\fP in a regular expression is not contained
|
|
within another subexpression, and it participated in the
|
|
match several times, then the byte offsets in \fIpmatch\fP[ \fIi\fP]
|
|
shall delimit the last such match.
|
|
.LP
|
|
.IP " 2." 4
|
|
If subexpression \fIi\fP is not contained within another subexpression,
|
|
and it did not participate in an otherwise successful
|
|
match, the byte offsets in \fIpmatch\fP[ \fIi\fP] shall be -1. A subexpression
|
|
does not participate in the match when:
|
|
\fB'*'\fP or \fB"\\{\\}"\fP appears immediately after the subexpression
|
|
in a basic regular expression, or
|
|
\fB'*'\fP , \fB'?'\fP , or \fB"{}"\fP appears immediately after the
|
|
subexpression in an extended regular expression, and
|
|
the subexpression did not match (matched 0 times)
|
|
.LP
|
|
or:
|
|
\fB'|'\fP is used in an extended regular expression to select this
|
|
subexpression or another, and the other
|
|
subexpression matched.
|
|
.LP
|
|
.IP " 3." 4
|
|
If subexpression \fIi\fP is contained within another subexpression
|
|
\fIj\fP, and \fIi\fP is not contained within any other
|
|
subexpression that is contained within \fIj\fP, and a match of subexpression
|
|
\fIj\fP is reported in \fIpmatch\fP[ \fIj\fP],
|
|
then the match or non-match of subexpression \fIi\fP reported in \fIpmatch\fP[
|
|
\fIi\fP] shall be as described in 1. and 2.
|
|
above, but within the substring reported in \fIpmatch\fP[ \fIj\fP]
|
|
rather than the whole string. The offsets in \fIpmatch\fP[
|
|
\fIi\fP] are still relative to the start of \fIstring\fP.
|
|
.LP
|
|
.IP " 4." 4
|
|
If subexpression \fIi\fP is contained in subexpression \fIj\fP, and
|
|
the byte offsets in \fIpmatch\fP[ \fIj\fP] are -1, then
|
|
the pointers in \fIpmatch\fP[ \fIi\fP] shall also be -1.
|
|
.LP
|
|
.IP " 5." 4
|
|
If subexpression \fIi\fP matched a zero-length string, then both byte
|
|
offsets in \fIpmatch\fP[ \fIi\fP] shall be the byte
|
|
offset of the character or null terminator immediately following the
|
|
zero-length string.
|
|
.LP
|
|
.LP
|
|
If, when \fIregexec\fP() is called, the locale is different from when
|
|
the regular expression was compiled, the result is
|
|
undefined.
|
|
.LP
|
|
If REG_NEWLINE is not set in \fIcflags\fP, then a <newline> in \fIpattern\fP
|
|
or \fIstring\fP shall be treated as an
|
|
ordinary character. If REG_NEWLINE is set, then <newline> shall be
|
|
treated as an ordinary character except as follows:
|
|
.IP " 1." 4
|
|
A <newline> in \fIstring\fP shall not be matched by a period outside
|
|
a bracket expression or by any form of a
|
|
non-matching list (see the Base Definitions volume of IEEE\ Std\ 1003.1-2001,
|
|
Chapter
|
|
9, Regular Expressions).
|
|
.LP
|
|
.IP " 2." 4
|
|
A circumflex ( \fB'^'\fP ) in \fIpattern\fP, when used to specify
|
|
expression anchoring (see the Base Definitions volume of
|
|
IEEE\ Std\ 1003.1-2001, Section 9.3.8, BRE Expression Anchoring),
|
|
shall match the zero-length string immediately after a <newline> in
|
|
\fIstring\fP, regardless of the setting of
|
|
REG_NOTBOL.
|
|
.LP
|
|
.IP " 3." 4
|
|
A dollar sign ( \fB'$'\fP ) in \fIpattern\fP, when used to specify
|
|
expression anchoring, shall match the zero-length string
|
|
immediately before a <newline> in \fIstring\fP, regardless of the
|
|
setting of REG_NOTEOL.
|
|
.LP
|
|
.LP
|
|
The \fIregfree\fP() function frees any memory allocated by \fIregcomp\fP()
|
|
associated with \fIpreg\fP.
|
|
.LP
|
|
The following constants are defined as error return values:
|
|
.TP 7
|
|
REG_NOMATCH
|
|
\fIregexec\fP() failed to match.
|
|
.TP 7
|
|
REG_BADPAT
|
|
Invalid regular expression.
|
|
.TP 7
|
|
REG_ECOLLATE
|
|
Invalid collating element referenced.
|
|
.TP 7
|
|
REG_ECTYPE
|
|
Invalid character class type referenced.
|
|
.TP 7
|
|
REG_EESCAPE
|
|
Trailing \fB'\\'\fP in pattern.
|
|
.TP 7
|
|
REG_ESUBREG
|
|
Number in \fB"\\digit"\fP invalid or in error.
|
|
.TP 7
|
|
REG_EBRACK
|
|
\fB"[]"\fP imbalance.
|
|
.TP 7
|
|
REG_EPAREN
|
|
\fB"\\(\\)"\fP or \fB"()"\fP imbalance.
|
|
.TP 7
|
|
REG_EBRACE
|
|
\fB"\\{\\}"\fP imbalance.
|
|
.TP 7
|
|
REG_BADBR
|
|
Content of \fB"\\{\\}"\fP invalid: not a number, number too large,
|
|
more than two numbers, first larger than second.
|
|
.TP 7
|
|
REG_ERANGE
|
|
Invalid endpoint in range expression.
|
|
.TP 7
|
|
REG_ESPACE
|
|
Out of memory.
|
|
.TP 7
|
|
REG_BADRPT
|
|
\fB'?'\fP , \fB'*'\fP , or \fB'+'\fP not preceded by valid regular
|
|
expression.
|
|
.sp
|
|
.LP
|
|
The \fIregerror\fP() function provides a mapping from error codes
|
|
returned by \fIregcomp\fP() and \fIregexec\fP() to
|
|
unspecified printable strings. It generates a string corresponding
|
|
to the value of the \fIerrcode\fP argument, which the
|
|
application shall ensure is the last non-zero value returned by \fIregcomp\fP()
|
|
or \fIregexec\fP() with the given value of
|
|
\fIpreg\fP. If \fIerrcode\fP is not such a value, the content of the
|
|
generated string is unspecified.
|
|
.LP
|
|
If \fIpreg\fP is a null pointer, but \fIerrcode\fP is a value returned
|
|
by a previous call to \fIregexec\fP() or
|
|
\fIregcomp\fP(), the \fIregerror\fP() still generates an error string
|
|
corresponding to the value of \fIerrcode\fP, but it might
|
|
not be as detailed under some implementations.
|
|
.LP
|
|
If the \fIerrbuf_size\fP argument is not 0, \fIregerror\fP() shall
|
|
place the generated string into the buffer of size
|
|
\fIerrbuf_size\fP bytes pointed to by \fIerrbuf\fP. If the string
|
|
(including the terminating null) cannot fit in the buffer,
|
|
\fIregerror\fP() shall truncate the string and null-terminate the
|
|
result.
|
|
.LP
|
|
If \fIerrbuf_size\fP is 0, \fIregerror\fP() shall ignore the \fIerrbuf\fP
|
|
argument, and return the size of the buffer needed
|
|
to hold the generated string.
|
|
.LP
|
|
If the \fIpreg\fP argument to \fIregexec\fP() or \fIregfree\fP() is
|
|
not a compiled regular expression returned by
|
|
\fIregcomp\fP(), the result is undefined. A \fIpreg\fP is no longer
|
|
treated as a compiled regular expression after it is given to
|
|
\fIregfree\fP().
|
|
.SH RETURN VALUE
|
|
.LP
|
|
Upon successful completion, the \fIregcomp\fP() function shall return
|
|
0. Otherwise, it shall return an integer value indicating
|
|
an error as described in \fI<regex.h>\fP, and the content of \fIpreg\fP
|
|
is
|
|
undefined. If a code is returned, the interpretation shall be as given
|
|
in \fI<regex.h>\fP.
|
|
.LP
|
|
If \fIregcomp\fP() detects an invalid RE, it may return REG_BADPAT,
|
|
or it may return one of the error codes that more precisely
|
|
describes the error.
|
|
.LP
|
|
Upon successful completion, the \fIregexec\fP() function shall return
|
|
0. Otherwise, it shall return REG_NOMATCH to indicate no
|
|
match.
|
|
.LP
|
|
Upon successful completion, the \fIregerror\fP() function shall return
|
|
the number of bytes needed to hold the entire generated
|
|
string, including the null termination. If the return value is greater
|
|
than \fIerrbuf_size\fP, the string returned in the buffer
|
|
pointed to by \fIerrbuf\fP has been truncated.
|
|
.LP
|
|
The \fIregfree\fP() function shall not return a value.
|
|
.SH ERRORS
|
|
.LP
|
|
No errors are defined.
|
|
.LP
|
|
\fIThe following sections are informative.\fP
|
|
.SH EXAMPLES
|
|
.sp
|
|
.RS
|
|
.nf
|
|
|
|
\fB#include <regex.h>
|
|
.sp
|
|
|
|
/*
|
|
* Match string against the extended regular expression in
|
|
* pattern, treating errors as no match.
|
|
*
|
|
* Return 1 for match, 0 for no match.
|
|
*/
|
|
.sp
|
|
|
|
int
|
|
match(const char *string, char *pattern)
|
|
{
|
|
int status;
|
|
regex_t re;
|
|
.sp
|
|
|
|
if (regcomp(&re, pattern, REG_EXTENDED|REG_NOSUB) != 0) {
|
|
return(0); /* Report error. */
|
|
}
|
|
status = regexec(&re, string, (size_t) 0, NULL, 0);
|
|
regfree(&re);
|
|
if (status != 0) {
|
|
return(0); /* Report error. */
|
|
}
|
|
return(1);
|
|
}
|
|
\fP
|
|
.fi
|
|
.RE
|
|
.LP
|
|
The following demonstrates how the REG_NOTBOL flag could be used with
|
|
\fIregexec\fP() to find all substrings in a line that
|
|
match a pattern supplied by a user. (For simplicity of the example,
|
|
very little error checking is done.)
|
|
.sp
|
|
.RS
|
|
.nf
|
|
|
|
\fB(void) regcomp (&re, pattern, 0);
|
|
/* This call to regexec() finds the first match on the line. */
|
|
error = regexec (&re, &buffer[0], 1, &pm, 0);
|
|
while (error == 0) { /* While matches found. */
|
|
/* Substring found between pm.rm_so and pm.rm_eo. */
|
|
/* This call to regexec() finds the next match. */
|
|
error = regexec (&re, buffer + pm.rm_eo, 1, &pm, REG_NOTBOL);
|
|
}
|
|
\fP
|
|
.fi
|
|
.RE
|
|
.SH APPLICATION USAGE
|
|
.LP
|
|
An application could use:
|
|
.sp
|
|
.RS
|
|
.nf
|
|
|
|
\fBregerror(code,preg,(char *)NULL,(size_t)0)
|
|
\fP
|
|
.fi
|
|
.RE
|
|
.LP
|
|
to find out how big a buffer is needed for the generated string, \fImalloc\fP()
|
|
a buffer
|
|
to hold the string, and then call \fIregerror\fP() again to get the
|
|
string. Alternatively, it could allocate a fixed, static
|
|
buffer that is big enough to hold most strings, and then use \fImalloc\fP()
|
|
to allocate a
|
|
larger buffer if it finds that this is too small.
|
|
.LP
|
|
To match a pattern as described in the Shell and Utilities volume
|
|
of IEEE\ Std\ 1003.1-2001, Section 2.13, Pattern Matching Notation,
|
|
use the \fIfnmatch\fP() function.
|
|
.SH RATIONALE
|
|
.LP
|
|
The \fIregexec\fP() function must fill in all \fInmatch\fP elements
|
|
of \fIpmatch\fP, where \fInmatch\fP and \fIpmatch\fP
|
|
are supplied by the application, even if some elements of \fIpmatch\fP
|
|
do not correspond to subexpressions in \fIpattern\fP. The
|
|
application writer should note that there is probably no reason for
|
|
using a value of \fInmatch\fP that is larger than
|
|
\fIpreg\fP-> \fIre_nsub\fP+1.
|
|
.LP
|
|
The REG_NEWLINE flag supports a use of RE matching that is needed
|
|
in some applications like text editors. In such applications,
|
|
the user supplies an RE asking the application to find a line that
|
|
matches the given expression. An anchor in such an RE anchors at
|
|
the beginning or end of any line. Such an application can pass a sequence
|
|
of <newline>-separated lines to \fIregexec\fP() as
|
|
a single long string and specify REG_NEWLINE to \fIregcomp\fP() to
|
|
get the desired behavior. The application must ensure that
|
|
there are no explicit <newline>s in \fIpattern\fP if it wants to ensure
|
|
that any match occurs entirely within a single
|
|
line.
|
|
.LP
|
|
The REG_NEWLINE flag affects the behavior of \fIregexec\fP(), but
|
|
it is in the \fIcflags\fP parameter to \fIregcomp\fP() to
|
|
allow flexibility of implementation. Some implementations will want
|
|
to generate the same compiled RE in \fIregcomp\fP() regardless
|
|
of the setting of REG_NEWLINE and have \fIregexec\fP() handle anchors
|
|
differently based on the setting of the flag. Other
|
|
implementations will generate different compiled REs based on the
|
|
REG_NEWLINE.
|
|
.LP
|
|
The REG_ICASE flag supports the operations taken by the \fIgrep\fP
|
|
\fB-i\fP option and
|
|
the historical implementations of \fIex\fP and \fIvi\fP.
|
|
Including this flag will make it easier for application code to be
|
|
written that does the same thing as these utilities.
|
|
.LP
|
|
The substrings reported in \fIpmatch\fP[] are defined using offsets
|
|
from the start of the string rather than pointers. Since
|
|
this is a new interface, there should be no impact on historical implementations
|
|
or applications, and offsets should be just as
|
|
easy to use as pointers. The change to offsets was made to facilitate
|
|
future extensions in which the string to be searched is
|
|
presented to \fIregexec\fP() in blocks, allowing a string to be searched
|
|
that is not all in memory at once.
|
|
.LP
|
|
The type \fBregoff_t\fP is used for the elements of \fIpmatch\fP[]
|
|
to ensure that the application can represent either the
|
|
largest possible array in memory (important for an application conforming
|
|
to the Shell and Utilities volume of
|
|
IEEE\ Std\ 1003.1-2001) or the largest possible file (important for
|
|
an application using the extension where a file is
|
|
searched in chunks).
|
|
.LP
|
|
The standard developers rejected the inclusion of a \fIregsub\fP()
|
|
function that would be used to do substitutions for a
|
|
matched RE. While such a routine would be useful to some applications,
|
|
its utility would be much more limited than the matching
|
|
function described here. Both RE parsing and substitution are possible
|
|
to implement without support other than that required by the
|
|
ISO\ C standard, but matching is much more complex than substituting.
|
|
The only difficult part of substitution, given the
|
|
information supplied by \fIregexec\fP(), is finding the next character
|
|
in a string when there can be multi-byte characters. That
|
|
is a much larger issue, and one that needs a more general solution.
|
|
.LP
|
|
The \fIerrno\fP variable has not been used for error returns to avoid
|
|
filling the \fIerrno\fP name space for this feature.
|
|
.LP
|
|
The interface is defined so that the matched substrings \fIrm_sp\fP
|
|
and \fIrm_ep\fP are in a separate \fBregmatch_t\fP
|
|
structure instead of in \fBregex_t\fP. This allows a single compiled
|
|
RE to be used simultaneously in several contexts; in
|
|
\fImain\fP() and a signal handler, perhaps, or in multiple threads
|
|
of lightweight processes. (The \fIpreg\fP argument to
|
|
\fIregexec\fP() is declared with type \fBconst\fP, so the implementation
|
|
is not permitted to use the structure to store
|
|
intermediate results.) It also allows an application to request an
|
|
arbitrary number of substrings from an RE. The number of
|
|
subexpressions in the RE is reported in \fIre_nsub\fP in \fIpreg\fP.
|
|
With this change to \fIregexec\fP(), consideration was
|
|
given to dropping the REG_NOSUB flag since the user can now specify
|
|
this with a zero \fInmatch\fP argument to \fIregexec\fP().
|
|
However, keeping REG_NOSUB allows an implementation to use a different
|
|
(perhaps more efficient) algorithm if it knows in
|
|
\fIregcomp\fP() that no subexpressions need be reported. The implementation
|
|
is only required to fill in \fIpmatch\fP if
|
|
\fInmatch\fP is not zero and if REG_NOSUB is not specified. Note that
|
|
the \fBsize_t\fP type, as defined in the ISO\ C
|
|
standard, is unsigned, so the description of \fIregexec\fP() does
|
|
not need to address negative values of \fInmatch\fP.
|
|
.LP
|
|
REG_NOTBOL was added to allow an application to do repeated searches
|
|
for the same pattern in a line. If the pattern contains a
|
|
circumflex character that should match the beginning of a line, then
|
|
the pattern should only match when matched against the
|
|
beginning of the line. Without the REG_NOTBOL flag, the application
|
|
could rewrite the expression for subsequent matches, but in the
|
|
general case this would require parsing the expression. The need for
|
|
REG_NOTEOL is not as clear; it was added for symmetry.
|
|
.LP
|
|
The addition of the \fIregerror\fP() function addresses the historical
|
|
need for conforming application programs to have access
|
|
to error information more than "Function failed to compile/match your
|
|
RE for unknown reasons".
|
|
.LP
|
|
This interface provides for two different methods of dealing with
|
|
error conditions. The specific error codes (REG_EBRACE, for
|
|
example), defined in \fI<regex.h>\fP, allow an application to recover
|
|
from an error
|
|
if it is so able. Many applications, especially those that use patterns
|
|
supplied by a user, will not try to deal with specific
|
|
error cases, but will just use \fIregerror\fP() to obtain a human-readable
|
|
error message to present to the user.
|
|
.LP
|
|
The \fIregerror\fP() function uses a scheme similar to \fIconfstr\fP()
|
|
to deal with
|
|
the problem of allocating memory to hold the generated string. The
|
|
scheme used by \fIstrerror\fP() in the ISO\ C standard was considered
|
|
unacceptable since it creates
|
|
difficulties for multi-threaded applications.
|
|
.LP
|
|
The \fIpreg\fP argument is provided to \fIregerror\fP() to allow an
|
|
implementation to generate a more descriptive message than
|
|
would be possible with \fIerrcode\fP alone. An implementation might,
|
|
for example, save the character offset of the offending
|
|
character of the pattern in a field of \fIpreg\fP, and then include
|
|
that in the generated message string. The implementation may
|
|
also ignore \fIpreg\fP.
|
|
.LP
|
|
A REG_FILENAME flag was considered, but omitted. This flag caused
|
|
\fIregexec\fP() to match patterns as described in the Shell
|
|
and Utilities volume of IEEE\ Std\ 1003.1-2001, Section 2.13, Pattern
|
|
Matching Notation instead of REs. This service is now provided by
|
|
the \fIfnmatch\fP()
|
|
function.
|
|
.LP
|
|
Notice that there is a difference in philosophy between the ISO\ POSIX-2:1993
|
|
standard and IEEE\ Std\ 1003.1-2001 in
|
|
how to handle a "bad" regular expression. The ISO\ POSIX-2:1993 standard
|
|
says that many bad constructs "produce undefined
|
|
results", or that "the interpretation is undefined". IEEE\ Std\ 1003.1-2001,
|
|
however, says that the interpretation of
|
|
such REs is unspecified. The term "undefined" means that the action
|
|
by the application is an error, of similar severity to
|
|
passing a bad pointer to a function.
|
|
.LP
|
|
The \fIregcomp\fP() and \fIregexec\fP() functions are required to
|
|
accept any null-terminated string as the \fIpattern\fP
|
|
argument. If the meaning of the string is "undefined", the behavior
|
|
of the function is "unspecified".
|
|
IEEE\ Std\ 1003.1-2001 does not specify how the functions will interpret
|
|
the pattern; they might return error codes, or
|
|
they might do pattern matching in some completely unexpected way,
|
|
but they should not do something like abort the process.
|
|
.SH FUTURE DIRECTIONS
|
|
.LP
|
|
None.
|
|
.SH SEE ALSO
|
|
.LP
|
|
\fIfnmatch\fP() , \fIglob\fP() , Shell and Utilities volume of
|
|
IEEE\ Std\ 1003.1-2001, Section 2.13, Pattern Matching Notation, Base
|
|
Definitions volume of IEEE\ Std\ 1003.1-2001, Chapter 9, Regular Expressions,
|
|
\fI<regex.h>\fP, \fI<sys/types.h>\fP
|
|
.SH COPYRIGHT
|
|
Portions of this text are reprinted and reproduced in electronic form
|
|
from IEEE Std 1003.1, 2003 Edition, Standard for Information Technology
|
|
-- Portable Operating System Interface (POSIX), The Open Group Base
|
|
Specifications Issue 6, Copyright (C) 2001-2003 by the Institute of
|
|
Electrical and Electronics Engineers, Inc and The Open Group. In the
|
|
event of any discrepancy between this version and the original IEEE and
|
|
The Open Group Standard, the original IEEE and The Open Group Standard
|
|
is the referee document. The original Standard can be obtained online at
|
|
http://www.opengroup.org/unix/online.html .
|