mirror of https://github.com/mkerrisk/man-pages
394 lines
9.2 KiB
Groff
394 lines
9.2 KiB
Groff
.\" Copyright (C), 1995, Graeme W. Wilford. (Wilf.)
|
|
.\"
|
|
.\" %%%LICENSE_START(VERBATIM)
|
|
.\" Permission is granted to make and distribute verbatim copies of this
|
|
.\" manual provided the copyright notice and this permission notice are
|
|
.\" preserved on all copies.
|
|
.\"
|
|
.\" Permission is granted to copy and distribute modified versions of this
|
|
.\" manual under the conditions for verbatim copying, provided that the
|
|
.\" entire resulting derived work is distributed under the terms of a
|
|
.\" permission notice identical to this one.
|
|
.\"
|
|
.\" Since the Linux kernel and libraries are constantly changing, this
|
|
.\" manual page may be incorrect or out-of-date. The author(s) assume no
|
|
.\" responsibility for errors or omissions, or for damages resulting from
|
|
.\" the use of the information contained herein. The author(s) may not
|
|
.\" have taken the same level of care in the production of this manual,
|
|
.\" which is licensed free of charge, as they might when working
|
|
.\" professionally.
|
|
.\"
|
|
.\" Formatted or processed versions of this manual, if unaccompanied by
|
|
.\" the source, must acknowledge the copyright and authors of this work.
|
|
.\" %%%LICENSE_END
|
|
.\"
|
|
.\" Wed Jun 14 16:10:28 BST 1995 Wilf. (G.Wilford@ee.surrey.ac.uk)
|
|
.\" Tiny change in formatting - aeb, 950812
|
|
.\" Modified 8 May 1998 by Joseph S. Myers (jsm28@cam.ac.uk)
|
|
.\"
|
|
.\" show the synopsis section nicely
|
|
.TH REGEX 3 2021-03-22 "GNU" "Linux Programmer's Manual"
|
|
.SH NAME
|
|
regcomp, regexec, regerror, regfree \- POSIX regex functions
|
|
.SH SYNOPSIS
|
|
.nf
|
|
.B #include <regex.h>
|
|
.PP
|
|
.BI "int regcomp(regex_t *restrict " preg ", const char *restrict " regex ,
|
|
.BI " int " cflags );
|
|
.BI "int regexec(const regex_t *restrict " preg \
|
|
", const char *restrict " string ,
|
|
.BI " size_t " nmatch ", regmatch_t " pmatch "[restrict]\
|
|
, int " eflags );
|
|
.PP
|
|
.BI "size_t regerror(int " errcode ", const regex_t *restrict " preg ,
|
|
.BI " char *restrict " errbuf ", size_t " errbuf_size );
|
|
.BI "void regfree(regex_t *" preg );
|
|
.fi
|
|
.SH DESCRIPTION
|
|
.SS POSIX regex compiling
|
|
.BR regcomp ()
|
|
is used to compile a regular expression into a form that is suitable
|
|
for subsequent
|
|
.BR regexec ()
|
|
searches.
|
|
.PP
|
|
.BR regcomp ()
|
|
is supplied with
|
|
.IR preg ,
|
|
a pointer to a pattern buffer storage area;
|
|
.IR regex ,
|
|
a pointer to the null-terminated string and
|
|
.IR cflags ,
|
|
flags used to determine the type of compilation.
|
|
.PP
|
|
All regular expression searching must be done via a compiled pattern
|
|
buffer, thus
|
|
.BR regexec ()
|
|
must always be supplied with the address of a
|
|
.BR regcomp ()
|
|
initialized pattern buffer.
|
|
.PP
|
|
.I cflags
|
|
is the
|
|
.RB bitwise- or
|
|
of zero or more of the following:
|
|
.TP
|
|
.B REG_EXTENDED
|
|
Use
|
|
.B POSIX
|
|
Extended Regular Expression syntax when interpreting
|
|
.IR regex .
|
|
If not set,
|
|
.B POSIX
|
|
Basic Regular Expression syntax is used.
|
|
.TP
|
|
.B REG_ICASE
|
|
Do not differentiate case.
|
|
Subsequent
|
|
.BR regexec ()
|
|
searches using this pattern buffer will be case insensitive.
|
|
.TP
|
|
.B REG_NOSUB
|
|
Do not report position of matches.
|
|
The
|
|
.I nmatch
|
|
and
|
|
.I pmatch
|
|
arguments to
|
|
.BR regexec ()
|
|
are ignored if the pattern buffer supplied was compiled with this flag set.
|
|
.TP
|
|
.B REG_NEWLINE
|
|
Match-any-character operators don't match a newline.
|
|
.IP
|
|
A nonmatching list
|
|
.RB ( [\(ha...] )
|
|
not containing a newline does not match a newline.
|
|
.IP
|
|
Match-beginning-of-line operator
|
|
.RB ( \(ha )
|
|
matches the empty string immediately after a newline, regardless of
|
|
whether
|
|
.IR eflags ,
|
|
the execution flags of
|
|
.BR regexec (),
|
|
contains
|
|
.BR REG_NOTBOL .
|
|
.IP
|
|
Match-end-of-line operator
|
|
.RB ( $ )
|
|
matches the empty string immediately before a newline, regardless of
|
|
whether
|
|
.I eflags
|
|
contains
|
|
.BR REG_NOTEOL .
|
|
.SS POSIX regex matching
|
|
.BR regexec ()
|
|
is used to match a null-terminated string
|
|
against the precompiled pattern buffer,
|
|
.IR preg .
|
|
.I nmatch
|
|
and
|
|
.I pmatch
|
|
are used to provide information regarding the location of any matches.
|
|
.I eflags
|
|
is the
|
|
.RB bitwise- or
|
|
of zero or more of the following flags:
|
|
.TP
|
|
.B REG_NOTBOL
|
|
The match-beginning-of-line operator always fails to match (but see the
|
|
compilation flag
|
|
.B REG_NEWLINE
|
|
above).
|
|
This flag may be used when different portions of a string are passed to
|
|
.BR regexec ()
|
|
and the beginning of the string should not be interpreted as the
|
|
beginning of the line.
|
|
.TP
|
|
.B REG_NOTEOL
|
|
The match-end-of-line operator always fails to match (but see the
|
|
compilation flag
|
|
.B REG_NEWLINE
|
|
above).
|
|
.TP
|
|
.B REG_STARTEND
|
|
Use
|
|
.I pmatch[0]
|
|
on the input string, starting at byte
|
|
.I pmatch[0].rm_so
|
|
and ending before byte
|
|
.IR pmatch[0].rm_eo .
|
|
This allows matching embedded NUL bytes
|
|
and avoids a
|
|
.BR strlen (3)
|
|
on large strings.
|
|
It does not use
|
|
.I nmatch
|
|
on input, and does not change
|
|
.B REG_NOTBOL
|
|
or
|
|
.B REG_NEWLINE
|
|
processing.
|
|
This flag is a BSD extension, not present in POSIX.
|
|
.SS Byte offsets
|
|
Unless
|
|
.B REG_NOSUB
|
|
was set for the compilation of the pattern buffer, it is possible to
|
|
obtain match addressing information.
|
|
.I pmatch
|
|
must be dimensioned to have at least
|
|
.I nmatch
|
|
elements.
|
|
These are filled in by
|
|
.BR regexec ()
|
|
with substring match addresses.
|
|
The offsets of the subexpression starting at the
|
|
.IR i th
|
|
open parenthesis are stored in
|
|
.IR pmatch[i] .
|
|
The entire regular expression's match addresses are stored in
|
|
.IR pmatch[0] .
|
|
(Note that to return the offsets of
|
|
.I N
|
|
subexpression matches,
|
|
.I nmatch
|
|
must be at least
|
|
.IR N+1 .)
|
|
Any unused structure elements will contain the value \-1.
|
|
.PP
|
|
The
|
|
.I regmatch_t
|
|
structure which is the type of
|
|
.I pmatch
|
|
is defined in
|
|
.IR <regex.h> .
|
|
.PP
|
|
.in +4n
|
|
.EX
|
|
typedef struct {
|
|
regoff_t rm_so;
|
|
regoff_t rm_eo;
|
|
} regmatch_t;
|
|
.EE
|
|
.in
|
|
.PP
|
|
Each
|
|
.I rm_so
|
|
element that is not \-1 indicates the start offset of the next largest
|
|
substring match within the string.
|
|
The relative
|
|
.I rm_eo
|
|
element indicates the end offset of the match,
|
|
which is the offset of the first character after the matching text.
|
|
.SS POSIX error reporting
|
|
.BR regerror ()
|
|
is used to turn the error codes that can be returned by both
|
|
.BR regcomp ()
|
|
and
|
|
.BR regexec ()
|
|
into error message strings.
|
|
.PP
|
|
.BR regerror ()
|
|
is passed the error code,
|
|
.IR errcode ,
|
|
the pattern buffer,
|
|
.IR preg ,
|
|
a pointer to a character string buffer,
|
|
.IR errbuf ,
|
|
and the size of the string buffer,
|
|
.IR errbuf_size .
|
|
It returns the size of the
|
|
.I errbuf
|
|
required to contain the null-terminated error message string.
|
|
If both
|
|
.I errbuf
|
|
and
|
|
.I errbuf_size
|
|
are nonzero,
|
|
.I errbuf
|
|
is filled in with the first
|
|
.I "errbuf_size \- 1"
|
|
characters of the error message and a terminating null byte (\(aq\e0\(aq).
|
|
.SS POSIX pattern buffer freeing
|
|
Supplying
|
|
.BR regfree ()
|
|
with a precompiled pattern buffer,
|
|
.I preg
|
|
will free the memory allocated to the pattern buffer by the compiling
|
|
process,
|
|
.BR regcomp ().
|
|
.SH RETURN VALUE
|
|
.BR regcomp ()
|
|
returns zero for a successful compilation or an error code for failure.
|
|
.PP
|
|
.BR regexec ()
|
|
returns zero for a successful match or
|
|
.B REG_NOMATCH
|
|
for failure.
|
|
.SH ERRORS
|
|
The following errors can be returned by
|
|
.BR regcomp ():
|
|
.TP
|
|
.B REG_BADBR
|
|
Invalid use of back reference operator.
|
|
.TP
|
|
.B REG_BADPAT
|
|
Invalid use of pattern operators such as group or list.
|
|
.TP
|
|
.B REG_BADRPT
|
|
Invalid use of repetition operators such as using \(aq*\(aq
|
|
as the first character.
|
|
.TP
|
|
.B REG_EBRACE
|
|
Un-matched brace interval operators.
|
|
.TP
|
|
.B REG_EBRACK
|
|
Un-matched bracket list operators.
|
|
.TP
|
|
.B REG_ECOLLATE
|
|
Invalid collating element.
|
|
.TP
|
|
.B REG_ECTYPE
|
|
Unknown character class name.
|
|
.TP
|
|
.B REG_EEND
|
|
Nonspecific error.
|
|
This is not defined by POSIX.2.
|
|
.TP
|
|
.B REG_EESCAPE
|
|
Trailing backslash.
|
|
.TP
|
|
.B REG_EPAREN
|
|
Un-matched parenthesis group operators.
|
|
.TP
|
|
.B REG_ERANGE
|
|
Invalid use of the range operator; for example, the ending point of the range
|
|
occurs prior to the starting point.
|
|
.TP
|
|
.B REG_ESIZE
|
|
Compiled regular expression requires a pattern buffer larger than 64\ kB.
|
|
This is not defined by POSIX.2.
|
|
.TP
|
|
.B REG_ESPACE
|
|
The regex routines ran out of memory.
|
|
.TP
|
|
.B REG_ESUBREG
|
|
Invalid back reference to a subexpression.
|
|
.SH ATTRIBUTES
|
|
For an explanation of the terms used in this section, see
|
|
.BR attributes (7).
|
|
.ad l
|
|
.nh
|
|
.TS
|
|
allbox;
|
|
lbx lb lb
|
|
l l l.
|
|
Interface Attribute Value
|
|
T{
|
|
.BR regcomp (),
|
|
.BR regexec ()
|
|
T} Thread safety MT-Safe locale
|
|
T{
|
|
.BR regerror ()
|
|
T} Thread safety MT-Safe env
|
|
T{
|
|
.BR regfree ()
|
|
T} Thread safety MT-Safe
|
|
.TE
|
|
.hy
|
|
.ad
|
|
.sp 1
|
|
.SH CONFORMING TO
|
|
POSIX.1-2001, POSIX.1-2008.
|
|
.SH EXAMPLES
|
|
.EX
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <regex.h>
|
|
|
|
#define ARRAY_SIZE(arr) (sizeof((arr)) / sizeof((arr)[0]))
|
|
|
|
static const char *const str =
|
|
"1) John Driverhacker;\en2) John Doe;\en3) John Foo;\en";
|
|
static const char *const re = "John.*o";
|
|
|
|
int main(void)
|
|
{
|
|
static const char *s = str;
|
|
regex_t regex;
|
|
regmatch_t pmatch[1];
|
|
regoff_t off, len;
|
|
|
|
if (regcomp(®ex, re, REG_NEWLINE))
|
|
exit(EXIT_FAILURE);
|
|
|
|
printf("String = \e"%s\e"\en", str);
|
|
printf("Matches:\en");
|
|
|
|
for (int i = 0; ; i++) {
|
|
if (regexec(®ex, s, ARRAY_SIZE(pmatch), pmatch, 0))
|
|
break;
|
|
|
|
off = pmatch[0].rm_so + (s \- str);
|
|
len = pmatch[0].rm_eo \- pmatch[0].rm_so;
|
|
printf("#%d:\en", i);
|
|
printf("offset = %jd; length = %jd\en", (intmax_t) off,
|
|
(intmax_t) len);
|
|
printf("substring = \e"%.*s\e"\en", len, s + pmatch[0].rm_so);
|
|
|
|
s += pmatch[0].rm_eo;
|
|
}
|
|
|
|
exit(EXIT_SUCCESS);
|
|
}
|
|
.EE
|
|
.SH SEE ALSO
|
|
.BR grep (1),
|
|
.BR regex (7)
|
|
.PP
|
|
The glibc manual section,
|
|
.I "Regular Expressions"
|