2004-11-03 13:51:07 +00:00
|
|
|
.\" Copyright (c) Bruno Haible <haible@clisp.cons.org>
|
|
|
|
.\"
|
|
|
|
.\" This is free documentation; you can redistribute it and/or
|
|
|
|
.\" modify it under the terms of the GNU General Public License as
|
|
|
|
.\" published by the Free Software Foundation; either version 2 of
|
|
|
|
.\" the License, or (at your option) any later version.
|
|
|
|
.\"
|
|
|
|
.\" References consulted:
|
|
|
|
.\" GNU glibc-2 source code and manual
|
|
|
|
.\" Dinkumware C library reference http://www.dinkumware.com/
|
|
|
|
.\" OpenGroup's Single Unix specification
|
|
|
|
.\" http://www.UNIX-systems.org/online.html
|
|
|
|
.\" ISO/IEC 9899:1999
|
|
|
|
.\"
|
|
|
|
.TH MBRTOWC 3 2001-11-22 "GNU" "Linux Programmer's Manual"
|
|
|
|
.SH NAME
|
|
|
|
mbrtowc \- convert a multibyte sequence to a wide character
|
|
|
|
.SH SYNOPSIS
|
|
|
|
.nf
|
|
|
|
.B #include <wchar.h>
|
|
|
|
.sp
|
2008-07-06 15:10:32 +00:00
|
|
|
.BI "size_t mbrtowc(wchar_t *" pwc ", const char *" s ", size_t " n \
|
|
|
|
", mbstate_t *" ps );
|
2004-11-03 13:51:07 +00:00
|
|
|
.fi
|
|
|
|
.SH DESCRIPTION
|
|
|
|
The main case for this function is when \fIs\fP is not NULL and \fIpwc\fP is
|
2007-04-12 22:42:49 +00:00
|
|
|
not NULL.
|
2007-05-12 09:06:04 +00:00
|
|
|
In this case, the
|
|
|
|
.BR mbrtowc ()
|
|
|
|
function inspects at most \fIn\fP
|
2004-11-03 13:51:07 +00:00
|
|
|
bytes of the multibyte string starting at \fIs\fP, extracts the next complete
|
|
|
|
multibyte character, converts it to a wide character and stores it at
|
2007-04-12 22:42:49 +00:00
|
|
|
\fI*pwc\fP.
|
|
|
|
It updates the shift state \fI*ps\fP.
|
|
|
|
If the converted wide
|
2008-06-09 15:49:35 +00:00
|
|
|
character is not L\(aq\\0\(aq, it returns the number of bytes that were consumed
|
2007-04-12 22:42:49 +00:00
|
|
|
from \fIs\fP.
|
2008-06-09 15:49:35 +00:00
|
|
|
If the converted wide character is L\(aq\\0\(aq, it resets the shift
|
2004-11-03 13:51:07 +00:00
|
|
|
state \fI*ps\fP to the initial state and returns 0.
|
|
|
|
.PP
|
|
|
|
If the \fIn\fP bytes starting at \fIs\fP do not contain a complete multibyte
|
2007-05-12 09:06:04 +00:00
|
|
|
character,
|
|
|
|
.BR mbrtowc ()
|
2007-12-22 16:26:51 +00:00
|
|
|
returns \fI(size_t)\ \-2\fP.
|
2007-04-12 22:42:49 +00:00
|
|
|
This can happen even if
|
2004-11-03 13:51:07 +00:00
|
|
|
\fIn\fP >= \fIMB_CUR_MAX\fP, if the multibyte string contains redundant shift
|
|
|
|
sequences.
|
|
|
|
.PP
|
|
|
|
If the multibyte string starting at \fIs\fP contains an invalid multibyte
|
2007-05-12 09:06:04 +00:00
|
|
|
sequence before the next complete character,
|
|
|
|
.BR mbrtowc ()
|
|
|
|
returns
|
2007-11-29 18:15:54 +00:00
|
|
|
\fI(size_t)\ \-1\fP and sets \fIerrno\fP to \fBEILSEQ\fP.
|
2007-04-12 22:42:49 +00:00
|
|
|
In this case,
|
2004-11-03 13:51:07 +00:00
|
|
|
the effects on \fI*ps\fP are undefined.
|
|
|
|
.PP
|
2007-04-12 22:42:49 +00:00
|
|
|
A different case is when \fIs\fP is not NULL but \fIpwc\fP is NULL.
|
|
|
|
In this
|
2007-05-12 09:06:04 +00:00
|
|
|
case the
|
|
|
|
.BR mbrtowc ()
|
2007-11-17 04:27:41 +00:00
|
|
|
function behaves as above, except that it does not
|
2004-11-03 13:51:07 +00:00
|
|
|
store the converted wide character in memory.
|
|
|
|
.PP
|
2007-04-12 22:42:49 +00:00
|
|
|
A third case is when \fIs\fP is NULL.
|
|
|
|
In this case, \fIpwc\fP and \fIn\fP are
|
|
|
|
ignored.
|
|
|
|
If the conversion state represented by \fI*ps\fP denotes an
|
2007-05-12 09:06:04 +00:00
|
|
|
incomplete multibyte character conversion, the
|
|
|
|
.BR mbrtowc ()
|
|
|
|
function
|
2007-11-29 18:15:54 +00:00
|
|
|
returns \fI(size_t)\ \-1\fP, sets \fIerrno\fP to \fBEILSEQ\fP, and
|
2007-04-12 22:42:49 +00:00
|
|
|
leaves \fI*ps\fP in an undefined state.
|
2007-05-12 09:06:04 +00:00
|
|
|
Otherwise, the
|
|
|
|
.BR mbrtowc ()
|
|
|
|
function
|
2004-11-03 13:51:07 +00:00
|
|
|
puts \fI*ps\fP in the initial state and returns 0.
|
|
|
|
.PP
|
|
|
|
In all of the above cases, if \fIps\fP is a NULL pointer, a static anonymous
|
|
|
|
state only known to the mbrtowc function is used instead.
|
2007-12-13 20:46:52 +00:00
|
|
|
Otherwise, \fI*ps\fP must be a valid \fImbstate_t\fP object.
|
|
|
|
An \fImbstate_t\fP object \fIa\fP can be initialized to the initial state
|
2004-11-03 13:51:07 +00:00
|
|
|
by zeroing it, for example using
|
|
|
|
.sp
|
2007-12-19 07:19:23 +00:00
|
|
|
.in +4n
|
2004-11-03 13:51:07 +00:00
|
|
|
memset(&a, 0, sizeof(a));
|
2007-12-19 07:19:23 +00:00
|
|
|
.in
|
2004-11-03 13:51:07 +00:00
|
|
|
.SH "RETURN VALUE"
|
2007-05-12 09:06:04 +00:00
|
|
|
The
|
|
|
|
.BR mbrtowc ()
|
|
|
|
function returns the number of bytes parsed from the
|
2008-06-09 15:49:35 +00:00
|
|
|
multibyte sequence starting at \fIs\fP, if a non-L\(aq\\0\(aq wide character
|
2004-11-03 13:51:07 +00:00
|
|
|
was recognized.
|
2008-06-09 15:49:35 +00:00
|
|
|
It returns 0, if a L\(aq\\0\(aq wide character was recognized.
|
2007-07-09 21:33:55 +00:00
|
|
|
It returns
|
2007-12-22 16:26:51 +00:00
|
|
|
.I (size_t)\ \-1
|
2006-02-09 20:29:51 +00:00
|
|
|
and sets \fIerrno\fP to \fBEILSEQ\fP, if an invalid multibyte sequence was
|
2007-04-12 22:42:49 +00:00
|
|
|
encountered.
|
2007-12-22 16:26:51 +00:00
|
|
|
It returns \fI(size_t)\ \-2\fP if it couldn't parse a complete multibyte
|
2004-11-03 13:51:07 +00:00
|
|
|
character, meaning that \fIn\fP should be increased.
|
|
|
|
.SH "CONFORMING TO"
|
2008-07-15 13:39:17 +00:00
|
|
|
C99.
|
2004-11-03 13:51:07 +00:00
|
|
|
.SH NOTES
|
2007-06-08 09:56:56 +00:00
|
|
|
The behavior of
|
2007-05-12 09:06:04 +00:00
|
|
|
.BR mbrtowc ()
|
2007-06-22 18:25:23 +00:00
|
|
|
depends on the
|
|
|
|
.B LC_CTYPE
|
|
|
|
category of the
|
2004-11-03 13:51:07 +00:00
|
|
|
current locale.
|
2007-05-16 18:25:50 +00:00
|
|
|
.SH "SEE ALSO"
|
|
|
|
.BR mbsrtowcs (3)
|