mirror of https://github.com/mkerrisk/man-pages
222 lines
5.3 KiB
Groff
222 lines
5.3 KiB
Groff
'\" t -*- coding: UTF-8 -*-
|
|
.\" Copyright (c) Bruno Haible <haible@clisp.cons.org>
|
|
.\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com>
|
|
.\"
|
|
.\" %%%LICENSE_START(GPLv2+_DOC_ONEPARA)
|
|
.\" This is free documentation; you can redistribute it and/or
|
|
.\" modify it under the terms of the GNU General Public License as
|
|
.\" published by the Free Software Foundation; either version 2 of
|
|
.\" the License, or (at your option) any later version.
|
|
.\" %%%LICENSE_END
|
|
.\"
|
|
.\" References consulted:
|
|
.\" GNU glibc-2 source code and manual
|
|
.\" Dinkumware C library reference http://www.dinkumware.com/
|
|
.\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html
|
|
.\" ISO/IEC 9899:1999
|
|
.\"
|
|
.TH MBSTOWCS 3 2014-03-18 "GNU" "Linux Programmer's Manual"
|
|
.SH NAME
|
|
mbstowcs \- convert a multibyte string to a wide-character string
|
|
.SH SYNOPSIS
|
|
.nf
|
|
.B #include <stdlib.h>
|
|
.sp
|
|
.BI "size_t mbstowcs(wchar_t *" dest ", const char *" src ", size_t " n );
|
|
.fi
|
|
.SH DESCRIPTION
|
|
If
|
|
.I dest
|
|
is not NULL,
|
|
the
|
|
.BR mbstowcs ()
|
|
function converts the
|
|
multibyte string
|
|
.I src
|
|
to a wide-character string starting at
|
|
.IR dest .
|
|
At most
|
|
.I n
|
|
wide characters are written to
|
|
.IR dest .
|
|
The conversion starts
|
|
in the initial state.
|
|
The conversion can stop for three reasons:
|
|
.IP 1. 3
|
|
An invalid multibyte sequence has been encountered.
|
|
In this case,
|
|
.I (size_t)\ \-1
|
|
is returned.
|
|
.IP 2.
|
|
.I n
|
|
non-L\(aq\\0\(aq wide characters have been stored at
|
|
.IR dest .
|
|
In this case, the number of wide characters written to
|
|
.I dest
|
|
is returned, but the
|
|
shift state at this point is lost.
|
|
.IP 3.
|
|
The multibyte string has been completely converted, including the
|
|
terminating null wide character (\(aq\\0\(aq).
|
|
In this case, the number of wide characters written to
|
|
.IR dest ,
|
|
excluding the terminating null wide character, is returned.
|
|
.PP
|
|
The programmer must ensure that there is room for at least
|
|
.I n
|
|
wide
|
|
characters at
|
|
.IR dest .
|
|
.PP
|
|
If
|
|
.IR dest
|
|
is NULL,
|
|
.I n
|
|
is ignored, and the conversion proceeds as
|
|
above, except that the converted wide characters are not written out to memory,
|
|
and that no length limit exists.
|
|
.PP
|
|
In order to avoid the case 2 above, the programmer should make sure
|
|
.I n
|
|
is
|
|
greater than or equal to
|
|
.IR "mbstowcs(NULL,src,0)+1" .
|
|
.SH RETURN VALUE
|
|
The
|
|
.BR mbstowcs ()
|
|
function returns the number of wide characters that make
|
|
up the converted part of the wide-character string, not including the
|
|
terminating null wide character.
|
|
If an invalid multibyte sequence was
|
|
encountered,
|
|
.I (size_t)\ \-1
|
|
is returned.
|
|
.SH CONFORMING TO
|
|
C99.
|
|
.SH NOTES
|
|
The behavior of
|
|
.BR mbstowcs ()
|
|
depends on the
|
|
.B LC_CTYPE
|
|
category of the
|
|
current locale.
|
|
.PP
|
|
The function
|
|
.BR mbsrtowcs (3)
|
|
provides a better interface to the same
|
|
functionality.
|
|
.SH EXAMPLE
|
|
The program below illustrates the use of
|
|
.BR mbstowcs (),
|
|
as well as some of the wide character classification functions.
|
|
An example run is the following:
|
|
.in +4n
|
|
.nf
|
|
|
|
$ ./t_mbstowcs de_DE.UTF\-8 Grüße!
|
|
Length of source string (excluding terminator):
|
|
8 bytes
|
|
6 multibyte characters
|
|
|
|
Wide character string is: Grüße! (6 characters)
|
|
G alpha upper
|
|
r alpha lower
|
|
ü alpha lower
|
|
ß alpha lower
|
|
e alpha lower
|
|
! !alpha
|
|
.fi
|
|
.in
|
|
.SS Program source
|
|
.nf
|
|
#include <locale.h>
|
|
#include <wchar.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
size_t mbslen; /* Number of multibyte characters in source */
|
|
wchar_t *wcs; /* Pointer to converted wide character string */
|
|
wchar_t *wp;
|
|
|
|
if (argc < 3) {
|
|
fprintf(stderr, "Usage: %s <locale> <string>\\n", argv[0]);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
/* Apply the specified locale */
|
|
|
|
if (setlocale(LC_ALL, argv[1]) == NULL) {
|
|
perror("setlocale");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
/* Calculate the length required to hold argv[2] converted to
|
|
a wide character string */
|
|
|
|
mbslen = mbstowcs(NULL, argv[2], 0);
|
|
if (mbslen == (size_t) \-1) {
|
|
perror("mbstowcs");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
/* Describe the source string to the user */
|
|
|
|
printf("Length of source string (excluding terminator):\\n");
|
|
printf(" %zu bytes\\n", strlen(argv[2]));
|
|
printf(" %zu multibyte characters\\n\\n", mbslen);
|
|
|
|
/* Allocate wide character string of the desired size. Add 1
|
|
to allow for terminating null wide character (L\(aq\\0\(aq). */
|
|
|
|
wcs = calloc(mbslen + 1, sizeof(wchar_t));
|
|
if (wcs == NULL) {
|
|
perror("calloc");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
/* Convert the multibyte character string in argv[2] to a
|
|
wide character string */
|
|
|
|
if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) {
|
|
perror("mbstowcs");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
printf("Wide character string is: %ls (%zu characters)\\n",
|
|
wcs, mbslen);
|
|
|
|
/* Now do some inspection of the classes of the characters in
|
|
the wide character string */
|
|
|
|
for (wp = wcs; *wp != 0; wp++) {
|
|
printf(" %lc ", (wint_t) *wp);
|
|
|
|
if (!iswalpha(*wp))
|
|
printf("!");
|
|
printf("alpha ");
|
|
|
|
if (iswalpha(*wp)) {
|
|
if (iswupper(*wp))
|
|
printf("upper ");
|
|
|
|
if (iswlower(*wp))
|
|
printf("lower ");
|
|
}
|
|
|
|
putchar(\(aq\\n\(aq);
|
|
}
|
|
|
|
exit(EXIT_SUCCESS);
|
|
}
|
|
.fi
|
|
.SH SEE ALSO
|
|
.BR mblen (3),
|
|
.BR mbsrtowcs (3),
|
|
.BR mbtowc (3),
|
|
.BR wctomb (3),
|
|
.BR wcstombs (3)
|