man-pages/man3/mbstowcs.3

'\" t -*- coding: UTF-8 -*-
.\" Copyright (c) Bruno Haible <haible@clisp.cons.org>
.\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com>
.\"
.\" %%%LICENSE_START(GPLv2+_DOC_ONEPARA)
.\" This is free documentation; you can redistribute it and/or
.\" modify it under the terms of the GNU General Public License as
.\" published by the Free Software Foundation; either version 2 of
.\" the License, or (at your option) any later version.
.\" %%%LICENSE_END
.\"
.\" References consulted:
.\"   GNU glibc-2 source code and manual
.\"   Dinkumware C library reference http://www.dinkumware.com/
.\"   OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html
.\"   ISO/IEC 9899:1999
.\"
.TH MBSTOWCS 3  2014-03-18 "GNU" "Linux Programmer's Manual"
.SH NAME
mbstowcs \- convert a multibyte string to a wide-character string
.SH SYNOPSIS
.nf
.B #include <stdlib.h>
.sp
.BI "size_t mbstowcs(wchar_t *" dest ", const char *" src ", size_t " n );
.fi
.SH DESCRIPTION
If
.I dest
is not NULL,
the
.BR mbstowcs ()
function converts the
multibyte string
.I src
to a wide-character string starting at
.IR dest .
At most
.I n
wide characters are written to
.IR dest .
The conversion starts
in the initial state.
The conversion can stop for three reasons:
.IP 1. 3
An invalid multibyte sequence has been encountered.
In this case,
.I (size_t)\ \-1
is returned.
.IP 2.
.I n
non-L\(aq\\0\(aq wide characters have been stored at
.IR dest .
In this case, the number of wide characters written to
.I dest
is returned, but the
shift state at this point is lost.
.IP 3.
The multibyte string has been completely converted, including the
terminating null wide character (\(aq\\0\(aq).
In this case, the number of wide characters written to
.IR dest ,
excluding the terminating null wide character, is returned.
.PP
The programmer must ensure that there is room for at least
.I n
wide
characters at
.IR dest .
.PP
If
.IR dest
is NULL,
.I n
is ignored, and the conversion proceeds as
above, except that the converted wide characters are not written out to memory,
and that no length limit exists.
.PP
In order to avoid the case 2 above, the programmer should make sure
.I n
is
greater than or equal to
.IR "mbstowcs(NULL,src,0)+1" .
.SH RETURN VALUE
The
.BR mbstowcs ()
function returns the number of wide characters that make
up the converted part of the wide-character string, not including the
terminating null wide character.
If an invalid multibyte sequence was
encountered,
.I (size_t)\ \-1
is returned.
.SH CONFORMING TO
C99.
.SH NOTES
The behavior of
.BR mbstowcs ()
depends on the
.B LC_CTYPE
category of the
current locale.
.PP
The function
.BR mbsrtowcs (3)
provides a better interface to the same
functionality.
.SH EXAMPLE
The program below illustrates the use of
.BR mbstowcs (),
as well as some of the wide character classification functions.
An example run is the following:
.in +4n
.nf

$ ./t_mbstowcs de_DE.UTF\-8 Grüße!
Length of source string (excluding terminator):
    8 bytes
    6 multibyte characters

Wide character string is: Grüße! (6 characters)
    G alpha upper
    r alpha lower
    ü alpha lower
    ß alpha lower
    e alpha lower
    ! !alpha
.fi
.in
.SS Program source
.nf
#include <locale.h>
#include <wchar.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int
main(int argc, char *argv[])
{
    size_t mbslen;      /* Number of multibyte characters in source */
    wchar_t *wcs;       /* Pointer to converted wide character string */
    wchar_t *wp;

    if (argc < 3) {
        fprintf(stderr, "Usage: %s <locale> <string>\\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    /* Apply the specified locale */

    if (setlocale(LC_ALL, argv[1]) == NULL) {
        perror("setlocale");
        exit(EXIT_FAILURE);
    }

    /* Calculate the length required to hold argv[2] converted to
       a wide character string */

    mbslen = mbstowcs(NULL, argv[2], 0);
    if (mbslen == (size_t) \-1) {
        perror("mbstowcs");
        exit(EXIT_FAILURE);
    }

    /* Describe the source string to the user */

    printf("Length of source string (excluding terminator):\\n");
    printf("    %zu bytes\\n", strlen(argv[2]));
    printf("    %zu multibyte characters\\n\\n", mbslen);

    /* Allocate wide character string of the desired size.  Add 1
       to allow for terminating null wide character (L\(aq\\0\(aq). */

    wcs = calloc(mbslen + 1, sizeof(wchar_t));
    if (wcs == NULL) {
        perror("calloc");
        exit(EXIT_FAILURE);
    }

    /* Convert the multibyte character string in argv[2] to a
       wide character string */

    if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) {
        perror("mbstowcs");
        exit(EXIT_FAILURE);
    }

    printf("Wide character string is: %ls (%zu characters)\\n",
            wcs, mbslen);

    /* Now do some inspection of the classes of the characters in
       the wide character string */

    for (wp = wcs; *wp != 0; wp++) {
        printf("    %lc ", (wint_t) *wp);

        if (!iswalpha(*wp))
            printf("!");
        printf("alpha ");

        if (iswalpha(*wp)) {
            if (iswupper(*wp))
                printf("upper ");

            if (iswlower(*wp))
                printf("lower ");
        }

        putchar(\(aq\\n\(aq);
    }

    exit(EXIT_SUCCESS);
}
.fi
.SH SEE ALSO
.BR mblen (3),
.BR mbsrtowcs (3),
.BR mbtowc (3),
.BR wctomb (3),
.BR wcstombs (3)