123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095 |
- /*++
- Copyright (c) 2013 Minoca Corp.
- This file is licensed under the terms of the GNU General Public License
- version 3. Alternative licensing terms are available. Contact
- info@minocacorp.com for details. See the LICENSE file at the root of this
- project for complete licensing information.
- Module Name:
- wchar.c
- Abstract:
- This module implements support for wide and multibyte characters.
- Author:
- Evan Green 23-Aug-2013
- Environment:
- User Mode C Library
- --*/
- //
- // ------------------------------------------------------------------- Includes
- //
- #include "libcp.h"
- #include <assert.h>
- #include <errno.h>
- #include <limits.h>
- #include <stdio.h>
- #include <string.h>
- //
- // --------------------------------------------------------------------- Macros
- //
- //
- // This is really a compile-time macro that ensure the mbstate_t structure is
- // big enough to contain the MULTIBYTE_STATE structure the runtime library
- // defines.
- //
- #define ASSERT_MBSTATE_SIZE() \
- assert(sizeof(mbstate_t) >= sizeof(MULTIBYTE_STATE))
- //
- // ---------------------------------------------------------------- Definitions
- //
- //
- // ------------------------------------------------------ Data Type Definitions
- //
- typedef const struct _WC_INTERVAL {
- USHORT First;
- USHORT Last;
- } WC_INTERVAL, *PWC_INTERVAL;
- //
- // ----------------------------------------------- Internal Function Prototypes
- //
- BOOL
- ClpSearchCombiningIntervals (
- wchar_t Character,
- PWC_INTERVAL Table,
- LONG Max
- );
- //
- // -------------------------------------------------------------------- Globals
- //
- //
- // Define the maximum number of bytes in a multibyte character for the current
- // locale.
- //
- LIBC_API int MB_CUR_MAX = MB_LEN_MAX;
- //
- // Store the internal character conversion state.
- //
- mbstate_t ClMultibyteConversionState;
- //
- // Define the intervals of combining characters.
- //
- static WC_INTERVAL ClCombiningCharacters[] = {
- {0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486},
- {0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9},
- {0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},
- {0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
- {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED},
- {0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A},
- {0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
- {0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954},
- {0x0962, 0x0963}, {0x0981, 0x0981}, {0x09BC, 0x09BC},
- {0x09C1, 0x09C4}, {0x09CD, 0x09CD}, {0x09E2, 0x09E3},
- {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
- {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71},
- {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5},
- {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, {0x0B01, 0x0B01},
- {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
- {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82},
- {0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40},
- {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},
- {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
- {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA},
- {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31},
- {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1},
- {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
- {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
- {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84},
- {0x0F86, 0x0F87}, {0x0F90, 0x0F97}, {0x0F99, 0x0FBC},
- {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
- {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059},
- {0x1160, 0x11FF}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},
- {0x17C9, 0x17D3}, {0x180B, 0x180E}, {0x18A9, 0x18A9},
- {0x200B, 0x200F}, {0x202A, 0x202E}, {0x206A, 0x206F},
- {0x20D0, 0x20E3}, {0x302A, 0x302F}, {0x3099, 0x309A},
- {0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF},
- {0xFFF9, 0xFFFB}
- };
- //
- // ------------------------------------------------------------------ Functions
- //
- LIBC_API
- int
- mbsinit (
- const mbstate_t *State
- )
- /*++
- Routine Description:
- This routine determines if the given state structure is in its initial
- shift state.
- Arguments:
- State - Supplies a pointer to the state to query.
- Return Value:
- Returns non-zero if the given state was a NULL pointer or is in its initial
- conversion state.
- 0 if the given state is not in its initial conversion state.
- --*/
- {
- ASSERT_MBSTATE_SIZE();
- if (State == NULL) {
- return 1;
- }
- if (RtlIsMultibyteStateReset((PMULTIBYTE_STATE)State) != FALSE) {
- return 1;
- }
- return 0;
- }
- LIBC_API
- wint_t
- btowc (
- int Character
- )
- /*++
- Routine Description:
- This routine attempts to convert a single byte into a wide character at
- the initial shift state.
- Arguments:
- Character - Supplies the character.
- Return Value:
- Returns the wide character representation of the character.
- WEOF if the input character is EOF or if the character (cast to an unsigned
- char) does not constitute a valid one byte character in the initial shift
- state.
- --*/
- {
- size_t Count;
- mbstate_t State;
- wchar_t WideCharacter;
- if (Character == EOF) {
- return WEOF;
- }
- memset(&State, 0, sizeof(mbstate_t));
- Count = mbrtowc(&WideCharacter, (const char *)&Character, 1, &State);
- if ((Count != 0) && (Count != 1)) {
- return WEOF;
- }
- return (wint_t)WideCharacter;
- }
- LIBC_API
- int
- wctob (
- wint_t Character
- )
- /*++
- Routine Description:
- This routine converts the given wide character into its corresponding
- single-byte character if possible, starting at the initial shift state.
- Arguments:
- Character - Supplies the wide character to convert to a byte.
- Return Value:
- Returns the byte representation of the character.
- EOF if the wide character is invalid or cannot be represented in a single
- byte.
- --*/
- {
- CHAR MultibyteCharacter[MULTIBYTE_MAX];
- size_t Result;
- mbstate_t State;
- memset(&State, 0, sizeof(mbstate_t));
- Result = wcrtomb(MultibyteCharacter, Character, &State);
- if ((Result == -1) || (Result > 1)) {
- return EOF;
- }
- if (Result == 0) {
- return '\0';
- }
- return MultibyteCharacter[0];
- }
- LIBC_API
- size_t
- mbtowc (
- wchar_t *WideCharacter,
- const char *MultibyteCharacter,
- size_t ByteCount
- )
- /*++
- Routine Description:
- This routine attempts to convert a multibyte character into a wide
- character. This routine is equivalent to calling mbrtowc with a NULL
- state pointer.
- Arguments:
- WideCharacter - Supplies an optional pointer wehre the converted wide
- character will be returned on success.
- MultibyteCharacter - Supplies a pointer to the multibyte character to
- convert.
- ByteCount - Supplies the maximum number of bytes to inspect in the
- multibyte character buffer.
- Return Value:
- 0 if the next character is the null character.
- Returns a positive value on success indicating the number of bytes that
- were used to construct the wide character.
- -2 if the byte count was too small, as the multibyte character could only
- be partially assembled with the given maximum number of bytes.
- -1 if an encoding error occurred.
- --*/
- {
- CHARACTER_ENCODING Encoding;
- if (WideCharacter == NULL) {
- RtlResetMultibyteState((PMULTIBYTE_STATE)&ClMultibyteConversionState);
- //
- // This should really get the LC_CTYPE encoding.
- //
- Encoding = RtlGetDefaultCharacterEncoding();
- if (RtlIsCharacterEncodingStateDependent(Encoding, FALSE) != FALSE) {
- return 1;
- }
- return 0;
- }
- return mbrtowc(WideCharacter, MultibyteCharacter, ByteCount, NULL);
- }
- LIBC_API
- size_t
- mbrtowc (
- wchar_t *WideCharacter,
- const char *MultibyteCharacter,
- size_t ByteCount,
- mbstate_t *State
- )
- /*++
- Routine Description:
- This routine attempts to convert a multibyte character into a wide
- character.
- Arguments:
- WideCharacter - Supplies an optional pointer wehre the converted wide
- character will be returned on success.
- MultibyteCharacter - Supplies a pointer to the multibyte character to
- convert.
- ByteCount - Supplies the maximum number of bytes to inspect in the
- multibyte character buffer.
- State - Supplies an optional pointer to a multibyte shift state object to
- use. If this value is not supplied, an internal state will be used.
- The downside of using the internal state is that it makes this function
- not thread safe nor reentrant.
- Return Value:
- 0 if the next character is the null character.
- Returns a positive value on success indicating the number of bytes that
- were used to construct the wide character.
- -2 if the byte count was too small, as the multibyte character could only
- be partially assembled with the given maximum number of bytes.
- -1 if an encoding error occurred.
- --*/
- {
- WCHAR LocalWideCharacter;
- PMULTIBYTE_STATE MultibyteState;
- ULONG Size;
- KSTATUS Status;
- ASSERT_MBSTATE_SIZE();
- if (State == NULL) {
- State = &ClMultibyteConversionState;
- }
- if (MultibyteCharacter == NULL) {
- memset(State, 0, sizeof(mbstate_t));
- return 0;
- }
- Size = ByteCount;
- MultibyteState = (PMULTIBYTE_STATE)State;
- Status = RtlConvertMultibyteCharacterToWide((PCHAR *)&MultibyteCharacter,
- &Size,
- &LocalWideCharacter,
- MultibyteState);
- if (KSUCCESS(Status)) {
- if (WideCharacter != NULL) {
- *WideCharacter = LocalWideCharacter;
- }
- if (LocalWideCharacter == L'\0') {
- return 0;
- }
- return ByteCount - Size;
- }
- if (Status == STATUS_BUFFER_TOO_SMALL) {
- return -2;
- }
- errno = ClConvertKstatusToErrorNumber(Status);
- return -1;
- }
- LIBC_API
- int
- wctomb (
- char *MultibyteCharacter,
- wchar_t WideCharacter
- )
- /*++
- Routine Description:
- This routine attempts to convert a single wide character into a multibyte
- character.
- Arguments:
- MultibyteCharacter - Supplies an optional pointer to the buffer where the
- multibyte character will be returned. This buffer is assumed to be at
- least MB_CUR_MAX bytes large. If this is NULL, then this function will
- determine whether or not the given character has state-dependent
- encodings.
- WideCharacter - Supplies a pointer to the wide character to convert. If this
- is a null terminator, then the shift state will be reset to its initial
- shift state.
- Return Value:
- 0 if the multibyte character is NULL and the character does not have state
- dependent encodings.
- Returns the number of bytes stored in the multibyte array, or that would
- be stored in the array were it non-NULL.
- -1 if an encoding error occurred, and errno may be set to EILSEQ.
- --*/
- {
- CHARACTER_ENCODING Encoding;
- if (MultibyteCharacter == NULL) {
- RtlResetMultibyteState((PMULTIBYTE_STATE)&ClMultibyteConversionState);
- //
- // This should really get the LC_CTYPE encoding.
- //
- Encoding = RtlGetDefaultCharacterEncoding();
- if (RtlIsCharacterEncodingStateDependent(Encoding, TRUE) != FALSE) {
- return 1;
- }
- return 0;
- }
- return wcrtomb(MultibyteCharacter, WideCharacter, NULL);
- }
- LIBC_API
- size_t
- wcrtomb (
- char *MultibyteCharacter,
- wchar_t WideCharacter,
- mbstate_t *State
- )
- /*++
- Routine Description:
- This routine attempts to convert a single wide character into a multibyte
- character.
- Arguments:
- MultibyteCharacter - Supplies an optional pointer to the buffer where the
- multibyte character will be returned. This buffer is assumed to be at
- least MB_CUR_MAX bytes large. If this is NULL, then functionality will
- be equivalent to wcrtomb(Buffer, L'\0', State), where Buffer is an
- internal buffer.
- WideCharacter - Supplies a pointer to the wide character to convert. If this
- is a null terminator, then the shift state will be reset to its initial
- shift state.
- State - Supplies an optional pointer to a multibyte shift state object to
- use. If this value is not supplied, an internal state will be used.
- The downside of using the internal state is that it makes this function
- not thread safe nor reentrant.
- Return Value:
- Returns the number of bytes stored in the multibyte array.
- -1 if an encoding error occurred, and errno may be set to EILSEQ.
- --*/
- {
- PMULTIBYTE_STATE MultibyteState;
- ULONG Size;
- KSTATUS Status;
- ASSERT_MBSTATE_SIZE();
- if (State == NULL) {
- State = &ClMultibyteConversionState;
- }
- if (MultibyteCharacter == NULL) {
- WideCharacter = L'\0';
- }
- MultibyteState = (PMULTIBYTE_STATE)State;
- Size = MULTIBYTE_MAX;
- Status = RtlConvertWideCharacterToMultibyte(WideCharacter,
- MultibyteCharacter,
- &Size,
- MultibyteState);
- if (KSUCCESS(Status)) {
- return Size;
- }
- errno = ClConvertKstatusToErrorNumber(Status);
- return -1;
- }
- LIBC_API
- size_t
- mbstowcs (
- wchar_t *Destination,
- const char *Source,
- size_t DestinationSize
- )
- /*++
- Routine Description:
- This routine converts a null-terminated sequence of multi-byte characters
- beginning in the inital shift state to a string of wide characters, up to
- and including a null terminator.
- Arguments:
- Destination - Supplies an optional pointer where the wide character string
- will be returned.
- Source - Supplies a pointer to the null-terminated multibyte string. No
- characters are examined after a null terminator is found.
- DestinationSize - Supplies the maximum number of elements to place in the
- wide string.
- Return Value:
- Returns the number of wide character array elements modified (or required
- if the wide string is NULL), not including the terminating NULL.
- -1 if an invalid character is encountered. The errno variable may be set
- to provide more information.
- --*/
- {
- mbstate_t State;
- memset(&State, 0, sizeof(mbstate_t));
- return mbsrtowcs(Destination, &Source, DestinationSize, &State);
- }
- LIBC_API
- size_t
- mbsrtowcs (
- wchar_t *Destination,
- const char **Source,
- size_t DestinationSize,
- mbstate_t *State
- )
- /*++
- Routine Description:
- This routine converts a null-terminated sequence of multi-byte characters
- beginning in the inital shift state to a string of wide characters, up to
- and including a null terminator.
- Arguments:
- Destination - Supplies an optional pointer where the wide character string
- will be returned.
- Source - Supplies a pointer that upon input contains a pointer to the null
- terminated multibyte string to convert. On output, this will contain
- one of two values. If the null terminator was encountered in the
- multibyte string, then the value returned here will be NULL. If the
- conversion stopped because it would exceed the wide string size, then
- the value returned here will be a pointer to the character one after
- the last character successfully converted. If the wide string is NULL,
- the pointer will remained unchanged on output.
- DestinationSize - Supplies the maximum number of elements to place in the
- wide string.
- State - Supplies an optional pointer to a multibyte shift state object to
- use. If this value is not supplied, an internal state will be used.
- The downside of using the internal state is that it makes this function
- not thread safe nor reentrant.
- Return Value:
- Returns the number of wide character array elements modified (or required
- if the wide string is NULL), not including the terminating NULL.
- -1 if an invalid character is encountered. The errno variable may be set
- to provide more information.
- --*/
- {
- size_t ElementsConverted;
- const char *MultibyteString;
- size_t Result;
- wchar_t WideCharacter;
- ElementsConverted = 0;
- MultibyteString = *Source;
- while ((Destination == NULL) || (DestinationSize > 0)) {
- Result = mbrtowc(&WideCharacter, MultibyteString, MB_LEN_MAX, State);
- if (Result == -1) {
- return -1;
- }
- if (Destination != NULL) {
- *Destination = WideCharacter;
- Destination += 1;
- DestinationSize -= 1;
- }
- if (Result == 0) {
- break;
- }
- if (WideCharacter == L'\0') {
- MultibyteString = NULL;
- break;
- }
- MultibyteString += Result;
- ElementsConverted += 1;
- }
- if (Destination != NULL) {
- *Source = MultibyteString;
- }
- return ElementsConverted;
- }
- LIBC_API
- size_t
- wcstombs (
- char *Destination,
- const wchar_t *Source,
- size_t DestinationSize
- )
- /*++
- Routine Description:
- This routine converts a string of wide characters into a multibyte string,
- up to and including a wide null terminator.
- Arguments:
- Destination - Supplies an optional pointer to a destination where the
- multibyte characters will be returned.
- Source - Supplies a pointer to the null terminated wide character string to
- convert.
- DestinationSize - Supplies the number of bytes in the destination buffer
- (or the theoretical destination buffer if one was not supplied).
- Return Value:
- Returns the number of bytes in the resulting character sequence, not
- including the null terminator (if any).
- -1 if an invalid wide character is encountered. The errno variable may be
- set to provide more information.
- --*/
- {
- mbstate_t State;
- memset(&State, 0, sizeof(State));
- return wcsrtombs(Destination, &Source, DestinationSize, &State);
- }
- LIBC_API
- size_t
- wcsrtombs (
- char *Destination,
- const wchar_t **Source,
- size_t DestinationSize,
- mbstate_t *State
- )
- /*++
- Routine Description:
- This routine converts a string of wide characters into a multibyte string,
- up to and including a wide null terminator.
- Arguments:
- Destination - Supplies an optional pointer to a destination where the
- multibyte characters will be returned.
- Source - Supplies a pointer that upon input contains a pointer to the
- null terminated wide character string to convert. On output, this will
- contain one of two values. If the null terminator was encountered in
- the source string, then the value returned here will be NULL. If the
- conversion stopped because it would exceed the destination size,
- then the value returned here will be a pointer to the character one
- after the last character successfully converted. If the destination
- is NULL, the pointer will remained unchanged on ouput.
- DestinationSize - Supplies the number of bytes in the destination buffer
- (or the theoretical destination buffer if one was not supplied).
- State - Supplies an optional pointer to a multibyte shift state object to
- use. If this value is not supplied, an internal state will be used.
- The downside of using the internal state is that it makes this function
- not thread safe nor reentrant.
- Return Value:
- Returns the number of bytes in the resulting character sequence, not
- including the null terminator (if any).
- -1 if an invalid wide character is encountered. The errno variable may be
- set to provide more information.
- --*/
- {
- char HoldingBuffer[MB_LEN_MAX];
- mbstate_t PreviousState;
- size_t Result;
- size_t TotalWritten;
- const wchar_t *WideString;
- if (State == NULL) {
- State = &ClMultibyteConversionState;
- }
- Result = 0;
- TotalWritten = 0;
- WideString = *Source;
- while ((Destination == NULL) || (DestinationSize > 0)) {
- PreviousState = *State;
- Result = wcrtomb(HoldingBuffer, *WideString, State);
- if (Result == -1) {
- errno = EILSEQ;
- break;
- } else if (Destination != NULL) {
- //
- // Copy the holding buffer to the destination if there's enough
- // room.
- //
- if (Result <= DestinationSize) {
- memcpy(Destination, HoldingBuffer, Result);
- Destination += Result;
- DestinationSize -= Result;
- //
- // The remaining size is not big enough to hold the character. Back
- // out the state advancement.
- //
- } else {
- *State = PreviousState;
- break;
- }
- }
- //
- // If this was a null terminator, stop.
- //
- if (*WideString == L'\0') {
- WideString = NULL;
- break;
- }
- //
- // Update the total bytes written. This never includes the null
- // terminator.
- //
- TotalWritten += Result;
- //
- // Advance the source string and continue.
- //
- WideString += 1;
- }
- //
- // Return the source string.
- //
- if (Destination != NULL) {
- *Source = WideString;
- }
- if (Result == -1) {
- return -1;
- }
- return TotalWritten;
- }
- LIBC_API
- int
- mblen (
- const char *MultibyteCharacter,
- size_t Size
- )
- /*++
- Routine Description:
- This routine returns the number of bytes constituting the given multibyte
- character. It shall be equivalent to:
- mbtowc(NULL, MultibyteCharacter, Size);
- except that the builtin state of mbtowc is not affected.
- Arguments:
- MultibyteCharacter - Supplies an optional pointer to the multibyte
- character to get the length of.
- Size - Supplies the size of the multibyte character buffer.
- Return Value:
- 0 if the next character corresponds to the null wide character.
- Returns the positive number of bytes constituting the next character on
- success.
- -2 if the size of the buffer is too small, such that only a partial wide
- character could be constructed using the given bytes.
- -1 on error, and errno will be set to contain more information.
- --*/
- {
- mbstate_t State;
- memset(&State, 0, sizeof(mbstate_t));
- return (int)mbrtowc(NULL, MultibyteCharacter, Size, &State);
- }
- LIBC_API
- size_t
- mbrlen (
- const char *MultibyteCharacter,
- size_t Size,
- mbstate_t *State
- )
- /*++
- Routine Description:
- This routine returns the number of bytes constituting the given multibyte
- character. It shall be equivalent to:
- mbrtowc(NULL, MultibyteCharacter, Size, State);.
- Arguments:
- MultibyteCharacter - Supplies an optional pointer to the multibyte
- character to get the length of.
- Size - Supplies the size of the multibyte character buffer.
- State - Supplies an optional pointer to an initialized multibyte conversion
- state buffer. If this is not supplied, an internal state buffer will
- be used, however using the internal one makes this function neither
- safe nor reentrant.
- Return Value:
- 0 if the next character corresponds to the null wide character.
- Returns the positive number of bytes constituting the next character on
- success.
- -2 if the size of the buffer is too small, such that only a partial wide
- character could be constructed using the given bytes.
- -1 on error, and errno will be set to contain more information.
- --*/
- {
- return mbrtowc(NULL, MultibyteCharacter, Size, State);
- }
- LIBC_API
- int
- wcwidth (
- wchar_t Character
- )
- /*++
- Routine Description:
- This routine returns the number of display column positions the given wide
- character occupies.
- Arguments:
- Character - Supplies the character to examine.
- Return Value:
- 0 for the null character.
- -1 if the character is not printable.
- Otherwise, returns the number of columns the given character takes up.
- --*/
- {
- LONG Max;
- //
- // This function is based on Markus Kuhn's function at
- // https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c, which was placed in the
- // public domain.
- //
- if (Character == 0) {
- return 0;
- }
- if ((Character < 0x20) || ((Character >= 0x7F) && (Character < 0xA0))) {
- return -1;
- }
- //
- // Search the non-spacing characters.
- //
- Max = sizeof(ClCombiningCharacters) / sizeof(ClCombiningCharacters[0]) - 1;
- if (ClpSearchCombiningIntervals(Character, ClCombiningCharacters, Max) !=
- FALSE) {
- return 0;
- }
- if (Character >= 0x1100) {
- if ((Character <= 0x115F) ||
- (Character == 0x2329) ||
- (Character == 0x232A) ||
- ((Character >= 0x2E80) && (Character <= 0xA4CF) &&
- (Character != 0x303F)) ||
- ((Character >= 0xAC00) && (Character <= 0xD7A3)) ||
- ((Character >= 0xF900) && (Character <= 0xFAFF)) ||
- ((Character >= 0xFE10) && (Character <= 0xFE19)) ||
- ((Character >= 0xFE30) && (Character <= 0xFE6F)) ||
- ((Character >= 0xFF00) && (Character <= 0xFF60)) ||
- ((Character >= 0xFFE0) && (Character <= 0xFFE6)) ||
- ((Character >= 0x20000) && (Character <= 0x2FFFD)) ||
- ((Character >= 0x30000) && (Character <= 0x3FFFD))) {
- return 2;
- }
- }
- return 1;
- }
- //
- // --------------------------------------------------------- Internal Functions
- //
- BOOL
- ClpSearchCombiningIntervals (
- wchar_t Character,
- PWC_INTERVAL Table,
- LONG Max
- )
- /*++
- Routine Description:
- This routine performs a binary search to determine if the given character
- is listed in the given table.
- Arguments:
- Character - Supplies the character to examine.
- Table - Supplies a pointer to the table to search in.
- Max - Supplies the maximum index of the table, inclusive.
- Return Value:
- TRUE if the character is in the table.
- FALSE if the character is not in the table.
- --*/
- {
- LONG Mid;
- LONG Min;
- if ((Character < Table[0].First) || (Character > Table[Max].Last)) {
- return FALSE;
- }
- Min = 0;
- while (Max >= Min) {
- Mid = (Min + Max) / 2;
- if (Character > Table[Mid].Last) {
- Min = Mid + 1;
- } else if (Character < Table[Mid].First) {
- Max = Mid - 1;
- } else {
- return TRUE;
- }
- }
- return FALSE;
- }
|