1
0

wchar.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095
  1. /*++
  2. Copyright (c) 2013 Minoca Corp.
  3. This file is licensed under the terms of the GNU General Public License
  4. version 3. Alternative licensing terms are available. Contact
  5. info@minocacorp.com for details. See the LICENSE file at the root of this
  6. project for complete licensing information.
  7. Module Name:
  8. wchar.c
  9. Abstract:
  10. This module implements support for wide and multibyte characters.
  11. Author:
  12. Evan Green 23-Aug-2013
  13. Environment:
  14. User Mode C Library
  15. --*/
  16. //
  17. // ------------------------------------------------------------------- Includes
  18. //
  19. #include "libcp.h"
  20. #include <assert.h>
  21. #include <errno.h>
  22. #include <limits.h>
  23. #include <stdio.h>
  24. #include <string.h>
  25. //
  26. // --------------------------------------------------------------------- Macros
  27. //
  28. //
  29. // This is really a compile-time macro that ensure the mbstate_t structure is
  30. // big enough to contain the MULTIBYTE_STATE structure the runtime library
  31. // defines.
  32. //
  33. #define ASSERT_MBSTATE_SIZE() \
  34. assert(sizeof(mbstate_t) >= sizeof(MULTIBYTE_STATE))
  35. //
  36. // ---------------------------------------------------------------- Definitions
  37. //
  38. //
  39. // ------------------------------------------------------ Data Type Definitions
  40. //
  41. typedef const struct _WC_INTERVAL {
  42. USHORT First;
  43. USHORT Last;
  44. } WC_INTERVAL, *PWC_INTERVAL;
  45. //
  46. // ----------------------------------------------- Internal Function Prototypes
  47. //
  48. BOOL
  49. ClpSearchCombiningIntervals (
  50. wchar_t Character,
  51. PWC_INTERVAL Table,
  52. LONG Max
  53. );
  54. //
  55. // -------------------------------------------------------------------- Globals
  56. //
  57. //
  58. // Define the maximum number of bytes in a multibyte character for the current
  59. // locale.
  60. //
  61. LIBC_API int MB_CUR_MAX = MB_LEN_MAX;
  62. //
  63. // Store the internal character conversion state.
  64. //
  65. mbstate_t ClMultibyteConversionState;
  66. //
  67. // Define the intervals of combining characters.
  68. //
  69. static WC_INTERVAL ClCombiningCharacters[] = {
  70. {0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486},
  71. {0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9},
  72. {0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},
  73. {0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
  74. {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED},
  75. {0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A},
  76. {0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
  77. {0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954},
  78. {0x0962, 0x0963}, {0x0981, 0x0981}, {0x09BC, 0x09BC},
  79. {0x09C1, 0x09C4}, {0x09CD, 0x09CD}, {0x09E2, 0x09E3},
  80. {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
  81. {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71},
  82. {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5},
  83. {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, {0x0B01, 0x0B01},
  84. {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
  85. {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82},
  86. {0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40},
  87. {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},
  88. {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
  89. {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA},
  90. {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31},
  91. {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1},
  92. {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
  93. {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
  94. {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84},
  95. {0x0F86, 0x0F87}, {0x0F90, 0x0F97}, {0x0F99, 0x0FBC},
  96. {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
  97. {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059},
  98. {0x1160, 0x11FF}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},
  99. {0x17C9, 0x17D3}, {0x180B, 0x180E}, {0x18A9, 0x18A9},
  100. {0x200B, 0x200F}, {0x202A, 0x202E}, {0x206A, 0x206F},
  101. {0x20D0, 0x20E3}, {0x302A, 0x302F}, {0x3099, 0x309A},
  102. {0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF},
  103. {0xFFF9, 0xFFFB}
  104. };
  105. //
  106. // ------------------------------------------------------------------ Functions
  107. //
  108. LIBC_API
  109. int
  110. mbsinit (
  111. const mbstate_t *State
  112. )
  113. /*++
  114. Routine Description:
  115. This routine determines if the given state structure is in its initial
  116. shift state.
  117. Arguments:
  118. State - Supplies a pointer to the state to query.
  119. Return Value:
  120. Returns non-zero if the given state was a NULL pointer or is in its initial
  121. conversion state.
  122. 0 if the given state is not in its initial conversion state.
  123. --*/
  124. {
  125. ASSERT_MBSTATE_SIZE();
  126. if (State == NULL) {
  127. return 1;
  128. }
  129. if (RtlIsMultibyteStateReset((PMULTIBYTE_STATE)State) != FALSE) {
  130. return 1;
  131. }
  132. return 0;
  133. }
  134. LIBC_API
  135. wint_t
  136. btowc (
  137. int Character
  138. )
  139. /*++
  140. Routine Description:
  141. This routine attempts to convert a single byte into a wide character at
  142. the initial shift state.
  143. Arguments:
  144. Character - Supplies the character.
  145. Return Value:
  146. Returns the wide character representation of the character.
  147. WEOF if the input character is EOF or if the character (cast to an unsigned
  148. char) does not constitute a valid one byte character in the initial shift
  149. state.
  150. --*/
  151. {
  152. size_t Count;
  153. mbstate_t State;
  154. wchar_t WideCharacter;
  155. if (Character == EOF) {
  156. return WEOF;
  157. }
  158. memset(&State, 0, sizeof(mbstate_t));
  159. Count = mbrtowc(&WideCharacter, (const char *)&Character, 1, &State);
  160. if ((Count != 0) && (Count != 1)) {
  161. return WEOF;
  162. }
  163. return (wint_t)WideCharacter;
  164. }
  165. LIBC_API
  166. int
  167. wctob (
  168. wint_t Character
  169. )
  170. /*++
  171. Routine Description:
  172. This routine converts the given wide character into its corresponding
  173. single-byte character if possible, starting at the initial shift state.
  174. Arguments:
  175. Character - Supplies the wide character to convert to a byte.
  176. Return Value:
  177. Returns the byte representation of the character.
  178. EOF if the wide character is invalid or cannot be represented in a single
  179. byte.
  180. --*/
  181. {
  182. CHAR MultibyteCharacter[MULTIBYTE_MAX];
  183. size_t Result;
  184. mbstate_t State;
  185. memset(&State, 0, sizeof(mbstate_t));
  186. Result = wcrtomb(MultibyteCharacter, Character, &State);
  187. if ((Result == -1) || (Result > 1)) {
  188. return EOF;
  189. }
  190. if (Result == 0) {
  191. return '\0';
  192. }
  193. return MultibyteCharacter[0];
  194. }
  195. LIBC_API
  196. size_t
  197. mbtowc (
  198. wchar_t *WideCharacter,
  199. const char *MultibyteCharacter,
  200. size_t ByteCount
  201. )
  202. /*++
  203. Routine Description:
  204. This routine attempts to convert a multibyte character into a wide
  205. character. This routine is equivalent to calling mbrtowc with a NULL
  206. state pointer.
  207. Arguments:
  208. WideCharacter - Supplies an optional pointer wehre the converted wide
  209. character will be returned on success.
  210. MultibyteCharacter - Supplies a pointer to the multibyte character to
  211. convert.
  212. ByteCount - Supplies the maximum number of bytes to inspect in the
  213. multibyte character buffer.
  214. Return Value:
  215. 0 if the next character is the null character.
  216. Returns a positive value on success indicating the number of bytes that
  217. were used to construct the wide character.
  218. -2 if the byte count was too small, as the multibyte character could only
  219. be partially assembled with the given maximum number of bytes.
  220. -1 if an encoding error occurred.
  221. --*/
  222. {
  223. CHARACTER_ENCODING Encoding;
  224. if (WideCharacter == NULL) {
  225. RtlResetMultibyteState((PMULTIBYTE_STATE)&ClMultibyteConversionState);
  226. //
  227. // This should really get the LC_CTYPE encoding.
  228. //
  229. Encoding = RtlGetDefaultCharacterEncoding();
  230. if (RtlIsCharacterEncodingStateDependent(Encoding, FALSE) != FALSE) {
  231. return 1;
  232. }
  233. return 0;
  234. }
  235. return mbrtowc(WideCharacter, MultibyteCharacter, ByteCount, NULL);
  236. }
  237. LIBC_API
  238. size_t
  239. mbrtowc (
  240. wchar_t *WideCharacter,
  241. const char *MultibyteCharacter,
  242. size_t ByteCount,
  243. mbstate_t *State
  244. )
  245. /*++
  246. Routine Description:
  247. This routine attempts to convert a multibyte character into a wide
  248. character.
  249. Arguments:
  250. WideCharacter - Supplies an optional pointer wehre the converted wide
  251. character will be returned on success.
  252. MultibyteCharacter - Supplies a pointer to the multibyte character to
  253. convert.
  254. ByteCount - Supplies the maximum number of bytes to inspect in the
  255. multibyte character buffer.
  256. State - Supplies an optional pointer to a multibyte shift state object to
  257. use. If this value is not supplied, an internal state will be used.
  258. The downside of using the internal state is that it makes this function
  259. not thread safe nor reentrant.
  260. Return Value:
  261. 0 if the next character is the null character.
  262. Returns a positive value on success indicating the number of bytes that
  263. were used to construct the wide character.
  264. -2 if the byte count was too small, as the multibyte character could only
  265. be partially assembled with the given maximum number of bytes.
  266. -1 if an encoding error occurred.
  267. --*/
  268. {
  269. WCHAR LocalWideCharacter;
  270. PMULTIBYTE_STATE MultibyteState;
  271. ULONG Size;
  272. KSTATUS Status;
  273. ASSERT_MBSTATE_SIZE();
  274. if (State == NULL) {
  275. State = &ClMultibyteConversionState;
  276. }
  277. if (MultibyteCharacter == NULL) {
  278. memset(State, 0, sizeof(mbstate_t));
  279. return 0;
  280. }
  281. Size = ByteCount;
  282. MultibyteState = (PMULTIBYTE_STATE)State;
  283. Status = RtlConvertMultibyteCharacterToWide((PCHAR *)&MultibyteCharacter,
  284. &Size,
  285. &LocalWideCharacter,
  286. MultibyteState);
  287. if (KSUCCESS(Status)) {
  288. if (WideCharacter != NULL) {
  289. *WideCharacter = LocalWideCharacter;
  290. }
  291. if (LocalWideCharacter == L'\0') {
  292. return 0;
  293. }
  294. return ByteCount - Size;
  295. }
  296. if (Status == STATUS_BUFFER_TOO_SMALL) {
  297. return -2;
  298. }
  299. errno = ClConvertKstatusToErrorNumber(Status);
  300. return -1;
  301. }
  302. LIBC_API
  303. int
  304. wctomb (
  305. char *MultibyteCharacter,
  306. wchar_t WideCharacter
  307. )
  308. /*++
  309. Routine Description:
  310. This routine attempts to convert a single wide character into a multibyte
  311. character.
  312. Arguments:
  313. MultibyteCharacter - Supplies an optional pointer to the buffer where the
  314. multibyte character will be returned. This buffer is assumed to be at
  315. least MB_CUR_MAX bytes large. If this is NULL, then this function will
  316. determine whether or not the given character has state-dependent
  317. encodings.
  318. WideCharacter - Supplies a pointer to the wide character to convert. If this
  319. is a null terminator, then the shift state will be reset to its initial
  320. shift state.
  321. Return Value:
  322. 0 if the multibyte character is NULL and the character does not have state
  323. dependent encodings.
  324. Returns the number of bytes stored in the multibyte array, or that would
  325. be stored in the array were it non-NULL.
  326. -1 if an encoding error occurred, and errno may be set to EILSEQ.
  327. --*/
  328. {
  329. CHARACTER_ENCODING Encoding;
  330. if (MultibyteCharacter == NULL) {
  331. RtlResetMultibyteState((PMULTIBYTE_STATE)&ClMultibyteConversionState);
  332. //
  333. // This should really get the LC_CTYPE encoding.
  334. //
  335. Encoding = RtlGetDefaultCharacterEncoding();
  336. if (RtlIsCharacterEncodingStateDependent(Encoding, TRUE) != FALSE) {
  337. return 1;
  338. }
  339. return 0;
  340. }
  341. return wcrtomb(MultibyteCharacter, WideCharacter, NULL);
  342. }
  343. LIBC_API
  344. size_t
  345. wcrtomb (
  346. char *MultibyteCharacter,
  347. wchar_t WideCharacter,
  348. mbstate_t *State
  349. )
  350. /*++
  351. Routine Description:
  352. This routine attempts to convert a single wide character into a multibyte
  353. character.
  354. Arguments:
  355. MultibyteCharacter - Supplies an optional pointer to the buffer where the
  356. multibyte character will be returned. This buffer is assumed to be at
  357. least MB_CUR_MAX bytes large. If this is NULL, then functionality will
  358. be equivalent to wcrtomb(Buffer, L'\0', State), where Buffer is an
  359. internal buffer.
  360. WideCharacter - Supplies a pointer to the wide character to convert. If this
  361. is a null terminator, then the shift state will be reset to its initial
  362. shift state.
  363. State - Supplies an optional pointer to a multibyte shift state object to
  364. use. If this value is not supplied, an internal state will be used.
  365. The downside of using the internal state is that it makes this function
  366. not thread safe nor reentrant.
  367. Return Value:
  368. Returns the number of bytes stored in the multibyte array.
  369. -1 if an encoding error occurred, and errno may be set to EILSEQ.
  370. --*/
  371. {
  372. PMULTIBYTE_STATE MultibyteState;
  373. ULONG Size;
  374. KSTATUS Status;
  375. ASSERT_MBSTATE_SIZE();
  376. if (State == NULL) {
  377. State = &ClMultibyteConversionState;
  378. }
  379. if (MultibyteCharacter == NULL) {
  380. WideCharacter = L'\0';
  381. }
  382. MultibyteState = (PMULTIBYTE_STATE)State;
  383. Size = MULTIBYTE_MAX;
  384. Status = RtlConvertWideCharacterToMultibyte(WideCharacter,
  385. MultibyteCharacter,
  386. &Size,
  387. MultibyteState);
  388. if (KSUCCESS(Status)) {
  389. return Size;
  390. }
  391. errno = ClConvertKstatusToErrorNumber(Status);
  392. return -1;
  393. }
  394. LIBC_API
  395. size_t
  396. mbstowcs (
  397. wchar_t *Destination,
  398. const char *Source,
  399. size_t DestinationSize
  400. )
  401. /*++
  402. Routine Description:
  403. This routine converts a null-terminated sequence of multi-byte characters
  404. beginning in the inital shift state to a string of wide characters, up to
  405. and including a null terminator.
  406. Arguments:
  407. Destination - Supplies an optional pointer where the wide character string
  408. will be returned.
  409. Source - Supplies a pointer to the null-terminated multibyte string. No
  410. characters are examined after a null terminator is found.
  411. DestinationSize - Supplies the maximum number of elements to place in the
  412. wide string.
  413. Return Value:
  414. Returns the number of wide character array elements modified (or required
  415. if the wide string is NULL), not including the terminating NULL.
  416. -1 if an invalid character is encountered. The errno variable may be set
  417. to provide more information.
  418. --*/
  419. {
  420. mbstate_t State;
  421. memset(&State, 0, sizeof(mbstate_t));
  422. return mbsrtowcs(Destination, &Source, DestinationSize, &State);
  423. }
  424. LIBC_API
  425. size_t
  426. mbsrtowcs (
  427. wchar_t *Destination,
  428. const char **Source,
  429. size_t DestinationSize,
  430. mbstate_t *State
  431. )
  432. /*++
  433. Routine Description:
  434. This routine converts a null-terminated sequence of multi-byte characters
  435. beginning in the inital shift state to a string of wide characters, up to
  436. and including a null terminator.
  437. Arguments:
  438. Destination - Supplies an optional pointer where the wide character string
  439. will be returned.
  440. Source - Supplies a pointer that upon input contains a pointer to the null
  441. terminated multibyte string to convert. On output, this will contain
  442. one of two values. If the null terminator was encountered in the
  443. multibyte string, then the value returned here will be NULL. If the
  444. conversion stopped because it would exceed the wide string size, then
  445. the value returned here will be a pointer to the character one after
  446. the last character successfully converted. If the wide string is NULL,
  447. the pointer will remained unchanged on output.
  448. DestinationSize - Supplies the maximum number of elements to place in the
  449. wide string.
  450. State - Supplies an optional pointer to a multibyte shift state object to
  451. use. If this value is not supplied, an internal state will be used.
  452. The downside of using the internal state is that it makes this function
  453. not thread safe nor reentrant.
  454. Return Value:
  455. Returns the number of wide character array elements modified (or required
  456. if the wide string is NULL), not including the terminating NULL.
  457. -1 if an invalid character is encountered. The errno variable may be set
  458. to provide more information.
  459. --*/
  460. {
  461. size_t ElementsConverted;
  462. const char *MultibyteString;
  463. size_t Result;
  464. wchar_t WideCharacter;
  465. ElementsConverted = 0;
  466. MultibyteString = *Source;
  467. while ((Destination == NULL) || (DestinationSize > 0)) {
  468. Result = mbrtowc(&WideCharacter, MultibyteString, MB_LEN_MAX, State);
  469. if (Result == -1) {
  470. return -1;
  471. }
  472. if (Destination != NULL) {
  473. *Destination = WideCharacter;
  474. Destination += 1;
  475. DestinationSize -= 1;
  476. }
  477. if (Result == 0) {
  478. break;
  479. }
  480. if (WideCharacter == L'\0') {
  481. MultibyteString = NULL;
  482. break;
  483. }
  484. MultibyteString += Result;
  485. ElementsConverted += 1;
  486. }
  487. if (Destination != NULL) {
  488. *Source = MultibyteString;
  489. }
  490. return ElementsConverted;
  491. }
  492. LIBC_API
  493. size_t
  494. wcstombs (
  495. char *Destination,
  496. const wchar_t *Source,
  497. size_t DestinationSize
  498. )
  499. /*++
  500. Routine Description:
  501. This routine converts a string of wide characters into a multibyte string,
  502. up to and including a wide null terminator.
  503. Arguments:
  504. Destination - Supplies an optional pointer to a destination where the
  505. multibyte characters will be returned.
  506. Source - Supplies a pointer to the null terminated wide character string to
  507. convert.
  508. DestinationSize - Supplies the number of bytes in the destination buffer
  509. (or the theoretical destination buffer if one was not supplied).
  510. Return Value:
  511. Returns the number of bytes in the resulting character sequence, not
  512. including the null terminator (if any).
  513. -1 if an invalid wide character is encountered. The errno variable may be
  514. set to provide more information.
  515. --*/
  516. {
  517. mbstate_t State;
  518. memset(&State, 0, sizeof(State));
  519. return wcsrtombs(Destination, &Source, DestinationSize, &State);
  520. }
  521. LIBC_API
  522. size_t
  523. wcsrtombs (
  524. char *Destination,
  525. const wchar_t **Source,
  526. size_t DestinationSize,
  527. mbstate_t *State
  528. )
  529. /*++
  530. Routine Description:
  531. This routine converts a string of wide characters into a multibyte string,
  532. up to and including a wide null terminator.
  533. Arguments:
  534. Destination - Supplies an optional pointer to a destination where the
  535. multibyte characters will be returned.
  536. Source - Supplies a pointer that upon input contains a pointer to the
  537. null terminated wide character string to convert. On output, this will
  538. contain one of two values. If the null terminator was encountered in
  539. the source string, then the value returned here will be NULL. If the
  540. conversion stopped because it would exceed the destination size,
  541. then the value returned here will be a pointer to the character one
  542. after the last character successfully converted. If the destination
  543. is NULL, the pointer will remained unchanged on ouput.
  544. DestinationSize - Supplies the number of bytes in the destination buffer
  545. (or the theoretical destination buffer if one was not supplied).
  546. State - Supplies an optional pointer to a multibyte shift state object to
  547. use. If this value is not supplied, an internal state will be used.
  548. The downside of using the internal state is that it makes this function
  549. not thread safe nor reentrant.
  550. Return Value:
  551. Returns the number of bytes in the resulting character sequence, not
  552. including the null terminator (if any).
  553. -1 if an invalid wide character is encountered. The errno variable may be
  554. set to provide more information.
  555. --*/
  556. {
  557. char HoldingBuffer[MB_LEN_MAX];
  558. mbstate_t PreviousState;
  559. size_t Result;
  560. size_t TotalWritten;
  561. const wchar_t *WideString;
  562. if (State == NULL) {
  563. State = &ClMultibyteConversionState;
  564. }
  565. Result = 0;
  566. TotalWritten = 0;
  567. WideString = *Source;
  568. while ((Destination == NULL) || (DestinationSize > 0)) {
  569. PreviousState = *State;
  570. Result = wcrtomb(HoldingBuffer, *WideString, State);
  571. if (Result == -1) {
  572. errno = EILSEQ;
  573. break;
  574. } else if (Destination != NULL) {
  575. //
  576. // Copy the holding buffer to the destination if there's enough
  577. // room.
  578. //
  579. if (Result <= DestinationSize) {
  580. memcpy(Destination, HoldingBuffer, Result);
  581. Destination += Result;
  582. DestinationSize -= Result;
  583. //
  584. // The remaining size is not big enough to hold the character. Back
  585. // out the state advancement.
  586. //
  587. } else {
  588. *State = PreviousState;
  589. break;
  590. }
  591. }
  592. //
  593. // If this was a null terminator, stop.
  594. //
  595. if (*WideString == L'\0') {
  596. WideString = NULL;
  597. break;
  598. }
  599. //
  600. // Update the total bytes written. This never includes the null
  601. // terminator.
  602. //
  603. TotalWritten += Result;
  604. //
  605. // Advance the source string and continue.
  606. //
  607. WideString += 1;
  608. }
  609. //
  610. // Return the source string.
  611. //
  612. if (Destination != NULL) {
  613. *Source = WideString;
  614. }
  615. if (Result == -1) {
  616. return -1;
  617. }
  618. return TotalWritten;
  619. }
  620. LIBC_API
  621. int
  622. mblen (
  623. const char *MultibyteCharacter,
  624. size_t Size
  625. )
  626. /*++
  627. Routine Description:
  628. This routine returns the number of bytes constituting the given multibyte
  629. character. It shall be equivalent to:
  630. mbtowc(NULL, MultibyteCharacter, Size);
  631. except that the builtin state of mbtowc is not affected.
  632. Arguments:
  633. MultibyteCharacter - Supplies an optional pointer to the multibyte
  634. character to get the length of.
  635. Size - Supplies the size of the multibyte character buffer.
  636. Return Value:
  637. 0 if the next character corresponds to the null wide character.
  638. Returns the positive number of bytes constituting the next character on
  639. success.
  640. -2 if the size of the buffer is too small, such that only a partial wide
  641. character could be constructed using the given bytes.
  642. -1 on error, and errno will be set to contain more information.
  643. --*/
  644. {
  645. mbstate_t State;
  646. memset(&State, 0, sizeof(mbstate_t));
  647. return (int)mbrtowc(NULL, MultibyteCharacter, Size, &State);
  648. }
  649. LIBC_API
  650. size_t
  651. mbrlen (
  652. const char *MultibyteCharacter,
  653. size_t Size,
  654. mbstate_t *State
  655. )
  656. /*++
  657. Routine Description:
  658. This routine returns the number of bytes constituting the given multibyte
  659. character. It shall be equivalent to:
  660. mbrtowc(NULL, MultibyteCharacter, Size, State);.
  661. Arguments:
  662. MultibyteCharacter - Supplies an optional pointer to the multibyte
  663. character to get the length of.
  664. Size - Supplies the size of the multibyte character buffer.
  665. State - Supplies an optional pointer to an initialized multibyte conversion
  666. state buffer. If this is not supplied, an internal state buffer will
  667. be used, however using the internal one makes this function neither
  668. safe nor reentrant.
  669. Return Value:
  670. 0 if the next character corresponds to the null wide character.
  671. Returns the positive number of bytes constituting the next character on
  672. success.
  673. -2 if the size of the buffer is too small, such that only a partial wide
  674. character could be constructed using the given bytes.
  675. -1 on error, and errno will be set to contain more information.
  676. --*/
  677. {
  678. return mbrtowc(NULL, MultibyteCharacter, Size, State);
  679. }
  680. LIBC_API
  681. int
  682. wcwidth (
  683. wchar_t Character
  684. )
  685. /*++
  686. Routine Description:
  687. This routine returns the number of display column positions the given wide
  688. character occupies.
  689. Arguments:
  690. Character - Supplies the character to examine.
  691. Return Value:
  692. 0 for the null character.
  693. -1 if the character is not printable.
  694. Otherwise, returns the number of columns the given character takes up.
  695. --*/
  696. {
  697. LONG Max;
  698. //
  699. // This function is based on Markus Kuhn's function at
  700. // https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c, which was placed in the
  701. // public domain.
  702. //
  703. if (Character == 0) {
  704. return 0;
  705. }
  706. if ((Character < 0x20) || ((Character >= 0x7F) && (Character < 0xA0))) {
  707. return -1;
  708. }
  709. //
  710. // Search the non-spacing characters.
  711. //
  712. Max = sizeof(ClCombiningCharacters) / sizeof(ClCombiningCharacters[0]) - 1;
  713. if (ClpSearchCombiningIntervals(Character, ClCombiningCharacters, Max) !=
  714. FALSE) {
  715. return 0;
  716. }
  717. if (Character >= 0x1100) {
  718. if ((Character <= 0x115F) ||
  719. (Character == 0x2329) ||
  720. (Character == 0x232A) ||
  721. ((Character >= 0x2E80) && (Character <= 0xA4CF) &&
  722. (Character != 0x303F)) ||
  723. ((Character >= 0xAC00) && (Character <= 0xD7A3)) ||
  724. ((Character >= 0xF900) && (Character <= 0xFAFF)) ||
  725. ((Character >= 0xFE10) && (Character <= 0xFE19)) ||
  726. ((Character >= 0xFE30) && (Character <= 0xFE6F)) ||
  727. ((Character >= 0xFF00) && (Character <= 0xFF60)) ||
  728. ((Character >= 0xFFE0) && (Character <= 0xFFE6)) ||
  729. ((Character >= 0x20000) && (Character <= 0x2FFFD)) ||
  730. ((Character >= 0x30000) && (Character <= 0x3FFFD))) {
  731. return 2;
  732. }
  733. }
  734. return 1;
  735. }
  736. //
  737. // --------------------------------------------------------- Internal Functions
  738. //
  739. BOOL
  740. ClpSearchCombiningIntervals (
  741. wchar_t Character,
  742. PWC_INTERVAL Table,
  743. LONG Max
  744. )
  745. /*++
  746. Routine Description:
  747. This routine performs a binary search to determine if the given character
  748. is listed in the given table.
  749. Arguments:
  750. Character - Supplies the character to examine.
  751. Table - Supplies a pointer to the table to search in.
  752. Max - Supplies the maximum index of the table, inclusive.
  753. Return Value:
  754. TRUE if the character is in the table.
  755. FALSE if the character is not in the table.
  756. --*/
  757. {
  758. LONG Mid;
  759. LONG Min;
  760. if ((Character < Table[0].First) || (Character > Table[Max].Last)) {
  761. return FALSE;
  762. }
  763. Min = 0;
  764. while (Max >= Min) {
  765. Mid = (Min + Max) / 2;
  766. if (Character > Table[Mid].Last) {
  767. Min = Mid + 1;
  768. } else if (Character < Table[Mid].First) {
  769. Max = Mid - 1;
  770. } else {
  771. return TRUE;
  772. }
  773. }
  774. return FALSE;
  775. }