mbwc.c 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. #include <stdlib.h>
  2. /*
  3. * Use the FSS-UTF transformation proposed by posix.
  4. * We define 7 byte types:
  5. * T0 0xxxxxxx 7 free bits
  6. * Tx 10xxxxxx 6 free bits
  7. * T1 110xxxxx 5 free bits
  8. * T2 1110xxxx 4 free bits
  9. *
  10. * Encoding is as follows.
  11. * From hex Thru hex Sequence Bits
  12. * 00000000 0000007F T0 7
  13. * 00000080 000007FF T1 Tx 11
  14. * 00000800 0000FFFF T2 Tx Tx 16
  15. */
  16. int
  17. mblen(const char *s, size_t n)
  18. {
  19. return mbtowc(0, s, n);
  20. }
  21. int
  22. mbtowc(wchar_t *pwc, const char *s, size_t n)
  23. {
  24. int c, c1, c2;
  25. long l;
  26. if(!s)
  27. return 0;
  28. if(n < 1)
  29. goto bad;
  30. c = s[0] & 0xff;
  31. if((c & 0x80) == 0x00) {
  32. if(pwc)
  33. *pwc = c;
  34. if(c == 0)
  35. return 0;
  36. return 1;
  37. }
  38. if(n < 2)
  39. goto bad;
  40. c1 = (s[1] ^ 0x80) & 0xff;
  41. if((c1 & 0xC0) != 0x00)
  42. goto bad;
  43. if((c & 0xE0) == 0xC0) {
  44. l = ((c << 6) | c1) & 0x7FF;
  45. if(l < 0x080)
  46. goto bad;
  47. if(pwc)
  48. *pwc = l;
  49. return 2;
  50. }
  51. if(n < 3)
  52. goto bad;
  53. c2 = (s[2] ^ 0x80) & 0xff;
  54. if((c2 & 0xC0) != 0x00)
  55. goto bad;
  56. if((c & 0xF0) == 0xE0) {
  57. l = ((((c << 6) | c1) << 6) | c2) & 0xFFFF;
  58. if(l < 0x0800)
  59. goto bad;
  60. if(pwc)
  61. *pwc = l;
  62. return 3;
  63. }
  64. /*
  65. * bad decoding
  66. */
  67. bad:
  68. return -1;
  69. }
  70. int
  71. wctomb(char *s, wchar_t wchar)
  72. {
  73. long c;
  74. if(!s)
  75. return 0;
  76. c = wchar & 0xFFFF;
  77. if(c < 0x80) {
  78. s[0] = c;
  79. return 1;
  80. }
  81. if(c < 0x800) {
  82. s[0] = 0xC0 | (c >> 6);
  83. s[1] = 0x80 | (c & 0x3F);
  84. return 2;
  85. }
  86. s[0] = 0xE0 | (c >> 12);
  87. s[1] = 0x80 | ((c >> 6) & 0x3F);
  88. s[2] = 0x80 | (c & 0x3F);
  89. return 3;
  90. }
  91. size_t
  92. mbstowcs(wchar_t *pwcs, const char *s, size_t n)
  93. {
  94. int i, d, c;
  95. for(i=0; i < n; i++) {
  96. c = *s & 0xff;
  97. if(c < 0x80) {
  98. *pwcs = c;
  99. if(c == 0)
  100. break;
  101. s++;
  102. } else {
  103. d = mbtowc(pwcs, s, 3);
  104. if(d <= 0)
  105. return (size_t)((d<0) ? -1 : i);
  106. s += d;
  107. }
  108. pwcs++;
  109. }
  110. return i;
  111. }
  112. size_t
  113. wcstombs(char *s, const wchar_t *pwcs, size_t n)
  114. {
  115. int i, d;
  116. long c;
  117. char *p, *pe;
  118. char buf[3];
  119. p = s;
  120. pe = p+n-3;
  121. while(p < pe) {
  122. c = *pwcs++;
  123. if(c < 0x80)
  124. *p++ = c;
  125. else
  126. p += wctomb(p, c);
  127. if(c == 0)
  128. return p-s;
  129. }
  130. while(p < pe+3) {
  131. c = *pwcs++;
  132. d = wctomb(buf, c);
  133. if(p+d <= pe+3) {
  134. *p++ = buf[0];
  135. if(d > 1) {
  136. *p++ = buf[2];
  137. if(d > 2)
  138. *p++ = buf[3];
  139. }
  140. }
  141. if(c == 0)
  142. break;
  143. }
  144. return p-s;
  145. }