123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183 |
- #include <stdlib.h>
- #include <limits.h>
- /*
- * Use the FSS-UTF transformation proposed by posix.
- * We define 7 byte types:
- * T0 0xxxxxxx 7 free bits
- * Tx 10xxxxxx 6 free bits
- * T1 110xxxxx 5 free bits
- * T2 1110xxxx 4 free bits
- * T3 11110xxx 3 free bits
- * T4 111110xx 2 free bits
- * T5 1111110x 1 free bit
- *
- * Encoding is as follows.
- * From hex Thru hex Sequence Bits
- * 00000000 0000007F T0 7
- * 00000080 000007FF T1 Tx 11
- * 00000800 0000FFFF T2 Tx Tx 16
- * 00010000 001FFFFF T3 Tx Tx Tx 21
- * 00200000 03FFFFFF T4 Tx Tx Tx Tx 26
- * 04000000 7FFFFFFF T5 Tx Tx Tx Tx Tx 31
- */
- int
- mbtowc(wchar_t *pwc, const char *s, size_t n);
- int
- mblen(const char *s, size_t n)
- {
- return mbtowc(0, s, n);
- }
- enum {
- C0MSK = 0x7F,
- C1MSK = 0x7FF,
- T1 = 0xC0,
- T2 = 0xE0,
- NT1BITS = 11,
- NSHFT = 5,
- NCSHFT = NSHFT + 1,
- WCHARMSK = (1<< (8*MB_LEN_MAX - 1)) - 1,
- };
- int
- mbtowc(wchar_t *pwc, const char *s, size_t n)
- {
- unsigned long long c[MB_LEN_MAX];
- unsigned long long l, m, wm, b;
- int i;
- if(!s)
- return 0;
- if(n < 1)
- goto bad;
- c[0] = s[0] & 0xff; /* first one is special */
- if((c[0] & 0x80) == 0x00) {
- if(pwc)
- *pwc = c[0];
- if(c[0] == 0)
- return 0;
- return 1;
- }
- m = T2;
- b = m^0x20;
- l = c[0];
- wm = C1MSK;
- for(i = 1; i < MB_LEN_MAX + 1; i++){
- if(n < i+1)
- goto bad;
- c[i] = (s[i] ^ 0x80) & 0xff;
- l = (l << NCSHFT) | c[i];
- if((c[i] & 0xC0) != 0x00)
- goto bad;
- if((c[0] & m) == b) {
- if(pwc)
- *pwc = l & wm;
- return i + 1;
- }
- b = m;
- m = (m >> 1) | 0x80;
- wm = (wm << NSHFT) | wm;
- }
- /*
- * bad decoding
- */
- bad:
- return -1;
- }
- int
- wctomb(char *s, wchar_t wchar)
- {
- unsigned long long c, maxc, m;
- int i, j;
- if(!s)
- return 0;
- maxc = 0x80;
- c = wchar & WCHARMSK;
- if(c < maxc) {
- s[0] = c;
- return 1;
- }
- m = T1;
- for(i = 2; i < MB_LEN_MAX + 1; i++){
- maxc <<= 4;
- if(c < maxc || i == MB_LEN_MAX){
- s[0] = m | (c >> ((i - 1) * NCSHFT));
- for(j = i - 1; j >= 1; j--){
- s[i - j] = 0x80|((c>>(6 * (j - 1)))&0x3f);
- }
- return i;
- }
- m = (m >> 1) | 0x80;
- }
- return MB_LEN_MAX;
- }
- size_t
- mbstowcs(wchar_t *pwcs, const char *s, size_t n)
- {
- int i, d, c;
- for(i=0; i < n; i++) {
- c = *s & 0xff;
- if(c < 0x80) {
- *pwcs = c;
- if(c == 0)
- break;
- s++;
- } else {
- d = mbtowc(pwcs, s, MB_LEN_MAX);
- if(d <= 0)
- return (size_t)((d<0) ? -1 : i);
- s += d;
- }
- pwcs++;
- }
- return i;
- }
- size_t
- wcstombs(char *s, const wchar_t *pwcs, size_t n)
- {
- int i, d;
- long c;
- char *p, *pe;
- char buf[MB_LEN_MAX];
- p = s;
- pe = p+n-MB_LEN_MAX;
- while(p < pe) {
- c = *pwcs++;
- if(c < 0x80)
- *p++ = c;
- else
- p += wctomb(p, c);
- if(c == 0)
- return p-s;
- }
- while(p < pe+MB_LEN_MAX) {
- c = *pwcs++;
- d = wctomb(buf, c);
- if(p+d <= pe+MB_LEN_MAX) {
- *p++ = buf[0]; /* first one is special */
- for(i = 2; i < MB_LEN_MAX + 1; i++){
- if(d <= i -1)
- break;
- *p++ = buf[i];
- }
- }
- if(c == 0)
- break;
- }
- return p-s;
- }
|