123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165 |
- #include <stdlib.h>
- /*
- * Use the FSS-UTF transformation proposed by posix.
- * We define 7 byte types:
- * T0 0xxxxxxx 7 free bits
- * Tx 10xxxxxx 6 free bits
- * T1 110xxxxx 5 free bits
- * T2 1110xxxx 4 free bits
- *
- * Encoding is as follows.
- * From hex Thru hex Sequence Bits
- * 00000000 0000007F T0 7
- * 00000080 000007FF T1 Tx 11
- * 00000800 0000FFFF T2 Tx Tx 16
- */
- int
- mblen(const char *s, size_t n)
- {
- return mbtowc(0, s, n);
- }
- int
- mbtowc(wchar_t *pwc, const char *s, size_t n)
- {
- int c, c1, c2;
- long l;
- if(!s)
- return 0;
- if(n < 1)
- goto bad;
- c = s[0] & 0xff;
- if((c & 0x80) == 0x00) {
- if(pwc)
- *pwc = c;
- if(c == 0)
- return 0;
- return 1;
- }
- if(n < 2)
- goto bad;
- c1 = (s[1] ^ 0x80) & 0xff;
- if((c1 & 0xC0) != 0x00)
- goto bad;
- if((c & 0xE0) == 0xC0) {
- l = ((c << 6) | c1) & 0x7FF;
- if(l < 0x080)
- goto bad;
- if(pwc)
- *pwc = l;
- return 2;
- }
- if(n < 3)
- goto bad;
- c2 = (s[2] ^ 0x80) & 0xff;
- if((c2 & 0xC0) != 0x00)
- goto bad;
- if((c & 0xF0) == 0xE0) {
- l = ((((c << 6) | c1) << 6) | c2) & 0xFFFF;
- if(l < 0x0800)
- goto bad;
- if(pwc)
- *pwc = l;
- return 3;
- }
- /*
- * bad decoding
- */
- bad:
- return -1;
- }
- int
- wctomb(char *s, wchar_t wchar)
- {
- long c;
- if(!s)
- return 0;
- c = wchar & 0xFFFF;
- if(c < 0x80) {
- s[0] = c;
- return 1;
- }
- if(c < 0x800) {
- s[0] = 0xC0 | (c >> 6);
- s[1] = 0x80 | (c & 0x3F);
- return 2;
- }
- s[0] = 0xE0 | (c >> 12);
- s[1] = 0x80 | ((c >> 6) & 0x3F);
- s[2] = 0x80 | (c & 0x3F);
- return 3;
- }
- size_t
- mbstowcs(wchar_t *pwcs, const char *s, size_t n)
- {
- int i, d, c;
- for(i=0; i < n; i++) {
- c = *s & 0xff;
- if(c < 0x80) {
- *pwcs = c;
- if(c == 0)
- break;
- s++;
- } else {
- d = mbtowc(pwcs, s, 3);
- if(d <= 0)
- return (size_t)((d<0) ? -1 : i);
- s += d;
- }
- pwcs++;
- }
- return i;
- }
- size_t
- wcstombs(char *s, const wchar_t *pwcs, size_t n)
- {
- int i, d;
- long c;
- char *p, *pe;
- char buf[3];
- p = s;
- pe = p+n-3;
- while(p < pe) {
- c = *pwcs++;
- if(c < 0x80)
- *p++ = c;
- else
- p += wctomb(p, c);
- if(c == 0)
- return p-s;
- }
- while(p < pe+3) {
- c = *pwcs++;
- d = wctomb(buf, c);
- if(p+d <= pe+3) {
- *p++ = buf[0];
- if(d > 1) {
- *p++ = buf[2];
- if(d > 2)
- *p++ = buf[3];
- }
- }
- if(c == 0)
- break;
- }
- return p-s;
- }
|