conv_big5.c 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. #ifdef PLAN9
  2. #include <u.h>
  3. #include <libc.h>
  4. #include <bio.h>
  5. #else
  6. #include <stdio.h>
  7. #include <unistd.h>
  8. #include "plan9.h"
  9. #endif
  10. #include "hdr.h"
  11. #include "conv.h"
  12. #include "big5.h"
  13. /*
  14. a state machine for interpreting big5 (hk format).
  15. */
  16. void
  17. big5proc(int c, Rune **r, long input_loc)
  18. {
  19. static enum { state0, state1 } state = state0;
  20. static int lastc;
  21. long n, ch, f, cold = c;
  22. switch(state)
  23. {
  24. case state0: /* idle state */
  25. if(c < 0)
  26. return;
  27. if(c >= 0xA1){
  28. lastc = c;
  29. state = state1;
  30. return;
  31. }
  32. if(c == 26)
  33. c = '\n';
  34. emit(c);
  35. return;
  36. case state1: /* seen a font spec */
  37. if(c >= 64 && c <= 126)
  38. c -= 64;
  39. else if(c >= 161 && c <= 254)
  40. c = c-161 + 63;
  41. else {
  42. nerrors++;
  43. if(squawk)
  44. EPR "%s: bad big5 glyph (from 0x%x,0x%lx) near byte %ld in %s\n",
  45. argv0, lastc, cold, input_loc, file);
  46. if(!clean)
  47. emit(BADMAP);
  48. state = state0;
  49. return;
  50. }
  51. if(lastc >= 161 && lastc <= 254)
  52. f = lastc - 161;
  53. else {
  54. nerrors++;
  55. if(squawk)
  56. EPR "%s: bad big5 font %d (from 0x%x,0x%lx) near byte %ld in %s\n",
  57. argv0, lastc-161, lastc, cold, input_loc, file);
  58. if(!clean)
  59. emit(BADMAP);
  60. state = state0;
  61. return;
  62. }
  63. n = f*BIG5FONT + c;
  64. if(n < BIG5MAX)
  65. ch = tabbig5[n];
  66. else
  67. ch = -1;
  68. if(ch < 0){
  69. nerrors++;
  70. if(squawk)
  71. EPR "%s: unknown big5 %ld (from 0x%x,0x%lx) near byte %ld in %s\n",
  72. argv0, n, lastc, cold, input_loc, file);
  73. if(!clean)
  74. emit(BADMAP);
  75. } else
  76. emit(ch);
  77. state = state0;
  78. }
  79. }
  80. void
  81. big5_in(int fd, long *notused, struct convert *out)
  82. {
  83. Rune ob[N];
  84. Rune *r, *re;
  85. uchar ibuf[N];
  86. int n, i;
  87. long nin;
  88. USED(notused);
  89. r = ob;
  90. re = ob+N-3;
  91. nin = 0;
  92. while((n = read(fd, ibuf, sizeof ibuf)) > 0){
  93. for(i = 0; i < n; i++){
  94. big5proc(ibuf[i], &r, nin++);
  95. if(r >= re){
  96. OUT(out, ob, r-ob);
  97. r = ob;
  98. }
  99. }
  100. if(r > ob){
  101. OUT(out, ob, r-ob);
  102. r = ob;
  103. }
  104. }
  105. big5proc(-1, &r, nin);
  106. if(r > ob)
  107. OUT(out, ob, r-ob);
  108. OUT(out, ob, 0);
  109. }
  110. void
  111. big5_out(Rune *base, int n, long *notused)
  112. {
  113. char *p;
  114. int i;
  115. Rune r;
  116. static int first = 1;
  117. USED(notused);
  118. if(first){
  119. first = 0;
  120. for(i = 0; i < NRUNE; i++)
  121. tab[i] = -1;
  122. for(i = 0; i < BIG5MAX; i++)
  123. if(tabbig5[i] != -1)
  124. tab[tabbig5[i]] = i;
  125. }
  126. nrunes += n;
  127. p = obuf;
  128. for(i = 0; i < n; i++){
  129. r = base[i];
  130. if(r < 128)
  131. *p++ = r;
  132. else {
  133. if(tab[r] != -1){
  134. r = tab[r];
  135. if(r >= BIG5MAX){
  136. *p++ = 0xA1;
  137. *p++ = r-BIG5MAX;
  138. continue;
  139. } else {
  140. *p++ = 0xA1 + (r/BIG5FONT);
  141. r = r%BIG5FONT;
  142. if(r <= 62) r += 64;
  143. else r += 0xA1-63;
  144. *p++ = r;
  145. continue;
  146. }
  147. }
  148. if(squawk)
  149. EPR "%s: rune 0x%x not in output cs\n", argv0, r);
  150. nerrors++;
  151. if(clean)
  152. continue;
  153. *p++ = BYTEBADMAP;
  154. }
  155. }
  156. noutput += p-obuf;
  157. if(p > obuf)
  158. write(1, obuf, p-obuf);
  159. }