conv_ksc.c 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. #ifdef PLAN9
  2. #include <u.h>
  3. #include <libc.h>
  4. #include <bio.h>
  5. #else
  6. #include <stdio.h>
  7. #include <unistd.h>
  8. #include "plan9.h"
  9. #endif
  10. #include "hdr.h"
  11. #include "conv.h"
  12. #include "ksc.h"
  13. /*
  14. contributed by kuro@vodka.Eng.Sun.COM (Teruhiko Kurosaka)
  15. */
  16. /*
  17. a state machine for interpreting shift-ksc.
  18. */
  19. #define SS2 0x8e
  20. #define SS3 0x8f
  21. /*
  22. * Convert EUC in Koran locale to Unicode.
  23. * Only codeset 0 and 1 are used.
  24. */
  25. void
  26. ukscproc(int c, Rune **r, long input_loc)
  27. {
  28. static enum { init, cs1last /*, cs2, cs3first, cs3last*/} state = init;
  29. static int korean646 = 1; /* fixed to 1 for now. */
  30. static int lastc;
  31. int n;
  32. long l;
  33. switch(state)
  34. {
  35. case init:
  36. if (c < 0){
  37. return;
  38. }else if (c < 128){
  39. if(korean646 && (c=='\\')){
  40. emit(0x20A9);
  41. } else {
  42. emit(c);
  43. }
  44. /* }else if (c==SS2){
  45. state = cs2;
  46. }else if (c==SS3){
  47. state = cs3first;
  48. */ }else{
  49. lastc = c;
  50. state = cs1last;
  51. }
  52. return;
  53. case cs1last: /* 2nd byte of codeset 1 (KSC 5601) */
  54. if(c < 0){
  55. if(squawk)
  56. EPR "%s: unexpected EOF in %s\n", argv0, file);
  57. c = 0x21 | (lastc&0x80);
  58. }
  59. n = ((lastc&0x7f)-33)*94 + (c&0x7f)-33;
  60. if((n >= ksc5601max) || ((l = tabksc5601[n]) < 0)){
  61. nerrors++;
  62. if(squawk)
  63. EPR "%s: unknown ksc5601 %d (from 0x%x,0x%x) near byte %ld in %s\n", argv0, n, lastc, c, input_loc, file);
  64. if(!clean)
  65. emit(BADMAP);
  66. } else {
  67. emit(l);
  68. }
  69. state = init;
  70. return;
  71. default:
  72. if(squawk)
  73. EPR "%s: ukscproc: unknown state %d\n",
  74. argv0, init);
  75. }
  76. }
  77. void
  78. uksc_in(int fd, long *notused, struct convert *out)
  79. {
  80. Rune ob[N];
  81. Rune *r, *re;
  82. uchar ibuf[N];
  83. int n, i;
  84. long nin;
  85. USED(notused);
  86. r = ob;
  87. re = ob+N-3;
  88. nin = 0;
  89. while((n = read(fd, ibuf, sizeof ibuf)) > 0){
  90. for(i = 0; i < n; i++){
  91. ukscproc(ibuf[i], &r, nin++);
  92. if(r >= re){
  93. OUT(out, ob, r-ob);
  94. r = ob;
  95. }
  96. }
  97. if(r > ob){
  98. OUT(out, ob, r-ob);
  99. r = ob;
  100. }
  101. }
  102. ukscproc(-1, &r, nin);
  103. if(r > ob)
  104. OUT(out, ob, r-ob);
  105. }
  106. void
  107. uksc_out(Rune *base, int n, long *notused)
  108. {
  109. char *p;
  110. int i;
  111. Rune r;
  112. long l;
  113. static int first = 1;
  114. USED(notused);
  115. if(first){
  116. first = 0;
  117. for(i = 0; i < NRUNE; i++)
  118. tab[i] = -1;
  119. for(i = 0; i < ksc5601max; i++)
  120. if((l = tabksc5601[i]) != -1){
  121. if(l < 0)
  122. tab[-l] = i;
  123. else
  124. tab[l] = i;
  125. }
  126. }
  127. nrunes += n;
  128. p = obuf;
  129. for(i = 0; i < n; i++){
  130. r = base[i];
  131. if(r < 128)
  132. *p++ = r;
  133. else {
  134. if(tab[r] != -1){
  135. *p++ = 0x80 | (tab[r]/94 + 0x21);
  136. *p++ = 0x80 | (tab[r]%94 + 0x21);
  137. continue;
  138. }
  139. if(squawk)
  140. EPR "%s: rune 0x%x not in output cs\n", argv0, r);
  141. nerrors++;
  142. if(clean)
  143. continue;
  144. *p++ = BYTEBADMAP;
  145. }
  146. }
  147. noutput += p-obuf;
  148. if(p > obuf)
  149. write(1, obuf, p-obuf);
  150. }