conv_ksc.c 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. #ifdef PLAN9
  2. #include <u.h>
  3. #include <libc.h>
  4. #include <bio.h>
  5. #else
  6. #include <stdio.h>
  7. #include <unistd.h>
  8. #include "plan9.h"
  9. #endif
  10. #include "hdr.h"
  11. #include "conv.h"
  12. #include "ksc.h"
  13. /*
  14. contributed by kuro@vodka.Eng.Sun.COM (Teruhiko Kurosaka)
  15. */
  16. /*
  17. a state machine for interpreting shift-ksc.
  18. */
  19. #define SS2 0x8e
  20. #define SS3 0x8f
  21. /*
  22. * Convert EUC in Koran locale to Unicode.
  23. * Only codeset 0 and 1 are used.
  24. */
  25. void
  26. ukscproc(int c, Rune **r, long input_loc)
  27. {
  28. static enum { init, cs1last /*, cs2, cs3first, cs3last*/} state = init;
  29. static int korean646 = 1; /* fixed to 1 for now. */
  30. static int lastc;
  31. int n;
  32. long l;
  33. switch(state)
  34. {
  35. case init:
  36. if (c < 0){
  37. return;
  38. }else if (c < 128){
  39. if(korean646 && (c=='\\')){
  40. emit(0x20A9);
  41. } else {
  42. emit(c);
  43. }
  44. /* }else if (c==SS2){
  45. state = cs2;
  46. }else if (c==SS3){
  47. state = cs3first;
  48. */ }else{
  49. lastc = c;
  50. state = cs1last;
  51. }
  52. return;
  53. case cs1last: /* 2nd byte of codeset 1 (KSC 5601) */
  54. if(c < 0){
  55. if(squawk)
  56. EPR "%s: unexpected EOF in %s\n", argv0, file);
  57. c = 0x21 | (lastc&0x80);
  58. }
  59. n = ((lastc&0x7f)-33)*94 + (c&0x7f)-33;
  60. if((n >= ksc5601max) || ((l = tabksc5601[n]) < 0)){
  61. nerrors++;
  62. if(squawk)
  63. EPR "%s: unknown ksc5601 %d (from 0x%x,0x%x) near byte %ld in %s\n", argv0, n, lastc, c, input_loc, file);
  64. if(!clean)
  65. emit(BADMAP);
  66. } else {
  67. emit(l);
  68. }
  69. state = init;
  70. return;
  71. default:
  72. if(squawk)
  73. EPR "%s: ukscproc: unknown state %d\n",
  74. argv0, init);
  75. }
  76. }
  77. void
  78. uksc_in(int fd, long *notused, struct convert *out)
  79. {
  80. Rune ob[N];
  81. Rune *r, *re;
  82. uchar ibuf[N];
  83. int n, i;
  84. long nin;
  85. USED(notused);
  86. r = ob;
  87. re = ob+N-3;
  88. nin = 0;
  89. while((n = read(fd, ibuf, sizeof ibuf)) > 0){
  90. for(i = 0; i < n; i++){
  91. ukscproc(ibuf[i], &r, nin++);
  92. if(r >= re){
  93. OUT(out, ob, r-ob);
  94. r = ob;
  95. }
  96. }
  97. if(r > ob){
  98. OUT(out, ob, r-ob);
  99. r = ob;
  100. }
  101. }
  102. ukscproc(-1, &r, nin);
  103. if(r > ob)
  104. OUT(out, ob, r-ob);
  105. OUT(out, ob, 0);
  106. }
  107. void
  108. uksc_out(Rune *base, int n, long *notused)
  109. {
  110. char *p;
  111. int i;
  112. Rune r;
  113. long l;
  114. static int first = 1;
  115. USED(notused);
  116. if(first){
  117. first = 0;
  118. for(i = 0; i < NRUNE; i++)
  119. tab[i] = -1;
  120. for(i = 0; i < ksc5601max; i++)
  121. if((l = tabksc5601[i]) != -1){
  122. if(l < 0)
  123. tab[-l] = i;
  124. else
  125. tab[l] = i;
  126. }
  127. }
  128. nrunes += n;
  129. p = obuf;
  130. for(i = 0; i < n; i++){
  131. r = base[i];
  132. if(r < 128)
  133. *p++ = r;
  134. else {
  135. if(tab[r] != -1){
  136. *p++ = 0x80 | (tab[r]/94 + 0x21);
  137. *p++ = 0x80 | (tab[r]%94 + 0x21);
  138. continue;
  139. }
  140. if(squawk)
  141. EPR "%s: rune 0x%x not in output cs\n", argv0, r);
  142. nerrors++;
  143. if(clean)
  144. continue;
  145. *p++ = BYTEBADMAP;
  146. }
  147. }
  148. noutput += p-obuf;
  149. if(p > obuf)
  150. write(1, obuf, p-obuf);
  151. }