tr.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. #include <u.h>
  2. #include <libc.h>
  3. typedef struct PCB /* Control block controlling specification parse */
  4. {
  5. char *base; /* start of specification */
  6. char *current; /* current parse point */
  7. long last; /* last Rune returned */
  8. long final; /* final Rune in a span */
  9. } Pcb;
  10. uchar bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
  11. #define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07])
  12. #define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07])
  13. #define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07])
  14. uchar f[(Runemax+1)/8];
  15. uchar t[(Runemax+1)/8];
  16. char wbuf[4096];
  17. char *wptr;
  18. Pcb pfrom, pto;
  19. int cflag;
  20. int dflag;
  21. int sflag;
  22. void complement(void);
  23. void delete(void);
  24. void squeeze(void);
  25. void translit(void);
  26. long canon(Pcb*);
  27. char *getrune(char*, Rune*);
  28. void Pinit(Pcb*, char*);
  29. void Prewind(Pcb *p);
  30. int readrune(int, long*);
  31. void wflush(int);
  32. void writerune(int, Rune);
  33. static void
  34. usage(void)
  35. {
  36. fprint(2, "usage: %s [-cds] [string1 [string2]]\n", argv0);
  37. exits("usage");
  38. }
  39. void
  40. main(int argc, char **argv)
  41. {
  42. ARGBEGIN{
  43. case 's': sflag++; break;
  44. case 'd': dflag++; break;
  45. case 'c': cflag++; break;
  46. default: usage();
  47. }ARGEND
  48. if(argc>0)
  49. Pinit(&pfrom, argv[0]);
  50. if(argc>1)
  51. Pinit(&pto, argv[1]);
  52. if(argc>2)
  53. usage();
  54. if(dflag) {
  55. if ((sflag && argc != 2) || (!sflag && argc != 1))
  56. usage();
  57. delete();
  58. } else {
  59. if (argc != 2)
  60. usage();
  61. if (cflag)
  62. complement();
  63. else translit();
  64. }
  65. exits(0);
  66. }
  67. void
  68. delete(void)
  69. {
  70. long c, last;
  71. if (cflag) {
  72. memset((char *) f, 0xff, sizeof f);
  73. while ((c = canon(&pfrom)) >= 0)
  74. CLEARBIT(f, c);
  75. } else {
  76. while ((c = canon(&pfrom)) >= 0)
  77. SETBIT(f, c);
  78. }
  79. if (sflag) {
  80. while ((c = canon(&pto)) >= 0)
  81. SETBIT(t, c);
  82. }
  83. last = 0x10000;
  84. while (readrune(0, &c) > 0) {
  85. if(!BITSET(f, c) && (c != last || !BITSET(t,c))) {
  86. last = c;
  87. writerune(1, (Rune) c);
  88. }
  89. }
  90. wflush(1);
  91. }
  92. void
  93. complement(void)
  94. {
  95. Rune *p;
  96. int i;
  97. long from, to, lastc, high;
  98. lastc = 0;
  99. high = 0;
  100. while ((from = canon(&pfrom)) >= 0) {
  101. if (from > high) high = from;
  102. SETBIT(f, from);
  103. }
  104. while ((to = canon(&pto)) > 0) {
  105. if (to > high) high = to;
  106. SETBIT(t,to);
  107. }
  108. Prewind(&pto);
  109. if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
  110. sysfatal("no memory");
  111. for (i = 0; i <= high; i++){
  112. if (!BITSET(f,i)) {
  113. if ((to = canon(&pto)) < 0)
  114. to = lastc;
  115. else lastc = to;
  116. p[i] = to;
  117. }
  118. else p[i] = i;
  119. }
  120. if (sflag){
  121. lastc = 0x10000;
  122. while (readrune(0, &from) > 0) {
  123. if (from > high)
  124. from = to;
  125. else
  126. from = p[from];
  127. if (from != lastc || !BITSET(t,from)) {
  128. lastc = from;
  129. writerune(1, (Rune) from);
  130. }
  131. }
  132. } else {
  133. while (readrune(0, &from) > 0){
  134. if (from > high)
  135. from = to;
  136. else
  137. from = p[from];
  138. writerune(1, (Rune) from);
  139. }
  140. }
  141. wflush(1);
  142. }
  143. void
  144. translit(void)
  145. {
  146. Rune *p;
  147. int i;
  148. long from, to, lastc, high;
  149. lastc = 0;
  150. high = 0;
  151. while ((from = canon(&pfrom)) >= 0)
  152. if (from > high) high = from;
  153. Prewind(&pfrom);
  154. if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
  155. sysfatal("no memory");
  156. for (i = 0; i <= high; i++)
  157. p[i] = i;
  158. while ((from = canon(&pfrom)) >= 0) {
  159. if ((to = canon(&pto)) < 0)
  160. to = lastc;
  161. else lastc = to;
  162. if (BITSET(f,from) && p[from] != to)
  163. sysfatal("ambiguous translation");
  164. SETBIT(f,from);
  165. p[from] = to;
  166. SETBIT(t,to);
  167. }
  168. while ((to = canon(&pto)) >= 0) {
  169. SETBIT(t,to);
  170. }
  171. if (sflag){
  172. lastc = 0x10000;
  173. while (readrune(0, &from) > 0) {
  174. if (from <= high)
  175. from = p[from];
  176. if (from != lastc || !BITSET(t,from)) {
  177. lastc = from;
  178. writerune(1, (Rune) from);
  179. }
  180. }
  181. } else {
  182. while (readrune(0, &from) > 0) {
  183. if (from <= high)
  184. from = p[from];
  185. writerune(1, (Rune) from);
  186. }
  187. }
  188. wflush(1);
  189. }
  190. int
  191. readrune(int fd, long *rp)
  192. {
  193. Rune r;
  194. int j;
  195. static int i, n;
  196. static char buf[4096];
  197. j = i;
  198. for (;;) {
  199. if (i >= n) {
  200. wflush(1);
  201. if (j != i)
  202. memcpy(buf, buf+j, n-j);
  203. i = n-j;
  204. n = read(fd, &buf[i], sizeof(buf)-i);
  205. if (n < 0)
  206. sysfatal("read error: %r");
  207. if (n == 0)
  208. return 0;
  209. j = 0;
  210. n += i;
  211. }
  212. i++;
  213. if (fullrune(&buf[j], i-j))
  214. break;
  215. }
  216. chartorune(&r, &buf[j]);
  217. *rp = r;
  218. return 1;
  219. }
  220. void
  221. writerune(int fd, Rune r)
  222. {
  223. char buf[UTFmax];
  224. int n;
  225. if (!wptr)
  226. wptr = wbuf;
  227. n = runetochar(buf, (Rune*)&r);
  228. if (wptr+n >= wbuf+sizeof(wbuf))
  229. wflush(fd);
  230. memcpy(wptr, buf, n);
  231. wptr += n;
  232. }
  233. void
  234. wflush(int fd)
  235. {
  236. if (wptr && wptr > wbuf)
  237. if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf)
  238. sysfatal("write error: %r");
  239. wptr = wbuf;
  240. }
  241. char *
  242. getrune(char *s, Rune *rp)
  243. {
  244. Rune r;
  245. char *save;
  246. int i, n;
  247. s += chartorune(rp, s);
  248. if((r = *rp) == '\\' && *s){
  249. n = 0;
  250. if (*s == 'x') {
  251. s++;
  252. for (i = 0; i < 4; i++) {
  253. save = s;
  254. s += chartorune(&r, s);
  255. if ('0' <= r && r <= '9')
  256. n = 16*n + r - '0';
  257. else if ('a' <= r && r <= 'f')
  258. n = 16*n + r - 'a' + 10;
  259. else if ('A' <= r && r <= 'F')
  260. n = 16*n + r - 'A' + 10;
  261. else {
  262. if (i == 0)
  263. *rp = 'x';
  264. else *rp = n;
  265. return save;
  266. }
  267. }
  268. } else {
  269. for(i = 0; i < 3; i++) {
  270. save = s;
  271. s += chartorune(&r, s);
  272. if('0' <= r && r <= '7')
  273. n = 8*n + r - '0';
  274. else {
  275. if (i == 0)
  276. {
  277. *rp = r;
  278. return s;
  279. }
  280. *rp = n;
  281. return save;
  282. }
  283. }
  284. if(n > 0377)
  285. sysfatal("character > 0377");
  286. }
  287. *rp = n;
  288. }
  289. return s;
  290. }
  291. long
  292. canon(Pcb *p)
  293. {
  294. Rune r;
  295. if (p->final >= 0) {
  296. if (p->last < p->final)
  297. return ++p->last;
  298. p->final = -1;
  299. }
  300. if (*p->current == '\0')
  301. return -1;
  302. if(*p->current == '-' && p->last >= 0 && p->current[1]){
  303. p->current = getrune(p->current+1, &r);
  304. if (r < p->last)
  305. sysfatal("invalid range specification");
  306. if (r > p->last) {
  307. p->final = r;
  308. return ++p->last;
  309. }
  310. }
  311. p->current = getrune(p->current, &r);
  312. p->last = r;
  313. return p->last;
  314. }
  315. void
  316. Pinit(Pcb *p, char *cp)
  317. {
  318. p->current = p->base = cp;
  319. p->last = p->final = -1;
  320. }
  321. void
  322. Prewind(Pcb *p)
  323. {
  324. p->current = p->base;
  325. p->last = p->final = -1;
  326. }