tr.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. typedef struct PCB /* Control block controlling specification parse */
  12. {
  13. char *base; /* start of specification */
  14. char *current; /* current parse point */
  15. int32_t last; /* last Rune returned */
  16. int32_t final; /* final Rune in a span */
  17. } Pcb;
  18. uint8_t bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
  19. #define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07])
  20. #define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07])
  21. #define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07])
  22. uint8_t f[(Runemax+1)/8];
  23. uint8_t t[(Runemax+1)/8];
  24. char wbuf[4096];
  25. char *wptr;
  26. Pcb pfrom, pto;
  27. int cflag;
  28. int dflag;
  29. int sflag;
  30. void complement(void);
  31. void delete(void);
  32. void squeeze(void);
  33. void translit(void);
  34. int32_t canon(Pcb*);
  35. char *getrune(char*, Rune*);
  36. void Pinit(Pcb*, char*);
  37. void Prewind(Pcb *p);
  38. int readrune(int, int32_t*);
  39. void wflush(int);
  40. void writerune(int, Rune);
  41. static void
  42. usage(void)
  43. {
  44. fprint(2, "usage: %s [-cds] [string1 [string2]]\n", argv0);
  45. exits("usage");
  46. }
  47. void
  48. main(int argc, char **argv)
  49. {
  50. ARGBEGIN{
  51. case 's': sflag++; break;
  52. case 'd': dflag++; break;
  53. case 'c': cflag++; break;
  54. default: usage();
  55. }ARGEND
  56. if(argc>0)
  57. Pinit(&pfrom, argv[0]);
  58. if(argc>1)
  59. Pinit(&pto, argv[1]);
  60. if(argc>2)
  61. usage();
  62. if(dflag) {
  63. if ((sflag && argc != 2) || (!sflag && argc != 1))
  64. usage();
  65. delete();
  66. } else {
  67. if (argc != 2)
  68. usage();
  69. if (cflag)
  70. complement();
  71. else translit();
  72. }
  73. exits(0);
  74. }
  75. void
  76. delete(void)
  77. {
  78. int32_t c, last;
  79. if (cflag) {
  80. memset((char *) f, 0xff, sizeof f);
  81. while ((c = canon(&pfrom)) >= 0)
  82. CLEARBIT(f, c);
  83. } else {
  84. while ((c = canon(&pfrom)) >= 0)
  85. SETBIT(f, c);
  86. }
  87. if (sflag) {
  88. while ((c = canon(&pto)) >= 0)
  89. SETBIT(t, c);
  90. }
  91. last = 0x10000;
  92. while (readrune(0, &c) > 0) {
  93. if(!BITSET(f, c) && (c != last || !BITSET(t,c))) {
  94. last = c;
  95. writerune(1, (Rune) c);
  96. }
  97. }
  98. wflush(1);
  99. }
  100. void
  101. complement(void)
  102. {
  103. Rune *p;
  104. int i;
  105. int32_t from, to, lastc, high;
  106. lastc = 0;
  107. high = 0;
  108. while ((from = canon(&pfrom)) >= 0) {
  109. if (from > high) high = from;
  110. SETBIT(f, from);
  111. }
  112. while ((to = canon(&pto)) > 0) {
  113. if (to > high) high = to;
  114. SETBIT(t,to);
  115. }
  116. Prewind(&pto);
  117. if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
  118. sysfatal("no memory");
  119. for (i = 0; i <= high; i++){
  120. if (!BITSET(f,i)) {
  121. if ((to = canon(&pto)) < 0)
  122. to = lastc;
  123. else lastc = to;
  124. p[i] = to;
  125. }
  126. else p[i] = i;
  127. }
  128. if (sflag){
  129. lastc = 0x10000;
  130. while (readrune(0, &from) > 0) {
  131. if (from > high)
  132. from = to;
  133. else
  134. from = p[from];
  135. if (from != lastc || !BITSET(t,from)) {
  136. lastc = from;
  137. writerune(1, (Rune) from);
  138. }
  139. }
  140. } else {
  141. while (readrune(0, &from) > 0){
  142. if (from > high)
  143. from = to;
  144. else
  145. from = p[from];
  146. writerune(1, (Rune) from);
  147. }
  148. }
  149. wflush(1);
  150. }
  151. void
  152. translit(void)
  153. {
  154. Rune *p;
  155. int i;
  156. int32_t from, to, lastc, high;
  157. lastc = 0;
  158. high = 0;
  159. while ((from = canon(&pfrom)) >= 0)
  160. if (from > high) high = from;
  161. Prewind(&pfrom);
  162. if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
  163. sysfatal("no memory");
  164. for (i = 0; i <= high; i++)
  165. p[i] = i;
  166. while ((from = canon(&pfrom)) >= 0) {
  167. if ((to = canon(&pto)) < 0)
  168. to = lastc;
  169. else lastc = to;
  170. if (BITSET(f,from) && p[from] != to)
  171. sysfatal("ambiguous translation");
  172. SETBIT(f,from);
  173. p[from] = to;
  174. SETBIT(t,to);
  175. }
  176. while ((to = canon(&pto)) >= 0) {
  177. SETBIT(t,to);
  178. }
  179. if (sflag){
  180. lastc = 0x10000;
  181. while (readrune(0, &from) > 0) {
  182. if (from <= high)
  183. from = p[from];
  184. if (from != lastc || !BITSET(t,from)) {
  185. lastc = from;
  186. writerune(1, (Rune) from);
  187. }
  188. }
  189. } else {
  190. while (readrune(0, &from) > 0) {
  191. if (from <= high)
  192. from = p[from];
  193. writerune(1, (Rune) from);
  194. }
  195. }
  196. wflush(1);
  197. }
  198. int
  199. readrune(int fd, int32_t *rp)
  200. {
  201. Rune r;
  202. int j;
  203. static int i, n;
  204. static char buf[4096];
  205. j = i;
  206. for (;;) {
  207. if (i >= n) {
  208. wflush(1);
  209. if (j != i)
  210. memcpy(buf, buf+j, n-j);
  211. i = n-j;
  212. n = read(fd, &buf[i], sizeof(buf)-i);
  213. if (n < 0)
  214. sysfatal("read error: %r");
  215. if (n == 0)
  216. return 0;
  217. j = 0;
  218. n += i;
  219. }
  220. i++;
  221. if (fullrune(&buf[j], i-j))
  222. break;
  223. }
  224. chartorune(&r, &buf[j]);
  225. *rp = r;
  226. return 1;
  227. }
  228. void
  229. writerune(int fd, Rune r)
  230. {
  231. char buf[UTFmax];
  232. int n;
  233. if (!wptr)
  234. wptr = wbuf;
  235. n = runetochar(buf, (Rune*)&r);
  236. if (wptr+n >= wbuf+sizeof(wbuf))
  237. wflush(fd);
  238. memcpy(wptr, buf, n);
  239. wptr += n;
  240. }
  241. void
  242. wflush(int fd)
  243. {
  244. if (wptr && wptr > wbuf)
  245. if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf)
  246. sysfatal("write error: %r");
  247. wptr = wbuf;
  248. }
  249. char *
  250. getrune(char *s, Rune *rp)
  251. {
  252. Rune r;
  253. char *save;
  254. int i, n;
  255. s += chartorune(rp, s);
  256. if((r = *rp) == '\\' && *s){
  257. n = 0;
  258. if (*s == 'x') {
  259. s++;
  260. for (i = 0; i < 4; i++) {
  261. save = s;
  262. s += chartorune(&r, s);
  263. if ('0' <= r && r <= '9')
  264. n = 16*n + r - '0';
  265. else if ('a' <= r && r <= 'f')
  266. n = 16*n + r - 'a' + 10;
  267. else if ('A' <= r && r <= 'F')
  268. n = 16*n + r - 'A' + 10;
  269. else {
  270. if (i == 0)
  271. *rp = 'x';
  272. else *rp = n;
  273. return save;
  274. }
  275. }
  276. } else {
  277. for(i = 0; i < 3; i++) {
  278. save = s;
  279. s += chartorune(&r, s);
  280. if('0' <= r && r <= '7')
  281. n = 8*n + r - '0';
  282. else {
  283. if (i == 0)
  284. {
  285. *rp = r;
  286. return s;
  287. }
  288. *rp = n;
  289. return save;
  290. }
  291. }
  292. if(n > 0377)
  293. sysfatal("character > 0377");
  294. }
  295. *rp = n;
  296. }
  297. return s;
  298. }
  299. int32_t
  300. canon(Pcb *p)
  301. {
  302. Rune r;
  303. if (p->final >= 0) {
  304. if (p->last < p->final)
  305. return ++p->last;
  306. p->final = -1;
  307. }
  308. if (*p->current == '\0')
  309. return -1;
  310. if(*p->current == '-' && p->last >= 0 && p->current[1]){
  311. p->current = getrune(p->current+1, &r);
  312. if (r < p->last)
  313. sysfatal("invalid range specification");
  314. if (r > p->last) {
  315. p->final = r;
  316. return ++p->last;
  317. }
  318. }
  319. p->current = getrune(p->current, &r);
  320. p->last = r;
  321. return p->last;
  322. }
  323. void
  324. Pinit(Pcb *p, char *cp)
  325. {
  326. p->current = p->base = cp;
  327. p->last = p->final = -1;
  328. }
  329. void
  330. Prewind(Pcb *p)
  331. {
  332. p->current = p->base;
  333. p->last = p->final = -1;
  334. }