tr.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. #include <u.h>
  2. #include <libc.h>
  3. typedef struct PCB /* Control block controlling specification parse */
  4. {
  5. char *base; /* start of specification */
  6. char *current; /* current parse point */
  7. long last; /* last Rune returned */
  8. long final; /* final Rune in a span */
  9. } Pcb;
  10. uchar bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
  11. #define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07])
  12. #define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07])
  13. #define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07])
  14. #define MAXRUNE 0xFFFF
  15. uchar f[(MAXRUNE+1)/8];
  16. uchar t[(MAXRUNE+1)/8];
  17. char wbuf[4096];
  18. char *wptr;
  19. Pcb pfrom, pto;
  20. int cflag;
  21. int dflag;
  22. int sflag;
  23. void complement(void);
  24. void delete(void);
  25. void squeeze(void);
  26. void translit(void);
  27. long canon(Pcb*);
  28. char *getrune(char*, Rune*);
  29. void Pinit(Pcb*, char*);
  30. void Prewind(Pcb *p);
  31. int readrune(int, long*);
  32. void wflush(int);
  33. void writerune(int, Rune);
  34. static void
  35. usage(void)
  36. {
  37. fprint(2, "usage: %s [-cds] [string1 [string2]]\n", argv0);
  38. exits("usage");
  39. }
  40. void
  41. main(int argc, char **argv)
  42. {
  43. ARGBEGIN{
  44. case 's': sflag++; break;
  45. case 'd': dflag++; break;
  46. case 'c': cflag++; break;
  47. default: usage();
  48. }ARGEND
  49. if(argc>0)
  50. Pinit(&pfrom, argv[0]);
  51. if(argc>1)
  52. Pinit(&pto, argv[1]);
  53. if(argc>2)
  54. usage();
  55. if(dflag) {
  56. if ((sflag && argc != 2) || (!sflag && argc != 1))
  57. usage();
  58. delete();
  59. } else {
  60. if (argc != 2)
  61. usage();
  62. if (cflag)
  63. complement();
  64. else translit();
  65. }
  66. exits(0);
  67. }
  68. void
  69. delete(void)
  70. {
  71. long c, last;
  72. if (cflag) {
  73. memset((char *) f, 0xff, sizeof f);
  74. while ((c = canon(&pfrom)) >= 0)
  75. CLEARBIT(f, c);
  76. } else {
  77. while ((c = canon(&pfrom)) >= 0)
  78. SETBIT(f, c);
  79. }
  80. if (sflag) {
  81. while ((c = canon(&pto)) >= 0)
  82. SETBIT(t, c);
  83. }
  84. last = 0x10000;
  85. while (readrune(0, &c) > 0) {
  86. if(!BITSET(f, c) && (c != last || !BITSET(t,c))) {
  87. last = c;
  88. writerune(1, (Rune) c);
  89. }
  90. }
  91. wflush(1);
  92. }
  93. void
  94. complement(void)
  95. {
  96. Rune *p;
  97. int i;
  98. long from, to, lastc, high;
  99. lastc = 0;
  100. high = 0;
  101. while ((from = canon(&pfrom)) >= 0) {
  102. if (from > high) high = from;
  103. SETBIT(f, from);
  104. }
  105. while ((to = canon(&pto)) > 0) {
  106. if (to > high) high = to;
  107. SETBIT(t,to);
  108. }
  109. Prewind(&pto);
  110. if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
  111. sysfatal("no memory");
  112. for (i = 0; i <= high; i++){
  113. if (!BITSET(f,i)) {
  114. if ((to = canon(&pto)) < 0)
  115. to = lastc;
  116. else lastc = to;
  117. p[i] = to;
  118. }
  119. else p[i] = i;
  120. }
  121. if (sflag){
  122. lastc = 0x10000;
  123. while (readrune(0, &from) > 0) {
  124. if (from > high)
  125. from = to;
  126. else
  127. from = p[from];
  128. if (from != lastc || !BITSET(t,from)) {
  129. lastc = from;
  130. writerune(1, (Rune) from);
  131. }
  132. }
  133. } else {
  134. while (readrune(0, &from) > 0){
  135. if (from > high)
  136. from = to;
  137. else
  138. from = p[from];
  139. writerune(1, (Rune) from);
  140. }
  141. }
  142. wflush(1);
  143. }
  144. void
  145. translit(void)
  146. {
  147. Rune *p;
  148. int i;
  149. long from, to, lastc, high;
  150. lastc = 0;
  151. high = 0;
  152. while ((from = canon(&pfrom)) >= 0)
  153. if (from > high) high = from;
  154. Prewind(&pfrom);
  155. if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
  156. sysfatal("no memory");
  157. for (i = 0; i <= high; i++)
  158. p[i] = i;
  159. while ((from = canon(&pfrom)) >= 0) {
  160. if ((to = canon(&pto)) < 0)
  161. to = lastc;
  162. else lastc = to;
  163. if (BITSET(f,from) && p[from] != to)
  164. sysfatal("ambiguous translation");
  165. SETBIT(f,from);
  166. p[from] = to;
  167. SETBIT(t,to);
  168. }
  169. while ((to = canon(&pto)) >= 0) {
  170. SETBIT(t,to);
  171. }
  172. if (sflag){
  173. lastc = 0x10000;
  174. while (readrune(0, &from) > 0) {
  175. if (from <= high)
  176. from = p[from];
  177. if (from != lastc || !BITSET(t,from)) {
  178. lastc = from;
  179. writerune(1, (Rune) from);
  180. }
  181. }
  182. } else {
  183. while (readrune(0, &from) > 0) {
  184. if (from <= high)
  185. from = p[from];
  186. writerune(1, (Rune) from);
  187. }
  188. }
  189. wflush(1);
  190. }
  191. int
  192. readrune(int fd, long *rp)
  193. {
  194. Rune r;
  195. int j;
  196. static int i, n;
  197. static char buf[4096];
  198. j = i;
  199. for (;;) {
  200. if (i >= n) {
  201. wflush(1);
  202. if (j != i)
  203. memcpy(buf, buf+j, n-j);
  204. i = n-j;
  205. n = read(fd, &buf[i], sizeof(buf)-i);
  206. if (n < 0)
  207. sysfatal("read error: %r");
  208. if (n == 0)
  209. return 0;
  210. j = 0;
  211. n += i;
  212. }
  213. i++;
  214. if (fullrune(&buf[j], i-j))
  215. break;
  216. }
  217. chartorune(&r, &buf[j]);
  218. *rp = r;
  219. return 1;
  220. }
  221. void
  222. writerune(int fd, Rune r)
  223. {
  224. char buf[UTFmax];
  225. int n;
  226. if (!wptr)
  227. wptr = wbuf;
  228. n = runetochar(buf, (Rune*)&r);
  229. if (wptr+n >= wbuf+sizeof(wbuf))
  230. wflush(fd);
  231. memcpy(wptr, buf, n);
  232. wptr += n;
  233. }
  234. void
  235. wflush(int fd)
  236. {
  237. if (wptr && wptr > wbuf)
  238. if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf)
  239. sysfatal("write error: %r");
  240. wptr = wbuf;
  241. }
  242. char *
  243. getrune(char *s, Rune *rp)
  244. {
  245. Rune r;
  246. char *save;
  247. int i, n;
  248. s += chartorune(rp, s);
  249. if((r = *rp) == '\\' && *s){
  250. n = 0;
  251. if (*s == 'x') {
  252. s++;
  253. for (i = 0; i < 4; i++) {
  254. save = s;
  255. s += chartorune(&r, s);
  256. if ('0' <= r && r <= '9')
  257. n = 16*n + r - '0';
  258. else if ('a' <= r && r <= 'f')
  259. n = 16*n + r - 'a' + 10;
  260. else if ('A' <= r && r <= 'F')
  261. n = 16*n + r - 'A' + 10;
  262. else {
  263. if (i == 0)
  264. *rp = 'x';
  265. else *rp = n;
  266. return save;
  267. }
  268. }
  269. } else {
  270. for(i = 0; i < 3; i++) {
  271. save = s;
  272. s += chartorune(&r, s);
  273. if('0' <= r && r <= '7')
  274. n = 8*n + r - '0';
  275. else {
  276. if (i == 0)
  277. {
  278. *rp = r;
  279. return s;
  280. }
  281. *rp = n;
  282. return save;
  283. }
  284. }
  285. if(n > 0377)
  286. sysfatal("character > 0377");
  287. }
  288. *rp = n;
  289. }
  290. return s;
  291. }
  292. long
  293. canon(Pcb *p)
  294. {
  295. Rune r;
  296. if (p->final >= 0) {
  297. if (p->last < p->final)
  298. return ++p->last;
  299. p->final = -1;
  300. }
  301. if (*p->current == '\0')
  302. return -1;
  303. if(*p->current == '-' && p->last >= 0 && p->current[1]){
  304. p->current = getrune(p->current+1, &r);
  305. if (r < p->last)
  306. sysfatal("invalid range specification");
  307. if (r > p->last) {
  308. p->final = r;
  309. return ++p->last;
  310. }
  311. }
  312. p->current = getrune(p->current, &r);
  313. p->last = r;
  314. return p->last;
  315. }
  316. void
  317. Pinit(Pcb *p, char *cp)
  318. {
  319. p->current = p->base = cp;
  320. p->last = p->final = -1;
  321. }
  322. void
  323. Prewind(Pcb *p)
  324. {
  325. p->current = p->base;
  326. p->last = p->final = -1;
  327. }