tr.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. #include <u.h>
  2. #include <libc.h>
  3. typedef struct PCB /* Control block controlling specification parse */
  4. {
  5. char *base; /* start of specification */
  6. char *current; /* current parse point */
  7. long last; /* last Rune returned */
  8. long final; /* final Rune in a span */
  9. } Pcb;
  10. uchar bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
  11. #define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07])
  12. #define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07])
  13. #define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07])
  14. #define MAXRUNE 0xFFFF
  15. uchar f[(MAXRUNE+1)/8];
  16. uchar t[(MAXRUNE+1)/8];
  17. char wbuf[4096];
  18. char *wptr;
  19. Pcb pfrom, pto;
  20. int cflag;
  21. int dflag;
  22. int sflag;
  23. void complement(void);
  24. void delete(void);
  25. void squeeze(void);
  26. void translit(void);
  27. void error(char*);
  28. long canon(Pcb*);
  29. char *getrune(char*, Rune*);
  30. void Pinit(Pcb*, char*);
  31. void Prewind(Pcb *p);
  32. int readrune(int, long*);
  33. void wflush(int);
  34. void writerune(int, Rune);
  35. void
  36. main(int argc, char **argv)
  37. {
  38. ARGBEGIN{
  39. case 's': sflag++; break;
  40. case 'd': dflag++; break;
  41. case 'c': cflag++; break;
  42. default: error("bad option");
  43. }ARGEND
  44. if(argc>0)
  45. Pinit(&pfrom, argv[0]);
  46. if(argc>1)
  47. Pinit(&pto, argv[1]);
  48. if(argc>2)
  49. error("arg count");
  50. if(dflag) {
  51. if ((sflag && argc != 2) || (!sflag && argc != 1))
  52. error("arg count");
  53. delete();
  54. } else {
  55. if (argc != 2)
  56. error("arg count");
  57. if (cflag)
  58. complement();
  59. else translit();
  60. }
  61. exits(0);
  62. }
  63. void
  64. delete(void)
  65. {
  66. long c, last;
  67. if (cflag) {
  68. memset((char *) f, 0xff, sizeof f);
  69. while ((c = canon(&pfrom)) >= 0)
  70. CLEARBIT(f, c);
  71. } else {
  72. while ((c = canon(&pfrom)) >= 0)
  73. SETBIT(f, c);
  74. }
  75. if (sflag) {
  76. while ((c = canon(&pto)) >= 0)
  77. SETBIT(t, c);
  78. }
  79. last = 0x10000;
  80. while (readrune(0, &c) > 0) {
  81. if(!BITSET(f, c) && (c != last || !BITSET(t,c))) {
  82. last = c;
  83. writerune(1, (Rune) c);
  84. }
  85. }
  86. wflush(1);
  87. }
  88. void
  89. complement(void)
  90. {
  91. Rune *p;
  92. int i;
  93. long from, to, lastc, high;
  94. lastc = 0;
  95. high = 0;
  96. while ((from = canon(&pfrom)) >= 0) {
  97. if (from > high) high = from;
  98. SETBIT(f, from);
  99. }
  100. while ((to = canon(&pto)) > 0) {
  101. if (to > high) high = to;
  102. SETBIT(t,to);
  103. }
  104. Prewind(&pto);
  105. if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
  106. error("can't allocate memory");
  107. for (i = 0; i <= high; i++){
  108. if (!BITSET(f,i)) {
  109. if ((to = canon(&pto)) < 0)
  110. to = lastc;
  111. else lastc = to;
  112. p[i] = to;
  113. }
  114. else p[i] = i;
  115. }
  116. if (sflag){
  117. lastc = 0x10000;
  118. while (readrune(0, &from) > 0) {
  119. if (from > high)
  120. from = to;
  121. else
  122. from = p[from];
  123. if (from != lastc || !BITSET(t,from)) {
  124. lastc = from;
  125. writerune(1, (Rune) from);
  126. }
  127. }
  128. } else {
  129. while (readrune(0, &from) > 0){
  130. if (from > high)
  131. from = to;
  132. else
  133. from = p[from];
  134. writerune(1, (Rune) from);
  135. }
  136. }
  137. wflush(1);
  138. }
  139. void
  140. translit(void)
  141. {
  142. Rune *p;
  143. int i;
  144. long from, to, lastc, high;
  145. lastc = 0;
  146. high = 0;
  147. while ((from = canon(&pfrom)) >= 0)
  148. if (from > high) high = from;
  149. Prewind(&pfrom);
  150. if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
  151. error("can't allocate memory");
  152. for (i = 0; i <= high; i++)
  153. p[i] = i;
  154. while ((from = canon(&pfrom)) >= 0) {
  155. if ((to = canon(&pto)) < 0)
  156. to = lastc;
  157. else lastc = to;
  158. if (BITSET(f,from) && p[from] != to)
  159. error("ambiguous translation");
  160. SETBIT(f,from);
  161. p[from] = to;
  162. SETBIT(t,to);
  163. }
  164. while ((to = canon(&pto)) >= 0) {
  165. SETBIT(t,to);
  166. }
  167. if (sflag){
  168. lastc = 0x10000;
  169. while (readrune(0, &from) > 0) {
  170. if (from <= high)
  171. from = p[from];
  172. if (from != lastc || !BITSET(t,from)) {
  173. lastc = from;
  174. writerune(1, (Rune) from);
  175. }
  176. }
  177. } else {
  178. while (readrune(0, &from) > 0) {
  179. if (from <= high)
  180. from = p[from];
  181. writerune(1, (Rune) from);
  182. }
  183. }
  184. wflush(1);
  185. }
  186. int
  187. readrune(int fd, long *rp)
  188. {
  189. Rune r;
  190. int j;
  191. static int i, n;
  192. static char buf[4096];
  193. j = i;
  194. for (;;) {
  195. if (i >= n) {
  196. wflush(1);
  197. if (j != i)
  198. memcpy(buf, buf+j, n-j);
  199. i = n-j;
  200. n = read(fd, &buf[i], sizeof(buf)-i);
  201. if (n < 0)
  202. error("read error");
  203. if (n == 0)
  204. return 0;
  205. j = 0;
  206. n += i;
  207. }
  208. i++;
  209. if (fullrune(&buf[j], i-j))
  210. break;
  211. }
  212. chartorune(&r, &buf[j]);
  213. *rp = r;
  214. return 1;
  215. }
  216. void
  217. writerune(int fd, Rune r)
  218. {
  219. char buf[UTFmax];
  220. int n;
  221. if (!wptr)
  222. wptr = wbuf;
  223. n = runetochar(buf, (Rune*)&r);
  224. if (wptr+n >= wbuf+sizeof(wbuf))
  225. wflush(fd);
  226. memcpy(wptr, buf, n);
  227. wptr += n;
  228. }
  229. void
  230. wflush(int fd)
  231. {
  232. if (wptr && wptr > wbuf)
  233. if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf)
  234. error("write error");
  235. wptr = wbuf;
  236. }
  237. char *
  238. getrune(char *s, Rune *rp)
  239. {
  240. Rune r;
  241. char *save;
  242. int i, n;
  243. s += chartorune(rp, s);
  244. if((r = *rp) == '\\' && *s){
  245. n = 0;
  246. if (*s == 'x') {
  247. s++;
  248. for (i = 0; i < 4; i++) {
  249. save = s;
  250. s += chartorune(&r, s);
  251. if ('0' <= r && r <= '9')
  252. n = 16*n + r - '0';
  253. else if ('a' <= r && r <= 'f')
  254. n = 16*n + r - 'a' + 10;
  255. else if ('A' <= r && r <= 'F')
  256. n = 16*n + r - 'A' + 10;
  257. else {
  258. if (i == 0)
  259. *rp = 'x';
  260. else *rp = n;
  261. return save;
  262. }
  263. }
  264. } else {
  265. for(i = 0; i < 3; i++) {
  266. save = s;
  267. s += chartorune(&r, s);
  268. if('0' <= r && r <= '7')
  269. n = 8*n + r - '0';
  270. else {
  271. if (i == 0)
  272. {
  273. *rp = r;
  274. return s;
  275. }
  276. *rp = n;
  277. return save;
  278. }
  279. }
  280. if(n > 0377)
  281. error("char>0377");
  282. }
  283. *rp = n;
  284. }
  285. return s;
  286. }
  287. long
  288. canon(Pcb *p)
  289. {
  290. Rune r;
  291. if (p->final >= 0) {
  292. if (p->last < p->final)
  293. return ++p->last;
  294. p->final = -1;
  295. }
  296. if (*p->current == '\0')
  297. return -1;
  298. if(*p->current == '-' && p->last >= 0 && p->current[1]){
  299. p->current = getrune(p->current+1, &r);
  300. if (r < p->last)
  301. error ("Invalid range specification");
  302. if (r > p->last) {
  303. p->final = r;
  304. return ++p->last;
  305. }
  306. }
  307. p->current = getrune(p->current, &r);
  308. p->last = r;
  309. return p->last;
  310. }
  311. void
  312. Pinit(Pcb *p, char *cp)
  313. {
  314. p->current = p->base = cp;
  315. p->last = p->final = -1;
  316. }
  317. void
  318. Prewind(Pcb *p)
  319. {
  320. p->current = p->base;
  321. p->last = p->final = -1;
  322. }
  323. void
  324. error(char *s)
  325. {
  326. fprint(2, "%s: %s\n", argv0, s);
  327. exits(s);
  328. }