123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356 |
- #include <u.h>
- #include <libc.h>
- typedef struct PCB /* Control block controlling specification parse */
- {
- char *base; /* start of specification */
- char *current; /* current parse point */
- long last; /* last Rune returned */
- long final; /* final Rune in a span */
- } Pcb;
- uchar bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
- #define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07])
- #define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07])
- #define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07])
- #define MAXRUNE 0xFFFF
- uchar f[(MAXRUNE+1)/8];
- uchar t[(MAXRUNE+1)/8];
- char wbuf[4096];
- char *wptr;
- Pcb pfrom, pto;
- int cflag;
- int dflag;
- int sflag;
- void complement(void);
- void delete(void);
- void squeeze(void);
- void translit(void);
- void error(char*);
- long canon(Pcb*);
- char *getrune(char*, Rune*);
- void Pinit(Pcb*, char*);
- void Prewind(Pcb *p);
- int readrune(int, long*);
- void wflush(int);
- void writerune(int, Rune);
- void
- main(int argc, char **argv)
- {
- ARGBEGIN{
- case 's': sflag++; break;
- case 'd': dflag++; break;
- case 'c': cflag++; break;
- default: error("bad option");
- }ARGEND
- if(argc>0)
- Pinit(&pfrom, argv[0]);
- if(argc>1)
- Pinit(&pto, argv[1]);
- if(argc>2)
- error("arg count");
- if(dflag) {
- if ((sflag && argc != 2) || (!sflag && argc != 1))
- error("arg count");
- delete();
- } else {
- if (argc != 2)
- error("arg count");
- if (cflag)
- complement();
- else translit();
- }
- exits(0);
- }
- void
- delete(void)
- {
- long c, last;
- if (cflag) {
- memset((char *) f, 0xff, sizeof f);
- while ((c = canon(&pfrom)) >= 0)
- CLEARBIT(f, c);
- } else {
- while ((c = canon(&pfrom)) >= 0)
- SETBIT(f, c);
- }
- if (sflag) {
- while ((c = canon(&pto)) >= 0)
- SETBIT(t, c);
- }
- last = 0x10000;
- while (readrune(0, &c) > 0) {
- if(!BITSET(f, c) && (c != last || !BITSET(t,c))) {
- last = c;
- writerune(1, (Rune) c);
- }
- }
- wflush(1);
- }
- void
- complement(void)
- {
- Rune *p;
- int i;
- long from, to, lastc, high;
- lastc = 0;
- high = 0;
- while ((from = canon(&pfrom)) >= 0) {
- if (from > high) high = from;
- SETBIT(f, from);
- }
- while ((to = canon(&pto)) > 0) {
- if (to > high) high = to;
- SETBIT(t,to);
- }
- Prewind(&pto);
- if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
- error("can't allocate memory");
- for (i = 0; i <= high; i++){
- if (!BITSET(f,i)) {
- if ((to = canon(&pto)) < 0)
- to = lastc;
- else lastc = to;
- p[i] = to;
- }
- else p[i] = i;
- }
- if (sflag){
- lastc = 0x10000;
- while (readrune(0, &from) > 0) {
- if (from > high)
- from = to;
- else
- from = p[from];
- if (from != lastc || !BITSET(t,from)) {
- lastc = from;
- writerune(1, (Rune) from);
- }
- }
-
- } else {
- while (readrune(0, &from) > 0){
- if (from > high)
- from = to;
- else
- from = p[from];
- writerune(1, (Rune) from);
- }
- }
- wflush(1);
- }
- void
- translit(void)
- {
- Rune *p;
- int i;
- long from, to, lastc, high;
- lastc = 0;
- high = 0;
- while ((from = canon(&pfrom)) >= 0)
- if (from > high) high = from;
- Prewind(&pfrom);
- if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
- error("can't allocate memory");
- for (i = 0; i <= high; i++)
- p[i] = i;
- while ((from = canon(&pfrom)) >= 0) {
- if ((to = canon(&pto)) < 0)
- to = lastc;
- else lastc = to;
- if (BITSET(f,from) && p[from] != to)
- error("ambiguous translation");
- SETBIT(f,from);
- p[from] = to;
- SETBIT(t,to);
- }
- while ((to = canon(&pto)) >= 0) {
- SETBIT(t,to);
- }
- if (sflag){
- lastc = 0x10000;
- while (readrune(0, &from) > 0) {
- if (from <= high)
- from = p[from];
- if (from != lastc || !BITSET(t,from)) {
- lastc = from;
- writerune(1, (Rune) from);
- }
- }
-
- } else {
- while (readrune(0, &from) > 0) {
- if (from <= high)
- from = p[from];
- writerune(1, (Rune) from);
- }
- }
- wflush(1);
- }
- int
- readrune(int fd, long *rp)
- {
- Rune r;
- int j;
- static int i, n;
- static char buf[4096];
- j = i;
- for (;;) {
- if (i >= n) {
- wflush(1);
- if (j != i)
- memcpy(buf, buf+j, n-j);
- i = n-j;
- n = read(fd, &buf[i], sizeof(buf)-i);
- if (n < 0)
- error("read error");
- if (n == 0)
- return 0;
- j = 0;
- n += i;
- }
- i++;
- if (fullrune(&buf[j], i-j))
- break;
- }
- chartorune(&r, &buf[j]);
- *rp = r;
- return 1;
- }
- void
- writerune(int fd, Rune r)
- {
- char buf[UTFmax];
- int n;
- if (!wptr)
- wptr = wbuf;
- n = runetochar(buf, (Rune*)&r);
- if (wptr+n >= wbuf+sizeof(wbuf))
- wflush(fd);
- memcpy(wptr, buf, n);
- wptr += n;
- }
- void
- wflush(int fd)
- {
- if (wptr && wptr > wbuf)
- if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf)
- error("write error");
- wptr = wbuf;
- }
- char *
- getrune(char *s, Rune *rp)
- {
- Rune r;
- char *save;
- int i, n;
- s += chartorune(rp, s);
- if((r = *rp) == '\\' && *s){
- n = 0;
- if (*s == 'x') {
- s++;
- for (i = 0; i < 4; i++) {
- save = s;
- s += chartorune(&r, s);
- if ('0' <= r && r <= '9')
- n = 16*n + r - '0';
- else if ('a' <= r && r <= 'f')
- n = 16*n + r - 'a' + 10;
- else if ('A' <= r && r <= 'F')
- n = 16*n + r - 'A' + 10;
- else {
- if (i == 0)
- *rp = 'x';
- else *rp = n;
- return save;
- }
- }
- } else {
- for(i = 0; i < 3; i++) {
- save = s;
- s += chartorune(&r, s);
- if('0' <= r && r <= '7')
- n = 8*n + r - '0';
- else {
- if (i == 0)
- {
- *rp = r;
- return s;
- }
- *rp = n;
- return save;
- }
- }
- if(n > 0377)
- error("char>0377");
- }
- *rp = n;
- }
- return s;
- }
- long
- canon(Pcb *p)
- {
- Rune r;
- if (p->final >= 0) {
- if (p->last < p->final)
- return ++p->last;
- p->final = -1;
- }
- if (*p->current == '\0')
- return -1;
- if(*p->current == '-' && p->last >= 0 && p->current[1]){
- p->current = getrune(p->current+1, &r);
- if (r < p->last)
- error ("Invalid range specification");
- if (r > p->last) {
- p->final = r;
- return ++p->last;
- }
- }
- p->current = getrune(p->current, &r);
- p->last = r;
- return p->last;
- }
- void
- Pinit(Pcb *p, char *cp)
- {
- p->current = p->base = cp;
- p->last = p->final = -1;
- }
- void
- Prewind(Pcb *p)
- {
- p->current = p->base;
- p->last = p->final = -1;
- }
- void
- error(char *s)
- {
- fprint(2, "%s: %s\n", argv0, s);
- exits(s);
- }
|