123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969 |
- #include <u.h>
- #include <libc.h>
- #include <bio.h>
- /*
- * Deroff command -- strip troff, eqn, and tbl sequences from
- * a file. Has three flags argument, -w, to cause output one word per line
- * rather than in the original format.
- * -mm (or -ms) causes the corresponding macro's to be interpreted
- * so that just sentences are output
- * -ml also gets rid of lists.
- * -i causes deroff to ignore .so and .nx commands.
- * Deroff follows .so and .nx commands, removes contents of macro
- * definitions, equations (both .EQ ... .EN and $...$),
- * Tbl command sequences, and Troff backslash vconstructions.
- *
- * All input is through the C macro; the most recently read character is in c.
- */
- /*
- #define C ((c = Bgetrune(infile)) < 0?\
- eof():\
- ((c == ldelim) && (filesp == files)?\
- skeqn():\
- (c == '\n'?\
- (linect++,c):\
- c)))
- #define C1 ((c = Bgetrune(infile)) == Beof?\
- eof():\
- (c == '\n'?\
- (linect++,c):\
- c))
- */
- /* lose those macros! */
- #define C fC()
- #define C1 fC1()
- #define SKIP while(C != '\n')
- #define SKIP1 while(C1 != '\n')
- #define SKIP_TO_COM SKIP;\
- SKIP;\
- pc=c;\
- while(C != '.' || pc != '\n' || C > 'Z')\
- pc=c
- #define YES 1
- #define NO 0
- #define MS 0
- #define MM 1
- #define ONE 1
- #define TWO 2
- #define NOCHAR -2
- #define EXTENDED -1 /* All runes above 0x7F */
- #define SPECIAL 0
- #define APOS 1
- #define PUNCT 2
- #define DIGIT 3
- #define LETTER 4
- int linect = 0;
- int wordflag= NO;
- int underscoreflag = NO;
- int msflag = NO;
- int iflag = NO;
- int mac = MM;
- int disp = 0;
- int inmacro = NO;
- int intable = NO;
- int eqnflag = 0;
- #define MAX_ASCII 0X80
- char chars[MAX_ASCII]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
- Rune line[30000];
- Rune* lp;
- long c;
- long pc;
- int ldelim = NOCHAR;
- int rdelim = NOCHAR;
- char** argv;
- char fname[50];
- Biobuf* files[15];
- Biobuf**filesp;
- Biobuf* infile;
- char* devnull = "/dev/null";
- Biobuf *infile;
- Biobuf bout;
- long skeqn(void);
- Biobuf* opn(char *p);
- int eof(void);
- int charclass(int);
- void getfname(void);
- void fatal(char *s, char *p);
- void usage(void);
- void work(void);
- void putmac(Rune *rp, int vconst);
- void regline(int macline, int vconst);
- void putwords(void);
- void comline(void);
- void macro(void);
- void eqn(void);
- void tbl(void);
- void stbl(void);
- void sdis(char a1, char a2);
- void sce(void);
- void backsl(void);
- char* copys(char *s);
- void refer(int c1);
- void inpic(void);
- int
- fC(void)
- {
- c = Bgetrune(infile);
- if(c < 0)
- return eof();
- if(c == ldelim && filesp == files)
- return skeqn();
- if(c == '\n')
- linect++;
- return c;
- }
- int
- fC1(void)
- {
- c = Bgetrune(infile);
- if(c == Beof)
- return eof();
- if(c == '\n')
- linect++;
- return c;
- }
- void
- main(int argc, char *av[])
- {
- int i;
- char *f;
- argv = av;
- Binit(&bout, 1, OWRITE);
- ARGBEGIN{
- case 'w':
- wordflag = YES;
- break;
- case '_':
- wordflag = YES;
- underscoreflag = YES;
- break;
- case 'm':
- msflag = YES;
- if(f = ARGF())
- switch(*f)
- {
- case 'm': mac = MM; break;
- case 's': mac = MS; break;
- case 'l': disp = 1; break;
- default: usage();
- }
- else
- usage();
- break;
- case 'i':
- iflag = YES;
- break;
- default:
- usage();
- }ARGEND
- if(*argv)
- infile = opn(*argv++);
- else{
- infile = malloc(sizeof(Biobuf));
- Binit(infile, 0, OREAD);
- }
- files[0] = infile;
- filesp = &files[0];
- for(i='a'; i<='z' ; ++i)
- chars[i] = LETTER;
- for(i='A'; i<='Z'; ++i)
- chars[i] = LETTER;
- for(i='0'; i<='9'; ++i)
- chars[i] = DIGIT;
- chars['\''] = APOS;
- chars['&'] = APOS;
- chars['\b'] = APOS;
- chars['.'] = PUNCT;
- chars[','] = PUNCT;
- chars[';'] = PUNCT;
- chars['?'] = PUNCT;
- chars[':'] = PUNCT;
- work();
- }
- long
- skeqn(void)
- {
- while(C1 != rdelim)
- if(c == '\\')
- c = C1;
- else if(c == '"')
- while(C1 != '"')
- if(c == '\\')
- C1;
- if (msflag)
- eqnflag = 1;
- return(c = ' ');
- }
- Biobuf*
- opn(char *p)
- {
- Biobuf *fd;
- while ((fd = Bopen(p, OREAD)) == 0) {
- if(msflag || p == devnull)
- fatal("Cannot open file %s - quitting\n", p);
- else {
- fprint(2, "Deroff: Cannot open file %s - continuing\n", p);
- p = devnull;
- }
- }
- linect = 0;
- return(fd);
- }
- int
- eof(void)
- {
- if(Bfildes(infile) != 0)
- Bterm(infile);
- if(filesp > files)
- infile = *--filesp;
- else
- if(*argv)
- infile = opn(*argv++);
- else
- exits(0);
- return(C);
- }
- void
- getfname(void)
- {
- char *p;
- Rune r;
- Dir *dir;
- struct chain
- {
- struct chain* nextp;
- char* datap;
- } *q;
- static struct chain *namechain= 0;
- while(C == ' ')
- ;
- for(p = fname; (r=c) != '\n' && r != ' ' && r != '\t' && r != '\\'; C)
- p += runetochar(p, &r);
- *p = '\0';
- while(c != '\n')
- C;
- if(!strcmp(fname, "/sys/lib/tmac/tmac.cs")
- || !strcmp(fname, "/sys/lib/tmac/tmac.s")) {
- fname[0] = '\0';
- return;
- }
- dir = dirstat(fname);
- if(dir!=nil && ((dir->mode & DMDIR) || dir->type != 'M')) {
- free(dir);
- fname[0] = '\0';
- return;
- }
- free(dir);
- /*
- * see if this name has already been used
- */
- for(q = namechain; q; q = q->nextp)
- if( !strcmp(fname, q->datap)) {
- fname[0] = '\0';
- return;
- }
- q = (struct chain*)malloc(sizeof(struct chain));
- q->nextp = namechain;
- q->datap = copys(fname);
- namechain = q;
- }
- void
- usage(void)
- {
- fprint(2,"usage: deroff [-nw_pi] [-m (m s l)] [file ...] \n");
- exits("usage");
- }
- void
- fatal(char *s, char *p)
- {
- fprint(2, "deroff: ");
- fprint(2, s, p);
- exits(s);
- }
- void
- work(void)
- {
- for(;;) {
- eqnflag = 0;
- if(C == '.' || c == '\'')
- comline();
- else
- regline(NO, TWO);
- }
- }
- void
- regline(int macline, int vconst)
- {
- line[0] = c;
- lp = line;
- for(;;) {
- if(c == '\\') {
- *lp = ' ';
- backsl();
- if(c == '%') /* no blank for hyphenation char */
- lp--;
- }
- if(c == '\n')
- break;
- if(intable && c=='T') {
- *++lp = C;
- if(c=='{' || c=='}') {
- lp[-1] = ' ';
- *lp = C;
- }
- } else {
- if(msflag == 1 && eqnflag == 1) {
- eqnflag = 0;
- *++lp = 'x';
- }
- *++lp = C;
- }
- }
- *lp = '\0';
- if(lp != line) {
- if(wordflag)
- putwords();
- else
- if(macline)
- putmac(line,vconst);
- else
- Bprint(&bout, "%S\n", line);
- }
- }
- void
- putmac(Rune *rp, int vconst)
- {
- Rune *t;
- int found;
- Rune last;
- found = 0;
- last = 0;
- while(*rp) {
- while(*rp == ' ' || *rp == '\t')
- Bputrune(&bout, *rp++);
- for(t = rp; *t != ' ' && *t != '\t' && *t != '\0'; t++)
- ;
- if(*rp == '\"')
- rp++;
- if(t > rp+vconst && charclass(*rp) == LETTER
- && charclass(rp[1]) == LETTER) {
- while(rp < t)
- if(*rp == '\"')
- rp++;
- else
- Bputrune(&bout, *rp++);
- last = t[-1];
- found++;
- } else
- if(found && charclass(*rp) == PUNCT && rp[1] == '\0')
- Bputrune(&bout, *rp++);
- else {
- last = t[-1];
- rp = t;
- }
- }
- Bputc(&bout, '\n');
- if(msflag && charclass(last) == PUNCT)
- Bprint(&bout, " %C\n", last);
- }
- /*
- * break into words for -w option
- */
- void
- putwords(void)
- {
- Rune *p, *p1;
- int i, nlet;
- for(p1 = line;;) {
- /*
- * skip initial specials ampersands and apostrophes
- */
- while((i = charclass(*p1)) != EXTENDED && i < DIGIT)
- if(*p1++ == '\0')
- return;
- nlet = 0;
- for(p = p1; (i = charclass(*p)) != SPECIAL || (underscoreflag && *p=='_'); p++)
- if(i == LETTER || (underscoreflag && *p == '_'))
- nlet++;
- /*
- * MDM definition of word
- */
- if(nlet > 1) {
- /*
- * delete trailing ampersands and apostrophes
- */
- while(*--p == '\'' || *p == '&'
- || charclass(*p) == PUNCT)
- ;
- while(p1 <= p)
- Bputrune(&bout, *p1++);
- Bputc(&bout, '\n');
- } else
- p1 = p;
- }
- }
- void
- comline(void)
- {
- long c1, c2;
- while(C==' ' || c=='\t')
- ;
- comx:
- if((c1=c) == '\n')
- return;
- c2 = C;
- if(c1=='.' && c2!='.')
- inmacro = NO;
- if(msflag && c1 == '['){
- refer(c2);
- return;
- }
- if(c2 == '\n')
- return;
- if(c1 == '\\' && c2 == '\"')
- SKIP;
- else
- if (filesp==files && c1=='E' && c2=='Q')
- eqn();
- else
- if(filesp==files && c1=='T' && (c2=='S' || c2=='C' || c2=='&')) {
- if(msflag)
- stbl();
- else
- tbl();
- }
- else
- if(c1=='T' && c2=='E')
- intable = NO;
- else if (!inmacro &&
- ((c1 == 'd' && c2 == 'e') ||
- (c1 == 'i' && c2 == 'g') ||
- (c1 == 'a' && c2 == 'm')))
- macro();
- else
- if(c1=='s' && c2=='o') {
- if(iflag)
- SKIP;
- else {
- getfname();
- if(fname[0]) {
- if(infile = opn(fname))
- *++filesp = infile;
- else infile = *filesp;
- }
- }
- }
- else
- if(c1=='n' && c2=='x')
- if(iflag)
- SKIP;
- else {
- getfname();
- if(fname[0] == '\0')
- exits(0);
- if(Bfildes(infile) != 0)
- Bterm(infile);
- infile = *filesp = opn(fname);
- }
- else
- if(c1 == 't' && c2 == 'm')
- SKIP;
- else
- if(c1=='h' && c2=='w')
- SKIP;
- else
- if(msflag && c1 == 'T' && c2 == 'L') {
- SKIP_TO_COM;
- goto comx;
- }
- else
- if(msflag && c1=='N' && c2 == 'R')
- SKIP;
- else
- if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){
- if(mac==MM)SKIP;
- else {
- SKIP_TO_COM;
- goto comx;
- }
- } else
- if(msflag && c1=='F' && c2=='S') {
- SKIP_TO_COM;
- goto comx;
- }
- else
- if(msflag && (c1=='S' || c1=='N') && c2=='H') {
- SKIP_TO_COM;
- goto comx;
- } else
- if(c1 == 'U' && c2 == 'X') {
- if(wordflag)
- Bprint(&bout, "UNIX\n");
- else
- Bprint(&bout, "UNIX ");
- } else
- if(msflag && c1=='O' && c2=='K') {
- SKIP_TO_COM;
- goto comx;
- } else
- if(msflag && c1=='N' && c2=='D')
- SKIP;
- else
- if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U'))
- SKIP;
- else
- if(msflag && mac==MM && c2=='L') {
- if(disp || c1=='R')
- sdis('L', 'E');
- else {
- SKIP;
- Bprint(&bout, " .");
- }
- } else
- if(!msflag && c1=='P' && c2=='S') {
- inpic();
- } else
- if(msflag && (c1=='D' || c1=='N' || c1=='K'|| c1=='P') && c2=='S') {
- sdis(c1, 'E');
- } else
- if(msflag && (c1 == 'K' && c2 == 'F')) {
- sdis(c1,'E');
- } else
- if(msflag && c1=='n' && c2=='f')
- sdis('f','i');
- else
- if(msflag && c1=='c' && c2=='e')
- sce();
- else {
- if(c1=='.' && c2=='.') {
- if(msflag) {
- SKIP;
- return;
- }
- while(C == '.')
- ;
- }
- inmacro++;
- if(c1 <= 'Z' && msflag)
- regline(YES,ONE);
- else {
- if(wordflag)
- C;
- regline(YES,TWO);
- }
- inmacro--;
- }
- }
- void
- macro(void)
- {
- if(msflag) {
- do {
- SKIP1;
- } while(C1 != '.' || C1 != '.' || C1 == '.');
- if(c != '\n')
- SKIP;
- return;
- }
- SKIP;
- inmacro = YES;
- }
- void
- sdis(char a1, char a2)
- {
- int c1, c2;
- int eqnf;
- int lct;
- if(a1 == 'P'){
- while(C1 == ' ')
- ;
- if(c == '<') {
- SKIP1;
- return;
- }
- }
- lct = 0;
- eqnf = 1;
- if(c != '\n')
- SKIP1;
- for(;;) {
- while(C1 != '.')
- if(c == '\n')
- continue;
- else
- SKIP1;
- if((c1=C1) == '\n')
- continue;
- if((c2=C1) == '\n') {
- if(a1 == 'f' && (c1 == 'P' || c1 == 'H'))
- return;
- continue;
- }
- if(c1==a1 && c2 == a2) {
- SKIP1;
- if(lct != 0){
- lct--;
- continue;
- }
- if(eqnf)
- Bprint(&bout, " .");
- Bputc(&bout, '\n');
- return;
- } else
- if(a1 == 'L' && c2 == 'L') {
- lct++;
- SKIP1;
- } else
- if(a1 == 'D' && c1 == 'E' && c2 == 'Q') {
- eqn();
- eqnf = 0;
- } else
- if(a1 == 'f') {
- if((mac == MS && c2 == 'P') ||
- (mac == MM && c1 == 'H' && c2 == 'U')){
- SKIP1;
- return;
- }
- SKIP1;
- }
- else
- SKIP1;
- }
- }
- void
- tbl(void)
- {
- while(C != '.')
- ;
- SKIP;
- intable = YES;
- }
- void
- stbl(void)
- {
- while(C != '.')
- ;
- SKIP_TO_COM;
- if(c != 'T' || C != 'E') {
- SKIP;
- pc = c;
- while(C != '.' || pc != '\n' || C != 'T' || C != 'E')
- pc = c;
- }
- }
- void
- eqn(void)
- {
- long c1, c2;
- int dflg;
- char last;
- last = 0;
- dflg = 1;
- SKIP;
- for(;;) {
- if(C1 == '.' || c == '\'') {
- while(C1==' ' || c=='\t')
- ;
- if(c=='E' && C1=='N') {
- SKIP;
- if(msflag && dflg) {
- Bputc(&bout, 'x');
- Bputc(&bout, ' ');
- if(last) {
- Bputc(&bout, last);
- Bputc(&bout, '\n');
- }
- }
- return;
- }
- } else
- if(c == 'd') {
- if(C1=='e' && C1=='l')
- if(C1=='i' && C1=='m') {
- while(C1 == ' ')
- ;
- if((c1=c)=='\n' || (c2=C1)=='\n' ||
- (c1=='o' && c2=='f' && C1=='f')) {
- ldelim = NOCHAR;
- rdelim = NOCHAR;
- } else {
- ldelim = c1;
- rdelim = c2;
- }
- }
- dflg = 0;
- }
- if(c != '\n')
- while(C1 != '\n') {
- if(chars[c] == PUNCT)
- last = c;
- else
- if(c != ' ')
- last = 0;
- }
- }
- }
- /*
- * skip over a complete backslash vconstruction
- */
- void
- backsl(void)
- {
- int bdelim;
- sw:
- switch(C1)
- {
- case '"':
- SKIP1;
- return;
- case 's':
- if(C1 == '\\')
- backsl();
- else {
- while(C1>='0' && c<='9')
- ;
- Bungetrune(infile);
- c = '0';
- }
- lp--;
- return;
- case 'f':
- case 'n':
- case '*':
- if(C1 != '(')
- return;
- case '(':
- if(msflag) {
- if(C == 'e') {
- if(C1 == 'm') {
- *lp = '-';
- return;
- }
- } else
- if(c != '\n')
- C1;
- return;
- }
- if(C1 != '\n')
- C1;
- return;
- case '$':
- C1; /* discard argument number */
- return;
- case 'b':
- case 'x':
- case 'v':
- case 'h':
- case 'w':
- case 'o':
- case 'l':
- case 'L':
- if((bdelim=C1) == '\n')
- return;
- while(C1!='\n' && c!=bdelim)
- if(c == '\\')
- backsl();
- return;
- case '\\':
- if(inmacro)
- goto sw;
- default:
- return;
- }
- }
- char*
- copys(char *s)
- {
- char *t, *t0;
- if((t0 = t = malloc((strlen(s)+1))) == 0)
- fatal("Cannot allocate memory", (char*)0);
- while(*t++ = *s++)
- ;
- return(t0);
- }
- void
- sce(void)
- {
- int n = 1;
- while (C != L'\n' && !(L'0' <= c && c <= L'9'))
- ;
- if (c != L'\n') {
- for (n = c-L'0';'0' <= C && c <= L'9';)
- n = n*10 + c-L'0';
- }
- while(n) {
- if(C == '.') {
- if(C == 'c') {
- if(C == 'e') {
- while(C == ' ')
- ;
- if(c == '0') {
- SKIP;
- break;
- } else
- SKIP;
- } else
- SKIP;
- } else
- if(c == 'P' || C == 'P') {
- if(c != '\n')
- SKIP;
- break;
- } else
- if(c != '\n')
- SKIP;
- } else {
- SKIP;
- n--;
- }
- }
- }
- void
- refer(int c1)
- {
- int c2;
- if(c1 != '\n')
- SKIP;
- c2 = 0;
- for(;;) {
- if(C != '.')
- SKIP;
- else {
- if(C != ']')
- SKIP;
- else {
- while(C != '\n')
- c2 = c;
- if(charclass(c2) == PUNCT)
- Bprint(&bout, " %C",c2);
- return;
- }
- }
- }
- }
- void
- inpic(void)
- {
- int c1;
- Rune *p1;
- /* SKIP1;*/
- while(C1 != '\n')
- if(c == '<'){
- SKIP1;
- return;
- }
- p1 = line;
- c = '\n';
- for(;;) {
- c1 = c;
- if(C1 == '.' && c1 == '\n') {
- if(C1 != 'P' || C1 != 'E') {
- if(c != '\n'){
- SKIP1;
- c = '\n';
- }
- continue;
- }
- SKIP1;
- return;
- } else
- if(c == '\"') {
- while(C1 != '\"') {
- if(c == '\\') {
- if(C1 == '\"')
- continue;
- Bungetrune(infile);
- backsl();
- } else
- *p1++ = c;
- }
- *p1++ = ' ';
- } else
- if(c == '\n' && p1 != line) {
- *p1 = '\0';
- if(wordflag)
- putwords();
- else
- Bprint(&bout, "%S\n\n", line);
- p1 = line;
- }
- }
- }
- int
- charclass(int c)
- {
- if(c < MAX_ASCII)
- return chars[c];
- switch(c){
- case 0x2013: case 0x2014: /* en dash, em dash */
- return SPECIAL;
- }
- return EXTENDED;
- }
|