123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651 |
- %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
- %left SCON '/' NEWE
- %left '|'
- %left '$' '^'
- %left CHAR CCL NCCL '(' '.' STR NULLS
- %left ITER
- %left CAT
- %left '*' '+' '?'
- %{
- # include "ldefs.h"
- #define YYSTYPE union _yystype_
- union _yystype_
- {
- int i;
- uchar *cp;
- };
- %}
- %%
- %{
- int i;
- int j,k;
- int g;
- uchar *p;
- %}
- acc : lexinput
- ={
- # ifdef DEBUG
- if(debug) sect2dump();
- # endif
- }
- ;
- lexinput: defns delim prods end
- | defns delim end
- ={
- if(!funcflag)phead2();
- funcflag = TRUE;
- }
- | error
- ={
- # ifdef DEBUG
- if(debug) {
- sect1dump();
- sect2dump();
- }
- # endif
- }
- ;
- end: delim | ;
- defns: defns STR STR
- ={ strcpy((char*)dp,(char*)$2.cp);
- def[dptr] = dp;
- dp += strlen((char*)$2.cp) + 1;
- strcpy((char*)dp,(char*)$3.cp);
- subs[dptr++] = dp;
- if(dptr >= DEFSIZE)
- error("Too many definitions");
- dp += strlen((char*)$3.cp) + 1;
- if(dp >= dchar+DEFCHAR)
- error("Definitions too long");
- subs[dptr]=def[dptr]=0; /* for lookup - require ending null */
- }
- |
- ;
- delim: DELIM
- ={
- # ifdef DEBUG
- if(sect == DEFSECTION && debug) sect1dump();
- # endif
- sect++;
- }
- ;
- prods: prods pr
- ={ $$.i = mn2(RNEWE,$1.i,$2.i);
- }
- | pr
- ={ $$.i = $1.i;}
- ;
- pr: r NEWE
- ={
- if(divflg == TRUE)
- i = mn1(S1FINAL,casecount);
- else i = mn1(FINAL,casecount);
- $$.i = mn2(RCAT,$1.i,i);
- divflg = FALSE;
- casecount++;
- }
- | error NEWE
- ={
- # ifdef DEBUG
- if(debug) sect2dump();
- # endif
- }
- r: CHAR
- ={ $$.i = mn0($1.i); }
- | STR
- ={
- p = $1.cp;
- i = mn0(*p++);
- while(*p)
- i = mn2(RSTR,i,*p++);
- $$.i = i;
- }
- | '.'
- ={ symbol['\n'] = 0;
- if(psave == FALSE){
- p = ccptr;
- psave = ccptr;
- for(i=1;i<'\n';i++){
- symbol[i] = 1;
- *ccptr++ = i;
- }
- for(i='\n'+1;i<NCH;i++){
- symbol[i] = 1;
- *ccptr++ = i;
- }
- *ccptr++ = 0;
- if(ccptr > ccl+CCLSIZE)
- error("Too many large character classes");
- }
- else
- p = psave;
- $$.i = mn1(RCCL,(int)p);
- cclinter(1);
- }
- | CCL
- ={ $$.i = mn1(RCCL,$1.i); }
- | NCCL
- ={ $$.i = mn1(RNCCL,$1.i); }
- | r '*'
- ={ $$.i = mn1(STAR,$1.i); }
- | r '+'
- ={ $$.i = mn1(PLUS,$1.i); }
- | r '?'
- ={ $$.i = mn1(QUEST,$1.i); }
- | r '|' r
- ={ $$.i = mn2(BAR,$1.i,$3.i); }
- | r r %prec CAT
- ={ $$.i = mn2(RCAT,$1.i,$2.i); }
- | r '/' r
- ={ if(!divflg){
- j = mn1(S2FINAL,-casecount);
- i = mn2(RCAT,$1.i,j);
- $$.i = mn2(DIV,i,$3.i);
- }
- else {
- $$.i = mn2(RCAT,$1.i,$3.i);
- warning("Extra slash removed");
- }
- divflg = TRUE;
- }
- | r ITER ',' ITER '}'
- ={ if($2.i > $4.i){
- i = $2.i;
- $2.i = $4.i;
- $4.i = i;
- }
- if($4.i <= 0)
- warning("Iteration range must be positive");
- else {
- j = $1.i;
- for(k = 2; k<=$2.i;k++)
- j = mn2(RCAT,j,dupl($1.i));
- for(i = $2.i+1; i<=$4.i; i++){
- g = dupl($1.i);
- for(k=2;k<=i;k++)
- g = mn2(RCAT,g,dupl($1.i));
- j = mn2(BAR,j,g);
- }
- $$.i = j;
- }
- }
- | r ITER '}'
- ={
- if($2.i < 0)warning("Can't have negative iteration");
- else if($2.i == 0) $$.i = mn0(RNULLS);
- else {
- j = $1.i;
- for(k=2;k<=$2.i;k++)
- j = mn2(RCAT,j,dupl($1.i));
- $$.i = j;
- }
- }
- | r ITER ',' '}'
- ={
- /* from n to infinity */
- if($2.i < 0)warning("Can't have negative iteration");
- else if($2.i == 0) $$.i = mn1(STAR,$1.i);
- else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
- else { /* >= 2 iterations minimum */
- j = $1.i;
- for(k=2;k<$2.i;k++)
- j = mn2(RCAT,j,dupl($1.i));
- k = mn1(PLUS,dupl($1.i));
- $$.i = mn2(RCAT,j,k);
- }
- }
- | SCON r
- ={ $$.i = mn2(RSCON,$2.i,$1.i); }
- | '^' r
- ={ $$.i = mn1(CARAT,$2.i); }
- | r '$'
- ={ i = mn0('\n');
- if(!divflg){
- j = mn1(S2FINAL,-casecount);
- k = mn2(RCAT,$1.i,j);
- $$.i = mn2(DIV,k,i);
- }
- else $$.i = mn2(RCAT,$1.i,i);
- divflg = TRUE;
- }
- | '(' r ')'
- ={ $$.i = $2.i; }
- | NULLS
- ={ $$.i = mn0(RNULLS); }
- ;
- %%
- int
- yylex(void)
- {
- uchar *p;
- int c, i;
- uchar *t, *xp;
- int n, j, k, x;
- static int sectbegin;
- static uchar token[TOKENSIZE];
- static int iter;
- # ifdef DEBUG
- yylval.i = 0;
- # endif
- if(sect == DEFSECTION) { /* definitions section */
- while(!eof) {
- if(prev == '\n'){ /* next char is at beginning of line */
- getl(p=buf);
- switch(*p){
- case '%':
- switch(*(p+1)){
- case '%':
- lgate();
- Bprint(&fout,"#define YYNEWLINE %d\n",'\n');
- Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n");
- sectbegin = TRUE;
- i = treesize*(sizeof(*name)+sizeof(*left)+
- sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
- p = myalloc(i,1);
- if(p == 0)
- error("Too little core for parse tree");
- free(p);
- name = myalloc(treesize,sizeof(*name));
- left = myalloc(treesize,sizeof(*left));
- right = myalloc(treesize,sizeof(*right));
- nullstr = myalloc(treesize,sizeof(*nullstr));
- parent = myalloc(treesize,sizeof(*parent));
- if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0)
- error("Too little core for parse tree");
- return(freturn(DELIM));
- case 'p': case 'P': /* has overridden number of positions */
- while(*p && !isdigit(*p))p++;
- maxpos = atol((char*)p);
- # ifdef DEBUG
- if (debug) print("positions (%%p) now %d\n",maxpos);
- # endif
- if(report == 2)report = 1;
- continue;
- case 'n': case 'N': /* has overridden number of states */
- while(*p && !isdigit(*p))p++;
- nstates = atol((char*)p);
- # ifdef DEBUG
- if(debug)print( " no. states (%%n) now %d\n",nstates);
- # endif
- if(report == 2)report = 1;
- continue;
- case 'e': case 'E': /* has overridden number of tree nodes */
- while(*p && !isdigit(*p))p++;
- treesize = atol((char*)p);
- # ifdef DEBUG
- if (debug) print("treesize (%%e) now %d\n",treesize);
- # endif
- if(report == 2)report = 1;
- continue;
- case 'o': case 'O':
- while (*p && !isdigit(*p))p++;
- outsize = atol((char*)p);
- if (report ==2) report=1;
- continue;
- case 'a': case 'A': /* has overridden number of transitions */
- while(*p && !isdigit(*p))p++;
- if(report == 2)report = 1;
- ntrans = atol((char*)p);
- # ifdef DEBUG
- if (debug)print("N. trans (%%a) now %d\n",ntrans);
- # endif
- continue;
- case 'k': case 'K': /* overriden packed char classes */
- while (*p && !isdigit(*p))p++;
- if (report==2) report=1;
- free(pchar);
- pchlen = atol((char*)p);
- # ifdef DEBUG
- if (debug) print( "Size classes (%%k) now %d\n",pchlen);
- # endif
- pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
- continue;
- case '{':
- lgate();
- while(getl(p) && strcmp((char*)p,"%}") != 0)
- Bprint(&fout, "%s\n",(char*)p);
- if(p[0] == '%') continue;
- error("Premature eof");
- case 's': case 'S': /* start conditions */
- lgate();
- while(*p && strchr(" \t,", *p) == 0) p++;
- n = TRUE;
- while(n){
- while(*p && strchr(" \t,", *p)) p++;
- t = p;
- while(*p && strchr(" \t,", *p) == 0)p++;
- if(!*p) n = FALSE;
- *p++ = 0;
- if (*t == 0) continue;
- i = sptr*2;
- Bprint(&fout,"#define %s %d\n",(char*)t,i);
- strcpy((char*)sp, (char*)t);
- sname[sptr++] = sp;
- sname[sptr] = 0; /* required by lookup */
- if(sptr >= STARTSIZE)
- error("Too many start conditions");
- sp += strlen((char*)sp) + 1;
- if(sp >= stchar+STARTCHAR)
- error("Start conditions too long");
- }
- continue;
- default:
- warning("Invalid request %s",p);
- continue;
- } /* end of switch after seeing '%' */
- case ' ': case '\t': /* must be code */
- lgate();
- Bprint(&fout, "%s\n",(char*)p);
- continue;
- default: /* definition */
- while(*p && !isspace(*p)) p++;
- if(*p == 0)
- continue;
- prev = *p;
- *p = 0;
- bptr = p+1;
- yylval.cp = buf;
- if(isdigit(buf[0]))
- warning("Substitution strings may not begin with digits");
- return(freturn(STR));
- }
- }
- /* still sect 1, but prev != '\n' */
- else {
- p = bptr;
- while(*p && isspace(*p)) p++;
- if(*p == 0)
- warning("No translation given - null string assumed");
- strcpy((char*)token, (char*)p);
- yylval.cp = token;
- prev = '\n';
- return(freturn(STR));
- }
- }
- /* end of section one processing */
- } else if(sect == RULESECTION){ /* rules and actions */
- while(!eof){
- switch(c=gch()){
- case '\0':
- return(freturn(0));
- case '\n':
- if(prev == '\n') continue;
- x = NEWE;
- break;
- case ' ':
- case '\t':
- if(sectbegin == TRUE){
- cpyact();
- while((c=gch()) && c != '\n');
- continue;
- }
- if(!funcflag)phead2();
- funcflag = TRUE;
- Bprint(&fout,"case %d:\n",casecount);
- if(cpyact())
- Bprint(&fout,"break;\n");
- while((c=gch()) && c != '\n');
- if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
- warning("Executable statements should occur right after %%");
- continue;
- }
- x = NEWE;
- break;
- case '%':
- if(prev != '\n') goto character;
- if(peek == '{'){ /* included code */
- getl(buf);
- while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0)
- Bprint(&fout,"%s\n",(char*)buf);
- continue;
- }
- if(peek == '%'){
- gch();
- gch();
- x = DELIM;
- break;
- }
- goto character;
- case '|':
- if(peek == ' ' || peek == '\t' || peek == '\n'){
- Bprint(&fout,"%d\n",30000+casecount++);
- continue;
- }
- x = '|';
- break;
- case '$':
- if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
- x = c;
- break;
- }
- goto character;
- case '^':
- if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */
- x = c;
- break;
- case '?':
- case '+':
- case '.':
- case '*':
- case '(':
- case ')':
- case ',':
- case '/':
- x = c;
- break;
- case '}':
- iter = FALSE;
- x = c;
- break;
- case '{': /* either iteration or definition */
- if(isdigit(c=gch())){ /* iteration */
- iter = TRUE;
- ieval:
- i = 0;
- while(isdigit(c)){
- token[i++] = c;
- c = gch();
- }
- token[i] = 0;
- yylval.i = atol((char*)token);
- munputc(c);
- x = ITER;
- break;
- } else { /* definition */
- i = 0;
- while(c && c!='}'){
- token[i++] = c;
- c = gch();
- }
- token[i] = 0;
- i = lookup(token,def);
- if(i < 0)
- warning("Definition %s not found",token);
- else
- munputs(subs[i]);
- continue;
- }
- case '<': /* start condition ? */
- if(prev != '\n') /* not at line begin, not start */
- goto character;
- t = slptr;
- do {
- i = 0;
- c = gch();
- while(c != ',' && c && c != '>'){
- token[i++] = c;
- c = gch();
- }
- token[i] = 0;
- if(i == 0)
- goto character;
- i = lookup(token,sname);
- if(i < 0) {
- warning("Undefined start condition %s",token);
- continue;
- }
- *slptr++ = i+1;
- } while(c && c != '>');
- *slptr++ = 0;
- /* check if previous value re-usable */
- for (xp=slist; xp<t; ){
- if (strcmp((char*)xp, (char*)t)==0)
- break;
- while (*xp++);
- }
- if (xp<t){
- /* re-use previous pointer to string */
- slptr=t;
- t=xp;
- }
- if(slptr > slist+STARTSIZE) /* note not packed ! */
- error("Too many start conditions used");
- yylval.cp = t;
- x = SCON;
- break;
- case '"':
- i = 0;
- while((c=gch()) && c != '"' && c != '\n'){
- if(c == '\\') c = usescape(gch());
- token[i++] = c;
- if(i > TOKENSIZE){
- warning("String too long");
- i = TOKENSIZE-1;
- break;
- }
- }
- if(c == '\n') {
- yyline--;
- warning("Non-terminated string");
- yyline++;
- }
- token[i] = 0;
- if(i == 0)x = NULLS;
- else if(i == 1){
- yylval.i = token[0];
- x = CHAR;
- } else {
- yylval.cp = token;
- x = STR;
- }
- break;
- case '[':
- for(i=1;i<NCH;i++) symbol[i] = 0;
- x = CCL;
- if((c = gch()) == '^'){
- x = NCCL;
- c = gch();
- }
- while(c != ']' && c){
- if(c == '\\') c = usescape(gch());
- symbol[c] = 1;
- j = c;
- if((c=gch()) == '-' && peek != ']'){ /* range specified */
- c = gch();
- if(c == '\\') c = usescape(gch());
- k = c;
- if(j > k) {
- n = j;
- j = k;
- k = n;
- }
- if(!(('A' <= j && k <= 'Z') ||
- ('a' <= j && k <= 'z') ||
- ('0' <= j && k <= '9')))
- warning("Non-portable Character Class");
- for(n=j+1;n<=k;n++)
- symbol[n] = 1; /* implementation dependent */
- c = gch();
- }
- }
- /* try to pack ccl's */
- i = 0;
- for(j=0;j<NCH;j++)
- if(symbol[j])token[i++] = j;
- token[i] = 0;
- p = ccl;
- while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++;
- if(p < ccptr) /* found it */
- yylval.cp = p;
- else {
- yylval.cp = ccptr;
- strcpy((char*)ccptr,(char*)token);
- ccptr += strlen((char*)token) + 1;
- if(ccptr >= ccl+CCLSIZE)
- error("Too many large character classes");
- }
- cclinter(x==CCL);
- break;
- case '\\':
- c = usescape(gch());
- default:
- character:
- if(iter){ /* second part of an iteration */
- iter = FALSE;
- if('0' <= c && c <= '9')
- goto ieval;
- }
- if(isalpha(peek)){
- i = 0;
- yylval.cp = token;
- token[i++] = c;
- while(isalpha(peek))
- token[i++] = gch();
- if(peek == '?' || peek == '*' || peek == '+')
- munputc(token[--i]);
- token[i] = 0;
- if(i == 1){
- yylval.i = token[0];
- x = CHAR;
- }
- else x = STR;
- } else {
- yylval.i = c;
- x = CHAR;
- }
- }
- scon = FALSE;
- if(x == SCON)scon = TRUE;
- sectbegin = FALSE;
- return(freturn(x));
- }
- }
- /* section three */
- ptail();
- # ifdef DEBUG
- if(debug)
- Bprint(&fout,"\n/*this comes from section three - debug */\n");
- # endif
- while(getl(buf) && !eof)
- Bprint(&fout,"%s\n",(char*)buf);
- return(freturn(0));
- }
- /* end of yylex */
- # ifdef DEBUG
- int
- freturn(int i)
- {
- if(yydebug) {
- print("now return ");
- if(i < NCH) allprint(i);
- else print("%d",i);
- print(" yylval = ");
- switch(i){
- case STR: case CCL: case NCCL:
- strpt(yylval.cp);
- break;
- case CHAR:
- allprint(yylval.i);
- break;
- default:
- print("%d",yylval.i);
- break;
- }
- print("\n");
- }
- return(i);
- }
- # endif
|