parser.y 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
  2. %left SCON '/' NEWE
  3. %left '|'
  4. %left '$' '^'
  5. %left CHAR CCL NCCL '(' '.' STR NULLS
  6. %left ITER
  7. %left CAT
  8. %left '*' '+' '?'
  9. %{
  10. # include "ldefs.h"
  11. #define YYSTYPE union _yystype_
  12. union _yystype_
  13. {
  14. int i;
  15. uchar *cp;
  16. };
  17. %}
  18. %%
  19. %{
  20. int i;
  21. int j,k;
  22. int g;
  23. uchar *p;
  24. %}
  25. acc : lexinput
  26. ={
  27. # ifdef DEBUG
  28. if(debug) sect2dump();
  29. # endif
  30. }
  31. ;
  32. lexinput: defns delim prods end
  33. | defns delim end
  34. ={
  35. if(!funcflag)phead2();
  36. funcflag = TRUE;
  37. }
  38. | error
  39. ={
  40. # ifdef DEBUG
  41. if(debug) {
  42. sect1dump();
  43. sect2dump();
  44. }
  45. # endif
  46. }
  47. ;
  48. end: delim | ;
  49. defns: defns STR STR
  50. ={ strcpy((char*)dp,(char*)$2.cp);
  51. def[dptr] = dp;
  52. dp += strlen((char*)$2.cp) + 1;
  53. strcpy((char*)dp,(char*)$3.cp);
  54. subs[dptr++] = dp;
  55. if(dptr >= DEFSIZE)
  56. error("Too many definitions");
  57. dp += strlen((char*)$3.cp) + 1;
  58. if(dp >= dchar+DEFCHAR)
  59. error("Definitions too long");
  60. subs[dptr]=def[dptr]=0; /* for lookup - require ending null */
  61. }
  62. |
  63. ;
  64. delim: DELIM
  65. ={
  66. # ifdef DEBUG
  67. if(sect == DEFSECTION && debug) sect1dump();
  68. # endif
  69. sect++;
  70. }
  71. ;
  72. prods: prods pr
  73. ={ $$.i = mn2(RNEWE,$1.i,$2.i);
  74. }
  75. | pr
  76. ={ $$.i = $1.i;}
  77. ;
  78. pr: r NEWE
  79. ={
  80. if(divflg == TRUE)
  81. i = mn1(S1FINAL,casecount);
  82. else i = mn1(FINAL,casecount);
  83. $$.i = mn2(RCAT,$1.i,i);
  84. divflg = FALSE;
  85. casecount++;
  86. }
  87. | error NEWE
  88. ={
  89. # ifdef DEBUG
  90. if(debug) sect2dump();
  91. # endif
  92. }
  93. r: CHAR
  94. ={ $$.i = mn0($1.i); }
  95. | STR
  96. ={
  97. p = $1.cp;
  98. i = mn0(*p++);
  99. while(*p)
  100. i = mn2(RSTR,i,*p++);
  101. $$.i = i;
  102. }
  103. | '.'
  104. ={ symbol['\n'] = 0;
  105. if(psave == FALSE){
  106. p = ccptr;
  107. psave = ccptr;
  108. for(i=1;i<'\n';i++){
  109. symbol[i] = 1;
  110. *ccptr++ = i;
  111. }
  112. for(i='\n'+1;i<NCH;i++){
  113. symbol[i] = 1;
  114. *ccptr++ = i;
  115. }
  116. *ccptr++ = 0;
  117. if(ccptr > ccl+CCLSIZE)
  118. error("Too many large character classes");
  119. }
  120. else
  121. p = psave;
  122. $$.i = mnp(RCCL, p);
  123. cclinter(1);
  124. }
  125. | CCL
  126. ={ $$.i = mnp(RCCL,$1.cp); }
  127. | NCCL
  128. ={ $$.i = mnp(RNCCL,$1.cp); }
  129. | r '*'
  130. ={ $$.i = mn1(STAR,$1.i); }
  131. | r '+'
  132. ={ $$.i = mn1(PLUS,$1.i); }
  133. | r '?'
  134. ={ $$.i = mn1(QUEST,$1.i); }
  135. | r '|' r
  136. ={ $$.i = mn2(BAR,$1.i,$3.i); }
  137. | r r %prec CAT
  138. ={ $$.i = mn2(RCAT,$1.i,$2.i); }
  139. | r '/' r
  140. ={ if(!divflg){
  141. j = mn1(S2FINAL,-casecount);
  142. i = mn2(RCAT,$1.i,j);
  143. $$.i = mn2(DIV,i,$3.i);
  144. }
  145. else {
  146. $$.i = mn2(RCAT,$1.i,$3.i);
  147. warning("Extra slash removed");
  148. }
  149. divflg = TRUE;
  150. }
  151. | r ITER ',' ITER '}'
  152. ={ if($2.i > $4.i){
  153. i = $2.i;
  154. $2.i = $4.i;
  155. $4.i = i;
  156. }
  157. if($4.i <= 0)
  158. warning("Iteration range must be positive");
  159. else {
  160. j = $1.i;
  161. for(k = 2; k<=$2.i;k++)
  162. j = mn2(RCAT,j,dupl($1.i));
  163. for(i = $2.i+1; i<=$4.i; i++){
  164. g = dupl($1.i);
  165. for(k=2;k<=i;k++)
  166. g = mn2(RCAT,g,dupl($1.i));
  167. j = mn2(BAR,j,g);
  168. }
  169. $$.i = j;
  170. }
  171. }
  172. | r ITER '}'
  173. ={
  174. if($2.i < 0)warning("Can't have negative iteration");
  175. else if($2.i == 0) $$.i = mn0(RNULLS);
  176. else {
  177. j = $1.i;
  178. for(k=2;k<=$2.i;k++)
  179. j = mn2(RCAT,j,dupl($1.i));
  180. $$.i = j;
  181. }
  182. }
  183. | r ITER ',' '}'
  184. ={
  185. /* from n to infinity */
  186. if($2.i < 0)warning("Can't have negative iteration");
  187. else if($2.i == 0) $$.i = mn1(STAR,$1.i);
  188. else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
  189. else { /* >= 2 iterations minimum */
  190. j = $1.i;
  191. for(k=2;k<$2.i;k++)
  192. j = mn2(RCAT,j,dupl($1.i));
  193. k = mn1(PLUS,dupl($1.i));
  194. $$.i = mn2(RCAT,j,k);
  195. }
  196. }
  197. | SCON r
  198. ={ $$.i = mn2(RSCON,$2.i,$1.i); }
  199. | '^' r
  200. ={ $$.i = mn1(CARAT,$2.i); }
  201. | r '$'
  202. ={ i = mn0('\n');
  203. if(!divflg){
  204. j = mn1(S2FINAL,-casecount);
  205. k = mn2(RCAT,$1.i,j);
  206. $$.i = mn2(DIV,k,i);
  207. }
  208. else $$.i = mn2(RCAT,$1.i,i);
  209. divflg = TRUE;
  210. }
  211. | '(' r ')'
  212. ={ $$.i = $2.i; }
  213. | NULLS
  214. ={ $$.i = mn0(RNULLS); }
  215. ;
  216. %%
  217. int
  218. yylex(void)
  219. {
  220. uchar *p;
  221. int c, i;
  222. uchar *t, *xp;
  223. int n, j, k, x;
  224. static int sectbegin;
  225. static uchar token[TOKENSIZE];
  226. static int iter;
  227. # ifdef DEBUG
  228. yylval.i = 0;
  229. yylval.p = 0;
  230. # endif
  231. if(sect == DEFSECTION) { /* definitions section */
  232. while(!eof) {
  233. if(prev == '\n'){ /* next char is at beginning of line */
  234. getl(p=buf);
  235. switch(*p){
  236. case '%':
  237. switch(*(p+1)){
  238. case '%':
  239. lgate();
  240. Bprint(&fout,"#define YYNEWLINE %d\n",'\n');
  241. Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n");
  242. sectbegin = TRUE;
  243. i = treesize*(sizeof(*name)+sizeof(*left)+
  244. sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
  245. p = myalloc(i,1);
  246. if(p == 0)
  247. error("Too little core for parse tree");
  248. free(p);
  249. name = myalloc(treesize,sizeof(*name));
  250. left = myalloc(treesize,sizeof(*left));
  251. right = myalloc(treesize,sizeof(*right));
  252. nullstr = myalloc(treesize,sizeof(*nullstr));
  253. parent = myalloc(treesize,sizeof(*parent));
  254. ptr = myalloc(treesize,sizeof(*ptr));
  255. if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0 || ptr == 0)
  256. error("Too little core for parse tree");
  257. return(freturn(DELIM));
  258. case 'p': case 'P': /* has overridden number of positions */
  259. while(*p && !isdigit(*p))p++;
  260. maxpos = atol((char*)p);
  261. # ifdef DEBUG
  262. if (debug) print("positions (%%p) now %d\n",maxpos);
  263. # endif
  264. if(report == 2)report = 1;
  265. continue;
  266. case 'n': case 'N': /* has overridden number of states */
  267. while(*p && !isdigit(*p))p++;
  268. nstates = atol((char*)p);
  269. # ifdef DEBUG
  270. if(debug)print( " no. states (%%n) now %d\n",nstates);
  271. # endif
  272. if(report == 2)report = 1;
  273. continue;
  274. case 'e': case 'E': /* has overridden number of tree nodes */
  275. while(*p && !isdigit(*p))p++;
  276. treesize = atol((char*)p);
  277. # ifdef DEBUG
  278. if (debug) print("treesize (%%e) now %d\n",treesize);
  279. # endif
  280. if(report == 2)report = 1;
  281. continue;
  282. case 'o': case 'O':
  283. while (*p && !isdigit(*p))p++;
  284. outsize = atol((char*)p);
  285. if (report ==2) report=1;
  286. continue;
  287. case 'a': case 'A': /* has overridden number of transitions */
  288. while(*p && !isdigit(*p))p++;
  289. if(report == 2)report = 1;
  290. ntrans = atol((char*)p);
  291. # ifdef DEBUG
  292. if (debug)print("N. trans (%%a) now %d\n",ntrans);
  293. # endif
  294. continue;
  295. case 'k': case 'K': /* overriden packed char classes */
  296. while (*p && !isdigit(*p))p++;
  297. if (report==2) report=1;
  298. free(pchar);
  299. pchlen = atol((char*)p);
  300. # ifdef DEBUG
  301. if (debug) print( "Size classes (%%k) now %d\n",pchlen);
  302. # endif
  303. pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
  304. continue;
  305. case '{':
  306. lgate();
  307. while(getl(p) && strcmp((char*)p,"%}") != 0)
  308. Bprint(&fout, "%s\n",(char*)p);
  309. if(p[0] == '%') continue;
  310. error("Premature eof");
  311. case 's': case 'S': /* start conditions */
  312. lgate();
  313. while(*p && strchr(" \t,", *p) == 0) p++;
  314. n = TRUE;
  315. while(n){
  316. while(*p && strchr(" \t,", *p)) p++;
  317. t = p;
  318. while(*p && strchr(" \t,", *p) == 0)p++;
  319. if(!*p) n = FALSE;
  320. *p++ = 0;
  321. if (*t == 0) continue;
  322. i = sptr*2;
  323. Bprint(&fout,"#define %s %d\n",(char*)t,i);
  324. strcpy((char*)sp, (char*)t);
  325. sname[sptr++] = sp;
  326. sname[sptr] = 0; /* required by lookup */
  327. if(sptr >= STARTSIZE)
  328. error("Too many start conditions");
  329. sp += strlen((char*)sp) + 1;
  330. if(sp >= stchar+STARTCHAR)
  331. error("Start conditions too long");
  332. }
  333. continue;
  334. default:
  335. warning("Invalid request %s",p);
  336. continue;
  337. } /* end of switch after seeing '%' */
  338. case ' ': case '\t': /* must be code */
  339. lgate();
  340. Bprint(&fout, "%s\n",(char*)p);
  341. continue;
  342. default: /* definition */
  343. while(*p && !isspace(*p)) p++;
  344. if(*p == 0)
  345. continue;
  346. prev = *p;
  347. *p = 0;
  348. bptr = p+1;
  349. yylval.cp = buf;
  350. if(isdigit(buf[0]))
  351. warning("Substitution strings may not begin with digits");
  352. return(freturn(STR));
  353. }
  354. }
  355. /* still sect 1, but prev != '\n' */
  356. else {
  357. p = bptr;
  358. while(*p && isspace(*p)) p++;
  359. if(*p == 0)
  360. warning("No translation given - null string assumed");
  361. strcpy((char*)token, (char*)p);
  362. yylval.cp = token;
  363. prev = '\n';
  364. return(freturn(STR));
  365. }
  366. }
  367. /* end of section one processing */
  368. } else if(sect == RULESECTION){ /* rules and actions */
  369. while(!eof){
  370. switch(c=gch()){
  371. case '\0':
  372. return(freturn(0));
  373. case '\n':
  374. if(prev == '\n') continue;
  375. x = NEWE;
  376. break;
  377. case ' ':
  378. case '\t':
  379. if(sectbegin == TRUE){
  380. cpyact();
  381. while((c=gch()) && c != '\n');
  382. continue;
  383. }
  384. if(!funcflag)phead2();
  385. funcflag = TRUE;
  386. Bprint(&fout,"case %d:\n",casecount);
  387. if(cpyact())
  388. Bprint(&fout,"break;\n");
  389. while((c=gch()) && c != '\n');
  390. if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
  391. warning("Executable statements should occur right after %%");
  392. continue;
  393. }
  394. x = NEWE;
  395. break;
  396. case '%':
  397. if(prev != '\n') goto character;
  398. if(peek == '{'){ /* included code */
  399. getl(buf);
  400. while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0)
  401. Bprint(&fout,"%s\n",(char*)buf);
  402. continue;
  403. }
  404. if(peek == '%'){
  405. gch();
  406. gch();
  407. x = DELIM;
  408. break;
  409. }
  410. goto character;
  411. case '|':
  412. if(peek == ' ' || peek == '\t' || peek == '\n'){
  413. Bprint(&fout,"%d\n",30000+casecount++);
  414. continue;
  415. }
  416. x = '|';
  417. break;
  418. case '$':
  419. if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
  420. x = c;
  421. break;
  422. }
  423. goto character;
  424. case '^':
  425. if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */
  426. x = c;
  427. break;
  428. case '?':
  429. case '+':
  430. case '.':
  431. case '*':
  432. case '(':
  433. case ')':
  434. case ',':
  435. case '/':
  436. x = c;
  437. break;
  438. case '}':
  439. iter = FALSE;
  440. x = c;
  441. break;
  442. case '{': /* either iteration or definition */
  443. if(isdigit(c=gch())){ /* iteration */
  444. iter = TRUE;
  445. ieval:
  446. i = 0;
  447. while(isdigit(c)){
  448. token[i++] = c;
  449. c = gch();
  450. }
  451. token[i] = 0;
  452. yylval.i = atol((char*)token);
  453. munputc(c);
  454. x = ITER;
  455. break;
  456. } else { /* definition */
  457. i = 0;
  458. while(c && c!='}'){
  459. token[i++] = c;
  460. c = gch();
  461. }
  462. token[i] = 0;
  463. i = lookup(token,def);
  464. if(i < 0)
  465. warning("Definition %s not found",token);
  466. else
  467. munputs(subs[i]);
  468. continue;
  469. }
  470. case '<': /* start condition ? */
  471. if(prev != '\n') /* not at line begin, not start */
  472. goto character;
  473. t = slptr;
  474. do {
  475. i = 0;
  476. c = gch();
  477. while(c != ',' && c && c != '>'){
  478. token[i++] = c;
  479. c = gch();
  480. }
  481. token[i] = 0;
  482. if(i == 0)
  483. goto character;
  484. i = lookup(token,sname);
  485. if(i < 0) {
  486. warning("Undefined start condition %s",token);
  487. continue;
  488. }
  489. *slptr++ = i+1;
  490. } while(c && c != '>');
  491. *slptr++ = 0;
  492. /* check if previous value re-usable */
  493. for (xp=slist; xp<t; ){
  494. if (strcmp((char*)xp, (char*)t)==0)
  495. break;
  496. while (*xp++);
  497. }
  498. if (xp<t){
  499. /* re-use previous pointer to string */
  500. slptr=t;
  501. t=xp;
  502. }
  503. if(slptr > slist+STARTSIZE) /* note not packed ! */
  504. error("Too many start conditions used");
  505. yylval.cp = t;
  506. x = SCON;
  507. break;
  508. case '"':
  509. i = 0;
  510. while((c=gch()) && c != '"' && c != '\n'){
  511. if(c == '\\') c = usescape(gch());
  512. token[i++] = c;
  513. if(i > TOKENSIZE){
  514. warning("String too long");
  515. i = TOKENSIZE-1;
  516. break;
  517. }
  518. }
  519. if(c == '\n') {
  520. yyline--;
  521. warning("Non-terminated string");
  522. yyline++;
  523. }
  524. token[i] = 0;
  525. if(i == 0)x = NULLS;
  526. else if(i == 1){
  527. yylval.i = token[0];
  528. x = CHAR;
  529. } else {
  530. yylval.cp = token;
  531. x = STR;
  532. }
  533. break;
  534. case '[':
  535. for(i=1;i<NCH;i++) symbol[i] = 0;
  536. x = CCL;
  537. if((c = gch()) == '^'){
  538. x = NCCL;
  539. c = gch();
  540. }
  541. while(c != ']' && c){
  542. if(c == '\\') c = usescape(gch());
  543. symbol[c] = 1;
  544. j = c;
  545. if((c=gch()) == '-' && peek != ']'){ /* range specified */
  546. c = gch();
  547. if(c == '\\') c = usescape(gch());
  548. k = c;
  549. if(j > k) {
  550. n = j;
  551. j = k;
  552. k = n;
  553. }
  554. if(!(('A' <= j && k <= 'Z') ||
  555. ('a' <= j && k <= 'z') ||
  556. ('0' <= j && k <= '9')))
  557. warning("Non-portable Character Class");
  558. for(n=j+1;n<=k;n++)
  559. symbol[n] = 1; /* implementation dependent */
  560. c = gch();
  561. }
  562. }
  563. /* try to pack ccl's */
  564. i = 0;
  565. for(j=0;j<NCH;j++)
  566. if(symbol[j])token[i++] = j;
  567. token[i] = 0;
  568. p = ccl;
  569. while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++;
  570. if(p < ccptr) /* found it */
  571. yylval.cp = p;
  572. else {
  573. yylval.cp = ccptr;
  574. strcpy((char*)ccptr,(char*)token);
  575. ccptr += strlen((char*)token) + 1;
  576. if(ccptr >= ccl+CCLSIZE)
  577. error("Too many large character classes");
  578. }
  579. cclinter(x==CCL);
  580. break;
  581. case '\\':
  582. c = usescape(gch());
  583. default:
  584. character:
  585. if(iter){ /* second part of an iteration */
  586. iter = FALSE;
  587. if('0' <= c && c <= '9')
  588. goto ieval;
  589. }
  590. if(isalpha(peek)){
  591. i = 0;
  592. yylval.cp = token;
  593. token[i++] = c;
  594. while(isalpha(peek))
  595. token[i++] = gch();
  596. if(peek == '?' || peek == '*' || peek == '+')
  597. munputc(token[--i]);
  598. token[i] = 0;
  599. if(i == 1){
  600. yylval.i = token[0];
  601. x = CHAR;
  602. }
  603. else x = STR;
  604. } else {
  605. yylval.i = c;
  606. x = CHAR;
  607. }
  608. }
  609. scon = FALSE;
  610. if(x == SCON)scon = TRUE;
  611. sectbegin = FALSE;
  612. return(freturn(x));
  613. }
  614. }
  615. /* section three */
  616. ptail();
  617. # ifdef DEBUG
  618. if(debug)
  619. Bprint(&fout,"\n/*this comes from section three - debug */\n");
  620. # endif
  621. while(getl(buf) && !eof)
  622. Bprint(&fout,"%s\n",(char*)buf);
  623. return(freturn(0));
  624. }
  625. /* end of yylex */
  626. # ifdef DEBUG
  627. int
  628. freturn(int i)
  629. {
  630. if(yydebug) {
  631. print("now return ");
  632. if(i < NCH) allprint(i);
  633. else print("%d",i);
  634. print(" yylval = ");
  635. switch(i){
  636. case STR: case CCL: case NCCL:
  637. strpt(yylval.cp);
  638. break;
  639. case CHAR:
  640. allprint(yylval.i);
  641. break;
  642. default:
  643. print("%d",yylval.i);
  644. break;
  645. }
  646. print("\n");
  647. }
  648. return(i);
  649. }
  650. # endif