parser.y 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
  2. %left SCON '/' NEWE
  3. %left '|'
  4. %left '$' '^'
  5. %left CHAR CCL NCCL '(' '.' STR NULLS
  6. %left ITER
  7. %left CAT
  8. %left '*' '+' '?'
  9. %{
  10. # include "ldefs.h"
  11. #define YYSTYPE union _yystype_
  12. union _yystype_
  13. {
  14. int i;
  15. uchar *cp;
  16. };
  17. %}
  18. %%
  19. %{
  20. int i;
  21. int j,k;
  22. int g;
  23. uchar *p;
  24. %}
  25. acc : lexinput
  26. ={
  27. # ifdef DEBUG
  28. if(debug) sect2dump();
  29. # endif
  30. }
  31. ;
  32. lexinput: defns delim prods end
  33. | defns delim end
  34. ={
  35. if(!funcflag)phead2();
  36. funcflag = TRUE;
  37. }
  38. | error
  39. ={
  40. # ifdef DEBUG
  41. if(debug) {
  42. sect1dump();
  43. sect2dump();
  44. }
  45. # endif
  46. }
  47. ;
  48. end: delim | ;
  49. defns: defns STR STR
  50. ={ strcpy((char*)dp,(char*)$2.cp);
  51. def[dptr] = dp;
  52. dp += strlen((char*)$2.cp) + 1;
  53. strcpy((char*)dp,(char*)$3.cp);
  54. subs[dptr++] = dp;
  55. if(dptr >= DEFSIZE)
  56. error("Too many definitions");
  57. dp += strlen((char*)$3.cp) + 1;
  58. if(dp >= dchar+DEFCHAR)
  59. error("Definitions too long");
  60. subs[dptr]=def[dptr]=0; /* for lookup - require ending null */
  61. }
  62. |
  63. ;
  64. delim: DELIM
  65. ={
  66. # ifdef DEBUG
  67. if(sect == DEFSECTION && debug) sect1dump();
  68. # endif
  69. sect++;
  70. }
  71. ;
  72. prods: prods pr
  73. ={ $$.i = mn2(RNEWE,$1.i,$2.i);
  74. }
  75. | pr
  76. ={ $$.i = $1.i;}
  77. ;
  78. pr: r NEWE
  79. ={
  80. if(divflg == TRUE)
  81. i = mn1(S1FINAL,casecount);
  82. else i = mn1(FINAL,casecount);
  83. $$.i = mn2(RCAT,$1.i,i);
  84. divflg = FALSE;
  85. casecount++;
  86. }
  87. | error NEWE
  88. ={
  89. # ifdef DEBUG
  90. if(debug) sect2dump();
  91. # endif
  92. }
  93. r: CHAR
  94. ={ $$.i = mn0($1.i); }
  95. | STR
  96. ={
  97. p = $1.cp;
  98. i = mn0(*p++);
  99. while(*p)
  100. i = mn2(RSTR,i,*p++);
  101. $$.i = i;
  102. }
  103. | '.'
  104. ={ symbol['\n'] = 0;
  105. if(psave == FALSE){
  106. p = ccptr;
  107. psave = ccptr;
  108. for(i=1;i<'\n';i++){
  109. symbol[i] = 1;
  110. *ccptr++ = i;
  111. }
  112. for(i='\n'+1;i<NCH;i++){
  113. symbol[i] = 1;
  114. *ccptr++ = i;
  115. }
  116. *ccptr++ = 0;
  117. if(ccptr > ccl+CCLSIZE)
  118. error("Too many large character classes");
  119. }
  120. else
  121. p = psave;
  122. $$.i = mn1(RCCL,(int)p);
  123. cclinter(1);
  124. }
  125. | CCL
  126. ={ $$.i = mn1(RCCL,$1.i); }
  127. | NCCL
  128. ={ $$.i = mn1(RNCCL,$1.i); }
  129. | r '*'
  130. ={ $$.i = mn1(STAR,$1.i); }
  131. | r '+'
  132. ={ $$.i = mn1(PLUS,$1.i); }
  133. | r '?'
  134. ={ $$.i = mn1(QUEST,$1.i); }
  135. | r '|' r
  136. ={ $$.i = mn2(BAR,$1.i,$3.i); }
  137. | r r %prec CAT
  138. ={ $$.i = mn2(RCAT,$1.i,$2.i); }
  139. | r '/' r
  140. ={ if(!divflg){
  141. j = mn1(S2FINAL,-casecount);
  142. i = mn2(RCAT,$1.i,j);
  143. $$.i = mn2(DIV,i,$3.i);
  144. }
  145. else {
  146. $$.i = mn2(RCAT,$1.i,$3.i);
  147. warning("Extra slash removed");
  148. }
  149. divflg = TRUE;
  150. }
  151. | r ITER ',' ITER '}'
  152. ={ if($2.i > $4.i){
  153. i = $2.i;
  154. $2.i = $4.i;
  155. $4.i = i;
  156. }
  157. if($4.i <= 0)
  158. warning("Iteration range must be positive");
  159. else {
  160. j = $1.i;
  161. for(k = 2; k<=$2.i;k++)
  162. j = mn2(RCAT,j,dupl($1.i));
  163. for(i = $2.i+1; i<=$4.i; i++){
  164. g = dupl($1.i);
  165. for(k=2;k<=i;k++)
  166. g = mn2(RCAT,g,dupl($1.i));
  167. j = mn2(BAR,j,g);
  168. }
  169. $$.i = j;
  170. }
  171. }
  172. | r ITER '}'
  173. ={
  174. if($2.i < 0)warning("Can't have negative iteration");
  175. else if($2.i == 0) $$.i = mn0(RNULLS);
  176. else {
  177. j = $1.i;
  178. for(k=2;k<=$2.i;k++)
  179. j = mn2(RCAT,j,dupl($1.i));
  180. $$.i = j;
  181. }
  182. }
  183. | r ITER ',' '}'
  184. ={
  185. /* from n to infinity */
  186. if($2.i < 0)warning("Can't have negative iteration");
  187. else if($2.i == 0) $$.i = mn1(STAR,$1.i);
  188. else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
  189. else { /* >= 2 iterations minimum */
  190. j = $1.i;
  191. for(k=2;k<$2.i;k++)
  192. j = mn2(RCAT,j,dupl($1.i));
  193. k = mn1(PLUS,dupl($1.i));
  194. $$.i = mn2(RCAT,j,k);
  195. }
  196. }
  197. | SCON r
  198. ={ $$.i = mn2(RSCON,$2.i,$1.i); }
  199. | '^' r
  200. ={ $$.i = mn1(CARAT,$2.i); }
  201. | r '$'
  202. ={ i = mn0('\n');
  203. if(!divflg){
  204. j = mn1(S2FINAL,-casecount);
  205. k = mn2(RCAT,$1.i,j);
  206. $$.i = mn2(DIV,k,i);
  207. }
  208. else $$.i = mn2(RCAT,$1.i,i);
  209. divflg = TRUE;
  210. }
  211. | '(' r ')'
  212. ={ $$.i = $2.i; }
  213. | NULLS
  214. ={ $$.i = mn0(RNULLS); }
  215. ;
  216. %%
  217. int
  218. yylex(void)
  219. {
  220. uchar *p;
  221. int c, i;
  222. uchar *t, *xp;
  223. int n, j, k, x;
  224. static int sectbegin;
  225. static uchar token[TOKENSIZE];
  226. static int iter;
  227. # ifdef DEBUG
  228. yylval.i = 0;
  229. # endif
  230. if(sect == DEFSECTION) { /* definitions section */
  231. while(!eof) {
  232. if(prev == '\n'){ /* next char is at beginning of line */
  233. getl(p=buf);
  234. switch(*p){
  235. case '%':
  236. switch(*(p+1)){
  237. case '%':
  238. lgate();
  239. Bprint(&fout,"#define YYNEWLINE %d\n",'\n');
  240. Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n");
  241. sectbegin = TRUE;
  242. i = treesize*(sizeof(*name)+sizeof(*left)+
  243. sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
  244. p = myalloc(i,1);
  245. if(p == 0)
  246. error("Too little core for parse tree");
  247. free(p);
  248. name = myalloc(treesize,sizeof(*name));
  249. left = myalloc(treesize,sizeof(*left));
  250. right = myalloc(treesize,sizeof(*right));
  251. nullstr = myalloc(treesize,sizeof(*nullstr));
  252. parent = myalloc(treesize,sizeof(*parent));
  253. if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0)
  254. error("Too little core for parse tree");
  255. return(freturn(DELIM));
  256. case 'p': case 'P': /* has overridden number of positions */
  257. while(*p && !isdigit(*p))p++;
  258. maxpos = atol((char*)p);
  259. # ifdef DEBUG
  260. if (debug) print("positions (%%p) now %d\n",maxpos);
  261. # endif
  262. if(report == 2)report = 1;
  263. continue;
  264. case 'n': case 'N': /* has overridden number of states */
  265. while(*p && !isdigit(*p))p++;
  266. nstates = atol((char*)p);
  267. # ifdef DEBUG
  268. if(debug)print( " no. states (%%n) now %d\n",nstates);
  269. # endif
  270. if(report == 2)report = 1;
  271. continue;
  272. case 'e': case 'E': /* has overridden number of tree nodes */
  273. while(*p && !isdigit(*p))p++;
  274. treesize = atol((char*)p);
  275. # ifdef DEBUG
  276. if (debug) print("treesize (%%e) now %d\n",treesize);
  277. # endif
  278. if(report == 2)report = 1;
  279. continue;
  280. case 'o': case 'O':
  281. while (*p && !isdigit(*p))p++;
  282. outsize = atol((char*)p);
  283. if (report ==2) report=1;
  284. continue;
  285. case 'a': case 'A': /* has overridden number of transitions */
  286. while(*p && !isdigit(*p))p++;
  287. if(report == 2)report = 1;
  288. ntrans = atol((char*)p);
  289. # ifdef DEBUG
  290. if (debug)print("N. trans (%%a) now %d\n",ntrans);
  291. # endif
  292. continue;
  293. case 'k': case 'K': /* overriden packed char classes */
  294. while (*p && !isdigit(*p))p++;
  295. if (report==2) report=1;
  296. free(pchar);
  297. pchlen = atol((char*)p);
  298. # ifdef DEBUG
  299. if (debug) print( "Size classes (%%k) now %d\n",pchlen);
  300. # endif
  301. pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
  302. continue;
  303. case '{':
  304. lgate();
  305. while(getl(p) && strcmp((char*)p,"%}") != 0)
  306. Bprint(&fout, "%s\n",(char*)p);
  307. if(p[0] == '%') continue;
  308. error("Premature eof");
  309. case 's': case 'S': /* start conditions */
  310. lgate();
  311. while(*p && strchr(" \t,", *p) == 0) p++;
  312. n = TRUE;
  313. while(n){
  314. while(*p && strchr(" \t,", *p)) p++;
  315. t = p;
  316. while(*p && strchr(" \t,", *p) == 0)p++;
  317. if(!*p) n = FALSE;
  318. *p++ = 0;
  319. if (*t == 0) continue;
  320. i = sptr*2;
  321. Bprint(&fout,"#define %s %d\n",(char*)t,i);
  322. strcpy((char*)sp, (char*)t);
  323. sname[sptr++] = sp;
  324. sname[sptr] = 0; /* required by lookup */
  325. if(sptr >= STARTSIZE)
  326. error("Too many start conditions");
  327. sp += strlen((char*)sp) + 1;
  328. if(sp >= stchar+STARTCHAR)
  329. error("Start conditions too long");
  330. }
  331. continue;
  332. default:
  333. warning("Invalid request %s",p);
  334. continue;
  335. } /* end of switch after seeing '%' */
  336. case ' ': case '\t': /* must be code */
  337. lgate();
  338. Bprint(&fout, "%s\n",(char*)p);
  339. continue;
  340. default: /* definition */
  341. while(*p && !isspace(*p)) p++;
  342. if(*p == 0)
  343. continue;
  344. prev = *p;
  345. *p = 0;
  346. bptr = p+1;
  347. yylval.cp = buf;
  348. if(isdigit(buf[0]))
  349. warning("Substitution strings may not begin with digits");
  350. return(freturn(STR));
  351. }
  352. }
  353. /* still sect 1, but prev != '\n' */
  354. else {
  355. p = bptr;
  356. while(*p && isspace(*p)) p++;
  357. if(*p == 0)
  358. warning("No translation given - null string assumed");
  359. strcpy((char*)token, (char*)p);
  360. yylval.cp = token;
  361. prev = '\n';
  362. return(freturn(STR));
  363. }
  364. }
  365. /* end of section one processing */
  366. } else if(sect == RULESECTION){ /* rules and actions */
  367. while(!eof){
  368. switch(c=gch()){
  369. case '\0':
  370. return(freturn(0));
  371. case '\n':
  372. if(prev == '\n') continue;
  373. x = NEWE;
  374. break;
  375. case ' ':
  376. case '\t':
  377. if(sectbegin == TRUE){
  378. cpyact();
  379. while((c=gch()) && c != '\n');
  380. continue;
  381. }
  382. if(!funcflag)phead2();
  383. funcflag = TRUE;
  384. Bprint(&fout,"case %d:\n",casecount);
  385. if(cpyact())
  386. Bprint(&fout,"break;\n");
  387. while((c=gch()) && c != '\n');
  388. if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
  389. warning("Executable statements should occur right after %%");
  390. continue;
  391. }
  392. x = NEWE;
  393. break;
  394. case '%':
  395. if(prev != '\n') goto character;
  396. if(peek == '{'){ /* included code */
  397. getl(buf);
  398. while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0)
  399. Bprint(&fout,"%s\n",(char*)buf);
  400. continue;
  401. }
  402. if(peek == '%'){
  403. gch();
  404. gch();
  405. x = DELIM;
  406. break;
  407. }
  408. goto character;
  409. case '|':
  410. if(peek == ' ' || peek == '\t' || peek == '\n'){
  411. Bprint(&fout,"%d\n",30000+casecount++);
  412. continue;
  413. }
  414. x = '|';
  415. break;
  416. case '$':
  417. if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
  418. x = c;
  419. break;
  420. }
  421. goto character;
  422. case '^':
  423. if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */
  424. x = c;
  425. break;
  426. case '?':
  427. case '+':
  428. case '.':
  429. case '*':
  430. case '(':
  431. case ')':
  432. case ',':
  433. case '/':
  434. x = c;
  435. break;
  436. case '}':
  437. iter = FALSE;
  438. x = c;
  439. break;
  440. case '{': /* either iteration or definition */
  441. if(isdigit(c=gch())){ /* iteration */
  442. iter = TRUE;
  443. ieval:
  444. i = 0;
  445. while(isdigit(c)){
  446. token[i++] = c;
  447. c = gch();
  448. }
  449. token[i] = 0;
  450. yylval.i = atol((char*)token);
  451. munputc(c);
  452. x = ITER;
  453. break;
  454. } else { /* definition */
  455. i = 0;
  456. while(c && c!='}'){
  457. token[i++] = c;
  458. c = gch();
  459. }
  460. token[i] = 0;
  461. i = lookup(token,def);
  462. if(i < 0)
  463. warning("Definition %s not found",token);
  464. else
  465. munputs(subs[i]);
  466. continue;
  467. }
  468. case '<': /* start condition ? */
  469. if(prev != '\n') /* not at line begin, not start */
  470. goto character;
  471. t = slptr;
  472. do {
  473. i = 0;
  474. c = gch();
  475. while(c != ',' && c && c != '>'){
  476. token[i++] = c;
  477. c = gch();
  478. }
  479. token[i] = 0;
  480. if(i == 0)
  481. goto character;
  482. i = lookup(token,sname);
  483. if(i < 0) {
  484. warning("Undefined start condition %s",token);
  485. continue;
  486. }
  487. *slptr++ = i+1;
  488. } while(c && c != '>');
  489. *slptr++ = 0;
  490. /* check if previous value re-usable */
  491. for (xp=slist; xp<t; ){
  492. if (strcmp((char*)xp, (char*)t)==0)
  493. break;
  494. while (*xp++);
  495. }
  496. if (xp<t){
  497. /* re-use previous pointer to string */
  498. slptr=t;
  499. t=xp;
  500. }
  501. if(slptr > slist+STARTSIZE) /* note not packed ! */
  502. error("Too many start conditions used");
  503. yylval.cp = t;
  504. x = SCON;
  505. break;
  506. case '"':
  507. i = 0;
  508. while((c=gch()) && c != '"' && c != '\n'){
  509. if(c == '\\') c = usescape(gch());
  510. token[i++] = c;
  511. if(i > TOKENSIZE){
  512. warning("String too long");
  513. i = TOKENSIZE-1;
  514. break;
  515. }
  516. }
  517. if(c == '\n') {
  518. yyline--;
  519. warning("Non-terminated string");
  520. yyline++;
  521. }
  522. token[i] = 0;
  523. if(i == 0)x = NULLS;
  524. else if(i == 1){
  525. yylval.i = token[0];
  526. x = CHAR;
  527. } else {
  528. yylval.cp = token;
  529. x = STR;
  530. }
  531. break;
  532. case '[':
  533. for(i=1;i<NCH;i++) symbol[i] = 0;
  534. x = CCL;
  535. if((c = gch()) == '^'){
  536. x = NCCL;
  537. c = gch();
  538. }
  539. while(c != ']' && c){
  540. if(c == '\\') c = usescape(gch());
  541. symbol[c] = 1;
  542. j = c;
  543. if((c=gch()) == '-' && peek != ']'){ /* range specified */
  544. c = gch();
  545. if(c == '\\') c = usescape(gch());
  546. k = c;
  547. if(j > k) {
  548. n = j;
  549. j = k;
  550. k = n;
  551. }
  552. if(!(('A' <= j && k <= 'Z') ||
  553. ('a' <= j && k <= 'z') ||
  554. ('0' <= j && k <= '9')))
  555. warning("Non-portable Character Class");
  556. for(n=j+1;n<=k;n++)
  557. symbol[n] = 1; /* implementation dependent */
  558. c = gch();
  559. }
  560. }
  561. /* try to pack ccl's */
  562. i = 0;
  563. for(j=0;j<NCH;j++)
  564. if(symbol[j])token[i++] = j;
  565. token[i] = 0;
  566. p = ccl;
  567. while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++;
  568. if(p < ccptr) /* found it */
  569. yylval.cp = p;
  570. else {
  571. yylval.cp = ccptr;
  572. strcpy((char*)ccptr,(char*)token);
  573. ccptr += strlen((char*)token) + 1;
  574. if(ccptr >= ccl+CCLSIZE)
  575. error("Too many large character classes");
  576. }
  577. cclinter(x==CCL);
  578. break;
  579. case '\\':
  580. c = usescape(gch());
  581. default:
  582. character:
  583. if(iter){ /* second part of an iteration */
  584. iter = FALSE;
  585. if('0' <= c && c <= '9')
  586. goto ieval;
  587. }
  588. if(isalpha(peek)){
  589. i = 0;
  590. yylval.cp = token;
  591. token[i++] = c;
  592. while(isalpha(peek))
  593. token[i++] = gch();
  594. if(peek == '?' || peek == '*' || peek == '+')
  595. munputc(token[--i]);
  596. token[i] = 0;
  597. if(i == 1){
  598. yylval.i = token[0];
  599. x = CHAR;
  600. }
  601. else x = STR;
  602. } else {
  603. yylval.i = c;
  604. x = CHAR;
  605. }
  606. }
  607. scon = FALSE;
  608. if(x == SCON)scon = TRUE;
  609. sectbegin = FALSE;
  610. return(freturn(x));
  611. }
  612. }
  613. /* section three */
  614. ptail();
  615. # ifdef DEBUG
  616. if(debug)
  617. Bprint(&fout,"\n/*this comes from section three - debug */\n");
  618. # endif
  619. while(getl(buf) && !eof)
  620. Bprint(&fout,"%s\n",(char*)buf);
  621. return(freturn(0));
  622. }
  623. /* end of yylex */
  624. # ifdef DEBUG
  625. int
  626. freturn(int i)
  627. {
  628. if(yydebug) {
  629. print("now return ");
  630. if(i < NCH) allprint(i);
  631. else print("%d",i);
  632. printf(" yylval = ");
  633. switch(i){
  634. case STR: case CCL: case NCCL:
  635. strpt(yylval.cp);
  636. break;
  637. case CHAR:
  638. allprint(yylval.i);
  639. break;
  640. default:
  641. print("%d",yylval.i);
  642. break;
  643. }
  644. print("\n");
  645. }
  646. return(i);
  647. }
  648. # endif