123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226 |
- %{
- #include "grep.h"
- %}
- %union
- {
- int val;
- char* str;
- Re2 re;
- }
- %type <re> expr prog
- %type <re> expr0 expr1 expr2 expr3 expr4
- %token <str> LCLASS
- %token <val> LCHAR
- %token LLPAREN LRPAREN LALT LSTAR LPLUS LQUES
- %token LBEGIN LEND LDOT LBAD LNEWLINE
- %%
- prog:
- expr newlines
- {
- $$.beg = ral(Tend);
- $$.end = $$.beg;
- $$ = re2cat(re2star(re2or(re2char(0x00, '\n'-1), re2char('\n'+1, 0xff))), $$);
- $$ = re2cat($1, $$);
- $$ = re2cat(re2star(re2char(0x00, 0xff)), $$);
- topre = $$;
- }
- expr:
- expr0
- | expr newlines expr0
- {
- $$ = re2or($1, $3);
- }
- expr0:
- expr1
- | LSTAR { literal = 1; } expr1
- {
- $$ = $3;
- }
- expr1:
- expr2
- | expr1 LALT expr2
- {
- $$ = re2or($1, $3);
- }
- expr2:
- expr3
- | expr2 expr3
- {
- $$ = re2cat($1, $2);
- }
- expr3:
- expr4
- | expr3 LSTAR
- {
- $$ = re2star($1);
- }
- | expr3 LPLUS
- {
- $$.beg = ral(Talt);
- patchnext($1.end, $$.beg);
- $$.beg->alt = $1.beg;
- $$.end = $$.beg;
- $$.beg = $1.beg;
- }
- | expr3 LQUES
- {
- $$.beg = ral(Talt);
- $$.beg->alt = $1.beg;
- $$.end = $1.end;
- appendnext($$.end, $$.beg);
- }
- expr4:
- LCHAR
- {
- $$.beg = ral(Tclass);
- $$.beg->lo = $1;
- $$.beg->hi = $1;
- $$.end = $$.beg;
- }
- | LBEGIN
- {
- $$.beg = ral(Tbegin);
- $$.end = $$.beg;
- }
- | LEND
- {
- $$.beg = ral(Tend);
- $$.end = $$.beg;
- }
- | LDOT
- {
- $$ = re2class("^\n");
- }
- | LCLASS
- {
- $$ = re2class($1);
- }
- | LLPAREN expr1 LRPAREN
- {
- $$ = $2;
- }
- newlines:
- LNEWLINE
- | newlines LNEWLINE
- %%
- void
- yyerror(char *e, ...)
- {
- if(filename)
- fprint(2, "grep: %s:%ld: %s\n", filename, lineno, e);
- else
- fprint(2, "grep: %s\n", e);
- exits("syntax");
- }
- long
- yylex(void)
- {
- char *q, *eq;
- int c, s;
- if(peekc) {
- s = peekc;
- peekc = 0;
- return s;
- }
- c = getrec();
- if(literal) {
- if(c != 0 && c != '\n') {
- yylval.val = c;
- return LCHAR;
- }
- literal = 0;
- }
- switch(c) {
- default:
- yylval.val = c;
- s = LCHAR;
- break;
- case '\\':
- c = getrec();
- yylval.val = c;
- s = LCHAR;
- if(c == '\n')
- s = LNEWLINE;
- break;
- case '[':
- goto getclass;
- case '(':
- s = LLPAREN;
- break;
- case ')':
- s = LRPAREN;
- break;
- case '|':
- s = LALT;
- break;
- case '*':
- s = LSTAR;
- break;
- case '+':
- s = LPLUS;
- break;
- case '?':
- s = LQUES;
- break;
- case '^':
- s = LBEGIN;
- break;
- case '$':
- s = LEND;
- break;
- case '.':
- s = LDOT;
- break;
- case 0:
- peekc = -1;
- case '\n':
- s = LNEWLINE;
- break;
- }
- return s;
- getclass:
- q = u.string;
- eq = q + nelem(u.string) - 5;
- c = getrec();
- if(c == '^') {
- q[0] = '^';
- q[1] = '\n';
- q[2] = '-';
- q[3] = '\n';
- q += 4;
- c = getrec();
- }
- for(;;) {
- if(q >= eq)
- error("class too long");
- if(c == ']' || c == 0)
- break;
- if(c == '\\') {
- *q++ = c;
- c = getrec();
- if(c == 0)
- break;
- }
- *q++ = c;
- c = getrec();
- }
- *q = 0;
- if(c == 0)
- return LBAD;
- yylval.str = u.string;
- return LCLASS;
- }
|