grep.y 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. %{
  2. #include "grep.h"
  3. %}
  4. %union
  5. {
  6. int val;
  7. char* str;
  8. Re2 re;
  9. }
  10. %type <re> expr prog
  11. %type <re> expr0 expr1 expr2 expr3 expr4
  12. %token <str> LCLASS
  13. %token <val> LCHAR
  14. %token LLPAREN LRPAREN LALT LSTAR LPLUS LQUES
  15. %token LBEGIN LEND LDOT LBAD LNEWLINE
  16. %%
  17. prog:
  18. expr newlines
  19. {
  20. $$.beg = ral(Tend);
  21. $$.end = $$.beg;
  22. $$ = re2cat(re2star(re2or(re2char(0x00, '\n'-1), re2char('\n'+1, 0xff))), $$);
  23. $$ = re2cat($1, $$);
  24. $$ = re2cat(re2star(re2char(0x00, 0xff)), $$);
  25. topre = $$;
  26. }
  27. expr:
  28. expr0
  29. | expr newlines expr0
  30. {
  31. $$ = re2or($1, $3);
  32. }
  33. expr0:
  34. expr1
  35. | LSTAR { literal = 1; } expr1
  36. {
  37. $$ = $3;
  38. }
  39. expr1:
  40. expr2
  41. | expr1 LALT expr2
  42. {
  43. $$ = re2or($1, $3);
  44. }
  45. expr2:
  46. expr3
  47. | expr2 expr3
  48. {
  49. $$ = re2cat($1, $2);
  50. }
  51. expr3:
  52. expr4
  53. | expr3 LSTAR
  54. {
  55. $$ = re2star($1);
  56. }
  57. | expr3 LPLUS
  58. {
  59. $$.beg = ral(Talt);
  60. patchnext($1.end, $$.beg);
  61. $$.beg->alt = $1.beg;
  62. $$.end = $$.beg;
  63. $$.beg = $1.beg;
  64. }
  65. | expr3 LQUES
  66. {
  67. $$.beg = ral(Talt);
  68. $$.beg->alt = $1.beg;
  69. $$.end = $1.end;
  70. appendnext($$.end, $$.beg);
  71. }
  72. expr4:
  73. LCHAR
  74. {
  75. $$.beg = ral(Tclass);
  76. $$.beg->lo = $1;
  77. $$.beg->hi = $1;
  78. $$.end = $$.beg;
  79. }
  80. | LBEGIN
  81. {
  82. $$.beg = ral(Tbegin);
  83. $$.end = $$.beg;
  84. }
  85. | LEND
  86. {
  87. $$.beg = ral(Tend);
  88. $$.end = $$.beg;
  89. }
  90. | LDOT
  91. {
  92. $$ = re2class("^\n");
  93. }
  94. | LCLASS
  95. {
  96. $$ = re2class($1);
  97. }
  98. | LLPAREN expr1 LRPAREN
  99. {
  100. $$ = $2;
  101. }
  102. newlines:
  103. LNEWLINE
  104. | newlines LNEWLINE
  105. %%
  106. void
  107. yyerror(char *e, ...)
  108. {
  109. if(filename)
  110. fprint(2, "grep: %s:%ld: %s\n", filename, lineno, e);
  111. else
  112. fprint(2, "grep: %s\n", e);
  113. exits("syntax");
  114. }
  115. long
  116. yylex(void)
  117. {
  118. char *q, *eq;
  119. int c, s;
  120. if(peekc) {
  121. s = peekc;
  122. peekc = 0;
  123. return s;
  124. }
  125. c = getrec();
  126. if(literal) {
  127. if(c != 0 && c != '\n') {
  128. yylval.val = c;
  129. return LCHAR;
  130. }
  131. literal = 0;
  132. }
  133. switch(c) {
  134. default:
  135. yylval.val = c;
  136. s = LCHAR;
  137. break;
  138. case '\\':
  139. c = getrec();
  140. yylval.val = c;
  141. s = LCHAR;
  142. if(c == '\n')
  143. s = LNEWLINE;
  144. break;
  145. case '[':
  146. goto getclass;
  147. case '(':
  148. s = LLPAREN;
  149. break;
  150. case ')':
  151. s = LRPAREN;
  152. break;
  153. case '|':
  154. s = LALT;
  155. break;
  156. case '*':
  157. s = LSTAR;
  158. break;
  159. case '+':
  160. s = LPLUS;
  161. break;
  162. case '?':
  163. s = LQUES;
  164. break;
  165. case '^':
  166. s = LBEGIN;
  167. break;
  168. case '$':
  169. s = LEND;
  170. break;
  171. case '.':
  172. s = LDOT;
  173. break;
  174. case 0:
  175. peekc = -1;
  176. case '\n':
  177. s = LNEWLINE;
  178. break;
  179. }
  180. return s;
  181. getclass:
  182. q = u.string;
  183. eq = q + nelem(u.string) - 5;
  184. c = getrec();
  185. if(c == '^') {
  186. q[0] = '^';
  187. q[1] = '\n';
  188. q[2] = '-';
  189. q[3] = '\n';
  190. q += 4;
  191. c = getrec();
  192. }
  193. for(;;) {
  194. if(q >= eq)
  195. error("class too long");
  196. if(c == ']' || c == 0)
  197. break;
  198. if(c == '\\') {
  199. *q++ = c;
  200. c = getrec();
  201. if(c == 0)
  202. break;
  203. }
  204. *q++ = c;
  205. c = getrec();
  206. }
  207. *q = 0;
  208. if(c == 0)
  209. return LBAD;
  210. yylval.str = u.string;
  211. return LCLASS;
  212. }