grep.y 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. %{
  2. #include "grep.h"
  3. %}
  4. %union
  5. {
  6. int val;
  7. char* str;
  8. Re2 re;
  9. }
  10. %type <re> expr prog
  11. %type <re> expr0 expr1 expr2 expr3 expr4
  12. %token <str> LCLASS
  13. %token <val> LCHAR
  14. %token LLPAREN LRPAREN LALT LSTAR LPLUS LQUES
  15. %token LBEGIN LEND LDOT LBAD LNEWLINE
  16. %%
  17. prog: /* empty */
  18. {
  19. yyerror("empty pattern");
  20. }
  21. | expr newlines
  22. {
  23. $$.beg = ral(Tend);
  24. $$.end = $$.beg;
  25. $$ = re2cat(re2star(re2or(re2char(0x00, '\n'-1), re2char('\n'+1, 0xff))), $$);
  26. $$ = re2cat($1, $$);
  27. $$ = re2cat(re2star(re2char(0x00, 0xff)), $$);
  28. topre = $$;
  29. }
  30. expr:
  31. expr0
  32. | expr newlines expr0
  33. {
  34. $$ = re2or($1, $3);
  35. }
  36. expr0:
  37. expr1
  38. | LSTAR { literal = 1; } expr1
  39. {
  40. $$ = $3;
  41. }
  42. expr1:
  43. expr2
  44. | expr1 LALT expr2
  45. {
  46. $$ = re2or($1, $3);
  47. }
  48. expr2:
  49. expr3
  50. | expr2 expr3
  51. {
  52. $$ = re2cat($1, $2);
  53. }
  54. expr3:
  55. expr4
  56. | expr3 LSTAR
  57. {
  58. $$ = re2star($1);
  59. }
  60. | expr3 LPLUS
  61. {
  62. $$.beg = ral(Talt);
  63. patchnext($1.end, $$.beg);
  64. $$.beg->alt = $1.beg;
  65. $$.end = $$.beg;
  66. $$.beg = $1.beg;
  67. }
  68. | expr3 LQUES
  69. {
  70. $$.beg = ral(Talt);
  71. $$.beg->alt = $1.beg;
  72. $$.end = $1.end;
  73. appendnext($$.end, $$.beg);
  74. }
  75. expr4:
  76. LCHAR
  77. {
  78. $$.beg = ral(Tclass);
  79. $$.beg->lo = $1;
  80. $$.beg->hi = $1;
  81. $$.end = $$.beg;
  82. }
  83. | LBEGIN
  84. {
  85. $$.beg = ral(Tbegin);
  86. $$.end = $$.beg;
  87. }
  88. | LEND
  89. {
  90. $$.beg = ral(Tend);
  91. $$.end = $$.beg;
  92. }
  93. | LDOT
  94. {
  95. $$ = re2class("^\n");
  96. }
  97. | LCLASS
  98. {
  99. $$ = re2class($1);
  100. }
  101. | LLPAREN expr1 LRPAREN
  102. {
  103. $$ = $2;
  104. }
  105. newlines:
  106. LNEWLINE
  107. | newlines LNEWLINE
  108. %%
  109. void
  110. yyerror(char *e, ...)
  111. {
  112. va_list args;
  113. fprint(2, "grep: ");
  114. if(filename)
  115. fprint(2, "%s:%ld: ", filename, lineno);
  116. else if (pattern)
  117. fprint(2, "%s: ", pattern);
  118. va_start(args, e);
  119. vfprint(2, e, args);
  120. va_end(args);
  121. fprint(2, "\n");
  122. exits("syntax");
  123. }
  124. long
  125. yylex(void)
  126. {
  127. char *q, *eq;
  128. int c, s;
  129. if(peekc) {
  130. s = peekc;
  131. peekc = 0;
  132. return s;
  133. }
  134. c = getrec();
  135. if(literal) {
  136. if(c != 0 && c != '\n') {
  137. yylval.val = c;
  138. return LCHAR;
  139. }
  140. literal = 0;
  141. }
  142. switch(c) {
  143. default:
  144. yylval.val = c;
  145. s = LCHAR;
  146. break;
  147. case '\\':
  148. c = getrec();
  149. yylval.val = c;
  150. s = LCHAR;
  151. if(c == '\n')
  152. s = LNEWLINE;
  153. break;
  154. case '[':
  155. goto getclass;
  156. case '(':
  157. s = LLPAREN;
  158. break;
  159. case ')':
  160. s = LRPAREN;
  161. break;
  162. case '|':
  163. s = LALT;
  164. break;
  165. case '*':
  166. s = LSTAR;
  167. break;
  168. case '+':
  169. s = LPLUS;
  170. break;
  171. case '?':
  172. s = LQUES;
  173. break;
  174. case '^':
  175. s = LBEGIN;
  176. break;
  177. case '$':
  178. s = LEND;
  179. break;
  180. case '.':
  181. s = LDOT;
  182. break;
  183. case 0:
  184. peekc = -1;
  185. case '\n':
  186. s = LNEWLINE;
  187. break;
  188. }
  189. return s;
  190. getclass:
  191. q = u.string;
  192. eq = q + nelem(u.string) - 5;
  193. c = getrec();
  194. if(c == '^') {
  195. q[0] = '^';
  196. q[1] = '\n';
  197. q[2] = '-';
  198. q[3] = '\n';
  199. q += 4;
  200. c = getrec();
  201. }
  202. for(;;) {
  203. if(q >= eq)
  204. error("class too long");
  205. if(c == ']' || c == 0)
  206. break;
  207. if(c == '\\') {
  208. *q++ = c;
  209. c = getrec();
  210. if(c == 0)
  211. break;
  212. }
  213. *q++ = c;
  214. c = getrec();
  215. }
  216. *q = 0;
  217. if(c == 0)
  218. return LBAD;
  219. yylval.str = u.string;
  220. return LCLASS;
  221. }