grep.y 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. %{
  2. #include "grep.h"
  3. #define YYSIZE_T size_t
  4. long yylex(void);
  5. %}
  6. %union
  7. {
  8. int val;
  9. char* str;
  10. Re2 re;
  11. }
  12. %type <re> expr prog
  13. %type <re> expr0 expr1 expr2 expr3 expr4
  14. %token <str> LCLASS
  15. %token <val> LCHAR
  16. %token LLPAREN LRPAREN LALT LSTAR LPLUS LQUES
  17. %token LBEGIN LEND LDOT LBAD LNEWLINE
  18. %%
  19. prog: /* empty */
  20. {
  21. yyerror("empty pattern");
  22. }
  23. | expr newlines
  24. {
  25. $$.beg = ral(Tend);
  26. $$.end = $$.beg;
  27. $$ = re2cat(re2star(re2or(re2char(0x00, '\n'-1), re2char('\n'+1, 0xff))), $$);
  28. $$ = re2cat($1, $$);
  29. $$ = re2cat(re2star(re2char(0x00, 0xff)), $$);
  30. topre = $$;
  31. }
  32. expr:
  33. expr0
  34. | expr newlines expr0
  35. {
  36. $$ = re2or($1, $3);
  37. }
  38. expr0:
  39. expr1
  40. | LSTAR { literal = 1; } expr1
  41. {
  42. $$ = $3;
  43. }
  44. expr1:
  45. expr2
  46. | expr1 LALT expr2
  47. {
  48. $$ = re2or($1, $3);
  49. }
  50. expr2:
  51. expr3
  52. | expr2 expr3
  53. {
  54. $$ = re2cat($1, $2);
  55. }
  56. expr3:
  57. expr4
  58. | expr3 LSTAR
  59. {
  60. $$ = re2star($1);
  61. }
  62. | expr3 LPLUS
  63. {
  64. $$.beg = ral(Talt);
  65. patchnext($1.end, $$.beg);
  66. $$.beg->alt = $1.beg;
  67. $$.end = $$.beg;
  68. $$.beg = $1.beg;
  69. }
  70. | expr3 LQUES
  71. {
  72. $$.beg = ral(Talt);
  73. $$.beg->alt = $1.beg;
  74. $$.end = $1.end;
  75. appendnext($$.end, $$.beg);
  76. }
  77. expr4:
  78. LCHAR
  79. {
  80. $$.beg = ral(Tclass);
  81. $$.beg->lo = $1;
  82. $$.beg->hi = $1;
  83. $$.end = $$.beg;
  84. }
  85. | LBEGIN
  86. {
  87. $$.beg = ral(Tbegin);
  88. $$.end = $$.beg;
  89. }
  90. | LEND
  91. {
  92. $$.beg = ral(Tend);
  93. $$.end = $$.beg;
  94. }
  95. | LDOT
  96. {
  97. $$ = re2class("^\n");
  98. }
  99. | LCLASS
  100. {
  101. $$ = re2class($1);
  102. }
  103. | LLPAREN expr1 LRPAREN
  104. {
  105. $$ = $2;
  106. }
  107. newlines:
  108. LNEWLINE
  109. | newlines LNEWLINE
  110. %%
  111. void
  112. yyerror(char *e, ...)
  113. {
  114. va_list args;
  115. fprint(2, "grep: ");
  116. if(filename)
  117. fprint(2, "%s:%ld: ", filename, lineno);
  118. else if (pattern)
  119. fprint(2, "%s: ", pattern);
  120. va_start(args, e);
  121. vfprint(2, e, args);
  122. va_end(args);
  123. fprint(2, "\n");
  124. exits("syntax");
  125. }
  126. long
  127. yylex(void)
  128. {
  129. char *q, *eq;
  130. int c, s;
  131. if(peekc) {
  132. s = peekc;
  133. peekc = 0;
  134. return s;
  135. }
  136. c = getrec();
  137. if(literal) {
  138. if(c != 0 && c != '\n') {
  139. yylval.val = c;
  140. return LCHAR;
  141. }
  142. literal = 0;
  143. }
  144. switch(c) {
  145. default:
  146. yylval.val = c;
  147. s = LCHAR;
  148. break;
  149. case '\\':
  150. c = getrec();
  151. yylval.val = c;
  152. s = LCHAR;
  153. if(c == '\n')
  154. s = LNEWLINE;
  155. break;
  156. case '[':
  157. goto getclass;
  158. case '(':
  159. s = LLPAREN;
  160. break;
  161. case ')':
  162. s = LRPAREN;
  163. break;
  164. case '|':
  165. s = LALT;
  166. break;
  167. case '*':
  168. s = LSTAR;
  169. break;
  170. case '+':
  171. s = LPLUS;
  172. break;
  173. case '?':
  174. s = LQUES;
  175. break;
  176. case '^':
  177. s = LBEGIN;
  178. break;
  179. case '$':
  180. s = LEND;
  181. break;
  182. case '.':
  183. s = LDOT;
  184. break;
  185. case 0:
  186. peekc = -1;
  187. case '\n':
  188. s = LNEWLINE;
  189. break;
  190. }
  191. return s;
  192. getclass:
  193. q = u.string;
  194. eq = q + nelem(u.string) - 5;
  195. c = getrec();
  196. if(c == '^') {
  197. q[0] = '^';
  198. q[1] = '\n';
  199. q[2] = '-';
  200. q[3] = '\n';
  201. q += 4;
  202. c = getrec();
  203. }
  204. for(;;) {
  205. if(q >= eq)
  206. error("class too long");
  207. if(c == ']' || c == 0)
  208. break;
  209. if(c == '\\') {
  210. *q++ = c;
  211. c = getrec();
  212. if(c == 0)
  213. break;
  214. }
  215. *q++ = c;
  216. c = getrec();
  217. }
  218. *q = 0;
  219. if(c == 0)
  220. return LBAD;
  221. yylval.str = u.string;
  222. return LCLASS;
  223. }