sub.c 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "grep.h"
  10. void *
  11. mal(int n)
  12. {
  13. static char *s;
  14. static int m = 0;
  15. void *v;
  16. n = (n + 3) & ~3;
  17. if (m < n) {
  18. if (n > Nhunk) {
  19. v = sbrk(n);
  20. memset(v, 0, n);
  21. return v;
  22. }
  23. s = sbrk(Nhunk);
  24. m = Nhunk;
  25. }
  26. v = s;
  27. s += n;
  28. m -= n;
  29. memset(v, 0, n);
  30. return v;
  31. }
  32. State *
  33. sal(int n)
  34. {
  35. State *s;
  36. s = mal(sizeof(*s));
  37. // s->next = mal(256*sizeof(*s->next));
  38. s->count = n;
  39. s->re = mal(n * sizeof(*state0->re));
  40. return s;
  41. }
  42. Re *
  43. ral(int type)
  44. {
  45. Re *r;
  46. r = mal(sizeof(*r));
  47. r->type = type;
  48. maxfollow++;
  49. return r;
  50. }
  51. void
  52. error(char *s)
  53. {
  54. fprint(2, "grep: internal error: %s\n", s);
  55. exits(s);
  56. }
  57. int
  58. countor(Re * r)
  59. {
  60. int n;
  61. n = 0;
  62. loop:
  63. switch (r->type) {
  64. case Tor:
  65. n += countor(r->alt);
  66. r = r->next;
  67. goto loop;
  68. case Tclass:
  69. return n + r->hi - r->lo + 1;
  70. }
  71. return n;
  72. }
  73. Re *
  74. oralloc(int t, Re * r, Re * b)
  75. {
  76. Re *a;
  77. if (b == 0)
  78. return r;
  79. a = ral(t);
  80. a->alt = r;
  81. a->next = b;
  82. return a;
  83. }
  84. void
  85. case1(Re * c, Re * r)
  86. {
  87. int n;
  88. loop:
  89. switch (r->type) {
  90. case Tor:
  91. case1(c, r->alt);
  92. r = r->next;
  93. goto loop;
  94. case Tclass: /* add to character */
  95. for (n = r->lo; n <= r->hi; n++)
  96. c->cases[n] = oralloc(Tor, r->next, c->cases[n]);
  97. break;
  98. default: /* add everything unknown to next */
  99. c->next = oralloc(Talt, r, c->next);
  100. break;
  101. }
  102. }
  103. Re *
  104. addcase(Re * r)
  105. {
  106. int i, n;
  107. Re *a;
  108. if (r->gen == gen)
  109. return r;
  110. r->gen = gen;
  111. switch (r->type) {
  112. default:
  113. error("addcase");
  114. case Tor:
  115. n = countor(r);
  116. if (n >= Caselim) {
  117. a = ral(Tcase);
  118. a->cases = mal(256 * sizeof(*a->cases));
  119. case1(a, r);
  120. for (i = 0; i < 256; i++)
  121. if (a->cases[i]) {
  122. r = a->cases[i];
  123. if (countor(r) < n)
  124. a->cases[i] = addcase(r);
  125. }
  126. return a;
  127. }
  128. return r;
  129. case Talt:
  130. r->next = addcase(r->next);
  131. r->alt = addcase(r->alt);
  132. return r;
  133. case Tbegin:
  134. case Tend:
  135. case Tclass:
  136. return r;
  137. }
  138. }
  139. void
  140. str2top(char *p)
  141. {
  142. Re2 oldtop;
  143. oldtop = topre;
  144. input = p;
  145. if (*p == '\0')
  146. yyerror("empty pattern"); /* can't be a file name here */
  147. if (!flags['f'])
  148. pattern = p;
  149. topre.beg = 0;
  150. topre.end = 0;
  151. yyparse();
  152. gen++;
  153. if (topre.beg == 0)
  154. yyerror("syntax");
  155. if (oldtop.beg)
  156. topre = re2or(oldtop, topre);
  157. }
  158. void
  159. appendnext(Re * a, Re * b)
  160. {
  161. Re *n;
  162. while (n = a->next)
  163. a = n;
  164. a->next = b;
  165. }
  166. void
  167. patchnext(Re * a, Re * b)
  168. {
  169. Re *n;
  170. while (a) {
  171. n = a->next;
  172. a->next = b;
  173. a = n;
  174. }
  175. }
  176. int
  177. getrec(void)
  178. {
  179. int c;
  180. if (flags['f']) {
  181. c = Bgetc(rein);
  182. if (c <= 0)
  183. return 0;
  184. } else
  185. c = *input++ & 0xff;
  186. if (flags['i'] && c >= 'A' && c <= 'Z')
  187. c += 'a' - 'A';
  188. if (c == '\n')
  189. lineno++;
  190. return c;
  191. }
  192. Re2
  193. re2cat(Re2 a, Re2 b)
  194. {
  195. Re2 c;
  196. c.beg = a.beg;
  197. c.end = b.end;
  198. patchnext(a.end, b.beg);
  199. return c;
  200. }
  201. Re2
  202. re2star(Re2 a)
  203. {
  204. Re2 c;
  205. c.beg = ral(Talt);
  206. c.beg->alt = a.beg;
  207. patchnext(a.end, c.beg);
  208. c.end = c.beg;
  209. return c;
  210. }
  211. Re2
  212. re2or(Re2 a, Re2 b)
  213. {
  214. Re2 c;
  215. c.beg = ral(Tor);
  216. c.beg->alt = b.beg;
  217. c.beg->next = a.beg;
  218. c.end = b.end;
  219. appendnext(c.end, a.end);
  220. return c;
  221. }
  222. Re2
  223. re2char(int c0, int c1)
  224. {
  225. Re2 c;
  226. c.beg = ral(Tclass);
  227. c.beg->lo = c0 & 0xff;
  228. c.beg->hi = c1 & 0xff;
  229. c.end = c.beg;
  230. return c;
  231. }
  232. void
  233. reprint1(Re * a)
  234. {
  235. int i, j;
  236. loop:
  237. if (a == 0)
  238. return;
  239. if (a->gen == gen)
  240. return;
  241. a->gen = gen;
  242. print("%p: ", a);
  243. switch (a->type) {
  244. default:
  245. print("type %d\n", a->type);
  246. error("print1 type");
  247. case Tcase:
  248. print("case ->%p\n", a->next);
  249. for (i = 0; i < 256; i++)
  250. if (a->cases[i]) {
  251. for (j = i + 1; j < 256; j++)
  252. if (a->cases[i] != a->cases[j])
  253. break;
  254. print(" [%.2x-%.2x] ->%p\n", i, j - 1, a->cases[i]);
  255. i = j - 1;
  256. }
  257. for (i = 0; i < 256; i++)
  258. reprint1(a->cases[i]);
  259. break;
  260. case Tbegin:
  261. print("^ ->%p\n", a->next);
  262. break;
  263. case Tend:
  264. print("$ ->%p\n", a->next);
  265. break;
  266. case Tclass:
  267. print("[%.2x-%.2x] ->%p\n", a->lo, a->hi, a->next);
  268. break;
  269. case Tor:
  270. case Talt:
  271. print("| %p ->%p\n", a->alt, a->next);
  272. reprint1(a->alt);
  273. break;
  274. }
  275. a = a->next;
  276. goto loop;
  277. }
  278. void
  279. reprint(char *s, Re * r)
  280. {
  281. print("%s:\n", s);
  282. gen++;
  283. reprint1(r);
  284. print("\n\n");
  285. }