sub.c 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. #include "grep.h"
  2. void*
  3. mal(int n)
  4. {
  5. static char *s;
  6. static int m = 0;
  7. void *v;
  8. n = (n+3) & ~3;
  9. if(m < n) {
  10. if(n > Nhunk) {
  11. v = sbrk(n);
  12. memset(v, 0, n);
  13. return v;
  14. }
  15. s = sbrk(Nhunk);
  16. m = Nhunk;
  17. }
  18. v = s;
  19. s += n;
  20. m -= n;
  21. memset(v, 0, n);
  22. return v;
  23. }
  24. State*
  25. sal(int n)
  26. {
  27. State *s;
  28. s = mal(sizeof(*s));
  29. // s->next = mal(256*sizeof(*s->next));
  30. s->count = n;
  31. s->re = mal(n*sizeof(*state0->re));
  32. return s;
  33. }
  34. Re*
  35. ral(int type)
  36. {
  37. Re *r;
  38. r = mal(sizeof(*r));
  39. r->type = type;
  40. maxfollow++;
  41. return r;
  42. }
  43. void
  44. error(char *s)
  45. {
  46. fprint(2, "grep: internal error: %s\n", s);
  47. exits(s);
  48. }
  49. int
  50. countor(Re *r)
  51. {
  52. int n;
  53. n = 0;
  54. loop:
  55. switch(r->type) {
  56. case Tor:
  57. n += countor(r->alt);
  58. r = r->next;
  59. goto loop;
  60. case Tclass:
  61. return n + r->hi - r->lo + 1;
  62. }
  63. return n;
  64. }
  65. Re*
  66. oralloc(int t, Re *r, Re *b)
  67. {
  68. Re *a;
  69. if(b == 0)
  70. return r;
  71. a = ral(t);
  72. a->alt = r;
  73. a->next = b;
  74. return a;
  75. }
  76. void
  77. case1(Re *c, Re *r)
  78. {
  79. int n;
  80. loop:
  81. switch(r->type) {
  82. case Tor:
  83. case1(c, r->alt);
  84. r = r->next;
  85. goto loop;
  86. case Tclass: /* add to character */
  87. for(n=r->lo; n<=r->hi; n++)
  88. c->cases[n] = oralloc(Tor, r->next, c->cases[n]);
  89. break;
  90. default: /* add everything unknown to next */
  91. c->next = oralloc(Talt, r, c->next);
  92. break;
  93. }
  94. }
  95. Re*
  96. addcase(Re *r)
  97. {
  98. int i, n;
  99. Re *a;
  100. if(r->gen == gen)
  101. return r;
  102. r->gen = gen;
  103. switch(r->type) {
  104. default:
  105. error("addcase");
  106. case Tor:
  107. n = countor(r);
  108. if(n >= Caselim) {
  109. a = ral(Tcase);
  110. a->cases = mal(256*sizeof(*a->cases));
  111. case1(a, r);
  112. for(i=0; i<256; i++)
  113. if(a->cases[i]) {
  114. r = a->cases[i];
  115. if(countor(r) < n)
  116. a->cases[i] = addcase(r);
  117. }
  118. return a;
  119. }
  120. return r;
  121. case Talt:
  122. r->next = addcase(r->next);
  123. r->alt = addcase(r->alt);
  124. return r;
  125. case Tbegin:
  126. case Tend:
  127. case Tclass:
  128. return r;
  129. }
  130. }
  131. void
  132. str2top(char *p)
  133. {
  134. Re2 oldtop;
  135. oldtop = topre;
  136. input = p;
  137. topre.beg = 0;
  138. topre.end = 0;
  139. yyparse();
  140. gen++;
  141. if(topre.beg == 0)
  142. yyerror("syntax");
  143. if(oldtop.beg)
  144. topre = re2or(oldtop, topre);
  145. }
  146. void
  147. appendnext(Re *a, Re *b)
  148. {
  149. Re *n;
  150. while(n = a->next)
  151. a = n;
  152. a->next = b;
  153. }
  154. void
  155. patchnext(Re *a, Re *b)
  156. {
  157. Re *n;
  158. while(a) {
  159. n = a->next;
  160. a->next = b;
  161. a = n;
  162. }
  163. }
  164. int
  165. getrec(void)
  166. {
  167. int c;
  168. if(flags['f']) {
  169. c = Bgetc(rein);
  170. if(c <= 0)
  171. return 0;
  172. } else
  173. c = *input++ & 0xff;
  174. if(flags['i'] && c >= 'A' && c <= 'Z')
  175. c += 'a'-'A';
  176. if(c == '\n')
  177. lineno++;
  178. return c;
  179. }
  180. Re2
  181. re2cat(Re2 a, Re2 b)
  182. {
  183. Re2 c;
  184. c.beg = a.beg;
  185. c.end = b.end;
  186. patchnext(a.end, b.beg);
  187. return c;
  188. }
  189. Re2
  190. re2star(Re2 a)
  191. {
  192. Re2 c;
  193. c.beg = ral(Talt);
  194. c.beg->alt = a.beg;
  195. patchnext(a.end, c.beg);
  196. c.end = c.beg;
  197. return c;
  198. }
  199. Re2
  200. re2or(Re2 a, Re2 b)
  201. {
  202. Re2 c;
  203. c.beg = ral(Tor);
  204. c.beg->alt = b.beg;
  205. c.beg->next = a.beg;
  206. c.end = b.end;
  207. appendnext(c.end, a.end);
  208. return c;
  209. }
  210. Re2
  211. re2char(int c0, int c1)
  212. {
  213. Re2 c;
  214. c.beg = ral(Tclass);
  215. c.beg->lo = c0 & 0xff;
  216. c.beg->hi = c1 & 0xff;
  217. c.end = c.beg;
  218. return c;
  219. }
  220. void
  221. reprint1(Re *a)
  222. {
  223. int i, j;
  224. loop:
  225. if(a == 0)
  226. return;
  227. if(a->gen == gen)
  228. return;
  229. a->gen = gen;
  230. print("%p: ", a);
  231. switch(a->type) {
  232. default:
  233. print("type %d\n", a->type);
  234. error("print1 type");
  235. case Tcase:
  236. print("case ->%p\n", a->next);
  237. for(i=0; i<256; i++)
  238. if(a->cases[i]) {
  239. for(j=i+1; j<256; j++)
  240. if(a->cases[i] != a->cases[j])
  241. break;
  242. print(" [%.2x-%.2x] ->%p\n", i, j-1, a->cases[i]);
  243. i = j-1;
  244. }
  245. for(i=0; i<256; i++)
  246. reprint1(a->cases[i]);
  247. break;
  248. case Tbegin:
  249. print("^ ->%p\n", a->next);
  250. break;
  251. case Tend:
  252. print("$ ->%p\n", a->next);
  253. break;
  254. case Tclass:
  255. print("[%.2x-%.2x] ->%p\n", a->lo, a->hi, a->next);
  256. break;
  257. case Tor:
  258. case Talt:
  259. print("| %p ->%p\n", a->alt, a->next);
  260. reprint1(a->alt);
  261. break;
  262. }
  263. a = a->next;
  264. goto loop;
  265. }
  266. void
  267. reprint(char *s, Re *r)
  268. {
  269. print("%s:\n", s);
  270. gen++;
  271. reprint1(r);
  272. print("\n\n");
  273. }