comp.c 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "grep.h"
  10. /*
  11. * incremental compiler.
  12. * add the branch c to the
  13. * state s.
  14. */
  15. void
  16. increment(State * s, int c)
  17. {
  18. int i;
  19. State *t, **tt;
  20. Re *re1, *re2;
  21. nfollow = 0;
  22. gen++;
  23. matched = 0;
  24. for (i = 0; i < s->count; i++)
  25. fol1(s->re[i], c);
  26. qsort(follow, nfollow, sizeof(*follow), fcmp);
  27. for (tt = &state0; t = *tt;) {
  28. if (t->count > nfollow) {
  29. tt = &t->linkleft;
  30. goto cont;
  31. }
  32. if (t->count < nfollow) {
  33. tt = &t->linkright;
  34. goto cont;
  35. }
  36. for (i = 0; i < nfollow; i++) {
  37. re1 = t->re[i];
  38. re2 = follow[i];
  39. if (re1 > re2) {
  40. tt = &t->linkleft;
  41. goto cont;
  42. }
  43. if (re1 < re2) {
  44. tt = &t->linkright;
  45. goto cont;
  46. }
  47. }
  48. if (! !matched && !t->match) {
  49. tt = &t->linkleft;
  50. goto cont;
  51. }
  52. if (!matched && ! !t->match) {
  53. tt = &t->linkright;
  54. goto cont;
  55. }
  56. s->next[c] = t;
  57. return;
  58. cont: ;
  59. }
  60. t = sal(nfollow);
  61. *tt = t;
  62. for (i = 0; i < nfollow; i++) {
  63. re1 = follow[i];
  64. t->re[i] = re1;
  65. }
  66. s->next[c] = t;
  67. t->match = matched;
  68. }
  69. int
  70. fcmp(const void *va, const void *vb)
  71. {
  72. const Re **aa = (const Re**)va;
  73. const Re **bb = (const Re**)vb;
  74. const Re *a = *aa;
  75. const Re *b = *bb;
  76. if (a > b)
  77. return 1;
  78. if (a < b)
  79. return -1;
  80. return 0;
  81. }
  82. void
  83. fol1(Re * r, int c)
  84. {
  85. Re *r1;
  86. loop:
  87. if (r->gen == gen)
  88. return;
  89. if (nfollow >= maxfollow)
  90. error("nfollow");
  91. r->gen = gen;
  92. switch (r->type) {
  93. default:
  94. error("fol1");
  95. case Tcase:
  96. if (c >= 0 && c < 256)
  97. if (r1 = r->cases[c])
  98. follow[nfollow++] = r1;
  99. if (r = r->next)
  100. goto loop;
  101. break;
  102. case Talt:
  103. case Tor:
  104. fol1(r->alt, c);
  105. r = r->next;
  106. goto loop;
  107. case Tbegin:
  108. if (c == '\n' || c == Cbegin)
  109. follow[nfollow++] = r->next;
  110. break;
  111. case Tend:
  112. if (c == '\n') {
  113. if (r->next == 0) {
  114. matched = 1;
  115. break;
  116. }
  117. r = r->next;
  118. goto loop;
  119. }
  120. break;
  121. case Tclass:
  122. if (c >= r->lo && c <= r->hi)
  123. follow[nfollow++] = r->next;
  124. break;
  125. }
  126. }
  127. Rune tab1[] = {
  128. 0x007f,
  129. 0x07ff,
  130. };
  131. Rune tab2[] = {
  132. 0x003f,
  133. 0x0fff,
  134. };
  135. Re2
  136. rclass(Rune p0, Rune p1)
  137. {
  138. char xc0[6], xc1[6];
  139. int i, n, m;
  140. Re2 x;
  141. if (p0 > p1)
  142. return re2char(0xff, 0xff); // no match
  143. /*
  144. * bust range into same length
  145. * character sequences
  146. */
  147. for (i = 0; i < nelem(tab1); i++) {
  148. m = tab1[i];
  149. if (p0 <= m && p1 > m)
  150. return re2or(rclass(p0, m), rclass(m + 1, p1));
  151. }
  152. /*
  153. * bust range into part of a single page
  154. * or into full pages
  155. */
  156. for (i = 0; i < nelem(tab2); i++) {
  157. m = tab2[i];
  158. if ((p0 & ~m) != (p1 & ~m)) {
  159. if ((p0 & m) != 0)
  160. return re2or(rclass(p0, p0 | m), rclass((p0 | m) + 1, p1));
  161. if ((p1 & m) != m)
  162. return re2or(rclass(p0, (p1 & ~m) - 1), rclass(p1 & ~m, p1));
  163. }
  164. }
  165. n = runetochar(xc0, &p0);
  166. i = runetochar(xc1, &p1);
  167. if (i != n)
  168. error("length");
  169. x = re2char(xc0[0], xc1[0]);
  170. for (i = 1; i < n; i++)
  171. x = re2cat(x, re2char(xc0[i], xc1[i]));
  172. return x;
  173. }
  174. int
  175. pcmp(const void *va, const void *vb)
  176. {
  177. int n;
  178. const Rune *a = (const Rune*)va;
  179. const Rune *b = (const Rune*)vb;
  180. n = a[0] - b[0];
  181. if (n)
  182. return n;
  183. return a[1] - b[1];
  184. }
  185. /*
  186. * convert character chass into
  187. * run-pair ranges of matches.
  188. * this is 10646/utf specific and
  189. * needs to be changed for some
  190. * other input character set.
  191. * this is the key to a fast
  192. * regular search of characters
  193. * by looking at sequential bytes.
  194. */
  195. Re2
  196. re2class(char *s)
  197. {
  198. Rune pairs[200 + 2], *p, *q, ov;
  199. int nc;
  200. Re2 x;
  201. nc = 0;
  202. if (*s == '^') {
  203. nc = 1;
  204. s++;
  205. }
  206. p = pairs;
  207. s += chartorune(p, s);
  208. for (;;) {
  209. if (*p == '\\')
  210. s += chartorune(p, s);
  211. if (*p == 0)
  212. break;
  213. p[1] = *p;
  214. p += 2;
  215. if (p >= pairs + nelem(pairs) - 2)
  216. error("class too big");
  217. s += chartorune(p, s);
  218. if (*p != '-')
  219. continue;
  220. s += chartorune(p, s);
  221. if (*p == '\\')
  222. s += chartorune(p, s);
  223. if (*p == 0)
  224. break;
  225. p[-1] = *p;
  226. s += chartorune(p, s);
  227. }
  228. *p = 0;
  229. qsort(pairs, (p - pairs) / 2, 2 * sizeof(*pairs), pcmp);
  230. q = pairs;
  231. for (p = pairs + 2; *p; p += 2) {
  232. if (p[0] > p[1])
  233. continue;
  234. if (p[0] > q[1] || p[1] < q[0]) {
  235. q[2] = p[0];
  236. q[3] = p[1];
  237. q += 2;
  238. continue;
  239. }
  240. if (p[0] < q[0])
  241. q[0] = p[0];
  242. if (p[1] > q[1])
  243. q[1] = p[1];
  244. }
  245. q[2] = 0;
  246. p = pairs;
  247. if (nc) {
  248. x = rclass(0, p[0] - 1);
  249. ov = p[1] + 1;
  250. for (p += 2; *p; p += 2) {
  251. x = re2or(x, rclass(ov, p[0] - 1));
  252. ov = p[1] + 1;
  253. }
  254. x = re2or(x, rclass(ov, Runemask));
  255. } else {
  256. x = rclass(p[0], p[1]);
  257. for (p += 2; *p; p += 2)
  258. x = re2or(x, rclass(p[0], p[1]));
  259. }
  260. return x;
  261. }