comp.c 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "grep.h"
  10. /*
  11. * incremental compiler.
  12. * add the branch c to the
  13. * state s.
  14. */
  15. void
  16. increment(State * s, int c)
  17. {
  18. int i;
  19. State *t, **tt;
  20. Re *re1, *re2;
  21. nfollow = 0;
  22. gen++;
  23. matched = 0;
  24. for (i = 0; i < s->count; i++)
  25. fol1(s->re[i], c);
  26. qsort(follow, nfollow, sizeof(*follow), fcmp);
  27. for (tt = &state0; t = *tt;) {
  28. if (t->count > nfollow) {
  29. tt = &t->linkleft;
  30. goto cont;
  31. }
  32. if (t->count < nfollow) {
  33. tt = &t->linkright;
  34. goto cont;
  35. }
  36. for (i = 0; i < nfollow; i++) {
  37. re1 = t->re[i];
  38. re2 = follow[i];
  39. if (re1 > re2) {
  40. tt = &t->linkleft;
  41. goto cont;
  42. }
  43. if (re1 < re2) {
  44. tt = &t->linkright;
  45. goto cont;
  46. }
  47. }
  48. if (! !matched && !t->match) {
  49. tt = &t->linkleft;
  50. goto cont;
  51. }
  52. if (!matched && ! !t->match) {
  53. tt = &t->linkright;
  54. goto cont;
  55. }
  56. s->next[c] = t;
  57. return;
  58. cont: ;
  59. }
  60. t = sal(nfollow);
  61. *tt = t;
  62. for (i = 0; i < nfollow; i++) {
  63. re1 = follow[i];
  64. t->re[i] = re1;
  65. }
  66. s->next[c] = t;
  67. t->match = matched;
  68. }
  69. int
  70. fcmp(const void *va, const void *vb)
  71. {
  72. Re **aa, **bb;
  73. Re *a, *b;
  74. aa = va;
  75. bb = vb;
  76. a = *aa;
  77. b = *bb;
  78. if (a > b)
  79. return 1;
  80. if (a < b)
  81. return -1;
  82. return 0;
  83. }
  84. void
  85. fol1(Re * r, int c)
  86. {
  87. Re *r1;
  88. loop:
  89. if (r->gen == gen)
  90. return;
  91. if (nfollow >= maxfollow)
  92. error("nfollow");
  93. r->gen = gen;
  94. switch (r->type) {
  95. default:
  96. error("fol1");
  97. case Tcase:
  98. if (c >= 0 && c < 256)
  99. if (r1 = r->cases[c])
  100. follow[nfollow++] = r1;
  101. if (r = r->next)
  102. goto loop;
  103. break;
  104. case Talt:
  105. case Tor:
  106. fol1(r->alt, c);
  107. r = r->next;
  108. goto loop;
  109. case Tbegin:
  110. if (c == '\n' || c == Cbegin)
  111. follow[nfollow++] = r->next;
  112. break;
  113. case Tend:
  114. if (c == '\n') {
  115. if (r->next == 0) {
  116. matched = 1;
  117. break;
  118. }
  119. r = r->next;
  120. goto loop;
  121. }
  122. break;
  123. case Tclass:
  124. if (c >= r->lo && c <= r->hi)
  125. follow[nfollow++] = r->next;
  126. break;
  127. }
  128. }
  129. Rune tab1[] = {
  130. 0x007f,
  131. 0x07ff,
  132. };
  133. Rune tab2[] = {
  134. 0x003f,
  135. 0x0fff,
  136. };
  137. Re2
  138. rclass(Rune p0, Rune p1)
  139. {
  140. char xc0[6], xc1[6];
  141. int i, n, m;
  142. Re2 x;
  143. if (p0 > p1)
  144. return re2char(0xff, 0xff); // no match
  145. /*
  146. * bust range into same length
  147. * character sequences
  148. */
  149. for (i = 0; i < nelem(tab1); i++) {
  150. m = tab1[i];
  151. if (p0 <= m && p1 > m)
  152. return re2or(rclass(p0, m), rclass(m + 1, p1));
  153. }
  154. /*
  155. * bust range into part of a single page
  156. * or into full pages
  157. */
  158. for (i = 0; i < nelem(tab2); i++) {
  159. m = tab2[i];
  160. if ((p0 & ~m) != (p1 & ~m)) {
  161. if ((p0 & m) != 0)
  162. return re2or(rclass(p0, p0 | m), rclass((p0 | m) + 1, p1));
  163. if ((p1 & m) != m)
  164. return re2or(rclass(p0, (p1 & ~m) - 1), rclass(p1 & ~m, p1));
  165. }
  166. }
  167. n = runetochar(xc0, &p0);
  168. i = runetochar(xc1, &p1);
  169. if (i != n)
  170. error("length");
  171. x = re2char(xc0[0], xc1[0]);
  172. for (i = 1; i < n; i++)
  173. x = re2cat(x, re2char(xc0[i], xc1[i]));
  174. return x;
  175. }
  176. int
  177. pcmp(const void *va, const void *vb)
  178. {
  179. int n;
  180. Rune *a, *b;
  181. a = va;
  182. b = vb;
  183. n = a[0] - b[0];
  184. if (n)
  185. return n;
  186. return a[1] - b[1];
  187. }
  188. /*
  189. * convert character chass into
  190. * run-pair ranges of matches.
  191. * this is 10646/utf specific and
  192. * needs to be changed for some
  193. * other input character set.
  194. * this is the key to a fast
  195. * regular search of characters
  196. * by looking at sequential bytes.
  197. */
  198. Re2
  199. re2class(char *s)
  200. {
  201. Rune pairs[200 + 2], *p, *q, ov;
  202. int nc;
  203. Re2 x;
  204. nc = 0;
  205. if (*s == '^') {
  206. nc = 1;
  207. s++;
  208. }
  209. p = pairs;
  210. s += chartorune(p, s);
  211. for (;;) {
  212. if (*p == '\\')
  213. s += chartorune(p, s);
  214. if (*p == 0)
  215. break;
  216. p[1] = *p;
  217. p += 2;
  218. if (p >= pairs + nelem(pairs) - 2)
  219. error("class too big");
  220. s += chartorune(p, s);
  221. if (*p != '-')
  222. continue;
  223. s += chartorune(p, s);
  224. if (*p == '\\')
  225. s += chartorune(p, s);
  226. if (*p == 0)
  227. break;
  228. p[-1] = *p;
  229. s += chartorune(p, s);
  230. }
  231. *p = 0;
  232. qsort(pairs, (p - pairs) / 2, 2 * sizeof(*pairs), pcmp);
  233. q = pairs;
  234. for (p = pairs + 2; *p; p += 2) {
  235. if (p[0] > p[1])
  236. continue;
  237. if (p[0] > q[1] || p[1] < q[0]) {
  238. q[2] = p[0];
  239. q[3] = p[1];
  240. q += 2;
  241. continue;
  242. }
  243. if (p[0] < q[0])
  244. q[0] = p[0];
  245. if (p[1] > q[1])
  246. q[1] = p[1];
  247. }
  248. q[2] = 0;
  249. p = pairs;
  250. if (nc) {
  251. x = rclass(0, p[0] - 1);
  252. ov = p[1] + 1;
  253. for (p += 2; *p; p += 2) {
  254. x = re2or(x, rclass(ov, p[0] - 1));
  255. ov = p[1] + 1;
  256. }
  257. x = re2or(x, rclass(ov, Runemask));
  258. } else {
  259. x = rclass(p[0], p[1]);
  260. for (p += 2; *p; p += 2)
  261. x = re2or(x, rclass(p[0], p[1]));
  262. }
  263. return x;
  264. }