123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290 |
- /*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
- #include "grep.h"
- /*
- * incremental compiler.
- * add the branch c to the
- * state s.
- */
- void
- increment(State * s, int c)
- {
- int i;
- State *t, **tt;
- Re *re1, *re2;
- nfollow = 0;
- gen++;
- matched = 0;
- for (i = 0; i < s->count; i++)
- fol1(s->re[i], c);
- qsort(follow, nfollow, sizeof(*follow), fcmp);
- for (tt = &state0; t = *tt;) {
- if (t->count > nfollow) {
- tt = &t->linkleft;
- goto cont;
- }
- if (t->count < nfollow) {
- tt = &t->linkright;
- goto cont;
- }
- for (i = 0; i < nfollow; i++) {
- re1 = t->re[i];
- re2 = follow[i];
- if (re1 > re2) {
- tt = &t->linkleft;
- goto cont;
- }
- if (re1 < re2) {
- tt = &t->linkright;
- goto cont;
- }
- }
- if (! !matched && !t->match) {
- tt = &t->linkleft;
- goto cont;
- }
- if (!matched && ! !t->match) {
- tt = &t->linkright;
- goto cont;
- }
- s->next[c] = t;
- return;
- cont: ;
- }
- t = sal(nfollow);
- *tt = t;
- for (i = 0; i < nfollow; i++) {
- re1 = follow[i];
- t->re[i] = re1;
- }
- s->next[c] = t;
- t->match = matched;
- }
- int
- fcmp(const void *va, const void *vb)
- {
- const Re **aa = (const Re**)va;
- const Re **bb = (const Re**)vb;
- const Re *a = *aa;
- const Re *b = *bb;
- if (a > b)
- return 1;
- if (a < b)
- return -1;
- return 0;
- }
- void
- fol1(Re * r, int c)
- {
- Re *r1;
- loop:
- if (r->gen == gen)
- return;
- if (nfollow >= maxfollow)
- error("nfollow");
- r->gen = gen;
- switch (r->type) {
- default:
- error("fol1");
- case Tcase:
- if (c >= 0 && c < 256)
- if (r1 = r->cases[c])
- follow[nfollow++] = r1;
- if (r = r->next)
- goto loop;
- break;
- case Talt:
- case Tor:
- fol1(r->alt, c);
- r = r->next;
- goto loop;
- case Tbegin:
- if (c == '\n' || c == Cbegin)
- follow[nfollow++] = r->next;
- break;
- case Tend:
- if (c == '\n') {
- if (r->next == 0) {
- matched = 1;
- break;
- }
- r = r->next;
- goto loop;
- }
- break;
- case Tclass:
- if (c >= r->lo && c <= r->hi)
- follow[nfollow++] = r->next;
- break;
- }
- }
- Rune tab1[] = {
- 0x007f,
- 0x07ff,
- };
- Rune tab2[] = {
- 0x003f,
- 0x0fff,
- };
- Re2
- rclass(Rune p0, Rune p1)
- {
- char xc0[6], xc1[6];
- int i, n, m;
- Re2 x;
- if (p0 > p1)
- return re2char(0xff, 0xff); // no match
- /*
- * bust range into same length
- * character sequences
- */
- for (i = 0; i < nelem(tab1); i++) {
- m = tab1[i];
- if (p0 <= m && p1 > m)
- return re2or(rclass(p0, m), rclass(m + 1, p1));
- }
- /*
- * bust range into part of a single page
- * or into full pages
- */
- for (i = 0; i < nelem(tab2); i++) {
- m = tab2[i];
- if ((p0 & ~m) != (p1 & ~m)) {
- if ((p0 & m) != 0)
- return re2or(rclass(p0, p0 | m), rclass((p0 | m) + 1, p1));
- if ((p1 & m) != m)
- return re2or(rclass(p0, (p1 & ~m) - 1), rclass(p1 & ~m, p1));
- }
- }
- n = runetochar(xc0, &p0);
- i = runetochar(xc1, &p1);
- if (i != n)
- error("length");
- x = re2char(xc0[0], xc1[0]);
- for (i = 1; i < n; i++)
- x = re2cat(x, re2char(xc0[i], xc1[i]));
- return x;
- }
- int
- pcmp(const void *va, const void *vb)
- {
- int n;
- const Rune *a = (const Rune*)va;
- const Rune *b = (const Rune*)vb;
- n = a[0] - b[0];
- if (n)
- return n;
- return a[1] - b[1];
- }
- /*
- * convert character chass into
- * run-pair ranges of matches.
- * this is 10646/utf specific and
- * needs to be changed for some
- * other input character set.
- * this is the key to a fast
- * regular search of characters
- * by looking at sequential bytes.
- */
- Re2
- re2class(char *s)
- {
- Rune pairs[200 + 2], *p, *q, ov;
- int nc;
- Re2 x;
- nc = 0;
- if (*s == '^') {
- nc = 1;
- s++;
- }
- p = pairs;
- s += chartorune(p, s);
- for (;;) {
- if (*p == '\\')
- s += chartorune(p, s);
- if (*p == 0)
- break;
- p[1] = *p;
- p += 2;
- if (p >= pairs + nelem(pairs) - 2)
- error("class too big");
- s += chartorune(p, s);
- if (*p != '-')
- continue;
- s += chartorune(p, s);
- if (*p == '\\')
- s += chartorune(p, s);
- if (*p == 0)
- break;
- p[-1] = *p;
- s += chartorune(p, s);
- }
- *p = 0;
- qsort(pairs, (p - pairs) / 2, 2 * sizeof(*pairs), pcmp);
- q = pairs;
- for (p = pairs + 2; *p; p += 2) {
- if (p[0] > p[1])
- continue;
- if (p[0] > q[1] || p[1] < q[0]) {
- q[2] = p[0];
- q[3] = p[1];
- q += 2;
- continue;
- }
- if (p[0] < q[0])
- q[0] = p[0];
- if (p[1] > q[1])
- q[1] = p[1];
- }
- q[2] = 0;
- p = pairs;
- if (nc) {
- x = rclass(0, p[0] - 1);
- ov = p[1] + 1;
- for (p += 2; *p; p += 2) {
- x = re2or(x, rclass(ov, p[0] - 1));
- ov = p[1] + 1;
- }
- x = re2or(x, rclass(ov, Runemask));
- } else {
- x = rclass(p[0], p[1]);
- for (p += 2; *p; p += 2)
- x = re2or(x, rclass(p[0], p[1]));
- }
- return x;
- }
|