b.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. /****************************************************************
  2. Copyright (C) Lucent Technologies 1997
  3. All Rights Reserved
  4. Permission to use, copy, modify, and distribute this software and
  5. its documentation for any purpose and without fee is hereby
  6. granted, provided that the above copyright notice appear in all
  7. copies and that both that the copyright notice and this
  8. permission notice and warranty disclaimer appear in supporting
  9. documentation, and that the name Lucent Technologies or any of
  10. its entities not be used in advertising or publicity pertaining
  11. to distribution of the software without specific, written prior
  12. permission.
  13. LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
  14. INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
  15. IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
  16. SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  17. WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
  18. IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
  19. ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
  20. THIS SOFTWARE.
  21. ****************************************************************/
  22. /* lasciate ogne speranza, voi ch'entrate. */
  23. #define DEBUG
  24. #include <ctype.h>
  25. #include <stdio.h>
  26. #include <string.h>
  27. #include <stdlib.h>
  28. #include "awk.h"
  29. #include "ytab.h"
  30. #define HAT (NCHARS-2) /* matches ^ in regular expr */
  31. /* NCHARS is 2**n */
  32. #define MAXLIN 22
  33. #define type(v) (v)->nobj /* badly overloaded here */
  34. #define info(v) (v)->ntype /* badly overloaded here */
  35. #define left(v) (v)->narg[0]
  36. #define right(v) (v)->narg[1]
  37. #define parent(v) (v)->nnext
  38. #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
  39. #define UNARY case STAR: case PLUS: case QUEST:
  40. /* encoding in tree Nodes:
  41. leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
  42. left is index, right contains value or pointer to value
  43. unary (STAR, PLUS, QUEST): left is child, right is null
  44. binary (CAT, OR): left and right are children
  45. parent contains pointer to parent
  46. */
  47. int *setvec;
  48. int *tmpset;
  49. int maxsetvec = 0;
  50. int rtok; /* next token in current re */
  51. int rlxval;
  52. static uschar *rlxstr;
  53. static uschar *prestr; /* current position in current re */
  54. static uschar *lastre; /* origin of last re */
  55. static int setcnt;
  56. static int poscnt;
  57. char *patbeg;
  58. int patlen;
  59. #define NFA 20 /* cache this many dynamic fa's */
  60. fa *fatab[NFA];
  61. int nfatab = 0; /* entries in fatab */
  62. fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
  63. {
  64. int i, use, nuse;
  65. fa *pfa;
  66. static int now = 1;
  67. if (setvec == 0) { /* first time through any RE */
  68. maxsetvec = MAXLIN;
  69. setvec = (int *) malloc(maxsetvec * sizeof(int));
  70. tmpset = (int *) malloc(maxsetvec * sizeof(int));
  71. if (setvec == 0 || tmpset == 0)
  72. overflo("out of space initializing makedfa");
  73. }
  74. if (compile_time) /* a constant for sure */
  75. return mkdfa(s, anchor);
  76. for (i = 0; i < nfatab; i++) /* is it there already? */
  77. if (fatab[i]->anchor == anchor
  78. && strcmp(fatab[i]->restr, s) == 0) {
  79. fatab[i]->use = now++;
  80. return fatab[i];
  81. }
  82. pfa = mkdfa(s, anchor);
  83. if (nfatab < NFA) { /* room for another */
  84. fatab[nfatab] = pfa;
  85. fatab[nfatab]->use = now++;
  86. nfatab++;
  87. return pfa;
  88. }
  89. use = fatab[0]->use; /* replace least-recently used */
  90. nuse = 0;
  91. for (i = 1; i < nfatab; i++)
  92. if (fatab[i]->use < use) {
  93. use = fatab[i]->use;
  94. nuse = i;
  95. }
  96. freefa(fatab[nuse]);
  97. fatab[nuse] = pfa;
  98. pfa->use = now++;
  99. return pfa;
  100. }
  101. fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
  102. /* anchor = 1 for anchored matches, else 0 */
  103. {
  104. Node *p, *p1;
  105. fa *f;
  106. p = reparse(s);
  107. p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
  108. /* put ALL STAR in front of reg. exp. */
  109. p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
  110. /* put FINAL after reg. exp. */
  111. poscnt = 0;
  112. penter(p1); /* enter parent pointers and leaf indices */
  113. if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
  114. overflo("out of space for fa");
  115. f->accept = poscnt-1; /* penter has computed number of positions in re */
  116. cfoll(f, p1); /* set up follow sets */
  117. freetr(p1);
  118. if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
  119. overflo("out of space in makedfa");
  120. if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
  121. overflo("out of space in makedfa");
  122. *f->posns[1] = 0;
  123. f->initstat = makeinit(f, anchor);
  124. f->anchor = anchor;
  125. f->restr = (uschar *) tostring(s);
  126. return f;
  127. }
  128. int makeinit(fa *f, int anchor)
  129. {
  130. int i, k;
  131. f->curstat = 2;
  132. f->out[2] = 0;
  133. f->reset = 0;
  134. k = *(f->re[0].lfollow);
  135. xfree(f->posns[2]);
  136. if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
  137. overflo("out of space in makeinit");
  138. for (i=0; i <= k; i++) {
  139. (f->posns[2])[i] = (f->re[0].lfollow)[i];
  140. }
  141. if ((f->posns[2])[1] == f->accept)
  142. f->out[2] = 1;
  143. for (i=0; i < NCHARS; i++)
  144. f->gototab[2][i] = 0;
  145. f->curstat = cgoto(f, 2, HAT);
  146. if (anchor) {
  147. *f->posns[2] = k-1; /* leave out position 0 */
  148. for (i=0; i < k; i++) {
  149. (f->posns[0])[i] = (f->posns[2])[i];
  150. }
  151. f->out[0] = f->out[2];
  152. if (f->curstat != 2)
  153. --(*f->posns[f->curstat]);
  154. }
  155. return f->curstat;
  156. }
  157. void penter(Node *p) /* set up parent pointers and leaf indices */
  158. {
  159. switch (type(p)) {
  160. LEAF
  161. info(p) = poscnt;
  162. poscnt++;
  163. break;
  164. UNARY
  165. penter(left(p));
  166. parent(left(p)) = p;
  167. break;
  168. case CAT:
  169. case OR:
  170. penter(left(p));
  171. penter(right(p));
  172. parent(left(p)) = p;
  173. parent(right(p)) = p;
  174. break;
  175. default: /* can't happen */
  176. FATAL("can't happen: unknown type %d in penter", type(p));
  177. break;
  178. }
  179. }
  180. void freetr(Node *p) /* free parse tree */
  181. {
  182. switch (type(p)) {
  183. LEAF
  184. xfree(p);
  185. break;
  186. UNARY
  187. freetr(left(p));
  188. xfree(p);
  189. break;
  190. case CAT:
  191. case OR:
  192. freetr(left(p));
  193. freetr(right(p));
  194. xfree(p);
  195. break;
  196. default: /* can't happen */
  197. FATAL("can't happen: unknown type %d in freetr", type(p));
  198. break;
  199. }
  200. }
  201. /* in the parsing of regular expressions, metacharacters like . have */
  202. /* to be seen literally; \056 is not a metacharacter. */
  203. int hexstr(char **pp) /* find and eval hex string at pp, return new p */
  204. { /* only pick up one 8-bit byte (2 chars) */
  205. uschar *p;
  206. int n = 0;
  207. int i;
  208. for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
  209. if (isdigit(*p))
  210. n = 16 * n + *p - '0';
  211. else if (*p >= 'a' && *p <= 'f')
  212. n = 16 * n + *p - 'a' + 10;
  213. else if (*p >= 'A' && *p <= 'F')
  214. n = 16 * n + *p - 'A' + 10;
  215. }
  216. *pp = (char *) p;
  217. return n;
  218. }
  219. #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
  220. int quoted(char **pp) /* pick up next thing after a \\ */
  221. /* and increment *pp */
  222. {
  223. char *p = *pp;
  224. int c;
  225. if ((c = *p++) == 't')
  226. c = '\t';
  227. else if (c == 'n')
  228. c = '\n';
  229. else if (c == 'f')
  230. c = '\f';
  231. else if (c == 'r')
  232. c = '\r';
  233. else if (c == 'b')
  234. c = '\b';
  235. else if (c == '\\')
  236. c = '\\';
  237. else if (c == 'x') { /* hexadecimal goo follows */
  238. c = hexstr(&p); /* this adds a null if number is invalid */
  239. } else if (isoctdigit(c)) { /* \d \dd \ddd */
  240. int n = c - '0';
  241. if (isoctdigit(*p)) {
  242. n = 8 * n + *p++ - '0';
  243. if (isoctdigit(*p))
  244. n = 8 * n + *p++ - '0';
  245. }
  246. c = n;
  247. } /* else */
  248. /* c = c; */
  249. *pp = p;
  250. return c;
  251. }
  252. char *cclenter(char *argp) /* add a character class */
  253. {
  254. int i, c, c2;
  255. uschar *p = (uschar *) argp;
  256. uschar *op, *bp;
  257. static uschar *buf = 0;
  258. static int bufsz = 100;
  259. op = p;
  260. if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
  261. FATAL("out of space for character class [%.10s...] 1", p);
  262. bp = buf;
  263. for (i = 0; (c = *p++) != 0; ) {
  264. if (c == '\\') {
  265. c = quoted((char **) &p);
  266. } else if (c == '-' && i > 0 && bp[-1] != 0) {
  267. if (*p != 0) {
  268. c = bp[-1];
  269. c2 = *p++;
  270. if (c2 == '\\')
  271. c2 = quoted((char **) &p);
  272. if (c > c2) { /* empty; ignore */
  273. bp--;
  274. i--;
  275. continue;
  276. }
  277. while (c < c2) {
  278. if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
  279. FATAL("out of space for character class [%.10s...] 2", p);
  280. *bp++ = ++c;
  281. i++;
  282. }
  283. continue;
  284. }
  285. }
  286. if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
  287. FATAL("out of space for character class [%.10s...] 3", p);
  288. *bp++ = c;
  289. i++;
  290. }
  291. *bp = 0;
  292. dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
  293. xfree(op);
  294. return (char *) tostring((char *) buf);
  295. }
  296. void overflo(char *s)
  297. {
  298. FATAL("regular expression too big: %.30s...", s);
  299. }
  300. void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
  301. {
  302. int i;
  303. int *p;
  304. switch (type(v)) {
  305. LEAF
  306. f->re[info(v)].ltype = type(v);
  307. f->re[info(v)].lval.np = right(v);
  308. while (f->accept >= maxsetvec) { /* guessing here! */
  309. maxsetvec *= 4;
  310. setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  311. tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  312. if (setvec == 0 || tmpset == 0)
  313. overflo("out of space in cfoll()");
  314. }
  315. for (i = 0; i <= f->accept; i++)
  316. setvec[i] = 0;
  317. setcnt = 0;
  318. follow(v); /* computes setvec and setcnt */
  319. if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
  320. overflo("out of space building follow set");
  321. f->re[info(v)].lfollow = p;
  322. *p = setcnt;
  323. for (i = f->accept; i >= 0; i--)
  324. if (setvec[i] == 1)
  325. *++p = i;
  326. break;
  327. UNARY
  328. cfoll(f,left(v));
  329. break;
  330. case CAT:
  331. case OR:
  332. cfoll(f,left(v));
  333. cfoll(f,right(v));
  334. break;
  335. default: /* can't happen */
  336. FATAL("can't happen: unknown type %d in cfoll", type(v));
  337. }
  338. }
  339. int first(Node *p) /* collects initially active leaves of p into setvec */
  340. /* returns 1 if p matches empty string */
  341. {
  342. int b, lp;
  343. switch (type(p)) {
  344. LEAF
  345. lp = info(p); /* look for high-water mark of subscripts */
  346. while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
  347. maxsetvec *= 4;
  348. setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  349. tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  350. if (setvec == 0 || tmpset == 0)
  351. overflo("out of space in first()");
  352. }
  353. if (setvec[lp] != 1) {
  354. setvec[lp] = 1;
  355. setcnt++;
  356. }
  357. if (type(p) == CCL && (*(char *) right(p)) == '\0')
  358. return(0); /* empty CCL */
  359. else return(1);
  360. case PLUS:
  361. if (first(left(p)) == 0) return(0);
  362. return(1);
  363. case STAR:
  364. case QUEST:
  365. first(left(p));
  366. return(0);
  367. case CAT:
  368. if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
  369. return(1);
  370. case OR:
  371. b = first(right(p));
  372. if (first(left(p)) == 0 || b == 0) return(0);
  373. return(1);
  374. }
  375. FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
  376. return(-1);
  377. }
  378. void follow(Node *v) /* collects leaves that can follow v into setvec */
  379. {
  380. Node *p;
  381. if (type(v) == FINAL)
  382. return;
  383. p = parent(v);
  384. switch (type(p)) {
  385. case STAR:
  386. case PLUS:
  387. first(v);
  388. follow(p);
  389. return;
  390. case OR:
  391. case QUEST:
  392. follow(p);
  393. return;
  394. case CAT:
  395. if (v == left(p)) { /* v is left child of p */
  396. if (first(right(p)) == 0) {
  397. follow(p);
  398. return;
  399. }
  400. } else /* v is right child */
  401. follow(p);
  402. return;
  403. }
  404. }
  405. int member(int c, char *sarg) /* is c in s? */
  406. {
  407. uschar *s = (uschar *) sarg;
  408. while (*s)
  409. if (c == *s++)
  410. return(1);
  411. return(0);
  412. }
  413. int match(fa *f, char *p0) /* shortest match ? */
  414. {
  415. int s, ns;
  416. uschar *p = (uschar *) p0;
  417. s = f->reset ? makeinit(f,0) : f->initstat;
  418. if (f->out[s])
  419. return(1);
  420. do {
  421. if ((ns = f->gototab[s][*p]) != 0)
  422. s = ns;
  423. else
  424. s = cgoto(f, s, *p);
  425. if (f->out[s])
  426. return(1);
  427. } while (*p++ != 0);
  428. return(0);
  429. }
  430. int pmatch(fa *f, char *p0) /* longest match, for sub */
  431. {
  432. int s, ns;
  433. uschar *p = (uschar *) p0;
  434. uschar *q;
  435. int i, k;
  436. s = f->reset ? makeinit(f,1) : f->initstat;
  437. patbeg = (char *) p;
  438. patlen = -1;
  439. do {
  440. q = p;
  441. do {
  442. if (f->out[s]) /* final state */
  443. patlen = q-p;
  444. if ((ns = f->gototab[s][*q]) != 0)
  445. s = ns;
  446. else
  447. s = cgoto(f, s, *q);
  448. if (s == 1) { /* no transition */
  449. if (patlen >= 0) {
  450. patbeg = (char *) p;
  451. return(1);
  452. }
  453. else
  454. goto nextin; /* no match */
  455. }
  456. } while (*q++ != 0);
  457. if (f->out[s])
  458. patlen = q-p-1; /* don't count $ */
  459. if (patlen >= 0) {
  460. patbeg = (char *) p;
  461. return(1);
  462. }
  463. nextin:
  464. s = 2;
  465. if (f->reset) {
  466. for (i = 2; i <= f->curstat; i++)
  467. xfree(f->posns[i]);
  468. k = *f->posns[0];
  469. if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
  470. overflo("out of space in pmatch");
  471. for (i = 0; i <= k; i++)
  472. (f->posns[2])[i] = (f->posns[0])[i];
  473. f->initstat = f->curstat = 2;
  474. f->out[2] = f->out[0];
  475. for (i = 0; i < NCHARS; i++)
  476. f->gototab[2][i] = 0;
  477. }
  478. } while (*p++ != 0);
  479. return (0);
  480. }
  481. int nematch(fa *f, char *p0) /* non-empty match, for sub */
  482. {
  483. int s, ns;
  484. uschar *p = (uschar *) p0;
  485. uschar *q;
  486. int i, k;
  487. s = f->reset ? makeinit(f,1) : f->initstat;
  488. patlen = -1;
  489. while (*p) {
  490. q = p;
  491. do {
  492. if (f->out[s]) /* final state */
  493. patlen = q-p;
  494. if ((ns = f->gototab[s][*q]) != 0)
  495. s = ns;
  496. else
  497. s = cgoto(f, s, *q);
  498. if (s == 1) { /* no transition */
  499. if (patlen > 0) {
  500. patbeg = (char *) p;
  501. return(1);
  502. } else
  503. goto nnextin; /* no nonempty match */
  504. }
  505. } while (*q++ != 0);
  506. if (f->out[s])
  507. patlen = q-p-1; /* don't count $ */
  508. if (patlen > 0 ) {
  509. patbeg = (char *) p;
  510. return(1);
  511. }
  512. nnextin:
  513. s = 2;
  514. if (f->reset) {
  515. for (i = 2; i <= f->curstat; i++)
  516. xfree(f->posns[i]);
  517. k = *f->posns[0];
  518. if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
  519. overflo("out of state space");
  520. for (i = 0; i <= k; i++)
  521. (f->posns[2])[i] = (f->posns[0])[i];
  522. f->initstat = f->curstat = 2;
  523. f->out[2] = f->out[0];
  524. for (i = 0; i < NCHARS; i++)
  525. f->gototab[2][i] = 0;
  526. }
  527. p++;
  528. }
  529. return (0);
  530. }
  531. Node *reparse(char *p) /* parses regular expression pointed to by p */
  532. { /* uses relex() to scan regular expression */
  533. Node *np;
  534. dprintf( ("reparse <%s>\n", p) );
  535. lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
  536. rtok = relex();
  537. if (rtok == '\0')
  538. FATAL("empty regular expression");
  539. np = regexp();
  540. if (rtok != '\0')
  541. FATAL("syntax error in regular expression %s at %s", lastre, prestr);
  542. return(np);
  543. }
  544. Node *regexp(void) /* top-level parse of reg expr */
  545. {
  546. return (alt(concat(primary())));
  547. }
  548. Node *primary(void)
  549. {
  550. Node *np;
  551. switch (rtok) {
  552. case CHAR:
  553. np = op2(CHAR, NIL, itonp(rlxval));
  554. rtok = relex();
  555. return (unary(np));
  556. case ALL:
  557. rtok = relex();
  558. return (unary(op2(ALL, NIL, NIL)));
  559. case DOT:
  560. rtok = relex();
  561. return (unary(op2(DOT, NIL, NIL)));
  562. case CCL:
  563. np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
  564. rtok = relex();
  565. return (unary(np));
  566. case NCCL:
  567. np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
  568. rtok = relex();
  569. return (unary(np));
  570. case '^':
  571. rtok = relex();
  572. return (unary(op2(CHAR, NIL, itonp(HAT))));
  573. case '$':
  574. rtok = relex();
  575. return (unary(op2(CHAR, NIL, NIL)));
  576. case '(':
  577. rtok = relex();
  578. if (rtok == ')') { /* special pleading for () */
  579. rtok = relex();
  580. return unary(op2(CCL, NIL, (Node *) tostring("")));
  581. }
  582. np = regexp();
  583. if (rtok == ')') {
  584. rtok = relex();
  585. return (unary(np));
  586. }
  587. else
  588. FATAL("syntax error in regular expression %s at %s", lastre, prestr);
  589. default:
  590. FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
  591. }
  592. return 0; /*NOTREACHED*/
  593. }
  594. Node *concat(Node *np)
  595. {
  596. switch (rtok) {
  597. case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
  598. return (concat(op2(CAT, np, primary())));
  599. }
  600. return (np);
  601. }
  602. Node *alt(Node *np)
  603. {
  604. if (rtok == OR) {
  605. rtok = relex();
  606. return (alt(op2(OR, np, concat(primary()))));
  607. }
  608. return (np);
  609. }
  610. Node *unary(Node *np)
  611. {
  612. switch (rtok) {
  613. case STAR:
  614. rtok = relex();
  615. return (unary(op2(STAR, np, NIL)));
  616. case PLUS:
  617. rtok = relex();
  618. return (unary(op2(PLUS, np, NIL)));
  619. case QUEST:
  620. rtok = relex();
  621. return (unary(op2(QUEST, np, NIL)));
  622. default:
  623. return (np);
  624. }
  625. }
  626. int relex(void) /* lexical analyzer for reparse */
  627. {
  628. int c, n;
  629. int cflag;
  630. static uschar *buf = 0;
  631. static int bufsz = 100;
  632. uschar *bp;
  633. switch (c = *prestr++) {
  634. case '|': return OR;
  635. case '*': return STAR;
  636. case '+': return PLUS;
  637. case '?': return QUEST;
  638. case '.': return DOT;
  639. case '\0': prestr--; return '\0';
  640. case '^':
  641. case '$':
  642. case '(':
  643. case ')':
  644. return c;
  645. case '\\':
  646. rlxval = quoted((char **) &prestr);
  647. return CHAR;
  648. default:
  649. rlxval = c;
  650. return CHAR;
  651. case '[':
  652. if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
  653. FATAL("out of space in reg expr %.10s..", lastre);
  654. bp = buf;
  655. if (*prestr == '^') {
  656. cflag = 1;
  657. prestr++;
  658. }
  659. else
  660. cflag = 0;
  661. n = 2 * strlen(prestr)+1;
  662. if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
  663. FATAL("out of space for reg expr %.10s...", lastre);
  664. for (; ; ) {
  665. if ((c = *prestr++) == '\\') {
  666. *bp++ = '\\';
  667. if ((c = *prestr++) == '\0')
  668. FATAL("nonterminated character class %.20s...", lastre);
  669. *bp++ = c;
  670. /* } else if (c == '\n') { */
  671. /* FATAL("newline in character class %.20s...", lastre); */
  672. } else if (c == '\0') {
  673. FATAL("nonterminated character class %.20s", lastre);
  674. } else if (bp == buf) { /* 1st char is special */
  675. *bp++ = c;
  676. } else if (c == ']') {
  677. *bp++ = 0;
  678. rlxstr = (uschar *) tostring((char *) buf);
  679. if (cflag == 0)
  680. return CCL;
  681. else
  682. return NCCL;
  683. } else
  684. *bp++ = c;
  685. }
  686. }
  687. }
  688. int cgoto(fa *f, int s, int c)
  689. {
  690. int i, j, k;
  691. int *p, *q;
  692. if (c < 0 || c > 255)
  693. FATAL("can't happen: neg char %d in cgoto", c);
  694. while (f->accept >= maxsetvec) { /* guessing here! */
  695. maxsetvec *= 4;
  696. setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  697. tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  698. if (setvec == 0 || tmpset == 0)
  699. overflo("out of space in cgoto()");
  700. }
  701. for (i = 0; i <= f->accept; i++)
  702. setvec[i] = 0;
  703. setcnt = 0;
  704. /* compute positions of gototab[s,c] into setvec */
  705. p = f->posns[s];
  706. for (i = 1; i <= *p; i++) {
  707. if ((k = f->re[p[i]].ltype) != FINAL) {
  708. if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
  709. || (k == DOT && c != 0 && c != HAT)
  710. || (k == ALL && c != 0)
  711. || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
  712. || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
  713. q = f->re[p[i]].lfollow;
  714. for (j = 1; j <= *q; j++) {
  715. if (q[j] >= maxsetvec) {
  716. maxsetvec *= 4;
  717. setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  718. tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
  719. if (setvec == 0 || tmpset == 0)
  720. overflo("cgoto overflow");
  721. }
  722. if (setvec[q[j]] == 0) {
  723. setcnt++;
  724. setvec[q[j]] = 1;
  725. }
  726. }
  727. }
  728. }
  729. }
  730. /* determine if setvec is a previous state */
  731. tmpset[0] = setcnt;
  732. j = 1;
  733. for (i = f->accept; i >= 0; i--)
  734. if (setvec[i]) {
  735. tmpset[j++] = i;
  736. }
  737. /* tmpset == previous state? */
  738. for (i = 1; i <= f->curstat; i++) {
  739. p = f->posns[i];
  740. if ((k = tmpset[0]) != p[0])
  741. goto different;
  742. for (j = 1; j <= k; j++)
  743. if (tmpset[j] != p[j])
  744. goto different;
  745. /* setvec is state i */
  746. f->gototab[s][c] = i;
  747. return i;
  748. different:;
  749. }
  750. /* add tmpset to current set of states */
  751. if (f->curstat >= NSTATES-1) {
  752. f->curstat = 2;
  753. f->reset = 1;
  754. for (i = 2; i < NSTATES; i++)
  755. xfree(f->posns[i]);
  756. } else
  757. ++(f->curstat);
  758. for (i = 0; i < NCHARS; i++)
  759. f->gototab[f->curstat][i] = 0;
  760. xfree(f->posns[f->curstat]);
  761. if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
  762. overflo("out of space in cgoto");
  763. f->posns[f->curstat] = p;
  764. f->gototab[s][c] = f->curstat;
  765. for (i = 0; i <= setcnt; i++)
  766. p[i] = tmpset[i];
  767. if (setvec[f->accept])
  768. f->out[f->curstat] = 1;
  769. else
  770. f->out[f->curstat] = 0;
  771. return f->curstat;
  772. }
  773. void freefa(fa *f) /* free a finite automaton */
  774. {
  775. int i;
  776. if (f == NULL)
  777. return;
  778. for (i = 0; i <= f->curstat; i++)
  779. xfree(f->posns[i]);
  780. for (i = 0; i <= f->accept; i++) {
  781. xfree(f->re[i].lfollow);
  782. if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
  783. xfree((f->re[i].lval.np));
  784. }
  785. xfree(f->restr);
  786. xfree(f);
  787. }