regexec.c 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include "regexp.h"
  4. #include "regcomp.h"
  5. static Resublist sempty; /* empty set of matches */
  6. /*
  7. * return 0 if no match
  8. * >0 if a match
  9. * <0 if we ran out of _relist space
  10. */
  11. static int
  12. regexec1(Reprog *progp, /* program to run */
  13. char *bol, /* string to run machine on */
  14. Resub *mp, /* subexpression elements */
  15. int ms, /* number of elements at mp */
  16. char *starts,
  17. char *eol,
  18. wchar_t startchar)
  19. {
  20. int flag=0;
  21. Reinst *inst;
  22. Relist *tlp;
  23. char *s;
  24. int i, checkstart;
  25. wchar_t r, *rp, *ep;
  26. int n;
  27. Relist* tl; /* This list, next list */
  28. Relist* nl;
  29. Relist* tle; /* ends of this and next list */
  30. Relist* nle;
  31. int match;
  32. match = 0;
  33. checkstart = startchar;
  34. sempty.m[0].s.sp = 0;
  35. if(mp!=0)
  36. for(i=0; i<ms; i++)
  37. mp[i].s.sp = mp[i].e.ep = 0;
  38. _relist[0][0].inst = _relist[1][0].inst = 0;
  39. /* Execute machine once for each character, including terminal NUL */
  40. s = starts;
  41. do{
  42. /* fast check for first char */
  43. r = *(unsigned char*)s;
  44. if(checkstart && r != startchar){
  45. s++;
  46. continue;
  47. }
  48. if(r < Runeself)
  49. n = 1;
  50. else {
  51. n = mbtowc(&r, s, MB_CUR_MAX);
  52. if (n <= 0)
  53. n = 1;
  54. }
  55. /* switch run lists */
  56. tl = _relist[flag];
  57. tle = _reliste[flag];
  58. nl = _relist[flag^=1];
  59. nle = _reliste[flag];
  60. nl->inst = 0;
  61. /* Add first instruction to current list */
  62. if(match == 0){
  63. sempty.m[0].s.sp = s;
  64. _renewthread(tl, progp->startinst, &sempty);
  65. }
  66. /* Execute machine until current list is empty */
  67. for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
  68. if(s == eol)
  69. break;
  70. for(inst = tlp->inst; ; inst = inst->l.next){
  71. switch(inst->type){
  72. case RUNE: /* regular character */
  73. if(inst->r.r == r)
  74. if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
  75. return -1;
  76. break;
  77. case LBRA:
  78. tlp->se.m[inst->r.subid].s.sp = s;
  79. continue;
  80. case RBRA:
  81. tlp->se.m[inst->r.subid].e.ep = s;
  82. continue;
  83. case ANY:
  84. if(r != '\n')
  85. if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
  86. return -1;
  87. break;
  88. case ANYNL:
  89. if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
  90. return -1;
  91. break;
  92. case BOL:
  93. if(s == bol || *(s-1) == '\n')
  94. continue;
  95. break;
  96. case EOL:
  97. if(r == 0 || r == '\n')
  98. continue;
  99. break;
  100. case CCLASS:
  101. ep = inst->r.cp->end;
  102. for(rp = inst->r.cp->spans; rp < ep; rp += 2)
  103. if(r >= rp[0] && r <= rp[1]){
  104. if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
  105. return -1;
  106. break;
  107. }
  108. break;
  109. case NCCLASS:
  110. ep = inst->r.cp->end;
  111. for(rp = inst->r.cp->spans; rp < ep; rp += 2)
  112. if(r >= rp[0] && r <= rp[1])
  113. break;
  114. if(rp == ep)
  115. if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
  116. return -1;
  117. break;
  118. case OR:
  119. /* evaluate right choice later */
  120. if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
  121. return -1;
  122. /* efficiency: advance and re-evaluate */
  123. continue;
  124. case END: /* Match! */
  125. match = 1;
  126. tlp->se.m[0].e.ep = s;
  127. if(mp != 0)
  128. _renewmatch(mp, ms, &tlp->se);
  129. break;
  130. }
  131. break;
  132. }
  133. }
  134. checkstart = startchar && nl->inst==0;
  135. s += n;
  136. }while(r);
  137. return match;
  138. }
  139. extern int
  140. regexec(Reprog *progp, /* program to run */
  141. char *bol, /* string to run machine on */
  142. Resub *mp, /* subexpression elements */
  143. int ms) /* number of elements at mp */
  144. {
  145. char *starts; /* where to start match */
  146. char *eol; /* where to end match */
  147. wchar_t startchar;
  148. int rv;
  149. /*
  150. * use user-specified starting/ending location if specified
  151. */
  152. starts = bol;
  153. eol = 0;
  154. if(mp && ms>0){
  155. if(mp->s.sp)
  156. starts = mp->s.sp;
  157. if(mp->e.ep)
  158. eol = mp->e.ep;
  159. }
  160. startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself)
  161. ? progp->startinst->r.r : 0;
  162. /* keep trying till we have enough list space to terminate */
  163. for(;;){
  164. if(_relist[0] == 0){
  165. _relist[0] = malloc(2*_relistsize*sizeof(Relist));
  166. _relist[1] = _relist[0] + _relistsize;
  167. _reliste[0] = _relist[0] + _relistsize - 1;
  168. _reliste[1] = _relist[1] + _relistsize - 1;
  169. if(_relist[0] == 0)
  170. regerror("_relist overflow");
  171. }
  172. rv = regexec1(progp, bol, mp, ms, starts, eol, startchar);
  173. if(rv >= 0)
  174. return rv;
  175. free(_relist[0]);
  176. _relist[0] = 0;
  177. _relistsize += LISTINCREMENT;
  178. }
  179. }