rregexec.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. #include <u.h>
  2. #include <libc.h>
  3. #include "regexp.h"
  4. #include "regcomp.h"
  5. /*
  6. * return 0 if no match
  7. * >0 if a match
  8. * <0 if we ran out of _relist space
  9. */
  10. static int
  11. rregexec1(Reprog *progp, /* program to run */
  12. Rune *bol, /* string to run machine on */
  13. Resub *mp, /* subexpression elements */
  14. int ms, /* number of elements at mp */
  15. Reljunk *j)
  16. {
  17. int flag=0;
  18. Reinst *inst;
  19. Relist *tlp;
  20. Rune *s;
  21. int i, checkstart;
  22. Rune r, *rp, *ep;
  23. Relist* tl; /* This list, next list */
  24. Relist* nl;
  25. Relist* tle; /* ends of this and next list */
  26. Relist* nle;
  27. int match;
  28. Rune *p;
  29. match = 0;
  30. checkstart = j->startchar;
  31. if(mp)
  32. for(i=0; i<ms; i++) {
  33. mp[i].rsp = 0;
  34. mp[i].rep = 0;
  35. }
  36. j->relist[0][0].inst = 0;
  37. j->relist[1][0].inst = 0;
  38. /* Execute machine once for each character, including terminal NUL */
  39. s = j->rstarts;
  40. do{
  41. /* fast check for first char */
  42. if(checkstart) {
  43. switch(j->starttype) {
  44. case RUNE:
  45. p = runestrchr(s, j->startchar);
  46. if(p == 0 || s == j->reol)
  47. return match;
  48. s = p;
  49. break;
  50. case BOL:
  51. if(s == bol)
  52. break;
  53. p = runestrchr(s, '\n');
  54. if(p == 0 || s == j->reol)
  55. return match;
  56. s = p+1;
  57. break;
  58. }
  59. }
  60. r = *s;
  61. /* switch run lists */
  62. tl = j->relist[flag];
  63. tle = j->reliste[flag];
  64. nl = j->relist[flag^=1];
  65. nle = j->reliste[flag];
  66. nl->inst = 0;
  67. /* Add first instruction to current list */
  68. _rrenewemptythread(tl, progp->startinst, ms, s);
  69. /* Execute machine until current list is empty */
  70. for(tlp=tl; tlp->inst; tlp++){
  71. for(inst=tlp->inst; ; inst = inst->next){
  72. switch(inst->type){
  73. case RUNE: /* regular character */
  74. if(inst->r == r)
  75. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  76. return -1;
  77. break;
  78. case LBRA:
  79. tlp->se.m[inst->subid].rsp = s;
  80. continue;
  81. case RBRA:
  82. tlp->se.m[inst->subid].rep = s;
  83. continue;
  84. case ANY:
  85. if(r != '\n')
  86. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  87. return -1;
  88. break;
  89. case ANYNL:
  90. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  91. return -1;
  92. break;
  93. case BOL:
  94. if(s == bol || *(s-1) == '\n')
  95. continue;
  96. break;
  97. case EOL:
  98. if(s == j->reol || r == 0 || r == '\n')
  99. continue;
  100. break;
  101. case CCLASS:
  102. ep = inst->cp->end;
  103. for(rp = inst->cp->spans; rp < ep; rp += 2)
  104. if(r >= rp[0] && r <= rp[1]){
  105. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  106. return -1;
  107. break;
  108. }
  109. break;
  110. case NCCLASS:
  111. ep = inst->cp->end;
  112. for(rp = inst->cp->spans; rp < ep; rp += 2)
  113. if(r >= rp[0] && r <= rp[1])
  114. break;
  115. if(rp == ep)
  116. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  117. return -1;
  118. break;
  119. case OR:
  120. /* evaluate right choice later */
  121. if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
  122. return -1;
  123. /* efficiency: advance and re-evaluate */
  124. continue;
  125. case END: /* Match! */
  126. match = 1;
  127. tlp->se.m[0].rep = s;
  128. if(mp != 0)
  129. _renewmatch(mp, ms, &tlp->se);
  130. break;
  131. }
  132. break;
  133. }
  134. }
  135. if(s == j->reol)
  136. break;
  137. checkstart = j->startchar && nl->inst==0;
  138. s++;
  139. }while(r);
  140. return match;
  141. }
  142. static int
  143. rregexec2(Reprog *progp, /* program to run */
  144. Rune *bol, /* string to run machine on */
  145. Resub *mp, /* subexpression elements */
  146. int ms, /* number of elements at mp */
  147. Reljunk *j
  148. )
  149. {
  150. Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
  151. /* mark space */
  152. j->relist[0] = relist0;
  153. j->relist[1] = relist1;
  154. j->reliste[0] = relist0 + nelem(relist0) - 2;
  155. j->reliste[1] = relist1 + nelem(relist1) - 2;
  156. return rregexec1(progp, bol, mp, ms, j);
  157. }
  158. extern int
  159. rregexec(Reprog *progp, /* program to run */
  160. Rune *bol, /* string to run machine on */
  161. Resub *mp, /* subexpression elements */
  162. int ms) /* number of elements at mp */
  163. {
  164. Reljunk j;
  165. Relist relist0[LISTSIZE], relist1[LISTSIZE];
  166. int rv;
  167. /*
  168. * use user-specified starting/ending location if specified
  169. */
  170. j.rstarts = bol;
  171. j.reol = 0;
  172. if(mp && ms>0){
  173. if(mp->sp)
  174. j.rstarts = mp->rsp;
  175. if(mp->ep)
  176. j.reol = mp->rep;
  177. }
  178. j.starttype = 0;
  179. j.startchar = 0;
  180. if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
  181. j.starttype = RUNE;
  182. j.startchar = progp->startinst->r;
  183. }
  184. if(progp->startinst->type == BOL)
  185. j.starttype = BOL;
  186. /* mark space */
  187. j.relist[0] = relist0;
  188. j.relist[1] = relist1;
  189. j.reliste[0] = relist0 + nelem(relist0) - 2;
  190. j.reliste[1] = relist1 + nelem(relist1) - 2;
  191. rv = rregexec1(progp, bol, mp, ms, &j);
  192. if(rv >= 0)
  193. return rv;
  194. rv = rregexec2(progp, bol, mp, ms, &j);
  195. if(rv >= 0)
  196. return rv;
  197. return -1;
  198. }