rregexec.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. #include <u.h>
  2. #include <libc.h>
  3. #include "regexp.h"
  4. #include "regcomp.h"
  5. /*
  6. * return 0 if no match
  7. * >0 if a match
  8. * <0 if we ran out of _relist space
  9. */
  10. static int
  11. rregexec1(Reprog *progp, /* program to run */
  12. Rune *bol, /* string to run machine on */
  13. Resub *mp, /* subexpression elements */
  14. int ms, /* number of elements at mp */
  15. Reljunk *j)
  16. {
  17. int flag=0;
  18. Reinst *inst;
  19. Relist *tlp;
  20. Rune *s;
  21. int i, checkstart;
  22. Rune r, *rp, *ep;
  23. Relist* tl; /* This list, next list */
  24. Relist* nl;
  25. Relist* tle; /* ends of this and next list */
  26. Relist* nle;
  27. int match;
  28. match = 0;
  29. checkstart = j->startchar;
  30. if(mp)
  31. for(i=0; i<ms; i++) {
  32. mp[i].rsp = 0;
  33. mp[i].rep = 0;
  34. }
  35. j->relist[0][0].inst = 0;
  36. j->relist[1][0].inst = 0;
  37. /* Execute machine once for each character, including terminal NUL */
  38. s = j->rstarts;
  39. do{
  40. /* fast check for first char */
  41. if(checkstart) {
  42. switch(j->starttype) {
  43. case RUNE:
  44. while(*s != j->startchar) {
  45. if(*s == 0 || s == j->reol)
  46. return match;
  47. s++;
  48. }
  49. break;
  50. case BOL:
  51. if(s == bol)
  52. break;
  53. while(*s != '\n') {
  54. if(*s == 0 || s == j->reol)
  55. return match;
  56. s++;
  57. }
  58. break;
  59. }
  60. }
  61. r = *s;
  62. /* switch run lists */
  63. tl = j->relist[flag];
  64. tle = j->reliste[flag];
  65. nl = j->relist[flag^=1];
  66. nle = j->reliste[flag];
  67. nl->inst = 0;
  68. /* Add first instruction to current list */
  69. _rrenewemptythread(tl, progp->startinst, ms, s);
  70. /* Execute machine until current list is empty */
  71. for(tlp=tl; tlp->inst; tlp++){
  72. for(inst=tlp->inst; ; inst = inst->next){
  73. switch(inst->type){
  74. case RUNE: /* regular character */
  75. if(inst->r == r)
  76. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  77. return -1;
  78. break;
  79. case LBRA:
  80. tlp->se.m[inst->subid].rsp = s;
  81. continue;
  82. case RBRA:
  83. tlp->se.m[inst->subid].rep = s;
  84. continue;
  85. case ANY:
  86. if(r != '\n')
  87. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  88. return -1;
  89. break;
  90. case ANYNL:
  91. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  92. return -1;
  93. break;
  94. case BOL:
  95. if(s == bol || *(s-1) == '\n')
  96. continue;
  97. break;
  98. case EOL:
  99. if(s == j->reol || r == 0 || r == '\n')
  100. continue;
  101. break;
  102. case CCLASS:
  103. ep = inst->cp->end;
  104. for(rp = inst->cp->spans; rp < ep; rp += 2)
  105. if(r >= rp[0] && r <= rp[1]){
  106. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  107. return -1;
  108. break;
  109. }
  110. break;
  111. case NCCLASS:
  112. ep = inst->cp->end;
  113. for(rp = inst->cp->spans; rp < ep; rp += 2)
  114. if(r >= rp[0] && r <= rp[1])
  115. break;
  116. if(rp == ep)
  117. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  118. return -1;
  119. break;
  120. case OR:
  121. /* evaluate right choice later */
  122. if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
  123. return -1;
  124. /* efficiency: advance and re-evaluate */
  125. continue;
  126. case END: /* Match! */
  127. match = 1;
  128. tlp->se.m[0].rep = s;
  129. if(mp != 0)
  130. _renewmatch(mp, ms, &tlp->se);
  131. break;
  132. }
  133. break;
  134. }
  135. }
  136. if(s == j->reol)
  137. break;
  138. checkstart = j->startchar && nl->inst==0;
  139. s++;
  140. }while(r);
  141. return match;
  142. }
  143. static int
  144. rregexec2(Reprog *progp, /* program to run */
  145. Rune *bol, /* string to run machine on */
  146. Resub *mp, /* subexpression elements */
  147. int ms, /* number of elements at mp */
  148. Reljunk *j
  149. )
  150. {
  151. Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
  152. /* mark space */
  153. j->relist[0] = relist0;
  154. j->relist[1] = relist1;
  155. j->reliste[0] = relist0 + nelem(relist0) - 2;
  156. j->reliste[1] = relist1 + nelem(relist1) - 2;
  157. return rregexec1(progp, bol, mp, ms, j);
  158. }
  159. extern int
  160. rregexec(Reprog *progp, /* program to run */
  161. Rune *bol, /* string to run machine on */
  162. Resub *mp, /* subexpression elements */
  163. int ms) /* number of elements at mp */
  164. {
  165. Reljunk j;
  166. Relist relist0[LISTSIZE], relist1[LISTSIZE];
  167. int rv;
  168. /*
  169. * use user-specified starting/ending location if specified
  170. */
  171. j.rstarts = bol;
  172. j.reol = 0;
  173. if(mp && ms>0){
  174. if(mp->sp)
  175. j.rstarts = mp->rsp;
  176. if(mp->ep)
  177. j.reol = mp->rep;
  178. }
  179. j.starttype = 0;
  180. j.startchar = 0;
  181. if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
  182. j.starttype = RUNE;
  183. j.startchar = progp->startinst->r;
  184. }
  185. if(progp->startinst->type == BOL)
  186. j.starttype = BOL;
  187. /* mark space */
  188. j.relist[0] = relist0;
  189. j.relist[1] = relist1;
  190. j.reliste[0] = relist0 + nelem(relist0) - 2;
  191. j.reliste[1] = relist1 + nelem(relist1) - 2;
  192. rv = rregexec1(progp, bol, mp, ms, &j);
  193. if(rv >= 0)
  194. return rv;
  195. rv = rregexec2(progp, bol, mp, ms, &j);
  196. if(rv >= 0)
  197. return rv;
  198. return -1;
  199. }