rregexec.c 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include "regexp.h"
  12. #include "regcomp.h"
  13. /*
  14. * return 0 if no match
  15. * >0 if a match
  16. * <0 if we ran out of _relist space
  17. */
  18. static int
  19. rregexec1(Reprog *progp, /* program to run */
  20. Rune *bol, /* string to run machine on */
  21. Resub *mp, /* subexpression elements */
  22. int ms, /* number of elements at mp */
  23. Reljunk *j)
  24. {
  25. int flag=0;
  26. Reinst *inst;
  27. Relist *tlp;
  28. Rune *s;
  29. int i, checkstart;
  30. Rune r, *rp, *ep;
  31. Relist* tl; /* This list, next list */
  32. Relist* nl;
  33. Relist* tle; /* ends of this and next list */
  34. Relist* nle;
  35. int match;
  36. Rune *p;
  37. match = 0;
  38. checkstart = j->startchar;
  39. if(mp)
  40. for(i=0; i<ms; i++) {
  41. mp[i].rsp = 0;
  42. mp[i].rep = 0;
  43. }
  44. j->relist[0][0].inst = 0;
  45. j->relist[1][0].inst = 0;
  46. /* Execute machine once for each character, including terminal NUL */
  47. s = j->rstarts;
  48. do{
  49. /* fast check for first char */
  50. if(checkstart) {
  51. switch(j->starttype) {
  52. case RUNE:
  53. p = runestrchr(s, j->startchar);
  54. if(p == 0 || s == j->reol)
  55. return match;
  56. s = p;
  57. break;
  58. case BOL:
  59. if(s == bol)
  60. break;
  61. p = runestrchr(s, '\n');
  62. if(p == 0 || s == j->reol)
  63. return match;
  64. s = p+1;
  65. break;
  66. }
  67. }
  68. r = *s;
  69. /* switch run lists */
  70. tl = j->relist[flag];
  71. tle = j->reliste[flag];
  72. nl = j->relist[flag^=1];
  73. nle = j->reliste[flag];
  74. nl->inst = 0;
  75. /* Add first instruction to current list */
  76. _rrenewemptythread(tl, progp->startinst, ms, s);
  77. /* Execute machine until current list is empty */
  78. for(tlp=tl; tlp->inst; tlp++){
  79. for(inst=tlp->inst; ; inst = inst->next){
  80. switch(inst->type){
  81. case RUNE: /* regular character */
  82. if(inst->r == r)
  83. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  84. return -1;
  85. break;
  86. case LBRA:
  87. tlp->se.m[inst->subid].rsp = s;
  88. continue;
  89. case RBRA:
  90. tlp->se.m[inst->subid].rep = s;
  91. continue;
  92. case ANY:
  93. if(r != '\n')
  94. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  95. return -1;
  96. break;
  97. case ANYNL:
  98. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  99. return -1;
  100. break;
  101. case BOL:
  102. if(s == bol || *(s-1) == '\n')
  103. continue;
  104. break;
  105. case EOL:
  106. if(s == j->reol || r == 0 || r == '\n')
  107. continue;
  108. break;
  109. case CCLASS:
  110. ep = inst->cp->end;
  111. for(rp = inst->cp->spans; rp < ep; rp += 2)
  112. if(r >= rp[0] && r <= rp[1]){
  113. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  114. return -1;
  115. break;
  116. }
  117. break;
  118. case NCCLASS:
  119. ep = inst->cp->end;
  120. for(rp = inst->cp->spans; rp < ep; rp += 2)
  121. if(r >= rp[0] && r <= rp[1])
  122. break;
  123. if(rp == ep)
  124. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  125. return -1;
  126. break;
  127. case OR:
  128. /* evaluate right choice later */
  129. if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
  130. return -1;
  131. /* efficiency: advance and re-evaluate */
  132. continue;
  133. case END: /* Match! */
  134. match = 1;
  135. tlp->se.m[0].rep = s;
  136. if(mp != 0)
  137. _renewmatch(mp, ms, &tlp->se);
  138. break;
  139. }
  140. break;
  141. }
  142. }
  143. if(s == j->reol)
  144. break;
  145. checkstart = j->startchar && nl->inst==0;
  146. s++;
  147. }while(r);
  148. return match;
  149. }
  150. static int
  151. rregexec2(Reprog *progp, /* program to run */
  152. Rune *bol, /* string to run machine on */
  153. Resub *mp, /* subexpression elements */
  154. int ms, /* number of elements at mp */
  155. Reljunk *j
  156. )
  157. {
  158. Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
  159. /* mark space */
  160. j->relist[0] = relist0;
  161. j->relist[1] = relist1;
  162. j->reliste[0] = relist0 + nelem(relist0) - 2;
  163. j->reliste[1] = relist1 + nelem(relist1) - 2;
  164. return rregexec1(progp, bol, mp, ms, j);
  165. }
  166. extern int
  167. rregexec(Reprog *progp, /* program to run */
  168. Rune *bol, /* string to run machine on */
  169. Resub *mp, /* subexpression elements */
  170. int ms) /* number of elements at mp */
  171. {
  172. Reljunk j;
  173. Relist relist0[LISTSIZE], relist1[LISTSIZE];
  174. int rv;
  175. /*
  176. * use user-specified starting/ending location if specified
  177. */
  178. j.rstarts = bol;
  179. j.reol = 0;
  180. if(mp && ms>0){
  181. if(mp->sp)
  182. j.rstarts = mp->rsp;
  183. if(mp->ep)
  184. j.reol = mp->rep;
  185. }
  186. j.starttype = 0;
  187. j.startchar = 0;
  188. if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
  189. j.starttype = RUNE;
  190. j.startchar = progp->startinst->r;
  191. }
  192. if(progp->startinst->type == BOL)
  193. j.starttype = BOL;
  194. /* mark space */
  195. j.relist[0] = relist0;
  196. j.relist[1] = relist1;
  197. j.reliste[0] = relist0 + nelem(relist0) - 2;
  198. j.reliste[1] = relist1 + nelem(relist1) - 2;
  199. rv = rregexec1(progp, bol, mp, ms, &j);
  200. if(rv >= 0)
  201. return rv;
  202. rv = rregexec2(progp, bol, mp, ms, &j);
  203. if(rv >= 0)
  204. return rv;
  205. return -1;
  206. }