regexec.c 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include "regexp.h"
  12. #include "regcomp.h"
  13. /*
  14. * return 0 if no match
  15. * >0 if a match
  16. * <0 if we ran out of _relist space
  17. */
  18. static int
  19. regexec1(Reprog *progp, /* program to run */
  20. char *bol, /* string to run machine on */
  21. Resub *mp, /* subexpression elements */
  22. int ms, /* number of elements at mp */
  23. Reljunk *j
  24. )
  25. {
  26. int flag=0;
  27. Reinst *inst;
  28. Relist *tlp;
  29. char *s;
  30. int i, checkstart;
  31. Rune r, *rp, *ep;
  32. int n;
  33. Relist* tl; /* This list, next list */
  34. Relist* nl;
  35. Relist* tle; /* ends of this and next list */
  36. Relist* nle;
  37. int match;
  38. char *p;
  39. match = 0;
  40. checkstart = j->starttype;
  41. if(mp)
  42. for(i=0; i<ms; i++) {
  43. mp[i].sp = 0;
  44. mp[i].ep = 0;
  45. }
  46. j->relist[0][0].inst = 0;
  47. j->relist[1][0].inst = 0;
  48. /* Execute machine once for each character, including terminal NUL */
  49. s = j->starts;
  50. do{
  51. /* fast check for first char */
  52. if(checkstart) {
  53. switch(j->starttype) {
  54. case RUNE:
  55. p = utfrune(s, j->startchar);
  56. if(p == 0 || s == j->eol)
  57. return match;
  58. s = p;
  59. break;
  60. case BOL:
  61. if(s == bol)
  62. break;
  63. p = utfrune(s, '\n');
  64. if(p == 0 || s == j->eol)
  65. return match;
  66. s = p+1;
  67. break;
  68. }
  69. }
  70. r = *(uint8_t*)s;
  71. if(r < Runeself)
  72. n = 1;
  73. else
  74. n = chartorune(&r, s);
  75. /* switch run lists */
  76. tl = j->relist[flag];
  77. tle = j->reliste[flag];
  78. nl = j->relist[flag^=1];
  79. nle = j->reliste[flag];
  80. nl->inst = 0;
  81. /* Add first instruction to current list */
  82. if(match == 0)
  83. _renewemptythread(tl, progp->startinst, ms, s);
  84. /* Execute machine until current list is empty */
  85. for(tlp=tl; tlp->inst; tlp++){
  86. for(inst = tlp->inst; ; inst = inst->next){
  87. switch(inst->type){
  88. case RUNE: /* regular character */
  89. if(inst->r == r){
  90. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  91. return -1;
  92. }
  93. break;
  94. case LBRA:
  95. tlp->se.m[inst->subid].sp = s;
  96. continue;
  97. case RBRA:
  98. tlp->se.m[inst->subid].ep = s;
  99. continue;
  100. case ANY:
  101. if(r != '\n')
  102. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  103. return -1;
  104. break;
  105. case ANYNL:
  106. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  107. return -1;
  108. break;
  109. case BOL:
  110. if(s == bol || *(s-1) == '\n')
  111. continue;
  112. break;
  113. case EOL:
  114. if(s == j->eol || r == 0 || r == '\n')
  115. continue;
  116. break;
  117. case CCLASS:
  118. ep = inst->cp->end;
  119. for(rp = inst->cp->spans; rp < ep; rp += 2)
  120. if(r >= rp[0] && r <= rp[1]){
  121. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  122. return -1;
  123. break;
  124. }
  125. break;
  126. case NCCLASS:
  127. ep = inst->cp->end;
  128. for(rp = inst->cp->spans; rp < ep; rp += 2)
  129. if(r >= rp[0] && r <= rp[1])
  130. break;
  131. if(rp == ep)
  132. if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
  133. return -1;
  134. break;
  135. case OR:
  136. /* evaluate right choice later */
  137. if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
  138. return -1;
  139. /* efficiency: advance and re-evaluate */
  140. continue;
  141. case END: /* Match! */
  142. match = 1;
  143. tlp->se.m[0].ep = s;
  144. if(mp != 0)
  145. _renewmatch(mp, ms, &tlp->se);
  146. break;
  147. }
  148. break;
  149. }
  150. }
  151. if(s == j->eol)
  152. break;
  153. checkstart = j->starttype && nl->inst==0;
  154. s += n;
  155. }while(r);
  156. return match;
  157. }
  158. static int
  159. regexec2(Reprog *progp, /* program to run */
  160. char *bol, /* string to run machine on */
  161. Resub *mp, /* subexpression elements */
  162. int ms, /* number of elements at mp */
  163. Reljunk *j
  164. )
  165. {
  166. int rv;
  167. Relist *relist0, *relist1;
  168. /* mark space */
  169. relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
  170. if(relist0 == nil)
  171. return -1;
  172. relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
  173. if(relist1 == nil){
  174. free(relist1);
  175. return -1;
  176. }
  177. j->relist[0] = relist0;
  178. j->relist[1] = relist1;
  179. j->reliste[0] = relist0 + BIGLISTSIZE - 2;
  180. j->reliste[1] = relist1 + BIGLISTSIZE - 2;
  181. rv = regexec1(progp, bol, mp, ms, j);
  182. free(relist0);
  183. free(relist1);
  184. return rv;
  185. }
  186. extern int
  187. regexec(Reprog *progp, /* program to run */
  188. char *bol, /* string to run machine on */
  189. Resub *mp, /* subexpression elements */
  190. int ms) /* number of elements at mp */
  191. {
  192. Reljunk j;
  193. Relist relist0[LISTSIZE], relist1[LISTSIZE];
  194. int rv;
  195. /*
  196. * use user-specified starting/ending location if specified
  197. */
  198. j.starts = bol;
  199. j.eol = 0;
  200. if(mp && ms>0){
  201. if(mp->sp)
  202. j.starts = mp->sp;
  203. if(mp->ep)
  204. j.eol = mp->ep;
  205. }
  206. j.starttype = 0;
  207. j.startchar = 0;
  208. if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
  209. j.starttype = RUNE;
  210. j.startchar = progp->startinst->r;
  211. }
  212. if(progp->startinst->type == BOL)
  213. j.starttype = BOL;
  214. /* mark space */
  215. j.relist[0] = relist0;
  216. j.relist[1] = relist1;
  217. j.reliste[0] = relist0 + nelem(relist0) - 2;
  218. j.reliste[1] = relist1 + nelem(relist1) - 2;
  219. rv = regexec1(progp, bol, mp, ms, &j);
  220. if(rv >= 0)
  221. return rv;
  222. rv = regexec2(progp, bol, mp, ms, &j);
  223. if(rv >= 0)
  224. return rv;
  225. return -1;
  226. }