regexp.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. #define CBRA 2
  2. #define CCHR 4
  3. #define CDOT 8
  4. #define CCL 12
  5. #define CDOL 20
  6. #define CEOF 22
  7. #define CKET 24
  8. #define CBACK 36
  9. #define STAR 01
  10. #define RNGE 03
  11. #define NBRA 9
  12. #define PLACE(c) ep[c >> 3] |= bittab[c & 07]
  13. #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])
  14. char *braslist[NBRA];
  15. char *braelist[NBRA];
  16. int nbra, ebra;
  17. char *loc1, *loc2, *locs;
  18. int sed;
  19. int circf;
  20. int low;
  21. int size;
  22. char bittab[] = {
  23. 1,
  24. 2,
  25. 4,
  26. 8,
  27. 16,
  28. 32,
  29. 64,
  30. 128
  31. };
  32. char *
  33. compile(instring, ep, endbuf, seof)
  34. register char *ep;
  35. char *instring, *endbuf;
  36. {
  37. INIT /* Dependent declarations and initializations */
  38. register c;
  39. register eof = seof;
  40. char *lastep = instring;
  41. int cclcnt;
  42. char bracket[NBRA], *bracketp;
  43. int closed;
  44. char neg;
  45. int lc;
  46. int i, cflg;
  47. lastep = 0;
  48. if((c = GETC()) == eof) {
  49. if(*ep == 0 && !sed)
  50. ERROR(41);
  51. RETURN(ep);
  52. }
  53. bracketp = bracket;
  54. circf = closed = nbra = ebra = 0;
  55. if (c == '^')
  56. circf++;
  57. else
  58. UNGETC(c);
  59. for (;;) {
  60. if (ep >= endbuf)
  61. ERROR(50);
  62. if((c = GETC()) != '*' && ((c != '\\') || (PEEKC() != '{')))
  63. lastep = ep;
  64. if (c == eof) {
  65. *ep++ = CEOF;
  66. RETURN(ep);
  67. }
  68. switch (c) {
  69. case '.':
  70. *ep++ = CDOT;
  71. continue;
  72. case '\n':
  73. ERROR(36);
  74. case '*':
  75. if (lastep==0 || *lastep==CBRA || *lastep==CKET)
  76. goto defchar;
  77. *lastep |= STAR;
  78. continue;
  79. case '$':
  80. if(PEEKC() != eof)
  81. goto defchar;
  82. *ep++ = CDOL;
  83. continue;
  84. case '[':
  85. if(&ep[17] >= endbuf)
  86. ERROR(50);
  87. *ep++ = CCL;
  88. lc = 0;
  89. for(i = 0; i < 16; i++)
  90. ep[i] = 0;
  91. neg = 0;
  92. if((c = GETC()) == '^') {
  93. neg = 1;
  94. c = GETC();
  95. }
  96. do {
  97. if(c == '\0' || c == '\n')
  98. ERROR(49);
  99. if(c == '-' && lc != 0) {
  100. if ((c = GETC()) == ']') {
  101. PLACE('-');
  102. break;
  103. }
  104. while(lc < c) {
  105. PLACE(lc);
  106. lc++;
  107. }
  108. }
  109. lc = c;
  110. PLACE(c);
  111. } while((c = GETC()) != ']');
  112. if(neg) {
  113. for(cclcnt = 0; cclcnt < 16; cclcnt++)
  114. ep[cclcnt] ^= -1;
  115. ep[0] &= 0376;
  116. }
  117. ep += 16;
  118. continue;
  119. case '\\':
  120. switch(c = GETC()) {
  121. case '(':
  122. if(nbra >= NBRA)
  123. ERROR(43);
  124. *bracketp++ = nbra;
  125. *ep++ = CBRA;
  126. *ep++ = nbra++;
  127. continue;
  128. case ')':
  129. if(bracketp <= bracket || ++ebra != nbra)
  130. ERROR(42);
  131. *ep++ = CKET;
  132. *ep++ = *--bracketp;
  133. closed++;
  134. continue;
  135. case '{':
  136. if(lastep == (char *) (0))
  137. goto defchar;
  138. *lastep |= RNGE;
  139. cflg = 0;
  140. nlim:
  141. c = GETC();
  142. i = 0;
  143. do {
  144. if ('0' <= c && c <= '9')
  145. i = 10 * i + c - '0';
  146. else
  147. ERROR(16);
  148. } while(((c = GETC()) != '\\') && (c != ','));
  149. if (i > 255)
  150. ERROR(11);
  151. *ep++ = i;
  152. if (c == ',') {
  153. if(cflg++)
  154. ERROR(44);
  155. if((c = GETC()) == '\\')
  156. *ep++ = 255;
  157. else {
  158. UNGETC(c);
  159. goto nlim; /* get 2'nd number */
  160. }
  161. }
  162. if(GETC() != '}')
  163. ERROR(45);
  164. if(!cflg) /* one number */
  165. *ep++ = i;
  166. else if((ep[-1] & 0377) < (ep[-2] & 0377))
  167. ERROR(46);
  168. continue;
  169. case '\n':
  170. ERROR(36);
  171. case 'n':
  172. c = '\n';
  173. goto defchar;
  174. default:
  175. if(c >= '1' && c <= '9') {
  176. if((c -= '1') >= closed)
  177. ERROR(25);
  178. *ep++ = CBACK;
  179. *ep++ = c;
  180. continue;
  181. }
  182. }
  183. /* Drop through to default to use \ to turn off special chars */
  184. defchar:
  185. default:
  186. lastep = ep;
  187. *ep++ = CCHR;
  188. *ep++ = c;
  189. }
  190. }
  191. }
  192. step(p1, p2)
  193. register char *p1, *p2;
  194. {
  195. register c;
  196. if (circf) {
  197. loc1 = p1;
  198. return(advance(p1, p2));
  199. }
  200. /* fast check for first character */
  201. if (*p2==CCHR) {
  202. c = p2[1];
  203. do {
  204. if (*p1 != c)
  205. continue;
  206. if (advance(p1, p2)) {
  207. loc1 = p1;
  208. return(1);
  209. }
  210. } while (*p1++);
  211. return(0);
  212. }
  213. /* regular algorithm */
  214. do {
  215. if (advance(p1, p2)) {
  216. loc1 = p1;
  217. return(1);
  218. }
  219. } while (*p1++);
  220. return(0);
  221. }
  222. advance(lp, ep)
  223. register char *lp, *ep;
  224. {
  225. register char *curlp;
  226. char c;
  227. char *bbeg;
  228. int ct;
  229. for (;;) switch (*ep++) {
  230. case CCHR:
  231. if (*ep++ == *lp++)
  232. continue;
  233. return(0);
  234. case CDOT:
  235. if (*lp++)
  236. continue;
  237. return(0);
  238. case CDOL:
  239. if (*lp==0)
  240. continue;
  241. return(0);
  242. case CEOF:
  243. loc2 = lp;
  244. return(1);
  245. case CCL:
  246. c = *lp++ & 0177;
  247. if(ISTHERE(c)) {
  248. ep += 16;
  249. continue;
  250. }
  251. return(0);
  252. case CBRA:
  253. braslist[*ep++] = lp;
  254. continue;
  255. case CKET:
  256. braelist[*ep++] = lp;
  257. continue;
  258. case CCHR|RNGE:
  259. c = *ep++;
  260. getrnge(ep);
  261. while(low--)
  262. if(*lp++ != c)
  263. return(0);
  264. curlp = lp;
  265. while(size--)
  266. if(*lp++ != c)
  267. break;
  268. if(size < 0)
  269. lp++;
  270. ep += 2;
  271. goto star;
  272. case CDOT|RNGE:
  273. getrnge(ep);
  274. while(low--)
  275. if(*lp++ == '\0')
  276. return(0);
  277. curlp = lp;
  278. while(size--)
  279. if(*lp++ == '\0')
  280. break;
  281. if(size < 0)
  282. lp++;
  283. ep += 2;
  284. goto star;
  285. case CCL|RNGE:
  286. getrnge(ep + 16);
  287. while(low--) {
  288. c = *lp++ & 0177;
  289. if(!ISTHERE(c))
  290. return(0);
  291. }
  292. curlp = lp;
  293. while(size--) {
  294. c = *lp++ & 0177;
  295. if(!ISTHERE(c))
  296. break;
  297. }
  298. if(size < 0)
  299. lp++;
  300. ep += 18; /* 16 + 2 */
  301. goto star;
  302. case CBACK:
  303. bbeg = braslist[*ep];
  304. ct = braelist[*ep++] - bbeg;
  305. if(ecmp(bbeg, lp, ct)) {
  306. lp += ct;
  307. continue;
  308. }
  309. return(0);
  310. case CBACK|STAR:
  311. bbeg = braslist[*ep];
  312. ct = braelist[*ep++] - bbeg;
  313. curlp = lp;
  314. while(ecmp(bbeg, lp, ct))
  315. lp += ct;
  316. while(lp >= curlp) {
  317. if(advance(lp, ep)) return(1);
  318. lp -= ct;
  319. }
  320. return(0);
  321. case CDOT|STAR:
  322. curlp = lp;
  323. while (*lp++);
  324. goto star;
  325. case CCHR|STAR:
  326. curlp = lp;
  327. while (*lp++ == *ep);
  328. ep++;
  329. goto star;
  330. case CCL|STAR:
  331. curlp = lp;
  332. do {
  333. c = *lp++ & 0177;
  334. } while(ISTHERE(c));
  335. ep += 16;
  336. goto star;
  337. star:
  338. do {
  339. if(--lp == locs)
  340. break;
  341. if (advance(lp, ep))
  342. return(1);
  343. } while (lp > curlp);
  344. return(0);
  345. }
  346. }
  347. getrnge(str)
  348. register char *str;
  349. {
  350. low = *str++ & 0377;
  351. size = *str == 255 ? 20000 : (*str &0377) - low;
  352. }
  353. ecmp(a, b, count)
  354. register char *a, *b;
  355. register count;
  356. {
  357. while(count--)
  358. if(*a++ != *b++) return(0);
  359. return(1);
  360. }