sed1.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719
  1. #include <stdlib.h>
  2. #include <sys/types.h>
  3. #include <fcntl.h>
  4. #include <unistd.h>
  5. #include <stdio.h>
  6. #include "sed.h"
  7. #define Read(f, buf, n) (fflush(stdout), read(f, buf, n))
  8. void
  9. execute(uchar *file)
  10. {
  11. uchar *p1, *p2;
  12. union reptr *ipc;
  13. int c;
  14. long l;
  15. uchar *execp;
  16. if (file) {
  17. if ((f = open((char*)file, O_RDONLY)) < 0) {
  18. fprintf(stderr, "sed: Can't open %s\n", file);
  19. }
  20. } else
  21. f = 0;
  22. ebp = ibuf;
  23. cbp = ibuf;
  24. if(pending) {
  25. ipc = pending;
  26. pending = 0;
  27. goto yes;
  28. }
  29. for(;;) {
  30. if((execp = gline(linebuf)) == badp) {
  31. close(f);
  32. return;
  33. }
  34. spend = execp;
  35. for(ipc = ptrspace; ipc->r1.command; ) {
  36. p1 = ipc->r1.ad1;
  37. p2 = ipc->r1.ad2;
  38. if(p1) {
  39. if(ipc->r1.inar) {
  40. if(*p2 == CEND) {
  41. p1 = 0;
  42. } else if(*p2 == CLNUM) {
  43. l = p2[1]&0377
  44. | ((p2[2]&0377)<<8)
  45. | ((p2[3]&0377)<<16)
  46. | ((p2[4]&0377)<<24);
  47. if(lnum > l) {
  48. ipc->r1.inar = 0;
  49. if(ipc->r1.negfl)
  50. goto yes;
  51. ipc++;
  52. continue;
  53. }
  54. if(lnum == l) {
  55. ipc->r1.inar = 0;
  56. }
  57. } else if(match(p2, 0)) {
  58. ipc->r1.inar = 0;
  59. }
  60. } else if(*p1 == CEND) {
  61. if(!dolflag) {
  62. if(ipc->r1.negfl)
  63. goto yes;
  64. ipc++;
  65. continue;
  66. }
  67. } else if(*p1 == CLNUM) {
  68. l = p1[1]&0377
  69. | ((p1[2]&0377)<<8)
  70. | ((p1[3]&0377)<<16)
  71. | ((p1[4]&0377)<<24);
  72. if(lnum != l) {
  73. if(ipc->r1.negfl)
  74. goto yes;
  75. ipc++;
  76. continue;
  77. }
  78. if(p2)
  79. ipc->r1.inar = 1;
  80. } else if(match(p1, 0)) {
  81. if(p2)
  82. ipc->r1.inar = 1;
  83. } else {
  84. if(ipc->r1.negfl)
  85. goto yes;
  86. ipc++;
  87. continue;
  88. }
  89. }
  90. if(ipc->r1.negfl) {
  91. ipc++;
  92. continue;
  93. }
  94. yes:
  95. command(ipc);
  96. if(delflag)
  97. break;
  98. if(jflag) {
  99. jflag = 0;
  100. if((ipc = ipc->r2.lb1) == 0) {
  101. ipc = ptrspace;
  102. break;
  103. }
  104. } else
  105. ipc++;
  106. }
  107. if(!nflag && !delflag) {
  108. for(p1 = linebuf; p1 < spend; p1++)
  109. putc(*p1, stdout);
  110. putc('\n', stdout);
  111. }
  112. if(aptr > abuf) {
  113. arout();
  114. }
  115. delflag = 0;
  116. }
  117. }
  118. int
  119. match(uchar *expbuf, int gf)
  120. {
  121. uchar *p1, *p2;
  122. int c;
  123. if(gf) {
  124. if(*expbuf) return(0);
  125. p1 = linebuf;
  126. p2 = genbuf;
  127. while(*p1++ = *p2++);
  128. locs = p1 = loc2;
  129. } else {
  130. p1 = linebuf;
  131. locs = 0;
  132. }
  133. p2 = expbuf;
  134. if(*p2++) {
  135. loc1 = p1;
  136. if(*p2 == CCHR && p2[1] != *p1)
  137. return(0);
  138. return(advance(p1, p2));
  139. }
  140. /* fast check for first character */
  141. if(*p2 == CCHR) {
  142. c = p2[1];
  143. do {
  144. if(*p1 != c)
  145. continue;
  146. if(advance(p1, p2)) {
  147. loc1 = p1;
  148. return(1);
  149. }
  150. } while(*p1++);
  151. return(0);
  152. }
  153. do {
  154. if(advance(p1, p2)) {
  155. loc1 = p1;
  156. return(1);
  157. }
  158. } while(*p1++);
  159. return(0);
  160. }
  161. int
  162. advance(uchar *alp, uchar *aep)
  163. {
  164. uchar *lp, *ep, *curlp;
  165. uchar c;
  166. uchar *bbeg;
  167. int ct;
  168. /*fprintf(stderr, "*lp = %c, %o\n*ep = %c, %o\n", *lp, *lp, *ep, *ep); /*DEBUG*/
  169. lp = alp;
  170. ep = aep;
  171. for (;;) switch (*ep++) {
  172. case CCHR:
  173. if (*ep++ == *lp++)
  174. continue;
  175. return(0);
  176. case CDOT:
  177. if (*lp++)
  178. continue;
  179. return(0);
  180. case CNL:
  181. case CDOL:
  182. if (*lp == 0)
  183. continue;
  184. return(0);
  185. case CEOF:
  186. loc2 = lp;
  187. return(1);
  188. case CCL:
  189. c = *lp++;
  190. if(ep[c>>3] & bittab[c & 07]) {
  191. ep += 32;
  192. continue;
  193. }
  194. return(0);
  195. case CBRA:
  196. braslist[*ep++] = lp;
  197. continue;
  198. case CKET:
  199. braelist[*ep++] = lp;
  200. continue;
  201. case CBACK:
  202. bbeg = braslist[*ep];
  203. ct = braelist[*ep++] - bbeg;
  204. if(ecmp(bbeg, lp, ct)) {
  205. lp += ct;
  206. continue;
  207. }
  208. return(0);
  209. case CBACK|STAR:
  210. bbeg = braslist[*ep];
  211. ct = braelist[*ep++] - bbeg;
  212. curlp = lp;
  213. while(ecmp(bbeg, lp, ct))
  214. lp += ct;
  215. while(lp >= curlp) {
  216. if(advance(lp, ep)) return(1);
  217. lp -= ct;
  218. }
  219. return(0);
  220. case CDOT|STAR:
  221. curlp = lp;
  222. while (*lp++);
  223. goto star;
  224. case CCHR|STAR:
  225. curlp = lp;
  226. while (*lp++ == *ep);
  227. ep++;
  228. goto star;
  229. case CCL|STAR:
  230. curlp = lp;
  231. do {
  232. c = *lp++;
  233. } while(ep[c>>3] & bittab[c & 07]);
  234. ep += 32;
  235. goto star;
  236. star:
  237. if(--lp == curlp) {
  238. continue;
  239. }
  240. if(*ep == CCHR) {
  241. c = ep[1];
  242. do {
  243. if(*lp != c)
  244. continue;
  245. if(advance(lp, ep))
  246. return(1);
  247. } while(lp-- > curlp);
  248. return(0);
  249. }
  250. if(*ep == CBACK) {
  251. c = *(braslist[ep[1]]);
  252. do {
  253. if(*lp != c)
  254. continue;
  255. if(advance(lp, ep))
  256. return(1);
  257. } while(lp-- > curlp);
  258. return(0);
  259. }
  260. do {
  261. if(lp == locs) break;
  262. if (advance(lp, ep))
  263. return(1);
  264. } while (lp-- > curlp);
  265. return(0);
  266. default:
  267. fprintf(stderr, "sed: RE botch, %o\n", *--ep);
  268. exit(1);
  269. }
  270. }
  271. int
  272. substitute(union reptr *ipc)
  273. {
  274. uchar *oloc2;
  275. if(match(ipc->r1.re1, 0)) {
  276. sflag = 1;
  277. if(!ipc->r1.gfl) {
  278. dosub(ipc->r1.rhs);
  279. return(1);
  280. }
  281. oloc2 = NULL;
  282. do {
  283. if(oloc2 == loc2) {
  284. loc2++;
  285. continue;
  286. } else {
  287. dosub(ipc->r1.rhs);
  288. if(*loc2 == 0)
  289. break;
  290. oloc2 = loc2;
  291. }
  292. } while(match(ipc->r1.re1, 1));
  293. return(1);
  294. }
  295. return(0);
  296. }
  297. void
  298. dosub(uchar *rhsbuf)
  299. {
  300. uchar *lp, *sp, *rp;
  301. int c;
  302. lp = linebuf;
  303. sp = genbuf;
  304. rp = rhsbuf;
  305. while (lp < loc1)
  306. *sp++ = *lp++;
  307. while(c = *rp++) {
  308. if (c == '\\') {
  309. c = *rp++;
  310. if (c >= '1' && c < NBRA+'1') {
  311. sp = place(sp, braslist[c-'1'], braelist[c-'1']);
  312. continue;
  313. }
  314. } else if(c == '&') {
  315. sp = place(sp, loc1, loc2);
  316. continue;
  317. }
  318. *sp++ = c;
  319. if (sp >= &genbuf[LBSIZE])
  320. fprintf(stderr, "sed: Output line too long.\n");
  321. }
  322. lp = loc2;
  323. loc2 = sp - genbuf + linebuf;
  324. while (*sp++ = *lp++)
  325. if (sp >= &genbuf[LBSIZE]) {
  326. fprintf(stderr, "sed: Output line too long.\n");
  327. }
  328. lp = linebuf;
  329. sp = genbuf;
  330. while (*lp++ = *sp++);
  331. spend = lp-1;
  332. }
  333. uchar *
  334. place(uchar *asp, uchar *al1, uchar *al2)
  335. {
  336. uchar *sp, *l1, *l2;
  337. sp = asp;
  338. l1 = al1;
  339. l2 = al2;
  340. while (l1 < l2) {
  341. *sp++ = *l1++;
  342. if (sp >= &genbuf[LBSIZE])
  343. fprintf(stderr, "sed: Output line too long.\n");
  344. }
  345. return(sp);
  346. }
  347. void
  348. command(union reptr *ipc)
  349. {
  350. int i;
  351. uchar *p1, *p2;
  352. uchar *execp;
  353. switch(ipc->r1.command) {
  354. case ACOM:
  355. *aptr++ = ipc;
  356. if(aptr >= &abuf[ABUFSIZE]) {
  357. fprintf(stderr, "sed: Too many appends after line %ld\n",
  358. lnum);
  359. }
  360. *aptr = 0;
  361. break;
  362. case CCOM:
  363. delflag = 1;
  364. if(!ipc->r1.inar || dolflag) {
  365. for(p1 = ipc->r1.re1; *p1; )
  366. putc(*p1++, stdout);
  367. putc('\n', stdout);
  368. }
  369. break;
  370. case DCOM:
  371. delflag++;
  372. break;
  373. case CDCOM:
  374. p1 = p2 = linebuf;
  375. while(*p1 != '\n') {
  376. if(*p1++ == 0) {
  377. delflag++;
  378. return;
  379. }
  380. }
  381. p1++;
  382. while(*p2++ = *p1++);
  383. spend = p2-1;
  384. jflag++;
  385. break;
  386. case EQCOM:
  387. fprintf(stdout, "%ld\n", lnum);
  388. break;
  389. case GCOM:
  390. p1 = linebuf;
  391. p2 = holdsp;
  392. while(*p1++ = *p2++);
  393. spend = p1-1;
  394. break;
  395. case CGCOM:
  396. *spend++ = '\n';
  397. p1 = spend;
  398. p2 = holdsp;
  399. while(*p1++ = *p2++)
  400. if(p1 >= lbend)
  401. break;
  402. spend = p1-1;
  403. break;
  404. case HCOM:
  405. p1 = holdsp;
  406. p2 = linebuf;
  407. while(*p1++ = *p2++);
  408. hspend = p1-1;
  409. break;
  410. case CHCOM:
  411. *hspend++ = '\n';
  412. p1 = hspend;
  413. p2 = linebuf;
  414. while(*p1++ = *p2++)
  415. if(p1 >= hend)
  416. break;
  417. hspend = p1-1;
  418. break;
  419. case ICOM:
  420. for(p1 = ipc->r1.re1; *p1; )
  421. putc(*p1++, stdout);
  422. putc('\n', stdout);
  423. break;
  424. case BCOM:
  425. jflag = 1;
  426. break;
  427. case LCOM:
  428. p1 = linebuf;
  429. p2 = genbuf;
  430. while(*p1) {
  431. p2 = lformat(*p1++ & 0377, p2);
  432. if(p2>lcomend && *p1) {
  433. *p2 = 0;
  434. fprintf(stdout, "%s\\\n", genbuf);
  435. p2 = genbuf;
  436. }
  437. }
  438. if(p2>genbuf && (p1[-1]==' '||p1[-1]=='\n'))
  439. p2 = lformat('\n', p2);
  440. *p2 = 0;
  441. fprintf(stdout, "%s\n", genbuf);
  442. break;
  443. case NCOM:
  444. if(!nflag) {
  445. for(p1 = linebuf; p1 < spend; p1++)
  446. putc(*p1, stdout);
  447. putc('\n', stdout);
  448. }
  449. if(aptr > abuf)
  450. arout();
  451. if((execp = gline(linebuf)) == badp) {
  452. pending = ipc;
  453. delflag = 1;
  454. break;
  455. }
  456. spend = execp;
  457. break;
  458. case CNCOM:
  459. if(aptr > abuf)
  460. arout();
  461. *spend++ = '\n';
  462. if((execp = gline(spend)) == badp) {
  463. pending = ipc;
  464. delflag = 1;
  465. break;
  466. }
  467. spend = execp;
  468. break;
  469. case PCOM:
  470. for(p1 = linebuf; p1 < spend; p1++)
  471. putc(*p1, stdout);
  472. putc('\n', stdout);
  473. break;
  474. case CPCOM:
  475. cpcom:
  476. for(p1 = linebuf; *p1 != '\n' && *p1 != '\0'; )
  477. putc(*p1++, stdout);
  478. putc('\n', stdout);
  479. break;
  480. case QCOM:
  481. if(!nflag) {
  482. for(p1 = linebuf; p1 < spend; p1++)
  483. putc(*p1, stdout);
  484. putc('\n', stdout);
  485. }
  486. if(aptr > abuf) arout();
  487. fclose(stdout);
  488. lseek(f,(long)(cbp-ebp),2);
  489. exit(0);
  490. case RCOM:
  491. *aptr++ = ipc;
  492. if(aptr >= &abuf[ABUFSIZE])
  493. fprintf(stderr, "sed: Too many reads after line%ld\n",
  494. lnum);
  495. *aptr = 0;
  496. break;
  497. case SCOM:
  498. i = substitute(ipc);
  499. if(ipc->r1.pfl && i)
  500. if(ipc->r1.pfl == 1) {
  501. for(p1 = linebuf; p1 < spend; p1++)
  502. putc(*p1, stdout);
  503. putc('\n', stdout);
  504. }
  505. else
  506. goto cpcom;
  507. if(i && ipc->r1.fcode)
  508. goto wcom;
  509. break;
  510. case TCOM:
  511. if(sflag == 0) break;
  512. sflag = 0;
  513. jflag = 1;
  514. break;
  515. wcom:
  516. case WCOM:
  517. fprintf(ipc->r1.fcode, "%s\n", linebuf);
  518. fflush(ipc->r1.fcode);
  519. break;
  520. case XCOM:
  521. p1 = linebuf;
  522. p2 = genbuf;
  523. while(*p2++ = *p1++);
  524. p1 = holdsp;
  525. p2 = linebuf;
  526. while(*p2++ = *p1++);
  527. spend = p2 - 1;
  528. p1 = genbuf;
  529. p2 = holdsp;
  530. while(*p2++ = *p1++);
  531. hspend = p2 - 1;
  532. break;
  533. case YCOM:
  534. p1 = linebuf;
  535. p2 = ipc->r1.re1;
  536. while(*p1 = p2[*p1]) p1++;
  537. break;
  538. }
  539. }
  540. uchar *
  541. gline(uchar *addr)
  542. {
  543. uchar *p1, *p2;
  544. int c;
  545. sflag = 0;
  546. p1 = addr;
  547. p2 = cbp;
  548. for (;;) {
  549. if (p2 >= ebp) {
  550. if ((c = Read(f, ibuf, 512)) <= 0) {
  551. return(badp);
  552. }
  553. p2 = ibuf;
  554. ebp = ibuf+c;
  555. }
  556. if ((c = *p2++) == '\n') {
  557. if(p2 >= ebp) {
  558. if((c = Read(f, ibuf, 512)) <= 0) {
  559. close(f);
  560. if(eargc == 0)
  561. dolflag = 1;
  562. }
  563. p2 = ibuf;
  564. ebp = ibuf + c;
  565. }
  566. break;
  567. }
  568. if(c)
  569. if(p1 < lbend)
  570. *p1++ = c;
  571. }
  572. lnum++;
  573. *p1 = 0;
  574. cbp = p2;
  575. return(p1);
  576. }
  577. int
  578. ecmp(uchar *a, uchar *b, int count)
  579. {
  580. while(count--)
  581. if(*a++ != *b++) return(0);
  582. return(1);
  583. }
  584. void
  585. arout(void)
  586. {
  587. uchar *p1;
  588. FILE *fi;
  589. uchar c;
  590. int t;
  591. aptr = abuf - 1;
  592. while(*++aptr) {
  593. if((*aptr)->r1.command == ACOM) {
  594. for(p1 = (*aptr)->r1.re1; *p1; )
  595. putc(*p1++, stdout);
  596. putc('\n', stdout);
  597. } else {
  598. if((fi = fopen((char*)((*aptr)->r1.re1), "r")) == NULL)
  599. continue;
  600. while((t = getc(fi)) != EOF) {
  601. c = t;
  602. putc(c, stdout);
  603. }
  604. fclose(fi);
  605. }
  606. }
  607. aptr = abuf;
  608. *aptr = 0;
  609. }
  610. uchar *
  611. lformat(int c, uchar *p)
  612. {
  613. int trans =
  614. c=='\b'? 'b':
  615. c=='\t'? 't':
  616. c=='\n'? 'n':
  617. c=='\v'? 'v':
  618. c=='\f'? 'f':
  619. c=='\r'? 'r':
  620. c=='\\'? '\\':
  621. 0;
  622. if(trans) {
  623. *p++ = '\\';
  624. *p++ = trans;
  625. } else if(c<040 || c>=0177) {
  626. *p++ = '\\';
  627. *p++ = ((c>>6)&07) + '0';
  628. *p++ = ((c>>3)&07) + '0';
  629. *p++ = (c&07) + '0';
  630. } else
  631. *p++ = c;
  632. return p;
  633. }