sed.c 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438
  1. /*
  2. * sed -- stream editor
  3. *
  4. *
  5. */
  6. #include <u.h>
  7. #include <libc.h>
  8. #include <bio.h>
  9. #include <regexp.h>
  10. enum {
  11. DEPTH = 20, /* max nesting depth of {} */
  12. MAXCMDS = 512, /* max sed commands */
  13. ADDSIZE = 10000, /* size of add & read buffer */
  14. MAXADDS = 20, /* max pending adds and reads */
  15. LBSIZE = 8192, /* input line size */
  16. LABSIZE = 50, /* max label name size */
  17. MAXSUB = 10, /* max number of sub reg exp */
  18. MAXFILES = 120, /* max output files */
  19. };
  20. /* An address is a line #, a R.E., "$", a reference to the last
  21. * R.E., or nothing.
  22. */
  23. typedef struct {
  24. enum {
  25. A_NONE,
  26. A_DOL,
  27. A_LINE,
  28. A_RE,
  29. A_LAST,
  30. }type;
  31. union {
  32. long line; /* Line # */
  33. Reprog *rp; /* Compiled R.E. */
  34. };
  35. } Addr;
  36. typedef struct SEDCOM {
  37. Addr ad1; /* optional start address */
  38. Addr ad2; /* optional end address */
  39. union {
  40. Reprog *re1; /* compiled R.E. */
  41. Rune *text; /* added text or file name */
  42. struct SEDCOM *lb1; /* destination command of branch */
  43. };
  44. Rune *rhs; /* Right-hand side of substitution */
  45. Biobuf* fcode; /* File ID for read and write */
  46. char command; /* command code -see below */
  47. char gfl; /* 'Global' flag for substitutions */
  48. char pfl; /* 'print' flag for substitutions */
  49. char active; /* 1 => data between start and end */
  50. char negfl; /* negation flag */
  51. } SedCom;
  52. /* Command Codes for field SedCom.command */
  53. #define ACOM 01
  54. #define BCOM 020
  55. #define CCOM 02
  56. #define CDCOM 025
  57. #define CNCOM 022
  58. #define COCOM 017
  59. #define CPCOM 023
  60. #define DCOM 03
  61. #define ECOM 015
  62. #define EQCOM 013
  63. #define FCOM 016
  64. #define GCOM 027
  65. #define CGCOM 030
  66. #define HCOM 031
  67. #define CHCOM 032
  68. #define ICOM 04
  69. #define LCOM 05
  70. #define NCOM 012
  71. #define PCOM 010
  72. #define QCOM 011
  73. #define RCOM 06
  74. #define SCOM 07
  75. #define TCOM 021
  76. #define WCOM 014
  77. #define CWCOM 024
  78. #define YCOM 026
  79. #define XCOM 033
  80. typedef struct label { /* Label symbol table */
  81. Rune asc[9]; /* Label name */
  82. SedCom *chain;
  83. SedCom *address; /* Command associated with label */
  84. } Label;
  85. typedef struct FILE_CACHE { /* Data file control block */
  86. struct FILE_CACHE *next; /* Forward Link */
  87. char *name; /* Name of file */
  88. } FileCache;
  89. SedCom pspace[MAXCMDS]; /* Command storage */
  90. SedCom *pend = pspace+MAXCMDS; /* End of command storage */
  91. SedCom *rep = pspace; /* Current fill point */
  92. Reprog *lastre = 0; /* Last regular expression */
  93. Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
  94. Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
  95. Rune *addend = addspace+ADDSIZE;
  96. SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
  97. SedCom **aptr = abuf;
  98. struct { /* Sed program input control block */
  99. enum PTYPE /* Either on command line or in file */
  100. { P_ARG,
  101. P_FILE
  102. } type;
  103. union PCTL { /* Pointer to data */
  104. Biobuf *bp;
  105. char *curr;
  106. };
  107. } prog;
  108. Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
  109. FileCache *fhead = 0; /* Head of File Cache Chain */
  110. FileCache *ftail = 0; /* Tail of File Cache Chain */
  111. Rune *loc1; /* Start of pattern match */
  112. Rune *loc2; /* End of pattern match */
  113. Rune seof; /* Pattern delimiter char */
  114. Rune linebuf[LBSIZE+1]; /* Input data buffer */
  115. Rune *lbend = linebuf+LBSIZE; /* End of buffer */
  116. Rune *spend = linebuf; /* End of input data */
  117. Rune *cp; /* Current scan point in linebuf */
  118. Rune holdsp[LBSIZE+1]; /* Hold buffer */
  119. Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
  120. Rune *hspend = holdsp; /* End of hold data */
  121. int nflag; /* Command line flags */
  122. int gflag;
  123. int dolflag; /* Set when at true EOF */
  124. int sflag; /* Set when substitution done */
  125. int jflag; /* Set when jump required */
  126. int delflag; /* Delete current line when set */
  127. long lnum = 0; /* Input line count */
  128. char fname[MAXFILES][40]; /* File name cache */
  129. Biobuf *fcode[MAXFILES]; /* File ID cache */
  130. int nfiles = 0; /* Cache fill point */
  131. Biobuf fout; /* Output stream */
  132. Biobuf stdin; /* Default input */
  133. Biobuf* f = 0; /* Input data */
  134. Label ltab[LABSIZE]; /* Label name symbol table */
  135. Label *labend = ltab+LABSIZE; /* End of label table */
  136. Label *lab = ltab+1; /* Current Fill point */
  137. int depth = 0; /* {} stack pointer */
  138. Rune bad; /* Dummy err ptr reference */
  139. Rune *badp = &bad;
  140. char CGMES[] = "Command garbled: %S";
  141. char TMMES[] = "Too much text: %S";
  142. char LTL[] = "Label too long: %S";
  143. char AD0MES[] = "No addresses allowed: %S";
  144. char AD1MES[] = "Only one address allowed: %S";
  145. void address(Addr *);
  146. void arout(void);
  147. int cmp(char *, char *);
  148. int rcmp(Rune *, Rune *);
  149. void command(SedCom *);
  150. Reprog *compile(void);
  151. Rune *compsub(Rune *, Rune *);
  152. void dechain(void);
  153. void dosub(Rune *);
  154. int ecmp(Rune *, Rune *, int);
  155. void enroll(char *);
  156. void errexit(void);
  157. int executable(SedCom *);
  158. void execute(void);
  159. void fcomp(void);
  160. long getrune(void);
  161. Rune *gline(Rune *);
  162. int match(Reprog *, Rune *);
  163. void newfile(enum PTYPE, char *);
  164. int opendata(void);
  165. Biobuf *open_file(char *);
  166. Rune *place(Rune *, Rune *, Rune *);
  167. void quit(char *, char *);
  168. int rline(Rune *, Rune *);
  169. Label *search(Label *);
  170. int substitute(SedCom *);
  171. char *text(char *);
  172. Rune *stext(Rune *, Rune *);
  173. int ycomp(SedCom *);
  174. char * trans(int c);
  175. void putline(Biobuf *bp, Rune *buf, int n);
  176. void
  177. main(int argc, char **argv)
  178. {
  179. int compfl;
  180. lnum = 0;
  181. Binit(&fout, 1, OWRITE);
  182. fcode[nfiles++] = &fout;
  183. compfl = 0;
  184. if(argc == 1)
  185. exits(0);
  186. ARGBEGIN{
  187. case 'n':
  188. nflag++;
  189. continue;
  190. case 'f':
  191. if(argc <= 1)
  192. quit("no pattern-file", 0);
  193. newfile(P_FILE, ARGF());
  194. fcomp();
  195. compfl = 1;
  196. continue;
  197. case 'e':
  198. if (argc <= 1)
  199. quit("missing pattern", 0);
  200. newfile(P_ARG, ARGF());
  201. fcomp();
  202. compfl = 1;
  203. continue;
  204. case 'g':
  205. gflag++;
  206. continue;
  207. default:
  208. fprint(2, "sed: Unknown flag: %c\n", ARGC());
  209. continue;
  210. } ARGEND
  211. if(compfl == 0) {
  212. if (--argc < 0)
  213. quit("missing pattern", 0);
  214. newfile(P_ARG, *argv++);
  215. fcomp();
  216. }
  217. if(depth)
  218. quit("Too many {'s", 0);
  219. ltab[0].address = rep;
  220. dechain();
  221. if(argc <= 0)
  222. enroll(0); /* Add stdin to cache */
  223. else while(--argc >= 0) {
  224. enroll(*argv++);
  225. }
  226. execute();
  227. exits(0);
  228. }
  229. void
  230. fcomp(void)
  231. {
  232. Rune *tp;
  233. SedCom *pt, *pt1;
  234. int i;
  235. Label *lpt;
  236. static Rune *p = addspace;
  237. static SedCom **cmpend[DEPTH]; /* stack of {} operations */
  238. while (rline(linebuf, lbend) >= 0) {
  239. cp = linebuf;
  240. comploop:
  241. while(*cp == ' ' || *cp == '\t')
  242. cp++;
  243. if(*cp == '\0' || *cp == '#')
  244. continue;
  245. if(*cp == ';') {
  246. cp++;
  247. goto comploop;
  248. }
  249. address(&rep->ad1);
  250. if (rep->ad1.type != A_NONE) {
  251. if (rep->ad1.type == A_LAST) {
  252. if (!lastre)
  253. quit("First RE may not be null", 0);
  254. rep->ad1.type = A_RE;
  255. rep->ad1.rp = lastre;
  256. }
  257. if(*cp == ',' || *cp == ';') {
  258. cp++;
  259. address(&rep->ad2);
  260. if (rep->ad2.type == A_LAST) {
  261. rep->ad2.type = A_RE;
  262. rep->ad2.rp = lastre;
  263. }
  264. } else
  265. rep->ad2.type = A_NONE;
  266. }
  267. while(*cp == ' ' || *cp == '\t')
  268. cp++;
  269. swit:
  270. switch(*cp++) {
  271. default:
  272. quit("Unrecognized command: %S", (char *)linebuf);
  273. case '!':
  274. rep->negfl = 1;
  275. goto swit;
  276. case '{':
  277. rep->command = BCOM;
  278. rep->negfl = !(rep->negfl);
  279. cmpend[depth++] = &rep->lb1;
  280. if(++rep >= pend)
  281. quit("Too many commands: %S", (char *) linebuf);
  282. if(*cp == '\0') continue;
  283. goto comploop;
  284. case '}':
  285. if(rep->ad1.type != A_NONE)
  286. quit(AD0MES, (char *) linebuf);
  287. if(--depth < 0)
  288. quit("Too many }'s", 0);
  289. *cmpend[depth] = rep;
  290. if(*cp == 0) continue;
  291. goto comploop;
  292. case '=':
  293. rep->command = EQCOM;
  294. if(rep->ad2.type != A_NONE)
  295. quit(AD1MES, (char *) linebuf);
  296. break;
  297. case ':':
  298. if(rep->ad1.type != A_NONE)
  299. quit(AD0MES, (char *) linebuf);
  300. while(*cp == ' ')
  301. cp++;
  302. tp = lab->asc;
  303. while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
  304. *tp++ = *cp++;
  305. if(tp >= &(lab->asc[8]))
  306. quit(LTL, (char *) linebuf);
  307. }
  308. *tp = '\0';
  309. if(lpt = search(lab)) {
  310. if(lpt->address)
  311. quit("Duplicate labels: %S", (char *) linebuf);
  312. } else {
  313. lab->chain = 0;
  314. lpt = lab;
  315. if(++lab >= labend)
  316. quit("Too many labels: %S", (char *) linebuf);
  317. }
  318. lpt->address = rep;
  319. if (*cp == '#')
  320. continue;
  321. rep--; /* reuse this slot */
  322. break;
  323. case 'a':
  324. rep->command = ACOM;
  325. if(rep->ad2.type != A_NONE)
  326. quit(AD1MES, (char *) linebuf);
  327. if(*cp == '\\') cp++;
  328. if(*cp++ != '\n')
  329. quit(CGMES, (char *) linebuf);
  330. rep->text = p;
  331. p = stext(p, addend);
  332. break;
  333. case 'c':
  334. rep->command = CCOM;
  335. if(*cp == '\\') cp++;
  336. if(*cp++ != '\n')
  337. quit(CGMES, (char *) linebuf);
  338. rep->text = p;
  339. p = stext(p, addend);
  340. break;
  341. case 'i':
  342. rep->command = ICOM;
  343. if(rep->ad2.type != A_NONE)
  344. quit(AD1MES, (char *) linebuf);
  345. if(*cp == '\\') cp++;
  346. if(*cp++ != '\n')
  347. quit(CGMES, (char *) linebuf);
  348. rep->text = p;
  349. p = stext(p, addend);
  350. break;
  351. case 'g':
  352. rep->command = GCOM;
  353. break;
  354. case 'G':
  355. rep->command = CGCOM;
  356. break;
  357. case 'h':
  358. rep->command = HCOM;
  359. break;
  360. case 'H':
  361. rep->command = CHCOM;
  362. break;
  363. case 't':
  364. rep->command = TCOM;
  365. goto jtcommon;
  366. case 'b':
  367. rep->command = BCOM;
  368. jtcommon:
  369. while(*cp == ' ')cp++;
  370. if(*cp == '\0') {
  371. if(pt = ltab[0].chain) {
  372. while(pt1 = pt->lb1)
  373. pt = pt1;
  374. pt->lb1 = rep;
  375. } else
  376. ltab[0].chain = rep;
  377. break;
  378. }
  379. tp = lab->asc;
  380. while((*tp++ = *cp++))
  381. if(tp >= &(lab->asc[8]))
  382. quit(LTL, (char *) linebuf);
  383. cp--;
  384. tp[-1] = '\0';
  385. if(lpt = search(lab)) {
  386. if(lpt->address) {
  387. rep->lb1 = lpt->address;
  388. } else {
  389. pt = lpt->chain;
  390. while(pt1 = pt->lb1)
  391. pt = pt1;
  392. pt->lb1 = rep;
  393. }
  394. } else {
  395. lab->chain = rep;
  396. lab->address = 0;
  397. if(++lab >= labend)
  398. quit("Too many labels: %S",
  399. (char *) linebuf);
  400. }
  401. break;
  402. case 'n':
  403. rep->command = NCOM;
  404. break;
  405. case 'N':
  406. rep->command = CNCOM;
  407. break;
  408. case 'p':
  409. rep->command = PCOM;
  410. break;
  411. case 'P':
  412. rep->command = CPCOM;
  413. break;
  414. case 'r':
  415. rep->command = RCOM;
  416. if(rep->ad2.type != A_NONE)
  417. quit(AD1MES, (char *) linebuf);
  418. if(*cp++ != ' ')
  419. quit(CGMES, (char *) linebuf);
  420. rep->text = p;
  421. p = stext(p, addend);
  422. break;
  423. case 'd':
  424. rep->command = DCOM;
  425. break;
  426. case 'D':
  427. rep->command = CDCOM;
  428. rep->lb1 = pspace;
  429. break;
  430. case 'q':
  431. rep->command = QCOM;
  432. if(rep->ad2.type != A_NONE)
  433. quit(AD1MES, (char *) linebuf);
  434. break;
  435. case 'l':
  436. rep->command = LCOM;
  437. break;
  438. case 's':
  439. rep->command = SCOM;
  440. seof = *cp++;
  441. if ((rep->re1 = compile()) == 0) {
  442. if(!lastre)
  443. quit("First RE may not be null.", 0);
  444. rep->re1 = lastre;
  445. }
  446. rep->rhs = p;
  447. if((p = compsub(p, addend)) == 0)
  448. quit(CGMES, (char *) linebuf);
  449. if(*cp == 'g') {
  450. cp++;
  451. rep->gfl++;
  452. } else if(gflag)
  453. rep->gfl++;
  454. if(*cp == 'p') {
  455. cp++;
  456. rep->pfl = 1;
  457. }
  458. if(*cp == 'P') {
  459. cp++;
  460. rep->pfl = 2;
  461. }
  462. if(*cp == 'w') {
  463. cp++;
  464. if(*cp++ != ' ')
  465. quit(CGMES, (char *) linebuf);
  466. text(fname[nfiles]);
  467. for(i = nfiles - 1; i >= 0; i--)
  468. if(cmp(fname[nfiles],fname[i]) == 0) {
  469. rep->fcode = fcode[i];
  470. goto done;
  471. }
  472. if(nfiles >= MAXFILES)
  473. quit("Too many files in w commands 1", 0);
  474. rep->fcode = open_file(fname[nfiles]);
  475. }
  476. break;
  477. case 'w':
  478. rep->command = WCOM;
  479. if(*cp++ != ' ')
  480. quit(CGMES, (char *) linebuf);
  481. text(fname[nfiles]);
  482. for(i = nfiles - 1; i >= 0; i--)
  483. if(cmp(fname[nfiles], fname[i]) == 0) {
  484. rep->fcode = fcode[i];
  485. goto done;
  486. }
  487. if(nfiles >= MAXFILES){
  488. fprint(2, "sed: Too many files in w commands 2 \n");
  489. fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
  490. errexit();
  491. }
  492. rep->fcode = open_file(fname[nfiles]);
  493. break;
  494. case 'x':
  495. rep->command = XCOM;
  496. break;
  497. case 'y':
  498. rep->command = YCOM;
  499. seof = *cp++;
  500. if (ycomp(rep) == 0)
  501. quit(CGMES, (char *) linebuf);
  502. break;
  503. }
  504. done:
  505. if(++rep >= pend)
  506. quit("Too many commands, last: %S", (char *) linebuf);
  507. if(*cp++ != '\0') {
  508. if(cp[-1] == ';')
  509. goto comploop;
  510. quit(CGMES, (char *) linebuf);
  511. }
  512. }
  513. }
  514. Biobuf *
  515. open_file(char *name)
  516. {
  517. Biobuf *bp;
  518. int fd;
  519. if ((bp = malloc(sizeof(Biobuf))) == 0)
  520. quit("Out of memory", 0);
  521. if ((fd = open(name, OWRITE)) < 0 &&
  522. (fd = create(name, OWRITE, 0666)) < 0)
  523. quit("Cannot create %s", name);
  524. Binit(bp, fd, OWRITE);
  525. Bseek(bp, 0, 2);
  526. fcode[nfiles++] = bp;
  527. return bp;
  528. }
  529. Rune *
  530. compsub(Rune *rhs, Rune *end)
  531. {
  532. Rune r;
  533. while ((r = *cp++) != '\0') {
  534. if(r == '\\') {
  535. if (rhs < end)
  536. *rhs++ = 0xFFFF;
  537. else
  538. return 0;
  539. r = *cp++;
  540. if(r == 'n')
  541. r = '\n';
  542. } else {
  543. if(r == seof) {
  544. if (rhs < end)
  545. *rhs++ = '\0';
  546. else
  547. return 0;
  548. return rhs;
  549. }
  550. }
  551. if (rhs < end)
  552. *rhs++ = r;
  553. else
  554. return 0;
  555. }
  556. return 0;
  557. }
  558. Reprog *
  559. compile(void)
  560. {
  561. Rune c;
  562. char *ep;
  563. char expbuf[512];
  564. if((c = *cp++) == seof) /* '//' */
  565. return 0;
  566. ep = expbuf;
  567. do {
  568. if (c == 0 || c == '\n')
  569. quit(TMMES, (char *) linebuf);
  570. if (c == '\\') {
  571. if (ep >= expbuf+sizeof(expbuf))
  572. quit(TMMES, (char *) linebuf);
  573. ep += runetochar(ep, &c);
  574. if ((c = *cp++) == 'n')
  575. c = '\n';
  576. }
  577. if (ep >= expbuf+sizeof(expbuf))
  578. quit(TMMES, (char *) linebuf);
  579. ep += runetochar(ep, &c);
  580. } while ((c = *cp++) != seof);
  581. *ep = 0;
  582. return lastre = regcomp(expbuf);
  583. }
  584. void
  585. regerror(char *s)
  586. {
  587. USED(s);
  588. quit(CGMES, (char *) linebuf);
  589. }
  590. void
  591. newfile(enum PTYPE type, char *name)
  592. {
  593. if (type == P_ARG)
  594. prog.curr = name;
  595. else if ((prog.bp = Bopen(name, OREAD)) == 0)
  596. quit("Cannot open pattern-file: %s\n", name);
  597. prog.type = type;
  598. }
  599. int
  600. rline(Rune *buf, Rune *end)
  601. {
  602. long c;
  603. Rune r;
  604. while ((c = getrune()) >= 0) {
  605. r = c;
  606. if (r == '\\') {
  607. if (buf <= end)
  608. *buf++ = r;
  609. if ((c = getrune()) < 0)
  610. break;
  611. r = c;
  612. } else if (r == '\n') {
  613. *buf = '\0';
  614. return(1);
  615. }
  616. if (buf <= end)
  617. *buf++ = r;
  618. }
  619. *buf = '\0';
  620. return(-1);
  621. }
  622. long
  623. getrune(void)
  624. {
  625. char *p;
  626. long c;
  627. Rune r;
  628. if (prog.type == P_ARG) {
  629. if ((p = prog.curr) != 0) {
  630. if (*p) {
  631. prog.curr += chartorune(&r, p);
  632. c = r;
  633. } else {
  634. c = '\n'; /* fake an end-of-line */
  635. prog.curr = 0;
  636. }
  637. } else
  638. c = -1;
  639. } else if ((c = Bgetrune(prog.bp)) < 0)
  640. Bterm(prog.bp);
  641. return c;
  642. }
  643. void
  644. address(Addr *ap)
  645. {
  646. int c;
  647. long lno;
  648. if((c = *cp++) == '$')
  649. ap->type = A_DOL;
  650. else if(c == '/') {
  651. seof = c;
  652. if (ap->rp = compile())
  653. ap->type = A_RE;
  654. else
  655. ap->type = A_LAST;
  656. }
  657. else if (c >= '0' && c <= '9') {
  658. lno = c-'0';
  659. while ((c = *cp) >= '0' && c <= '9')
  660. lno = lno*10 + *cp++-'0';
  661. if(!lno)
  662. quit("line number 0 is illegal",0);
  663. ap->type = A_LINE;
  664. ap->line = lno;
  665. }
  666. else {
  667. cp--;
  668. ap->type = A_NONE;
  669. }
  670. }
  671. cmp(char *a, char *b) /* compare characters */
  672. {
  673. while(*a == *b++)
  674. if (*a == '\0')
  675. return(0);
  676. else a++;
  677. return(1);
  678. }
  679. rcmp(Rune *a, Rune *b) /* compare runes */
  680. {
  681. while(*a == *b++)
  682. if (*a == '\0')
  683. return(0);
  684. else a++;
  685. return(1);
  686. }
  687. char *
  688. text(char *p) /* extract character string */
  689. {
  690. Rune r;
  691. while(*cp == '\t' || *cp == ' ')
  692. cp++;
  693. while (*cp) {
  694. if ((r = *cp++) == '\\')
  695. if ((r = *cp++) == 0)
  696. break;;
  697. if (r == '\n')
  698. while (*cp == '\t' || *cp == ' ')
  699. cp++;
  700. p += runetochar(p, &r);
  701. }
  702. *p++ = '\0';
  703. return p;
  704. }
  705. Rune *
  706. stext(Rune *p, Rune *end) /* extract rune string */
  707. {
  708. while(*cp == '\t' || *cp == ' ')
  709. cp++;
  710. while (*cp) {
  711. if (*cp == '\\')
  712. if (*++cp == 0)
  713. break;
  714. if (p >= end-1)
  715. quit(TMMES, (char *) linebuf);
  716. if ((*p++ = *cp++) == '\n')
  717. while(*cp == '\t' || *cp == ' ')
  718. cp++;
  719. }
  720. *p++ = 0;
  721. return p;
  722. }
  723. Label *
  724. search (Label *ptr)
  725. {
  726. Label *rp;
  727. for (rp = ltab; rp < ptr; rp++)
  728. if(rcmp(rp->asc, ptr->asc) == 0)
  729. return(rp);
  730. return(0);
  731. }
  732. void
  733. dechain(void)
  734. {
  735. Label *lptr;
  736. SedCom *rptr, *trptr;
  737. for(lptr = ltab; lptr < lab; lptr++) {
  738. if(lptr->address == 0)
  739. quit("Undefined label: %S", (char *) lptr->asc);
  740. if(lptr->chain) {
  741. rptr = lptr->chain;
  742. while(trptr = rptr->lb1) {
  743. rptr->lb1 = lptr->address;
  744. rptr = trptr;
  745. }
  746. rptr->lb1 = lptr->address;
  747. }
  748. }
  749. }
  750. int
  751. ycomp(SedCom *r)
  752. {
  753. int i;
  754. Rune *rp;
  755. Rune c, *tsp, highc;
  756. Rune *sp;
  757. highc = 0;
  758. for(tsp = cp; *tsp != seof; tsp++) {
  759. if(*tsp == '\\')
  760. tsp++;
  761. if(*tsp == '\n' || *tsp == '\0')
  762. return(0);
  763. if (*tsp > highc) highc = *tsp;
  764. }
  765. tsp++;
  766. if ((rp = r->text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
  767. quit("Out of memory", 0);
  768. *rp++ = highc; /* save upper bound */
  769. for (i = 0; i <= highc; i++)
  770. rp[i] = i;
  771. sp = cp;
  772. while((c = *sp++) != seof) {
  773. if(c == '\\' && *sp == 'n') {
  774. sp++;
  775. c = '\n';
  776. }
  777. if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
  778. rp[c] = '\n';
  779. tsp++;
  780. }
  781. if(rp[c] == seof || rp[c] == '\0') {
  782. free(r->re1);
  783. r->re1 = 0;
  784. return(0);
  785. }
  786. }
  787. if(*tsp != seof) {
  788. free(r->re1);
  789. r->re1 = 0;
  790. return(0);
  791. }
  792. cp = tsp+1;
  793. return(1);
  794. }
  795. void
  796. execute(void)
  797. {
  798. SedCom *ipc;
  799. while (spend = gline(linebuf)){
  800. for(ipc = pspace; ipc->command; ) {
  801. if (!executable(ipc)) {
  802. ipc++;
  803. continue;
  804. }
  805. command(ipc);
  806. if(delflag)
  807. break;
  808. if(jflag) {
  809. jflag = 0;
  810. if((ipc = ipc->lb1) == 0)
  811. break;
  812. } else
  813. ipc++;
  814. }
  815. if(!nflag && !delflag)
  816. putline(&fout, linebuf, spend-linebuf);
  817. if(aptr > abuf) {
  818. arout();
  819. }
  820. delflag = 0;
  821. }
  822. }
  823. /* determine if a statement should be applied to an input line */
  824. int
  825. executable(SedCom *ipc)
  826. {
  827. if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
  828. if (ipc->active == 1) /* Second line */
  829. ipc->active = 2;
  830. switch(ipc->ad2.type) {
  831. case A_NONE: /* No second addr; use first */
  832. ipc->active = 0;
  833. break;
  834. case A_DOL: /* Accept everything */
  835. return !ipc->negfl;
  836. case A_LINE: /* Line at end of range? */
  837. if (lnum <= ipc->ad2.line) {
  838. if (ipc->ad2.line == lnum)
  839. ipc->active = 0;
  840. return !ipc->negfl;
  841. }
  842. ipc->active = 0; /* out of range */
  843. return ipc->negfl;
  844. case A_RE: /* Check for matching R.E. */
  845. if (match(ipc->ad2.rp, linebuf))
  846. ipc->active = 0;
  847. return !ipc->negfl;
  848. default: /* internal error */
  849. quit("Internal error", 0);
  850. }
  851. }
  852. switch (ipc->ad1.type) { /* Check first address */
  853. case A_NONE: /* Everything matches */
  854. return !ipc->negfl;
  855. case A_DOL: /* Only last line */
  856. if (dolflag)
  857. return !ipc->negfl;
  858. break;
  859. case A_LINE: /* Check line number */
  860. if (ipc->ad1.line == lnum) {
  861. ipc->active = 1; /* In range */
  862. return !ipc->negfl;
  863. }
  864. break;
  865. case A_RE: /* Check R.E. */
  866. if (match(ipc->ad1.rp, linebuf)) {
  867. ipc->active = 1; /* In range */
  868. return !ipc->negfl;
  869. }
  870. break;
  871. default:
  872. quit("Internal error", 0);
  873. }
  874. return ipc->negfl;
  875. }
  876. match(Reprog *pattern, Rune *buf)
  877. {
  878. if (!pattern)
  879. return 0;
  880. subexp[0].rsp = buf;
  881. subexp[0].ep = 0;
  882. if (rregexec(pattern, linebuf, subexp, MAXSUB)) {
  883. loc1 = subexp[0].rsp;
  884. loc2 = subexp[0].rep;
  885. return 1;
  886. }
  887. loc1 = loc2 = 0;
  888. return 0;
  889. }
  890. substitute(SedCom *ipc)
  891. {
  892. int len;
  893. if(!match(ipc->re1, linebuf))
  894. return 0;
  895. /*
  896. * we have at least one match. some patterns, e.g. '$' or '^', can
  897. * produce zero-length matches, so during a global substitute we
  898. * must bump to the character after a zero-length match to keep from looping.
  899. */
  900. sflag = 1;
  901. if(ipc->gfl == 0) /* single substitution */
  902. dosub(ipc->rhs);
  903. else
  904. do{ /* global substitution */
  905. len = loc2-loc1; /* length of match */
  906. dosub(ipc->rhs); /* dosub moves loc2 */
  907. if(*loc2 == 0) /* end of string */
  908. break;
  909. if(len == 0) /* zero-length R.E. match */
  910. loc2++; /* bump over zero-length match */
  911. if(*loc2 == 0) /* end of string */
  912. break;
  913. } while(match(ipc->re1, loc2));
  914. return 1;
  915. }
  916. void
  917. dosub(Rune *rhsbuf)
  918. {
  919. Rune *lp, *sp;
  920. Rune *rp;
  921. int c, n;
  922. lp = linebuf;
  923. sp = genbuf;
  924. rp = rhsbuf;
  925. while (lp < loc1)
  926. *sp++ = *lp++;
  927. while(c = *rp++) {
  928. if (c == '&') {
  929. sp = place(sp, loc1, loc2);
  930. continue;
  931. }
  932. if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') {
  933. n = c-'0';
  934. if (subexp[n].rsp && subexp[n].rep) {
  935. sp = place(sp, subexp[n].rsp, subexp[n].rep);
  936. continue;
  937. }
  938. else {
  939. fprint(2, "sed: Invalid back reference \\%d\n",n);
  940. errexit();
  941. }
  942. }
  943. *sp++ = c;
  944. if (sp >= &genbuf[LBSIZE])
  945. fprint(2, "sed: Output line too long.\n");
  946. }
  947. lp = loc2;
  948. loc2 = sp - genbuf + linebuf;
  949. while (*sp++ = *lp++)
  950. if (sp >= &genbuf[LBSIZE])
  951. fprint(2, "sed: Output line too long.\n");
  952. lp = linebuf;
  953. sp = genbuf;
  954. while (*lp++ = *sp++)
  955. ;
  956. spend = lp-1;
  957. }
  958. Rune *
  959. place(Rune *sp, Rune *l1, Rune *l2)
  960. {
  961. while (l1 < l2) {
  962. *sp++ = *l1++;
  963. if (sp >= &genbuf[LBSIZE])
  964. fprint(2, "sed: Output line too long.\n");
  965. }
  966. return(sp);
  967. }
  968. char *
  969. trans(int c)
  970. {
  971. static char buf[] = "\\x0000";
  972. static char hex[] = "0123456789abcdef";
  973. switch(c) {
  974. case '\b':
  975. return "\\b";
  976. case '\n':
  977. return "\\n";
  978. case '\r':
  979. return "\\r";
  980. case '\t':
  981. return "\\t";
  982. case '\\':
  983. return "\\\\";
  984. }
  985. buf[2] = hex[(c>>12)&0xF];
  986. buf[3] = hex[(c>>8)&0xF];
  987. buf[4] = hex[(c>>4)&0xF];
  988. buf[5] = hex[c&0xF];
  989. return buf;
  990. }
  991. void
  992. command(SedCom *ipc)
  993. {
  994. int i, c;
  995. Rune *p1, *p2;
  996. char *ucp;
  997. Rune *rp;
  998. Rune *execp;
  999. switch(ipc->command) {
  1000. case ACOM:
  1001. *aptr++ = ipc;
  1002. if(aptr >= abuf+MAXADDS) {
  1003. quit("sed: Too many appends after line %ld\n",
  1004. (char *) lnum);
  1005. }
  1006. *aptr = 0;
  1007. break;
  1008. case CCOM:
  1009. delflag = 1;
  1010. if(ipc->active == 1) {
  1011. for(rp = ipc->text; *rp; rp++)
  1012. Bputrune(&fout, *rp);
  1013. Bputc(&fout, '\n');
  1014. }
  1015. break;
  1016. case DCOM:
  1017. delflag++;
  1018. break;
  1019. case CDCOM:
  1020. p1 = p2 = linebuf;
  1021. while(*p1 != '\n') {
  1022. if(*p1++ == 0) {
  1023. delflag++;
  1024. return;
  1025. }
  1026. }
  1027. p1++;
  1028. while(*p2++ = *p1++)
  1029. ;
  1030. spend = p2-1;
  1031. jflag++;
  1032. break;
  1033. case EQCOM:
  1034. Bprint(&fout, "%ld\n", lnum);
  1035. break;
  1036. case GCOM:
  1037. p1 = linebuf;
  1038. p2 = holdsp;
  1039. while(*p1++ = *p2++)
  1040. ;
  1041. spend = p1-1;
  1042. break;
  1043. case CGCOM:
  1044. *spend++ = '\n';
  1045. p1 = spend;
  1046. p2 = holdsp;
  1047. while(*p1++ = *p2++)
  1048. if(p1 >= lbend)
  1049. break;
  1050. spend = p1-1;
  1051. break;
  1052. case HCOM:
  1053. p1 = holdsp;
  1054. p2 = linebuf;
  1055. while(*p1++ = *p2++);
  1056. hspend = p1-1;
  1057. break;
  1058. case CHCOM:
  1059. *hspend++ = '\n';
  1060. p1 = hspend;
  1061. p2 = linebuf;
  1062. while(*p1++ = *p2++)
  1063. if(p1 >= hend)
  1064. break;
  1065. hspend = p1-1;
  1066. break;
  1067. case ICOM:
  1068. for(rp = ipc->text; *rp; rp++)
  1069. Bputrune(&fout, *rp);
  1070. Bputc(&fout, '\n');
  1071. break;
  1072. case BCOM:
  1073. jflag = 1;
  1074. break;
  1075. case LCOM:
  1076. c = 0;
  1077. for (i = 0, rp = linebuf; *rp; rp++) {
  1078. c = *rp;
  1079. if(c >= 0x20 && c < 0x7F && c != '\\') {
  1080. Bputc(&fout, c);
  1081. if(i++ > 71) {
  1082. Bprint(&fout, "\\\n");
  1083. i = 0;
  1084. }
  1085. } else {
  1086. for (ucp = trans(*rp); *ucp; ucp++){
  1087. c = *ucp;
  1088. Bputc(&fout, c);
  1089. if(i++ > 71) {
  1090. Bprint(&fout, "\\\n");
  1091. i = 0;
  1092. }
  1093. }
  1094. }
  1095. }
  1096. if(c == ' ')
  1097. Bprint(&fout, "\\n");
  1098. Bputc(&fout, '\n');
  1099. break;
  1100. case NCOM:
  1101. if(!nflag)
  1102. putline(&fout, linebuf, spend-linebuf);
  1103. if(aptr > abuf)
  1104. arout();
  1105. if((execp = gline(linebuf)) == 0) {
  1106. delflag = 1;
  1107. break;
  1108. }
  1109. spend = execp;
  1110. break;
  1111. case CNCOM:
  1112. if(aptr > abuf)
  1113. arout();
  1114. *spend++ = '\n';
  1115. if((execp = gline(spend)) == 0) {
  1116. delflag = 1;
  1117. break;
  1118. }
  1119. spend = execp;
  1120. break;
  1121. case PCOM:
  1122. putline(&fout, linebuf, spend-linebuf);
  1123. break;
  1124. case CPCOM:
  1125. cpcom:
  1126. for(rp = linebuf; *rp && *rp != '\n'; rp++)
  1127. Bputc(&fout, *rp);
  1128. Bputc(&fout, '\n');
  1129. break;
  1130. case QCOM:
  1131. if(!nflag)
  1132. putline(&fout, linebuf, spend-linebuf);
  1133. if(aptr > abuf)
  1134. arout();
  1135. exits(0);
  1136. case RCOM:
  1137. *aptr++ = ipc;
  1138. if(aptr >= &abuf[MAXADDS])
  1139. quit("sed: Too many reads after line %ld\n",
  1140. (char *) lnum);
  1141. *aptr = 0;
  1142. break;
  1143. case SCOM:
  1144. i = substitute(ipc);
  1145. if(i && ipc->pfl)
  1146. if(ipc->pfl == 1)
  1147. putline(&fout, linebuf, spend-linebuf);
  1148. else
  1149. goto cpcom;
  1150. if(i && ipc->fcode)
  1151. goto wcom;
  1152. break;
  1153. case TCOM:
  1154. if(sflag == 0) break;
  1155. sflag = 0;
  1156. jflag = 1;
  1157. break;
  1158. wcom:
  1159. case WCOM:
  1160. putline(ipc->fcode,linebuf, spend-linebuf);
  1161. break;
  1162. case XCOM:
  1163. p1 = linebuf;
  1164. p2 = genbuf;
  1165. while(*p2++ = *p1++);
  1166. p1 = holdsp;
  1167. p2 = linebuf;
  1168. while(*p2++ = *p1++);
  1169. spend = p2 - 1;
  1170. p1 = genbuf;
  1171. p2 = holdsp;
  1172. while(*p2++ = *p1++);
  1173. hspend = p2 - 1;
  1174. break;
  1175. case YCOM:
  1176. p1 = linebuf;
  1177. p2 = ipc->text;
  1178. for (i = *p2++; *p1; p1++){
  1179. if (*p1 <= i) *p1 = p2[*p1];
  1180. }
  1181. break;
  1182. }
  1183. }
  1184. void
  1185. putline(Biobuf *bp, Rune *buf, int n)
  1186. {
  1187. while (n--)
  1188. Bputrune(bp, *buf++);
  1189. Bputc(bp, '\n');
  1190. }
  1191. ecmp(Rune *a, Rune *b, int count)
  1192. {
  1193. while(count--)
  1194. if(*a++ != *b++) return(0);
  1195. return(1);
  1196. }
  1197. void
  1198. arout(void)
  1199. {
  1200. Rune *p1;
  1201. Biobuf *fi;
  1202. int c;
  1203. char *s;
  1204. char buf[128];
  1205. for (aptr = abuf; *aptr; aptr++) {
  1206. if((*aptr)->command == ACOM) {
  1207. for(p1 = (*aptr)->text; *p1; p1++ )
  1208. Bputrune(&fout, *p1);
  1209. Bputc(&fout, '\n');
  1210. } else {
  1211. for(s = buf, p1= (*aptr)->text; *p1; p1++)
  1212. s += runetochar(s, p1);
  1213. *s = '\0';
  1214. if((fi = Bopen(buf, OREAD)) == 0)
  1215. continue;
  1216. while((c = Bgetc(fi)) >= 0)
  1217. Bputc(&fout, c);
  1218. Bterm(fi);
  1219. }
  1220. }
  1221. aptr = abuf;
  1222. *aptr = 0;
  1223. }
  1224. void
  1225. errexit(void)
  1226. {
  1227. exits("error");
  1228. }
  1229. void
  1230. quit (char *msg, char *arg)
  1231. {
  1232. fprint(2, "sed: ");
  1233. fprint(2, msg, arg);
  1234. fprint(2, "\n");
  1235. errexit();
  1236. }
  1237. Rune *
  1238. gline(Rune *addr)
  1239. {
  1240. long c;
  1241. Rune *p;
  1242. static long peekc = 0;
  1243. if (f == 0 && opendata() < 0)
  1244. return 0;
  1245. sflag = 0;
  1246. lnum++;
  1247. /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
  1248. do {
  1249. p = addr;
  1250. for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
  1251. if (c == '\n') {
  1252. if ((peekc = Bgetrune(f)) < 0) {
  1253. if (fhead == 0)
  1254. dolflag = 1;
  1255. }
  1256. *p = '\0';
  1257. return p;
  1258. }
  1259. if (c && p < lbend)
  1260. *p++ = c;
  1261. }
  1262. /* return partial final line, adding implicit newline */
  1263. if(p != addr) {
  1264. *p = '\0';
  1265. peekc = -1;
  1266. if (fhead == 0)
  1267. dolflag = 1;
  1268. return p;
  1269. }
  1270. peekc = 0;
  1271. Bterm(f);
  1272. } while (opendata() > 0); /* Switch to next stream */
  1273. f = 0;
  1274. return 0;
  1275. }
  1276. /* Data file input section - the intent is to transparently
  1277. * catenate all data input streams.
  1278. */
  1279. void
  1280. enroll(char *filename) /* Add a file to the input file cache */
  1281. {
  1282. FileCache *fp;
  1283. if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
  1284. quit("Out of memory", 0);
  1285. if (ftail == 0)
  1286. fhead = fp;
  1287. else
  1288. ftail->next = fp;
  1289. ftail = fp;
  1290. fp->next = 0;
  1291. fp->name = filename; /* 0 => stdin */
  1292. }
  1293. int
  1294. opendata(void)
  1295. {
  1296. if (fhead == 0)
  1297. return -1;
  1298. if (fhead->name) {
  1299. if ((f = Bopen(fhead->name, OREAD)) == 0)
  1300. quit("Can't open %s", fhead->name);
  1301. } else {
  1302. Binit(&stdin, 0, OREAD);
  1303. f = &stdin;
  1304. }
  1305. fhead = fhead->next;
  1306. return 1;
  1307. }