sed.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455
  1. /*
  2. * sed -- stream editor
  3. */
  4. #include <u.h>
  5. #include <libc.h>
  6. #include <bio.h>
  7. #include <regexp.h>
  8. enum {
  9. DEPTH = 20, /* max nesting depth of {} */
  10. MAXCMDS = 512, /* max sed commands */
  11. ADDSIZE = 10000, /* size of add & read buffer */
  12. MAXADDS = 20, /* max pending adds and reads */
  13. LBSIZE = 8192, /* input line size */
  14. LABSIZE = 50, /* max number of labels */
  15. MAXSUB = 10, /* max number of sub reg exp */
  16. MAXFILES = 120, /* max output files */
  17. };
  18. /*
  19. * An address is a line #, a R.E., "$", a reference to the last
  20. * R.E., or nothing.
  21. */
  22. typedef struct {
  23. enum {
  24. A_NONE,
  25. A_DOL,
  26. A_LINE,
  27. A_RE,
  28. A_LAST,
  29. }type;
  30. union {
  31. long line; /* Line # */
  32. Reprog *rp; /* Compiled R.E. */
  33. };
  34. } Addr;
  35. typedef struct SEDCOM {
  36. Addr ad1; /* optional start address */
  37. Addr ad2; /* optional end address */
  38. union {
  39. Reprog *re1; /* compiled R.E. */
  40. Rune *text; /* added text or file name */
  41. struct SEDCOM *lb1; /* destination command of branch */
  42. };
  43. Rune *rhs; /* Right-hand side of substitution */
  44. Biobuf* fcode; /* File ID for read and write */
  45. char command; /* command code -see below */
  46. char gfl; /* 'Global' flag for substitutions */
  47. char pfl; /* 'print' flag for substitutions */
  48. char active; /* 1 => data between start and end */
  49. char negfl; /* negation flag */
  50. } SedCom;
  51. /* Command Codes for field SedCom.command */
  52. #define ACOM 01
  53. #define BCOM 020
  54. #define CCOM 02
  55. #define CDCOM 025
  56. #define CNCOM 022
  57. #define COCOM 017
  58. #define CPCOM 023
  59. #define DCOM 03
  60. #define ECOM 015
  61. #define EQCOM 013
  62. #define FCOM 016
  63. #define GCOM 027
  64. #define CGCOM 030
  65. #define HCOM 031
  66. #define CHCOM 032
  67. #define ICOM 04
  68. #define LCOM 05
  69. #define NCOM 012
  70. #define PCOM 010
  71. #define QCOM 011
  72. #define RCOM 06
  73. #define SCOM 07
  74. #define TCOM 021
  75. #define WCOM 014
  76. #define CWCOM 024
  77. #define YCOM 026
  78. #define XCOM 033
  79. typedef struct label { /* Label symbol table */
  80. Rune uninm[9]; /* Label name */
  81. SedCom *chain;
  82. SedCom *address; /* Command associated with label */
  83. } Label;
  84. typedef struct FILE_CACHE { /* Data file control block */
  85. struct FILE_CACHE *next; /* Forward Link */
  86. char *name; /* Name of file */
  87. } FileCache;
  88. SedCom pspace[MAXCMDS]; /* Command storage */
  89. SedCom *pend = pspace+MAXCMDS; /* End of command storage */
  90. SedCom *rep = pspace; /* Current fill point */
  91. Reprog *lastre = 0; /* Last regular expression */
  92. Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
  93. Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
  94. Rune *addend = addspace+ADDSIZE;
  95. SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
  96. SedCom **aptr = abuf;
  97. struct { /* Sed program input control block */
  98. enum PTYPE { /* Either on command line or in file */
  99. P_ARG,
  100. P_FILE,
  101. } type;
  102. union PCTL { /* Pointer to data */
  103. Biobuf *bp;
  104. char *curr;
  105. };
  106. } prog;
  107. Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
  108. FileCache *fhead = 0; /* Head of File Cache Chain */
  109. FileCache *ftail = 0; /* Tail of File Cache Chain */
  110. Rune *loc1; /* Start of pattern match */
  111. Rune *loc2; /* End of pattern match */
  112. Rune seof; /* Pattern delimiter char */
  113. Rune linebuf[LBSIZE+1]; /* Input data buffer */
  114. Rune *lbend = linebuf+LBSIZE; /* End of buffer */
  115. Rune *spend = linebuf; /* End of input data */
  116. Rune *cp; /* Current scan point in linebuf */
  117. Rune holdsp[LBSIZE+1]; /* Hold buffer */
  118. Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
  119. Rune *hspend = holdsp; /* End of hold data */
  120. int nflag; /* Command line flags */
  121. int gflag;
  122. int dolflag; /* Set when at true EOF */
  123. int sflag; /* Set when substitution done */
  124. int jflag; /* Set when jump required */
  125. int delflag; /* Delete current line when set */
  126. long lnum = 0; /* Input line count */
  127. char fname[MAXFILES][40]; /* File name cache */
  128. Biobuf *fcode[MAXFILES]; /* File ID cache */
  129. int nfiles = 0; /* Cache fill point */
  130. Biobuf fout; /* Output stream */
  131. Biobuf stdin; /* Default input */
  132. Biobuf* f = 0; /* Input data */
  133. Label ltab[LABSIZE]; /* Label name symbol table */
  134. Label *labend = ltab+LABSIZE; /* End of label table */
  135. Label *lab = ltab+1; /* Current Fill point */
  136. int depth = 0; /* {} stack pointer */
  137. Rune bad; /* Dummy err ptr reference */
  138. Rune *badp = &bad;
  139. char CGMES[] = "%S command garbled: %S";
  140. char TMMES[] = "Too much text: %S";
  141. char LTL[] = "Label too long: %S";
  142. char AD0MES[] = "No addresses allowed: %S";
  143. char AD1MES[] = "Only one address allowed: %S";
  144. void address(Addr *);
  145. void arout(void);
  146. int cmp(char *, char *);
  147. int rcmp(Rune *, Rune *);
  148. void command(SedCom *);
  149. Reprog *compile(void);
  150. Rune *compsub(Rune *, Rune *);
  151. void dechain(void);
  152. void dosub(Rune *);
  153. int ecmp(Rune *, Rune *, int);
  154. void enroll(char *);
  155. void errexit(void);
  156. int executable(SedCom *);
  157. void execute(void);
  158. void fcomp(void);
  159. long getrune(void);
  160. Rune *gline(Rune *);
  161. int match(Reprog *, Rune *);
  162. void newfile(enum PTYPE, char *);
  163. int opendata(void);
  164. Biobuf *open_file(char *);
  165. Rune *place(Rune *, Rune *, Rune *);
  166. void quit(char *, ...);
  167. int rline(Rune *, Rune *);
  168. Label *search(Label *);
  169. int substitute(SedCom *);
  170. char *text(char *);
  171. Rune *stext(Rune *, Rune *);
  172. int ycomp(SedCom *);
  173. char * trans(int c);
  174. void putline(Biobuf *bp, Rune *buf, int n);
  175. void
  176. main(int argc, char **argv)
  177. {
  178. int compfl;
  179. lnum = 0;
  180. Binit(&fout, 1, OWRITE);
  181. fcode[nfiles++] = &fout;
  182. compfl = 0;
  183. if(argc == 1)
  184. exits(0);
  185. ARGBEGIN{
  186. case 'e':
  187. if (argc <= 1)
  188. quit("missing pattern");
  189. newfile(P_ARG, ARGF());
  190. fcomp();
  191. compfl = 1;
  192. continue;
  193. case 'f':
  194. if(argc <= 1)
  195. quit("no pattern-file");
  196. newfile(P_FILE, ARGF());
  197. fcomp();
  198. compfl = 1;
  199. continue;
  200. case 'g':
  201. gflag++;
  202. continue;
  203. case 'n':
  204. nflag++;
  205. continue;
  206. default:
  207. fprint(2, "sed: Unknown flag: %c\n", ARGC());
  208. continue;
  209. } ARGEND
  210. if(compfl == 0) {
  211. if (--argc < 0)
  212. quit("missing pattern");
  213. newfile(P_ARG, *argv++);
  214. fcomp();
  215. }
  216. if(depth)
  217. quit("Too many {'s");
  218. ltab[0].address = rep;
  219. dechain();
  220. if(argc <= 0)
  221. enroll(0); /* Add stdin to cache */
  222. else
  223. while(--argc >= 0)
  224. enroll(*argv++);
  225. execute();
  226. exits(0);
  227. }
  228. void
  229. fcomp(void)
  230. {
  231. int i;
  232. Label *lpt;
  233. Rune *tp;
  234. SedCom *pt, *pt1;
  235. static Rune *p = addspace;
  236. static SedCom **cmpend[DEPTH]; /* stack of {} operations */
  237. while (rline(linebuf, lbend) >= 0) {
  238. cp = linebuf;
  239. comploop:
  240. while(*cp == L' ' || *cp == L'\t')
  241. cp++;
  242. if(*cp == L'\0' || *cp == L'#')
  243. continue;
  244. if(*cp == L';') {
  245. cp++;
  246. goto comploop;
  247. }
  248. address(&rep->ad1);
  249. if (rep->ad1.type != A_NONE) {
  250. if (rep->ad1.type == A_LAST) {
  251. if (!lastre)
  252. quit("First RE may not be null");
  253. rep->ad1.type = A_RE;
  254. rep->ad1.rp = lastre;
  255. }
  256. if(*cp == L',' || *cp == L';') {
  257. cp++;
  258. address(&rep->ad2);
  259. if (rep->ad2.type == A_LAST) {
  260. rep->ad2.type = A_RE;
  261. rep->ad2.rp = lastre;
  262. }
  263. } else
  264. rep->ad2.type = A_NONE;
  265. }
  266. while(*cp == L' ' || *cp == L'\t')
  267. cp++;
  268. swit:
  269. switch(*cp++) {
  270. default:
  271. quit("Unrecognized command: %S", linebuf);
  272. case '!':
  273. rep->negfl = 1;
  274. goto swit;
  275. case '{':
  276. rep->command = BCOM;
  277. rep->negfl = !rep->negfl;
  278. cmpend[depth++] = &rep->lb1;
  279. if(++rep >= pend)
  280. quit("Too many commands: %S", linebuf);
  281. if(*cp == '\0')
  282. continue;
  283. goto comploop;
  284. case '}':
  285. if(rep->ad1.type != A_NONE)
  286. quit(AD0MES, linebuf);
  287. if(--depth < 0)
  288. quit("Too many }'s");
  289. *cmpend[depth] = rep;
  290. if(*cp == 0)
  291. continue;
  292. goto comploop;
  293. case '=':
  294. rep->command = EQCOM;
  295. if(rep->ad2.type != A_NONE)
  296. quit(AD1MES, linebuf);
  297. break;
  298. case ':':
  299. if(rep->ad1.type != A_NONE)
  300. quit(AD0MES, linebuf);
  301. while(*cp == L' ')
  302. cp++;
  303. tp = lab->uninm;
  304. while (*cp && *cp != L';' && *cp != L' ' &&
  305. *cp != L'\t' && *cp != L'#') {
  306. *tp++ = *cp++;
  307. if(tp >= &lab->uninm[8])
  308. quit(LTL, linebuf);
  309. }
  310. *tp = L'\0';
  311. if (*lab->uninm == L'\0') /* no label? */
  312. quit(CGMES, L":", linebuf);
  313. if(lpt = search(lab)) {
  314. if(lpt->address)
  315. quit("Duplicate labels: %S", linebuf);
  316. } else {
  317. lab->chain = 0;
  318. lpt = lab;
  319. if(++lab >= labend)
  320. quit("Too many labels: %S", linebuf);
  321. }
  322. lpt->address = rep;
  323. if (*cp == L'#')
  324. continue;
  325. rep--; /* reuse this slot */
  326. break;
  327. case 'a':
  328. rep->command = ACOM;
  329. if(rep->ad2.type != A_NONE)
  330. quit(AD1MES, linebuf);
  331. if(*cp == L'\\')
  332. cp++;
  333. if(*cp++ != L'\n')
  334. quit(CGMES, L"a", linebuf);
  335. rep->text = p;
  336. p = stext(p, addend);
  337. break;
  338. case 'c':
  339. rep->command = CCOM;
  340. if(*cp == L'\\')
  341. cp++;
  342. if(*cp++ != L'\n')
  343. quit(CGMES, L"c", linebuf);
  344. rep->text = p;
  345. p = stext(p, addend);
  346. break;
  347. case 'i':
  348. rep->command = ICOM;
  349. if(rep->ad2.type != A_NONE)
  350. quit(AD1MES, linebuf);
  351. if(*cp == L'\\')
  352. cp++;
  353. if(*cp++ != L'\n')
  354. quit(CGMES, L"i", linebuf);
  355. rep->text = p;
  356. p = stext(p, addend);
  357. break;
  358. case 'g':
  359. rep->command = GCOM;
  360. break;
  361. case 'G':
  362. rep->command = CGCOM;
  363. break;
  364. case 'h':
  365. rep->command = HCOM;
  366. break;
  367. case 'H':
  368. rep->command = CHCOM;
  369. break;
  370. case 't':
  371. rep->command = TCOM;
  372. goto jtcommon;
  373. case 'b':
  374. rep->command = BCOM;
  375. jtcommon:
  376. while(*cp == L' ')
  377. cp++;
  378. if(*cp == L'\0' || *cp == L';') {
  379. /* no label; jump to end */
  380. if(pt = ltab[0].chain) {
  381. while((pt1 = pt->lb1) != nil)
  382. pt = pt1;
  383. pt->lb1 = rep;
  384. } else
  385. ltab[0].chain = rep;
  386. break;
  387. }
  388. /* copy label into lab->uninm */
  389. tp = lab->uninm;
  390. while((*tp = *cp++) != L'\0' && *tp != L';')
  391. if(++tp >= &lab->uninm[8])
  392. quit(LTL, linebuf);
  393. cp--;
  394. *tp = L'\0';
  395. if (*lab->uninm == L'\0')
  396. /* shouldn't get here */
  397. quit(CGMES, L"b or t", linebuf);
  398. if((lpt = search(lab)) != nil) {
  399. if(lpt->address)
  400. rep->lb1 = lpt->address;
  401. else {
  402. for(pt = lpt->chain; pt != nil &&
  403. (pt1 = pt->lb1) != nil; pt = pt1)
  404. ;
  405. if (pt)
  406. pt->lb1 = rep;
  407. }
  408. } else { /* add new label */
  409. lab->chain = rep;
  410. lab->address = 0;
  411. if(++lab >= labend)
  412. quit("Too many labels: %S", linebuf);
  413. }
  414. break;
  415. case 'n':
  416. rep->command = NCOM;
  417. break;
  418. case 'N':
  419. rep->command = CNCOM;
  420. break;
  421. case 'p':
  422. rep->command = PCOM;
  423. break;
  424. case 'P':
  425. rep->command = CPCOM;
  426. break;
  427. case 'r':
  428. rep->command = RCOM;
  429. if(rep->ad2.type != A_NONE)
  430. quit(AD1MES, linebuf);
  431. if(*cp++ != L' ')
  432. quit(CGMES, L"r", linebuf);
  433. rep->text = p;
  434. p = stext(p, addend);
  435. break;
  436. case 'd':
  437. rep->command = DCOM;
  438. break;
  439. case 'D':
  440. rep->command = CDCOM;
  441. rep->lb1 = pspace;
  442. break;
  443. case 'q':
  444. rep->command = QCOM;
  445. if(rep->ad2.type != A_NONE)
  446. quit(AD1MES, linebuf);
  447. break;
  448. case 'l':
  449. rep->command = LCOM;
  450. break;
  451. case 's':
  452. rep->command = SCOM;
  453. seof = *cp++;
  454. if ((rep->re1 = compile()) == 0) {
  455. if(!lastre)
  456. quit("First RE may not be null.");
  457. rep->re1 = lastre;
  458. }
  459. rep->rhs = p;
  460. if((p = compsub(p, addend)) == 0)
  461. quit(CGMES, L"s", linebuf);
  462. if(*cp == L'g') {
  463. cp++;
  464. rep->gfl++;
  465. } else if(gflag)
  466. rep->gfl++;
  467. if(*cp == L'p') {
  468. cp++;
  469. rep->pfl = 1;
  470. }
  471. if(*cp == L'P') {
  472. cp++;
  473. rep->pfl = 2;
  474. }
  475. if(*cp == L'w') {
  476. cp++;
  477. if(*cp++ != L' ')
  478. quit(CGMES, L"s", linebuf);
  479. text(fname[nfiles]);
  480. for(i = nfiles - 1; i >= 0; i--)
  481. if(cmp(fname[nfiles], fname[i]) == 0) {
  482. rep->fcode = fcode[i];
  483. goto done;
  484. }
  485. if(nfiles >= MAXFILES)
  486. quit("Too many files in w commands 1");
  487. rep->fcode = open_file(fname[nfiles]);
  488. }
  489. break;
  490. case 'w':
  491. rep->command = WCOM;
  492. if(*cp++ != L' ')
  493. quit(CGMES, L"w", linebuf);
  494. text(fname[nfiles]);
  495. for(i = nfiles - 1; i >= 0; i--)
  496. if(cmp(fname[nfiles], fname[i]) == 0) {
  497. rep->fcode = fcode[i];
  498. goto done;
  499. }
  500. if(nfiles >= MAXFILES){
  501. fprint(2, "sed: Too many files in w commands 2 \n");
  502. fprint(2, "nfiles = %d; MAXF = %d\n",
  503. nfiles, MAXFILES);
  504. errexit();
  505. }
  506. rep->fcode = open_file(fname[nfiles]);
  507. break;
  508. case 'x':
  509. rep->command = XCOM;
  510. break;
  511. case 'y':
  512. rep->command = YCOM;
  513. seof = *cp++;
  514. if (ycomp(rep) == 0)
  515. quit(CGMES, L"y", linebuf);
  516. break;
  517. }
  518. done:
  519. if(++rep >= pend)
  520. quit("Too many commands, last: %S", linebuf);
  521. if(*cp++ != L'\0') {
  522. if(cp[-1] == L';')
  523. goto comploop;
  524. quit(CGMES, cp - 1, linebuf);
  525. }
  526. }
  527. }
  528. Biobuf *
  529. open_file(char *name)
  530. {
  531. int fd;
  532. Biobuf *bp;
  533. if ((bp = malloc(sizeof(Biobuf))) == 0)
  534. quit("Out of memory");
  535. if ((fd = open(name, OWRITE)) < 0 &&
  536. (fd = create(name, OWRITE, 0666)) < 0)
  537. quit("Cannot create %s", name);
  538. Binit(bp, fd, OWRITE);
  539. Bseek(bp, 0, 2);
  540. fcode[nfiles++] = bp;
  541. return bp;
  542. }
  543. Rune *
  544. compsub(Rune *rhs, Rune *end)
  545. {
  546. Rune r;
  547. while ((r = *cp++) != '\0') {
  548. if(r == '\\') {
  549. if (rhs < end)
  550. *rhs++ = 0xFFFF;
  551. else
  552. return 0;
  553. r = *cp++;
  554. if(r == 'n')
  555. r = '\n';
  556. } else {
  557. if(r == seof) {
  558. if (rhs < end)
  559. *rhs++ = '\0';
  560. else
  561. return 0;
  562. return rhs;
  563. }
  564. }
  565. if (rhs < end)
  566. *rhs++ = r;
  567. else
  568. return 0;
  569. }
  570. return 0;
  571. }
  572. Reprog *
  573. compile(void)
  574. {
  575. Rune c;
  576. char *ep;
  577. char expbuf[512];
  578. if((c = *cp++) == seof) /* L'//' */
  579. return 0;
  580. ep = expbuf;
  581. do {
  582. if (c == L'\0' || c == L'\n')
  583. quit(TMMES, linebuf);
  584. if (c == L'\\') {
  585. if (ep >= expbuf+sizeof(expbuf))
  586. quit(TMMES, linebuf);
  587. ep += runetochar(ep, &c);
  588. if ((c = *cp++) == L'n')
  589. c = L'\n';
  590. }
  591. if (ep >= expbuf + sizeof(expbuf))
  592. quit(TMMES, linebuf);
  593. ep += runetochar(ep, &c);
  594. } while ((c = *cp++) != seof);
  595. *ep = 0;
  596. return lastre = regcomp(expbuf);
  597. }
  598. void
  599. regerror(char *s)
  600. {
  601. USED(s);
  602. quit(CGMES, L"r.e.-using", linebuf);
  603. }
  604. void
  605. newfile(enum PTYPE type, char *name)
  606. {
  607. if (type == P_ARG)
  608. prog.curr = name;
  609. else if ((prog.bp = Bopen(name, OREAD)) == 0)
  610. quit("Cannot open pattern-file: %s\n", name);
  611. prog.type = type;
  612. }
  613. int
  614. rline(Rune *buf, Rune *end)
  615. {
  616. long c;
  617. Rune r;
  618. while ((c = getrune()) >= 0) {
  619. r = c;
  620. if (r == '\\') {
  621. if (buf <= end)
  622. *buf++ = r;
  623. if ((c = getrune()) < 0)
  624. break;
  625. r = c;
  626. } else if (r == '\n') {
  627. *buf = '\0';
  628. return 1;
  629. }
  630. if (buf <= end)
  631. *buf++ = r;
  632. }
  633. *buf = '\0';
  634. return -1;
  635. }
  636. long
  637. getrune(void)
  638. {
  639. long c;
  640. Rune r;
  641. char *p;
  642. if (prog.type == P_ARG) {
  643. if ((p = prog.curr) != 0) {
  644. if (*p) {
  645. prog.curr += chartorune(&r, p);
  646. c = r;
  647. } else {
  648. c = '\n'; /* fake an end-of-line */
  649. prog.curr = 0;
  650. }
  651. } else
  652. c = -1;
  653. } else if ((c = Bgetrune(prog.bp)) < 0)
  654. Bterm(prog.bp);
  655. return c;
  656. }
  657. void
  658. address(Addr *ap)
  659. {
  660. int c;
  661. long lno;
  662. if((c = *cp++) == '$')
  663. ap->type = A_DOL;
  664. else if(c == '/') {
  665. seof = c;
  666. if (ap->rp = compile())
  667. ap->type = A_RE;
  668. else
  669. ap->type = A_LAST;
  670. }
  671. else if (c >= '0' && c <= '9') {
  672. lno = c - '0';
  673. while ((c = *cp) >= '0' && c <= '9')
  674. lno = lno*10 + *cp++ - '0';
  675. if(!lno)
  676. quit("line number 0 is illegal",0);
  677. ap->type = A_LINE;
  678. ap->line = lno;
  679. }
  680. else {
  681. cp--;
  682. ap->type = A_NONE;
  683. }
  684. }
  685. cmp(char *a, char *b) /* compare characters */
  686. {
  687. while(*a == *b++)
  688. if (*a == '\0')
  689. return 0;
  690. else
  691. a++;
  692. return 1;
  693. }
  694. rcmp(Rune *a, Rune *b) /* compare runes */
  695. {
  696. while(*a == *b++)
  697. if (*a == '\0')
  698. return 0;
  699. else
  700. a++;
  701. return 1;
  702. }
  703. char *
  704. text(char *p) /* extract character string */
  705. {
  706. Rune r;
  707. while(*cp == ' ' || *cp == '\t')
  708. cp++;
  709. while (*cp) {
  710. if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
  711. break;
  712. if (r == '\n')
  713. while (*cp == ' ' || *cp == '\t')
  714. cp++;
  715. p += runetochar(p, &r);
  716. }
  717. *p++ = '\0';
  718. return p;
  719. }
  720. Rune *
  721. stext(Rune *p, Rune *end) /* extract rune string */
  722. {
  723. while(*cp == L' ' || *cp == L'\t')
  724. cp++;
  725. while (*cp) {
  726. if (*cp == L'\\' && *++cp == L'\0')
  727. break;
  728. if (p >= end-1)
  729. quit(TMMES, linebuf);
  730. if ((*p++ = *cp++) == L'\n')
  731. while(*cp == L' ' || *cp == L'\t')
  732. cp++;
  733. }
  734. *p++ = 0;
  735. return p;
  736. }
  737. Label *
  738. search(Label *ptr)
  739. {
  740. Label *rp;
  741. for (rp = ltab; rp < ptr; rp++)
  742. if(rcmp(rp->uninm, ptr->uninm) == 0)
  743. return(rp);
  744. return(0);
  745. }
  746. void
  747. dechain(void)
  748. {
  749. Label *lptr;
  750. SedCom *rptr, *trptr;
  751. for(lptr = ltab; lptr < lab; lptr++) {
  752. if(lptr->address == 0)
  753. quit("Undefined label: %S", lptr->uninm);
  754. if(lptr->chain) {
  755. rptr = lptr->chain;
  756. while((trptr = rptr->lb1) != nil) {
  757. rptr->lb1 = lptr->address;
  758. rptr = trptr;
  759. }
  760. rptr->lb1 = lptr->address;
  761. }
  762. }
  763. }
  764. int
  765. ycomp(SedCom *r)
  766. {
  767. int i;
  768. Rune *rp, *sp, *tsp;
  769. Rune c, highc;
  770. highc = 0;
  771. for(tsp = cp; *tsp != seof; tsp++) {
  772. if(*tsp == L'\\')
  773. tsp++;
  774. if(*tsp == L'\n' || *tsp == L'\0')
  775. return 0;
  776. if (*tsp > highc)
  777. highc = *tsp;
  778. }
  779. tsp++;
  780. if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
  781. quit("Out of memory");
  782. *rp++ = highc; /* save upper bound */
  783. for (i = 0; i <= highc; i++)
  784. rp[i] = i;
  785. sp = cp;
  786. while((c = *sp++) != seof) {
  787. if(c == L'\\' && *sp == L'n') {
  788. sp++;
  789. c = L'\n';
  790. }
  791. if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
  792. rp[c] = L'\n';
  793. tsp++;
  794. }
  795. if(rp[c] == seof || rp[c] == L'\0') {
  796. free(r->re1);
  797. r->re1 = nil;
  798. return 0;
  799. }
  800. }
  801. if(*tsp != seof) {
  802. free(r->re1);
  803. r->re1 = nil;
  804. return 0;
  805. }
  806. cp = tsp+1;
  807. return 1;
  808. }
  809. void
  810. execute(void)
  811. {
  812. SedCom *ipc;
  813. while (spend = gline(linebuf)){
  814. for(ipc = pspace; ipc->command; ) {
  815. if (!executable(ipc)) {
  816. ipc++;
  817. continue;
  818. }
  819. command(ipc);
  820. if(delflag)
  821. break;
  822. if(jflag) {
  823. jflag = 0;
  824. if((ipc = ipc->lb1) == 0)
  825. break;
  826. } else
  827. ipc++;
  828. }
  829. if(!nflag && !delflag)
  830. putline(&fout, linebuf, spend - linebuf);
  831. if(aptr > abuf)
  832. arout();
  833. delflag = 0;
  834. }
  835. }
  836. /* determine if a statement should be applied to an input line */
  837. int
  838. executable(SedCom *ipc)
  839. {
  840. if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
  841. if (ipc->active == 1) /* Second line */
  842. ipc->active = 2;
  843. switch(ipc->ad2.type) {
  844. case A_NONE: /* No second addr; use first */
  845. ipc->active = 0;
  846. break;
  847. case A_DOL: /* Accept everything */
  848. return !ipc->negfl;
  849. case A_LINE: /* Line at end of range? */
  850. if (lnum <= ipc->ad2.line) {
  851. if (ipc->ad2.line == lnum)
  852. ipc->active = 0;
  853. return !ipc->negfl;
  854. }
  855. ipc->active = 0; /* out of range */
  856. return ipc->negfl;
  857. case A_RE: /* Check for matching R.E. */
  858. if (match(ipc->ad2.rp, linebuf))
  859. ipc->active = 0;
  860. return !ipc->negfl;
  861. default:
  862. quit("Internal error");
  863. }
  864. }
  865. switch (ipc->ad1.type) { /* Check first address */
  866. case A_NONE: /* Everything matches */
  867. return !ipc->negfl;
  868. case A_DOL: /* Only last line */
  869. if (dolflag)
  870. return !ipc->negfl;
  871. break;
  872. case A_LINE: /* Check line number */
  873. if (ipc->ad1.line == lnum) {
  874. ipc->active = 1; /* In range */
  875. return !ipc->negfl;
  876. }
  877. break;
  878. case A_RE: /* Check R.E. */
  879. if (match(ipc->ad1.rp, linebuf)) {
  880. ipc->active = 1; /* In range */
  881. return !ipc->negfl;
  882. }
  883. break;
  884. default:
  885. quit("Internal error");
  886. }
  887. return ipc->negfl;
  888. }
  889. int
  890. match(Reprog *pattern, Rune *buf)
  891. {
  892. if (!pattern)
  893. return 0;
  894. subexp[0].rsp = buf;
  895. subexp[0].ep = 0;
  896. if (rregexec(pattern, linebuf, subexp, MAXSUB)) {
  897. loc1 = subexp[0].rsp;
  898. loc2 = subexp[0].rep;
  899. return 1;
  900. }
  901. loc1 = loc2 = 0;
  902. return 0;
  903. }
  904. int
  905. substitute(SedCom *ipc)
  906. {
  907. int len;
  908. if(!match(ipc->re1, linebuf))
  909. return 0;
  910. /*
  911. * we have at least one match. some patterns, e.g. '$' or '^', can
  912. * produce 0-length matches, so during a global substitute we must
  913. * bump to the character after a 0-length match to keep from looping.
  914. */
  915. sflag = 1;
  916. if(ipc->gfl == 0) /* single substitution */
  917. dosub(ipc->rhs);
  918. else
  919. do{ /* global substitution */
  920. len = loc2 - loc1; /* length of match */
  921. dosub(ipc->rhs); /* dosub moves loc2 */
  922. if(*loc2 == 0) /* end of string */
  923. break;
  924. if(len == 0) /* zero-length R.E. match */
  925. loc2++; /* bump over 0-length match */
  926. if(*loc2 == 0) /* end of string */
  927. break;
  928. } while(match(ipc->re1, loc2));
  929. return 1;
  930. }
  931. void
  932. dosub(Rune *rhsbuf)
  933. {
  934. int c, n;
  935. Rune *lp, *sp, *rp;
  936. lp = linebuf;
  937. sp = genbuf;
  938. rp = rhsbuf;
  939. while (lp < loc1)
  940. *sp++ = *lp++;
  941. while(c = *rp++) {
  942. if (c == '&') {
  943. sp = place(sp, loc1, loc2);
  944. continue;
  945. }
  946. if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB + '0') {
  947. n = c-'0';
  948. if (subexp[n].rsp && subexp[n].rep) {
  949. sp = place(sp, subexp[n].rsp, subexp[n].rep);
  950. continue;
  951. }
  952. else {
  953. fprint(2, "sed: Invalid back reference \\%d\n",n);
  954. errexit();
  955. }
  956. }
  957. *sp++ = c;
  958. if (sp >= &genbuf[LBSIZE])
  959. fprint(2, "sed: Output line too long.\n");
  960. }
  961. lp = loc2;
  962. loc2 = sp - genbuf + linebuf;
  963. while (*sp++ = *lp++)
  964. if (sp >= &genbuf[LBSIZE])
  965. fprint(2, "sed: Output line too long.\n");
  966. lp = linebuf;
  967. sp = genbuf;
  968. while (*lp++ = *sp++)
  969. ;
  970. spend = lp - 1;
  971. }
  972. Rune *
  973. place(Rune *sp, Rune *l1, Rune *l2)
  974. {
  975. while (l1 < l2) {
  976. *sp++ = *l1++;
  977. if (sp >= &genbuf[LBSIZE])
  978. fprint(2, "sed: Output line too long.\n");
  979. }
  980. return sp;
  981. }
  982. char *
  983. trans(int c)
  984. {
  985. static char buf[] = "\\x0000";
  986. static char hex[] = "0123456789abcdef";
  987. switch(c) {
  988. case '\b':
  989. return "\\b";
  990. case '\n':
  991. return "\\n";
  992. case '\r':
  993. return "\\r";
  994. case '\t':
  995. return "\\t";
  996. case '\\':
  997. return "\\\\";
  998. }
  999. buf[2] = hex[(c>>12)&0xF];
  1000. buf[3] = hex[(c>>8)&0xF];
  1001. buf[4] = hex[(c>>4)&0xF];
  1002. buf[5] = hex[c&0xF];
  1003. return buf;
  1004. }
  1005. void
  1006. command(SedCom *ipc)
  1007. {
  1008. int i, c;
  1009. char *ucp;
  1010. Rune *execp, *p1, *p2, *rp;
  1011. switch(ipc->command) {
  1012. case ACOM:
  1013. *aptr++ = ipc;
  1014. if(aptr >= abuf+MAXADDS)
  1015. quit("sed: Too many appends after line %ld\n",
  1016. (char *)lnum);
  1017. *aptr = 0;
  1018. break;
  1019. case CCOM:
  1020. delflag = 1;
  1021. if(ipc->active == 1) {
  1022. for(rp = ipc->text; *rp; rp++)
  1023. Bputrune(&fout, *rp);
  1024. Bputc(&fout, '\n');
  1025. }
  1026. break;
  1027. case DCOM:
  1028. delflag++;
  1029. break;
  1030. case CDCOM:
  1031. p1 = p2 = linebuf;
  1032. while(*p1 != '\n') {
  1033. if(*p1++ == 0) {
  1034. delflag++;
  1035. return;
  1036. }
  1037. }
  1038. p1++;
  1039. while(*p2++ = *p1++)
  1040. ;
  1041. spend = p2 - 1;
  1042. jflag++;
  1043. break;
  1044. case EQCOM:
  1045. Bprint(&fout, "%ld\n", lnum);
  1046. break;
  1047. case GCOM:
  1048. p1 = linebuf;
  1049. p2 = holdsp;
  1050. while(*p1++ = *p2++)
  1051. ;
  1052. spend = p1 - 1;
  1053. break;
  1054. case CGCOM:
  1055. *spend++ = '\n';
  1056. p1 = spend;
  1057. p2 = holdsp;
  1058. while(*p1++ = *p2++)
  1059. if(p1 >= lbend)
  1060. break;
  1061. spend = p1 - 1;
  1062. break;
  1063. case HCOM:
  1064. p1 = holdsp;
  1065. p2 = linebuf;
  1066. while(*p1++ = *p2++);
  1067. hspend = p1 - 1;
  1068. break;
  1069. case CHCOM:
  1070. *hspend++ = '\n';
  1071. p1 = hspend;
  1072. p2 = linebuf;
  1073. while(*p1++ = *p2++)
  1074. if(p1 >= hend)
  1075. break;
  1076. hspend = p1 - 1;
  1077. break;
  1078. case ICOM:
  1079. for(rp = ipc->text; *rp; rp++)
  1080. Bputrune(&fout, *rp);
  1081. Bputc(&fout, '\n');
  1082. break;
  1083. case BCOM:
  1084. jflag = 1;
  1085. break;
  1086. case LCOM:
  1087. c = 0;
  1088. for (i = 0, rp = linebuf; *rp; rp++) {
  1089. c = *rp;
  1090. if(c >= 0x20 && c < 0x7F && c != '\\') {
  1091. Bputc(&fout, c);
  1092. if(i++ > 71) {
  1093. Bprint(&fout, "\\\n");
  1094. i = 0;
  1095. }
  1096. } else {
  1097. for (ucp = trans(*rp); *ucp; ucp++){
  1098. c = *ucp;
  1099. Bputc(&fout, c);
  1100. if(i++ > 71) {
  1101. Bprint(&fout, "\\\n");
  1102. i = 0;
  1103. }
  1104. }
  1105. }
  1106. }
  1107. if(c == ' ')
  1108. Bprint(&fout, "\\n");
  1109. Bputc(&fout, '\n');
  1110. break;
  1111. case NCOM:
  1112. if(!nflag)
  1113. putline(&fout, linebuf, spend-linebuf);
  1114. if(aptr > abuf)
  1115. arout();
  1116. if((execp = gline(linebuf)) == 0) {
  1117. delflag = 1;
  1118. break;
  1119. }
  1120. spend = execp;
  1121. break;
  1122. case CNCOM:
  1123. if(aptr > abuf)
  1124. arout();
  1125. *spend++ = '\n';
  1126. if((execp = gline(spend)) == 0) {
  1127. delflag = 1;
  1128. break;
  1129. }
  1130. spend = execp;
  1131. break;
  1132. case PCOM:
  1133. putline(&fout, linebuf, spend-linebuf);
  1134. break;
  1135. case CPCOM:
  1136. cpcom:
  1137. for(rp = linebuf; *rp && *rp != '\n'; rp++)
  1138. Bputc(&fout, *rp);
  1139. Bputc(&fout, '\n');
  1140. break;
  1141. case QCOM:
  1142. if(!nflag)
  1143. putline(&fout, linebuf, spend-linebuf);
  1144. if(aptr > abuf)
  1145. arout();
  1146. exits(0);
  1147. case RCOM:
  1148. *aptr++ = ipc;
  1149. if(aptr >= &abuf[MAXADDS])
  1150. quit("sed: Too many reads after line %ld\n",
  1151. (char *)lnum);
  1152. *aptr = 0;
  1153. break;
  1154. case SCOM:
  1155. i = substitute(ipc);
  1156. if(i && ipc->pfl)
  1157. if(ipc->pfl == 1)
  1158. putline(&fout, linebuf, spend-linebuf);
  1159. else
  1160. goto cpcom;
  1161. if(i && ipc->fcode)
  1162. goto wcom;
  1163. break;
  1164. case TCOM:
  1165. if(sflag) {
  1166. sflag = 0;
  1167. jflag = 1;
  1168. }
  1169. break;
  1170. case WCOM:
  1171. wcom:
  1172. putline(ipc->fcode,linebuf, spend - linebuf);
  1173. break;
  1174. case XCOM:
  1175. p1 = linebuf;
  1176. p2 = genbuf;
  1177. while(*p2++ = *p1++)
  1178. ;
  1179. p1 = holdsp;
  1180. p2 = linebuf;
  1181. while(*p2++ = *p1++)
  1182. ;
  1183. spend = p2 - 1;
  1184. p1 = genbuf;
  1185. p2 = holdsp;
  1186. while(*p2++ = *p1++)
  1187. ;
  1188. hspend = p2 - 1;
  1189. break;
  1190. case YCOM:
  1191. p1 = linebuf;
  1192. p2 = ipc->text;
  1193. for (i = *p2++; *p1; p1++)
  1194. if (*p1 <= i)
  1195. *p1 = p2[*p1];
  1196. break;
  1197. }
  1198. }
  1199. void
  1200. putline(Biobuf *bp, Rune *buf, int n)
  1201. {
  1202. while (n--)
  1203. Bputrune(bp, *buf++);
  1204. Bputc(bp, '\n');
  1205. }
  1206. ecmp(Rune *a, Rune *b, int count)
  1207. {
  1208. while(count--)
  1209. if(*a++ != *b++)
  1210. return 0;
  1211. return 1;
  1212. }
  1213. void
  1214. arout(void)
  1215. {
  1216. int c;
  1217. char *s;
  1218. char buf[128];
  1219. Rune *p1;
  1220. Biobuf *fi;
  1221. for (aptr = abuf; *aptr; aptr++) {
  1222. if((*aptr)->command == ACOM) {
  1223. for(p1 = (*aptr)->text; *p1; p1++ )
  1224. Bputrune(&fout, *p1);
  1225. Bputc(&fout, '\n');
  1226. } else {
  1227. for(s = buf, p1 = (*aptr)->text; *p1; p1++)
  1228. s += runetochar(s, p1);
  1229. *s = '\0';
  1230. if((fi = Bopen(buf, OREAD)) == 0)
  1231. continue;
  1232. while((c = Bgetc(fi)) >= 0)
  1233. Bputc(&fout, c);
  1234. Bterm(fi);
  1235. }
  1236. }
  1237. aptr = abuf;
  1238. *aptr = 0;
  1239. }
  1240. void
  1241. errexit(void)
  1242. {
  1243. exits("error");
  1244. }
  1245. void
  1246. quit(char *fmt, ...)
  1247. {
  1248. char *p, *ep;
  1249. char msg[256];
  1250. va_list arg;
  1251. ep = msg + sizeof msg;
  1252. p = seprint(msg, ep, "sed: ");
  1253. va_start(arg, fmt);
  1254. p = vseprint(p, ep, fmt, arg);
  1255. va_end(arg);
  1256. p = seprint(p, ep, "\n");
  1257. write(2, msg, p - msg);
  1258. errexit();
  1259. }
  1260. Rune *
  1261. gline(Rune *addr)
  1262. {
  1263. long c;
  1264. Rune *p;
  1265. static long peekc = 0;
  1266. if (f == 0 && opendata() < 0)
  1267. return 0;
  1268. sflag = 0;
  1269. lnum++;
  1270. /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
  1271. do {
  1272. p = addr;
  1273. for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
  1274. if (c == '\n') {
  1275. if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
  1276. dolflag = 1;
  1277. *p = '\0';
  1278. return p;
  1279. }
  1280. if (c && p < lbend)
  1281. *p++ = c;
  1282. }
  1283. /* return partial final line, adding implicit newline */
  1284. if(p != addr) {
  1285. *p = '\0';
  1286. peekc = -1;
  1287. if (fhead == 0)
  1288. dolflag = 1;
  1289. return p;
  1290. }
  1291. peekc = 0;
  1292. Bterm(f);
  1293. } while (opendata() > 0); /* Switch to next stream */
  1294. f = 0;
  1295. return 0;
  1296. }
  1297. /*
  1298. * Data file input section - the intent is to transparently
  1299. * catenate all data input streams.
  1300. */
  1301. void
  1302. enroll(char *filename) /* Add a file to the input file cache */
  1303. {
  1304. FileCache *fp;
  1305. if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
  1306. quit("Out of memory");
  1307. if (ftail == nil)
  1308. fhead = fp;
  1309. else
  1310. ftail->next = fp;
  1311. ftail = fp;
  1312. fp->next = nil;
  1313. fp->name = filename; /* 0 => stdin */
  1314. }
  1315. int
  1316. opendata(void)
  1317. {
  1318. if (fhead == nil)
  1319. return -1;
  1320. if (fhead->name) {
  1321. if ((f = Bopen(fhead->name, OREAD)) == nil)
  1322. quit("Can't open %s", fhead->name);
  1323. } else {
  1324. Binit(&stdin, 0, OREAD);
  1325. f = &stdin;
  1326. }
  1327. fhead = fhead->next;
  1328. return 1;
  1329. }