sed.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * sed -- stream editor
  11. */
  12. #include <u.h>
  13. #include <libc.h>
  14. #include <bio.h>
  15. #include <regexp.h>
  16. enum {
  17. DEPTH = 20, /* max nesting depth of {} */
  18. MAXCMDS = 512, /* max sed commands */
  19. ADDSIZE = 10000, /* size of add & read buffer */
  20. MAXADDS = 20, /* max pending adds and reads */
  21. LBSIZE = 8192, /* input line size */
  22. LABSIZE = 50, /* max number of labels */
  23. MAXSUB = 10, /* max number of sub reg exp */
  24. MAXFILES = 120, /* max output files */
  25. };
  26. /*
  27. * An address is a line #, a R.E., "$", a reference to the last
  28. * R.E., or nothing.
  29. */
  30. typedef struct {
  31. enum {
  32. A_NONE,
  33. A_DOL,
  34. A_LINE,
  35. A_RE,
  36. A_LAST,
  37. }type;
  38. union {
  39. int32_t line; /* Line # */
  40. Reprog *rp; /* Compiled R.E. */
  41. };
  42. } Addr;
  43. typedef struct SEDCOM {
  44. Addr ad1; /* optional start address */
  45. Addr ad2; /* optional end address */
  46. union {
  47. Reprog *re1; /* compiled R.E. */
  48. Rune *text; /* added text or file name */
  49. struct SEDCOM *lb1; /* destination command of branch */
  50. };
  51. Rune *rhs; /* Right-hand side of substitution */
  52. Biobuf* fcode; /* File ID for read and write */
  53. char command; /* command code -see below */
  54. char gfl; /* 'Global' flag for substitutions */
  55. char pfl; /* 'print' flag for substitutions */
  56. char active; /* 1 => data between start and end */
  57. char negfl; /* negation flag */
  58. } SedCom;
  59. /* Command Codes for field SedCom.command */
  60. #define ACOM 01
  61. #define BCOM 020
  62. #define CCOM 02
  63. #define CDCOM 025
  64. #define CNCOM 022
  65. #define COCOM 017
  66. #define CPCOM 023
  67. #define DCOM 03
  68. #define ECOM 015
  69. #define EQCOM 013
  70. #define FCOM 016
  71. #define GCOM 027
  72. #define CGCOM 030
  73. #define HCOM 031
  74. #define CHCOM 032
  75. #define ICOM 04
  76. #define LCOM 05
  77. #define NCOM 012
  78. #define PCOM 010
  79. #define QCOM 011
  80. #define RCOM 06
  81. #define SCOM 07
  82. #define TCOM 021
  83. #define WCOM 014
  84. #define CWCOM 024
  85. #define YCOM 026
  86. #define XCOM 033
  87. typedef struct label { /* Label symbol table */
  88. Rune uninm[9]; /* Label name */
  89. SedCom *chain;
  90. SedCom *address; /* Command associated with label */
  91. } Label;
  92. typedef struct FILE_CACHE { /* Data file control block */
  93. struct FILE_CACHE *next; /* Forward Link */
  94. char *name; /* Name of file */
  95. } FileCache;
  96. SedCom pspace[MAXCMDS]; /* Command storage */
  97. SedCom *pend = pspace+MAXCMDS; /* End of command storage */
  98. SedCom *rep = pspace; /* Current fill point */
  99. Reprog *lastre = 0; /* Last regular expression */
  100. Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
  101. Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
  102. Rune *addend = addspace+ADDSIZE;
  103. SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
  104. SedCom **aptr = abuf;
  105. struct { /* Sed program input control block */
  106. enum PTYPE { /* Either on command line or in file */
  107. P_ARG,
  108. P_FILE,
  109. } type;
  110. union PCTL { /* Pointer to data */
  111. Biobuf *bp;
  112. char *curr;
  113. };
  114. } prog;
  115. Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
  116. FileCache *fhead = 0; /* Head of File Cache Chain */
  117. FileCache *ftail = 0; /* Tail of File Cache Chain */
  118. Rune *loc1; /* Start of pattern match */
  119. Rune *loc2; /* End of pattern match */
  120. Rune seof; /* Pattern delimiter char */
  121. Rune linebuf[LBSIZE+1]; /* Input data buffer */
  122. Rune *lbend = linebuf+LBSIZE; /* End of buffer */
  123. Rune *spend = linebuf; /* End of input data */
  124. Rune *cp; /* Current scan point in linebuf */
  125. Rune holdsp[LBSIZE+1]; /* Hold buffer */
  126. Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
  127. Rune *hspend = holdsp; /* End of hold data */
  128. int nflag; /* Command line flags */
  129. int gflag;
  130. int dolflag; /* Set when at true EOF */
  131. int sflag; /* Set when substitution done */
  132. int jflag; /* Set when jump required */
  133. int delflag; /* Delete current line when set */
  134. int32_t lnum = 0; /* Input line count */
  135. char fname[MAXFILES][40]; /* File name cache */
  136. Biobuf *fcode[MAXFILES]; /* File ID cache */
  137. int nfiles = 0; /* Cache fill point */
  138. Biobuf fout; /* Output stream */
  139. Biobuf stdin; /* Default input */
  140. Biobuf* f = 0; /* Input data */
  141. Label ltab[LABSIZE]; /* Label name symbol table */
  142. Label *labend = ltab+LABSIZE; /* End of label table */
  143. Label *lab = ltab+1; /* Current Fill point */
  144. int depth = 0; /* {} stack pointer */
  145. Rune bad; /* Dummy err ptr reference */
  146. Rune *badp = &bad;
  147. char CGMES[] = "%S command garbled: %S";
  148. char TMMES[] = "Too much text: %S";
  149. char LTL[] = "Label too long: %S";
  150. char AD0MES[] = "No addresses allowed: %S";
  151. char AD1MES[] = "Only one address allowed: %S";
  152. void address(Addr *);
  153. void arout(void);
  154. int cmp(char *, char *);
  155. int rcmp(Rune *, Rune *);
  156. void command(SedCom *);
  157. Reprog *compile(void);
  158. Rune *compsub(Rune *, Rune *);
  159. void dechain(void);
  160. void dosub(Rune *);
  161. int ecmp(Rune *, Rune *, int);
  162. void enroll(char *);
  163. void errexit(void);
  164. int executable(SedCom *);
  165. void execute(void);
  166. void fcomp(void);
  167. int32_t getrune(void);
  168. Rune *gline(Rune *);
  169. int match(Reprog *, Rune *);
  170. void newfile(enum PTYPE, char *);
  171. int opendata(void);
  172. Biobuf *open_file(char *);
  173. Rune *place(Rune *, Rune *, Rune *);
  174. void quit(char *, ...);
  175. int rline(Rune *, Rune *);
  176. Label *search(Label *);
  177. int substitute(SedCom *);
  178. char *text(char *);
  179. Rune *stext(Rune *, Rune *);
  180. int ycomp(SedCom *);
  181. char * trans(int c);
  182. void putline(Biobuf *bp, Rune *buf, int n);
  183. void
  184. main(int argc, char **argv)
  185. {
  186. int compfl;
  187. lnum = 0;
  188. Binit(&fout, 1, OWRITE);
  189. fcode[nfiles++] = &fout;
  190. compfl = 0;
  191. if(argc == 1)
  192. exits(0);
  193. ARGBEGIN{
  194. case 'e':
  195. if (argc <= 1)
  196. quit("missing pattern");
  197. newfile(P_ARG, ARGF());
  198. fcomp();
  199. compfl = 1;
  200. continue;
  201. case 'f':
  202. if(argc <= 1)
  203. quit("no pattern-file");
  204. newfile(P_FILE, ARGF());
  205. fcomp();
  206. compfl = 1;
  207. continue;
  208. case 'g':
  209. gflag++;
  210. continue;
  211. case 'n':
  212. nflag++;
  213. continue;
  214. default:
  215. fprint(2, "sed: Unknown flag: %c\n", ARGC());
  216. continue;
  217. } ARGEND
  218. if(compfl == 0) {
  219. if (--argc < 0)
  220. quit("missing pattern");
  221. newfile(P_ARG, *argv++);
  222. fcomp();
  223. }
  224. if(depth)
  225. quit("Too many {'s");
  226. ltab[0].address = rep;
  227. dechain();
  228. if(argc <= 0)
  229. enroll(0); /* Add stdin to cache */
  230. else
  231. while(--argc >= 0)
  232. enroll(*argv++);
  233. execute();
  234. exits(0);
  235. }
  236. void
  237. fcomp(void)
  238. {
  239. int i;
  240. Label *lpt;
  241. Rune *tp;
  242. SedCom *pt, *pt1;
  243. static Rune *p = addspace;
  244. static SedCom **cmpend[DEPTH]; /* stack of {} operations */
  245. while (rline(linebuf, lbend) >= 0) {
  246. cp = linebuf;
  247. comploop:
  248. while(*cp == L' ' || *cp == L'\t')
  249. cp++;
  250. if(*cp == L'\0' || *cp == L'#')
  251. continue;
  252. if(*cp == L';') {
  253. cp++;
  254. goto comploop;
  255. }
  256. address(&rep->ad1);
  257. if (rep->ad1.type != A_NONE) {
  258. if (rep->ad1.type == A_LAST) {
  259. if (!lastre)
  260. quit("First RE may not be null");
  261. rep->ad1.type = A_RE;
  262. rep->ad1.rp = lastre;
  263. }
  264. if(*cp == L',' || *cp == L';') {
  265. cp++;
  266. address(&rep->ad2);
  267. if (rep->ad2.type == A_LAST) {
  268. rep->ad2.type = A_RE;
  269. rep->ad2.rp = lastre;
  270. }
  271. } else
  272. rep->ad2.type = A_NONE;
  273. }
  274. while(*cp == L' ' || *cp == L'\t')
  275. cp++;
  276. swit:
  277. switch(*cp++) {
  278. default:
  279. quit("Unrecognized command: %S", linebuf);
  280. case '!':
  281. rep->negfl = 1;
  282. goto swit;
  283. case '{':
  284. rep->command = BCOM;
  285. rep->negfl = !rep->negfl;
  286. cmpend[depth++] = &rep->lb1;
  287. if(++rep >= pend)
  288. quit("Too many commands: %S", linebuf);
  289. if(*cp == '\0')
  290. continue;
  291. goto comploop;
  292. case '}':
  293. if(rep->ad1.type != A_NONE)
  294. quit(AD0MES, linebuf);
  295. if(--depth < 0)
  296. quit("Too many }'s");
  297. *cmpend[depth] = rep;
  298. if(*cp == 0)
  299. continue;
  300. goto comploop;
  301. case '=':
  302. rep->command = EQCOM;
  303. if(rep->ad2.type != A_NONE)
  304. quit(AD1MES, linebuf);
  305. break;
  306. case ':':
  307. if(rep->ad1.type != A_NONE)
  308. quit(AD0MES, linebuf);
  309. while(*cp == L' ')
  310. cp++;
  311. tp = lab->uninm;
  312. while (*cp && *cp != L';' && *cp != L' ' &&
  313. *cp != L'\t' && *cp != L'#') {
  314. *tp++ = *cp++;
  315. if(tp >= &lab->uninm[8])
  316. quit(LTL, linebuf);
  317. }
  318. *tp = L'\0';
  319. if (*lab->uninm == L'\0') /* no label? */
  320. quit(CGMES, L":", linebuf);
  321. if(lpt = search(lab)) {
  322. if(lpt->address)
  323. quit("Duplicate labels: %S", linebuf);
  324. } else {
  325. lab->chain = 0;
  326. lpt = lab;
  327. if(++lab >= labend)
  328. quit("Too many labels: %S", linebuf);
  329. }
  330. lpt->address = rep;
  331. if (*cp == L'#')
  332. continue;
  333. rep--; /* reuse this slot */
  334. break;
  335. case 'a':
  336. rep->command = ACOM;
  337. if(rep->ad2.type != A_NONE)
  338. quit(AD1MES, linebuf);
  339. if(*cp == L'\\')
  340. cp++;
  341. if(*cp++ != L'\n')
  342. quit(CGMES, L"a", linebuf);
  343. rep->text = p;
  344. p = stext(p, addend);
  345. break;
  346. case 'c':
  347. rep->command = CCOM;
  348. if(*cp == L'\\')
  349. cp++;
  350. if(*cp++ != L'\n')
  351. quit(CGMES, L"c", linebuf);
  352. rep->text = p;
  353. p = stext(p, addend);
  354. break;
  355. case 'i':
  356. rep->command = ICOM;
  357. if(rep->ad2.type != A_NONE)
  358. quit(AD1MES, linebuf);
  359. if(*cp == L'\\')
  360. cp++;
  361. if(*cp++ != L'\n')
  362. quit(CGMES, L"i", linebuf);
  363. rep->text = p;
  364. p = stext(p, addend);
  365. break;
  366. case 'g':
  367. rep->command = GCOM;
  368. break;
  369. case 'G':
  370. rep->command = CGCOM;
  371. break;
  372. case 'h':
  373. rep->command = HCOM;
  374. break;
  375. case 'H':
  376. rep->command = CHCOM;
  377. break;
  378. case 't':
  379. rep->command = TCOM;
  380. goto jtcommon;
  381. case 'b':
  382. rep->command = BCOM;
  383. jtcommon:
  384. while(*cp == L' ')
  385. cp++;
  386. if(*cp == L'\0' || *cp == L';') {
  387. /* no label; jump to end */
  388. if(pt = ltab[0].chain) {
  389. while((pt1 = pt->lb1) != nil)
  390. pt = pt1;
  391. pt->lb1 = rep;
  392. } else
  393. ltab[0].chain = rep;
  394. break;
  395. }
  396. /* copy label into lab->uninm */
  397. tp = lab->uninm;
  398. while((*tp = *cp++) != L'\0' && *tp != L';')
  399. if(++tp >= &lab->uninm[8])
  400. quit(LTL, linebuf);
  401. cp--;
  402. *tp = L'\0';
  403. if (*lab->uninm == L'\0')
  404. /* shouldn't get here */
  405. quit(CGMES, L"b or t", linebuf);
  406. if((lpt = search(lab)) != nil) {
  407. if(lpt->address)
  408. rep->lb1 = lpt->address;
  409. else {
  410. for(pt = lpt->chain; pt != nil &&
  411. (pt1 = pt->lb1) != nil; pt = pt1)
  412. ;
  413. if (pt)
  414. pt->lb1 = rep;
  415. }
  416. } else { /* add new label */
  417. lab->chain = rep;
  418. lab->address = 0;
  419. if(++lab >= labend)
  420. quit("Too many labels: %S", linebuf);
  421. }
  422. break;
  423. case 'n':
  424. rep->command = NCOM;
  425. break;
  426. case 'N':
  427. rep->command = CNCOM;
  428. break;
  429. case 'p':
  430. rep->command = PCOM;
  431. break;
  432. case 'P':
  433. rep->command = CPCOM;
  434. break;
  435. case 'r':
  436. rep->command = RCOM;
  437. if(rep->ad2.type != A_NONE)
  438. quit(AD1MES, linebuf);
  439. if(*cp++ != L' ')
  440. quit(CGMES, L"r", linebuf);
  441. rep->text = p;
  442. p = stext(p, addend);
  443. break;
  444. case 'd':
  445. rep->command = DCOM;
  446. break;
  447. case 'D':
  448. rep->command = CDCOM;
  449. rep->lb1 = pspace;
  450. break;
  451. case 'q':
  452. rep->command = QCOM;
  453. if(rep->ad2.type != A_NONE)
  454. quit(AD1MES, linebuf);
  455. break;
  456. case 'l':
  457. rep->command = LCOM;
  458. break;
  459. case 's':
  460. rep->command = SCOM;
  461. seof = *cp++;
  462. if ((rep->re1 = compile()) == 0) {
  463. if(!lastre)
  464. quit("First RE may not be null.");
  465. rep->re1 = lastre;
  466. }
  467. rep->rhs = p;
  468. if((p = compsub(p, addend)) == 0)
  469. quit(CGMES, L"s", linebuf);
  470. if(*cp == L'g') {
  471. cp++;
  472. rep->gfl++;
  473. } else if(gflag)
  474. rep->gfl++;
  475. if(*cp == L'p') {
  476. cp++;
  477. rep->pfl = 1;
  478. }
  479. if(*cp == L'P') {
  480. cp++;
  481. rep->pfl = 2;
  482. }
  483. if(*cp == L'w') {
  484. cp++;
  485. if(*cp++ != L' ')
  486. quit(CGMES, L"s", linebuf);
  487. text(fname[nfiles]);
  488. for(i = nfiles - 1; i >= 0; i--)
  489. if(cmp(fname[nfiles], fname[i]) == 0) {
  490. rep->fcode = fcode[i];
  491. goto done;
  492. }
  493. if(nfiles >= MAXFILES)
  494. quit("Too many files in w commands 1");
  495. rep->fcode = open_file(fname[nfiles]);
  496. }
  497. break;
  498. case 'w':
  499. rep->command = WCOM;
  500. if(*cp++ != L' ')
  501. quit(CGMES, L"w", linebuf);
  502. text(fname[nfiles]);
  503. for(i = nfiles - 1; i >= 0; i--)
  504. if(cmp(fname[nfiles], fname[i]) == 0) {
  505. rep->fcode = fcode[i];
  506. goto done;
  507. }
  508. if(nfiles >= MAXFILES){
  509. fprint(2, "sed: Too many files in w commands 2 \n");
  510. fprint(2, "nfiles = %d; MAXF = %d\n",
  511. nfiles, MAXFILES);
  512. errexit();
  513. }
  514. rep->fcode = open_file(fname[nfiles]);
  515. break;
  516. case 'x':
  517. rep->command = XCOM;
  518. break;
  519. case 'y':
  520. rep->command = YCOM;
  521. seof = *cp++;
  522. if (ycomp(rep) == 0)
  523. quit(CGMES, L"y", linebuf);
  524. break;
  525. }
  526. done:
  527. if(++rep >= pend)
  528. quit("Too many commands, last: %S", linebuf);
  529. if(*cp++ != L'\0') {
  530. if(cp[-1] == L';')
  531. goto comploop;
  532. quit(CGMES, cp - 1, linebuf);
  533. }
  534. }
  535. }
  536. Biobuf *
  537. open_file(char *name)
  538. {
  539. int fd;
  540. Biobuf *bp;
  541. if ((bp = malloc(sizeof(Biobuf))) == 0)
  542. quit("Out of memory");
  543. if ((fd = open(name, OWRITE)) < 0 &&
  544. (fd = create(name, OWRITE, 0666)) < 0)
  545. quit("Cannot create %s", name);
  546. Binit(bp, fd, OWRITE);
  547. Bseek(bp, 0, 2);
  548. fcode[nfiles++] = bp;
  549. return bp;
  550. }
  551. Rune *
  552. compsub(Rune *rhs, Rune *end)
  553. {
  554. Rune r;
  555. while ((r = *cp++) != '\0') {
  556. if(r == '\\') {
  557. if (rhs < end)
  558. *rhs++ = Runemax;
  559. else
  560. return 0;
  561. r = *cp++;
  562. if(r == 'n')
  563. r = '\n';
  564. } else {
  565. if(r == seof) {
  566. if (rhs < end)
  567. *rhs++ = '\0';
  568. else
  569. return 0;
  570. return rhs;
  571. }
  572. }
  573. if (rhs < end)
  574. *rhs++ = r;
  575. else
  576. return 0;
  577. }
  578. return 0;
  579. }
  580. Reprog *
  581. compile(void)
  582. {
  583. Rune c;
  584. char *ep;
  585. char expbuf[512];
  586. if((c = *cp++) == seof) /* L'//' */
  587. return 0;
  588. ep = expbuf;
  589. do {
  590. if (c == L'\0' || c == L'\n')
  591. quit(TMMES, linebuf);
  592. if (c == L'\\') {
  593. if (ep >= expbuf+sizeof(expbuf))
  594. quit(TMMES, linebuf);
  595. ep += runetochar(ep, &c);
  596. if ((c = *cp++) == L'n')
  597. c = L'\n';
  598. }
  599. if (ep >= expbuf + sizeof(expbuf))
  600. quit(TMMES, linebuf);
  601. ep += runetochar(ep, &c);
  602. } while ((c = *cp++) != seof);
  603. *ep = 0;
  604. return lastre = regcomp(expbuf);
  605. }
  606. void
  607. regerror(char *s)
  608. {
  609. USED(s);
  610. quit(CGMES, L"r.e.-using", linebuf);
  611. }
  612. void
  613. newfile(enum PTYPE type, char *name)
  614. {
  615. if (type == P_ARG)
  616. prog.curr = name;
  617. else if ((prog.bp = Bopen(name, OREAD)) == 0)
  618. quit("Cannot open pattern-file: %s\n", name);
  619. prog.type = type;
  620. }
  621. int
  622. rline(Rune *buf, Rune *end)
  623. {
  624. int32_t c;
  625. Rune r;
  626. while ((c = getrune()) >= 0) {
  627. r = c;
  628. if (r == '\\') {
  629. if (buf <= end)
  630. *buf++ = r;
  631. if ((c = getrune()) < 0)
  632. break;
  633. r = c;
  634. } else if (r == '\n') {
  635. *buf = '\0';
  636. return 1;
  637. }
  638. if (buf <= end)
  639. *buf++ = r;
  640. }
  641. *buf = '\0';
  642. return -1;
  643. }
  644. int32_t
  645. getrune(void)
  646. {
  647. int32_t c;
  648. Rune r;
  649. char *p;
  650. if (prog.type == P_ARG) {
  651. if ((p = prog.curr) != 0) {
  652. if (*p) {
  653. prog.curr += chartorune(&r, p);
  654. c = r;
  655. } else {
  656. c = '\n'; /* fake an end-of-line */
  657. prog.curr = 0;
  658. }
  659. } else
  660. c = -1;
  661. } else if ((c = Bgetrune(prog.bp)) < 0)
  662. Bterm(prog.bp);
  663. return c;
  664. }
  665. void
  666. address(Addr *ap)
  667. {
  668. int c;
  669. int32_t lno;
  670. if((c = *cp++) == '$')
  671. ap->type = A_DOL;
  672. else if(c == '/') {
  673. seof = c;
  674. if (ap->rp = compile())
  675. ap->type = A_RE;
  676. else
  677. ap->type = A_LAST;
  678. }
  679. else if (c >= '0' && c <= '9') {
  680. lno = c - '0';
  681. while ((c = *cp) >= '0' && c <= '9')
  682. lno = lno*10 + *cp++ - '0';
  683. if(!lno)
  684. quit("line number 0 is illegal",0);
  685. ap->type = A_LINE;
  686. ap->line = lno;
  687. }
  688. else {
  689. cp--;
  690. ap->type = A_NONE;
  691. }
  692. }
  693. cmp(char *a, char *b) /* compare characters */
  694. {
  695. while(*a == *b++)
  696. if (*a == '\0')
  697. return 0;
  698. else
  699. a++;
  700. return 1;
  701. }
  702. rcmp(Rune *a, Rune *b) /* compare runes */
  703. {
  704. while(*a == *b++)
  705. if (*a == '\0')
  706. return 0;
  707. else
  708. a++;
  709. return 1;
  710. }
  711. char *
  712. text(char *p) /* extract character string */
  713. {
  714. Rune r;
  715. while(*cp == ' ' || *cp == '\t')
  716. cp++;
  717. while (*cp) {
  718. if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
  719. break;
  720. if (r == '\n')
  721. while (*cp == ' ' || *cp == '\t')
  722. cp++;
  723. p += runetochar(p, &r);
  724. }
  725. *p++ = '\0';
  726. return p;
  727. }
  728. Rune *
  729. stext(Rune *p, Rune *end) /* extract rune string */
  730. {
  731. while(*cp == L' ' || *cp == L'\t')
  732. cp++;
  733. while (*cp) {
  734. if (*cp == L'\\' && *++cp == L'\0')
  735. break;
  736. if (p >= end-1)
  737. quit(TMMES, linebuf);
  738. if ((*p++ = *cp++) == L'\n')
  739. while(*cp == L' ' || *cp == L'\t')
  740. cp++;
  741. }
  742. *p++ = 0;
  743. return p;
  744. }
  745. Label *
  746. search(Label *ptr)
  747. {
  748. Label *rp;
  749. for (rp = ltab; rp < ptr; rp++)
  750. if(rcmp(rp->uninm, ptr->uninm) == 0)
  751. return(rp);
  752. return(0);
  753. }
  754. void
  755. dechain(void)
  756. {
  757. Label *lptr;
  758. SedCom *rptr, *trptr;
  759. for(lptr = ltab; lptr < lab; lptr++) {
  760. if(lptr->address == 0)
  761. quit("Undefined label: %S", lptr->uninm);
  762. if(lptr->chain) {
  763. rptr = lptr->chain;
  764. while((trptr = rptr->lb1) != nil) {
  765. rptr->lb1 = lptr->address;
  766. rptr = trptr;
  767. }
  768. rptr->lb1 = lptr->address;
  769. }
  770. }
  771. }
  772. int
  773. ycomp(SedCom *r)
  774. {
  775. int i;
  776. Rune *rp, *sp, *tsp;
  777. Rune c, highc;
  778. highc = 0;
  779. for(tsp = cp; *tsp != seof; tsp++) {
  780. if(*tsp == L'\\')
  781. tsp++;
  782. if(*tsp == L'\n' || *tsp == L'\0')
  783. return 0;
  784. if (*tsp > highc)
  785. highc = *tsp;
  786. }
  787. tsp++;
  788. if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
  789. quit("Out of memory");
  790. *rp++ = highc; /* save upper bound */
  791. for (i = 0; i <= highc; i++)
  792. rp[i] = i;
  793. sp = cp;
  794. while((c = *sp++) != seof) {
  795. if(c == L'\\' && *sp == L'n') {
  796. sp++;
  797. c = L'\n';
  798. }
  799. if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
  800. rp[c] = L'\n';
  801. tsp++;
  802. }
  803. if(rp[c] == seof || rp[c] == L'\0') {
  804. free(r->re1);
  805. r->re1 = nil;
  806. return 0;
  807. }
  808. }
  809. if(*tsp != seof) {
  810. free(r->re1);
  811. r->re1 = nil;
  812. return 0;
  813. }
  814. cp = tsp+1;
  815. return 1;
  816. }
  817. void
  818. execute(void)
  819. {
  820. SedCom *ipc;
  821. while (spend = gline(linebuf)){
  822. for(ipc = pspace; ipc->command; ) {
  823. if (!executable(ipc)) {
  824. ipc++;
  825. continue;
  826. }
  827. command(ipc);
  828. if(delflag)
  829. break;
  830. if(jflag) {
  831. jflag = 0;
  832. if((ipc = ipc->lb1) == 0)
  833. break;
  834. } else
  835. ipc++;
  836. }
  837. if(!nflag && !delflag)
  838. putline(&fout, linebuf, spend - linebuf);
  839. if(aptr > abuf)
  840. arout();
  841. delflag = 0;
  842. }
  843. }
  844. /* determine if a statement should be applied to an input line */
  845. int
  846. executable(SedCom *ipc)
  847. {
  848. if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
  849. if (ipc->active == 1) /* Second line */
  850. ipc->active = 2;
  851. switch(ipc->ad2.type) {
  852. case A_NONE: /* No second addr; use first */
  853. ipc->active = 0;
  854. break;
  855. case A_DOL: /* Accept everything */
  856. return !ipc->negfl;
  857. case A_LINE: /* Line at end of range? */
  858. if (lnum <= ipc->ad2.line) {
  859. if (ipc->ad2.line == lnum)
  860. ipc->active = 0;
  861. return !ipc->negfl;
  862. }
  863. ipc->active = 0; /* out of range */
  864. return ipc->negfl;
  865. case A_RE: /* Check for matching R.E. */
  866. if (match(ipc->ad2.rp, linebuf))
  867. ipc->active = 0;
  868. return !ipc->negfl;
  869. default:
  870. quit("Internal error");
  871. }
  872. }
  873. switch (ipc->ad1.type) { /* Check first address */
  874. case A_NONE: /* Everything matches */
  875. return !ipc->negfl;
  876. case A_DOL: /* Only last line */
  877. if (dolflag)
  878. return !ipc->negfl;
  879. break;
  880. case A_LINE: /* Check line number */
  881. if (ipc->ad1.line == lnum) {
  882. ipc->active = 1; /* In range */
  883. return !ipc->negfl;
  884. }
  885. break;
  886. case A_RE: /* Check R.E. */
  887. if (match(ipc->ad1.rp, linebuf)) {
  888. ipc->active = 1; /* In range */
  889. return !ipc->negfl;
  890. }
  891. break;
  892. default:
  893. quit("Internal error");
  894. }
  895. return ipc->negfl;
  896. }
  897. int
  898. match(Reprog *pattern, Rune *buf)
  899. {
  900. if (!pattern)
  901. return 0;
  902. subexp[0].rsp = buf;
  903. subexp[0].ep = 0;
  904. if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
  905. loc1 = subexp[0].rsp;
  906. loc2 = subexp[0].rep;
  907. return 1;
  908. }
  909. loc1 = loc2 = 0;
  910. return 0;
  911. }
  912. int
  913. substitute(SedCom *ipc)
  914. {
  915. int len;
  916. if(!match(ipc->re1, linebuf))
  917. return 0;
  918. /*
  919. * we have at least one match. some patterns, e.g. '$' or '^', can
  920. * produce 0-length matches, so during a global substitute we must
  921. * bump to the character after a 0-length match to keep from looping.
  922. */
  923. sflag = 1;
  924. if(ipc->gfl == 0) /* single substitution */
  925. dosub(ipc->rhs);
  926. else
  927. do{ /* global substitution */
  928. len = loc2 - loc1; /* length of match */
  929. dosub(ipc->rhs); /* dosub moves loc2 */
  930. if(*loc2 == 0) /* end of string */
  931. break;
  932. if(len == 0) /* zero-length R.E. match */
  933. loc2++; /* bump over 0-length match */
  934. if(*loc2 == 0) /* end of string */
  935. break;
  936. } while(match(ipc->re1, loc2));
  937. return 1;
  938. }
  939. void
  940. dosub(Rune *rhsbuf)
  941. {
  942. int c, n;
  943. Rune *lp, *sp, *rp;
  944. lp = linebuf;
  945. sp = genbuf;
  946. rp = rhsbuf;
  947. while (lp < loc1)
  948. *sp++ = *lp++;
  949. while(c = *rp++) {
  950. if (c == '&') {
  951. sp = place(sp, loc1, loc2);
  952. continue;
  953. }
  954. if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
  955. n = c-'0';
  956. if (subexp[n].rsp && subexp[n].rep) {
  957. sp = place(sp, subexp[n].rsp, subexp[n].rep);
  958. continue;
  959. }
  960. else {
  961. fprint(2, "sed: Invalid back reference \\%d\n",n);
  962. errexit();
  963. }
  964. }
  965. *sp++ = c;
  966. if (sp >= &genbuf[LBSIZE])
  967. fprint(2, "sed: Output line too long.\n");
  968. }
  969. lp = loc2;
  970. loc2 = sp - genbuf + linebuf;
  971. while (*sp++ = *lp++)
  972. if (sp >= &genbuf[LBSIZE])
  973. fprint(2, "sed: Output line too long.\n");
  974. lp = linebuf;
  975. sp = genbuf;
  976. while (*lp++ = *sp++)
  977. ;
  978. spend = lp - 1;
  979. }
  980. Rune *
  981. place(Rune *sp, Rune *l1, Rune *l2)
  982. {
  983. while (l1 < l2) {
  984. *sp++ = *l1++;
  985. if (sp >= &genbuf[LBSIZE])
  986. fprint(2, "sed: Output line too long.\n");
  987. }
  988. return sp;
  989. }
  990. char *
  991. trans(int c)
  992. {
  993. static char buf[] = "\\x0000";
  994. static char hex[] = "0123456789abcdef";
  995. switch(c) {
  996. case '\b':
  997. return "\\b";
  998. case '\n':
  999. return "\\n";
  1000. case '\r':
  1001. return "\\r";
  1002. case '\t':
  1003. return "\\t";
  1004. case '\\':
  1005. return "\\\\";
  1006. }
  1007. buf[2] = hex[(c>>12)&0xF];
  1008. buf[3] = hex[(c>>8)&0xF];
  1009. buf[4] = hex[(c>>4)&0xF];
  1010. buf[5] = hex[c&0xF];
  1011. return buf;
  1012. }
  1013. void
  1014. command(SedCom *ipc)
  1015. {
  1016. int i, c;
  1017. char *ucp;
  1018. Rune *execp, *p1, *p2, *rp;
  1019. switch(ipc->command) {
  1020. case ACOM:
  1021. *aptr++ = ipc;
  1022. if(aptr >= abuf+MAXADDS)
  1023. quit("sed: Too many appends after line %ld\n",
  1024. (char *)lnum);
  1025. *aptr = 0;
  1026. break;
  1027. case CCOM:
  1028. delflag = 1;
  1029. if(ipc->active == 1) {
  1030. for(rp = ipc->text; *rp; rp++)
  1031. Bputrune(&fout, *rp);
  1032. Bputc(&fout, '\n');
  1033. }
  1034. break;
  1035. case DCOM:
  1036. delflag++;
  1037. break;
  1038. case CDCOM:
  1039. p1 = p2 = linebuf;
  1040. while(*p1 != '\n') {
  1041. if(*p1++ == 0) {
  1042. delflag++;
  1043. return;
  1044. }
  1045. }
  1046. p1++;
  1047. while(*p2++ = *p1++)
  1048. ;
  1049. spend = p2 - 1;
  1050. jflag++;
  1051. break;
  1052. case EQCOM:
  1053. Bprint(&fout, "%ld\n", lnum);
  1054. break;
  1055. case GCOM:
  1056. p1 = linebuf;
  1057. p2 = holdsp;
  1058. while(*p1++ = *p2++)
  1059. ;
  1060. spend = p1 - 1;
  1061. break;
  1062. case CGCOM:
  1063. *spend++ = '\n';
  1064. p1 = spend;
  1065. p2 = holdsp;
  1066. while(*p1++ = *p2++)
  1067. if(p1 >= lbend)
  1068. break;
  1069. spend = p1 - 1;
  1070. break;
  1071. case HCOM:
  1072. p1 = holdsp;
  1073. p2 = linebuf;
  1074. while(*p1++ = *p2++);
  1075. hspend = p1 - 1;
  1076. break;
  1077. case CHCOM:
  1078. *hspend++ = '\n';
  1079. p1 = hspend;
  1080. p2 = linebuf;
  1081. while(*p1++ = *p2++)
  1082. if(p1 >= hend)
  1083. break;
  1084. hspend = p1 - 1;
  1085. break;
  1086. case ICOM:
  1087. for(rp = ipc->text; *rp; rp++)
  1088. Bputrune(&fout, *rp);
  1089. Bputc(&fout, '\n');
  1090. break;
  1091. case BCOM:
  1092. jflag = 1;
  1093. break;
  1094. case LCOM:
  1095. c = 0;
  1096. for (i = 0, rp = linebuf; *rp; rp++) {
  1097. c = *rp;
  1098. if(c >= 0x20 && c < 0x7F && c != '\\') {
  1099. Bputc(&fout, c);
  1100. if(i++ > 71) {
  1101. Bprint(&fout, "\\\n");
  1102. i = 0;
  1103. }
  1104. } else {
  1105. for (ucp = trans(*rp); *ucp; ucp++){
  1106. c = *ucp;
  1107. Bputc(&fout, c);
  1108. if(i++ > 71) {
  1109. Bprint(&fout, "\\\n");
  1110. i = 0;
  1111. }
  1112. }
  1113. }
  1114. }
  1115. if(c == ' ')
  1116. Bprint(&fout, "\\n");
  1117. Bputc(&fout, '\n');
  1118. break;
  1119. case NCOM:
  1120. if(!nflag)
  1121. putline(&fout, linebuf, spend-linebuf);
  1122. if(aptr > abuf)
  1123. arout();
  1124. if((execp = gline(linebuf)) == 0) {
  1125. delflag = 1;
  1126. break;
  1127. }
  1128. spend = execp;
  1129. break;
  1130. case CNCOM:
  1131. if(aptr > abuf)
  1132. arout();
  1133. *spend++ = '\n';
  1134. if((execp = gline(spend)) == 0) {
  1135. delflag = 1;
  1136. break;
  1137. }
  1138. spend = execp;
  1139. break;
  1140. case PCOM:
  1141. putline(&fout, linebuf, spend-linebuf);
  1142. break;
  1143. case CPCOM:
  1144. cpcom:
  1145. for(rp = linebuf; *rp && *rp != '\n'; rp++)
  1146. Bputc(&fout, *rp);
  1147. Bputc(&fout, '\n');
  1148. break;
  1149. case QCOM:
  1150. if(!nflag)
  1151. putline(&fout, linebuf, spend-linebuf);
  1152. if(aptr > abuf)
  1153. arout();
  1154. exits(0);
  1155. case RCOM:
  1156. *aptr++ = ipc;
  1157. if(aptr >= &abuf[MAXADDS])
  1158. quit("sed: Too many reads after line %ld\n",
  1159. (char *)lnum);
  1160. *aptr = 0;
  1161. break;
  1162. case SCOM:
  1163. i = substitute(ipc);
  1164. if(i && ipc->pfl)
  1165. if(ipc->pfl == 1)
  1166. putline(&fout, linebuf, spend-linebuf);
  1167. else
  1168. goto cpcom;
  1169. if(i && ipc->fcode)
  1170. goto wcom;
  1171. break;
  1172. case TCOM:
  1173. if(sflag) {
  1174. sflag = 0;
  1175. jflag = 1;
  1176. }
  1177. break;
  1178. case WCOM:
  1179. wcom:
  1180. putline(ipc->fcode,linebuf, spend - linebuf);
  1181. break;
  1182. case XCOM:
  1183. p1 = linebuf;
  1184. p2 = genbuf;
  1185. while(*p2++ = *p1++)
  1186. ;
  1187. p1 = holdsp;
  1188. p2 = linebuf;
  1189. while(*p2++ = *p1++)
  1190. ;
  1191. spend = p2 - 1;
  1192. p1 = genbuf;
  1193. p2 = holdsp;
  1194. while(*p2++ = *p1++)
  1195. ;
  1196. hspend = p2 - 1;
  1197. break;
  1198. case YCOM:
  1199. p1 = linebuf;
  1200. p2 = ipc->text;
  1201. for (i = *p2++; *p1; p1++)
  1202. if (*p1 <= i)
  1203. *p1 = p2[*p1];
  1204. break;
  1205. }
  1206. }
  1207. void
  1208. putline(Biobuf *bp, Rune *buf, int n)
  1209. {
  1210. while (n--)
  1211. Bputrune(bp, *buf++);
  1212. Bputc(bp, '\n');
  1213. }
  1214. ecmp(Rune *a, Rune *b, int count)
  1215. {
  1216. while(count--)
  1217. if(*a++ != *b++)
  1218. return 0;
  1219. return 1;
  1220. }
  1221. void
  1222. arout(void)
  1223. {
  1224. int c;
  1225. char *s;
  1226. char buf[128];
  1227. Rune *p1;
  1228. Biobuf *fi;
  1229. for (aptr = abuf; *aptr; aptr++) {
  1230. if((*aptr)->command == ACOM) {
  1231. for(p1 = (*aptr)->text; *p1; p1++ )
  1232. Bputrune(&fout, *p1);
  1233. Bputc(&fout, '\n');
  1234. } else {
  1235. for(s = buf, p1 = (*aptr)->text; *p1; p1++)
  1236. s += runetochar(s, p1);
  1237. *s = '\0';
  1238. if((fi = Bopen(buf, OREAD)) == 0)
  1239. continue;
  1240. while((c = Bgetc(fi)) >= 0)
  1241. Bputc(&fout, c);
  1242. Bterm(fi);
  1243. }
  1244. }
  1245. aptr = abuf;
  1246. *aptr = 0;
  1247. }
  1248. void
  1249. errexit(void)
  1250. {
  1251. exits("error");
  1252. }
  1253. void
  1254. quit(char *fmt, ...)
  1255. {
  1256. char *p, *ep;
  1257. char msg[256];
  1258. va_list arg;
  1259. ep = msg + sizeof msg;
  1260. p = seprint(msg, ep, "sed: ");
  1261. va_start(arg, fmt);
  1262. p = vseprint(p, ep, fmt, arg);
  1263. va_end(arg);
  1264. p = seprint(p, ep, "\n");
  1265. write(2, msg, p - msg);
  1266. errexit();
  1267. }
  1268. Rune *
  1269. gline(Rune *addr)
  1270. {
  1271. int32_t c;
  1272. Rune *p;
  1273. static int32_t peekc = 0;
  1274. if (f == 0 && opendata() < 0)
  1275. return 0;
  1276. sflag = 0;
  1277. lnum++;
  1278. /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
  1279. do {
  1280. p = addr;
  1281. for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
  1282. if (c == '\n') {
  1283. if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
  1284. dolflag = 1;
  1285. *p = '\0';
  1286. return p;
  1287. }
  1288. if (c && p < lbend)
  1289. *p++ = c;
  1290. }
  1291. /* return partial final line, adding implicit newline */
  1292. if(p != addr) {
  1293. *p = '\0';
  1294. peekc = -1;
  1295. if (fhead == 0)
  1296. dolflag = 1;
  1297. return p;
  1298. }
  1299. peekc = 0;
  1300. Bterm(f);
  1301. } while (opendata() > 0); /* Switch to next stream */
  1302. f = 0;
  1303. return 0;
  1304. }
  1305. /*
  1306. * Data file input section - the intent is to transparently
  1307. * catenate all data input streams.
  1308. */
  1309. void
  1310. enroll(char *filename) /* Add a file to the input file cache */
  1311. {
  1312. FileCache *fp;
  1313. if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
  1314. quit("Out of memory");
  1315. if (ftail == nil)
  1316. fhead = fp;
  1317. else
  1318. ftail->next = fp;
  1319. ftail = fp;
  1320. fp->next = nil;
  1321. fp->name = filename; /* 0 => stdin */
  1322. }
  1323. int
  1324. opendata(void)
  1325. {
  1326. if (fhead == nil)
  1327. return -1;
  1328. if (fhead->name) {
  1329. if ((f = Bopen(fhead->name, OREAD)) == nil)
  1330. quit("Can't open %s", fhead->name);
  1331. } else {
  1332. Binit(&stdin, 0, OREAD);
  1333. f = &stdin;
  1334. }
  1335. fhead = fhead->next;
  1336. return 1;
  1337. }