sed.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * sed -- stream editor
  11. */
  12. #include <u.h>
  13. #include <libc.h>
  14. #include <bio.h>
  15. #include <regexp.h>
  16. enum {
  17. DEPTH = 20, /* max nesting depth of {} */
  18. MAXCMDS = 512, /* max sed commands */
  19. ADDSIZE = 10000, /* size of add & read buffer */
  20. MAXADDS = 20, /* max pending adds and reads */
  21. LBSIZE = 8192, /* input line size */
  22. LABSIZE = 50, /* max number of labels */
  23. MAXSUB = 10, /* max number of sub reg exp */
  24. MAXFILES = 120, /* max output files */
  25. };
  26. /*
  27. * An address is a line #, a R.E., "$", a reference to the last
  28. * R.E., or nothing.
  29. */
  30. typedef struct {
  31. enum {
  32. A_NONE,
  33. A_DOL,
  34. A_LINE,
  35. A_RE,
  36. A_LAST,
  37. }type;
  38. union {
  39. int32_t line; /* Line # */
  40. Reprog *rp; /* Compiled R.E. */
  41. };
  42. } Addr;
  43. typedef struct SEDCOM {
  44. Addr ad1; /* optional start address */
  45. Addr ad2; /* optional end address */
  46. union {
  47. Reprog *re1; /* compiled R.E. */
  48. Rune *text; /* added text or file name */
  49. struct SEDCOM *lb1; /* destination command of branch */
  50. };
  51. Rune *rhs; /* Right-hand side of substitution */
  52. Biobuf* fcode; /* File ID for read and write */
  53. char command; /* command code -see below */
  54. char gfl; /* 'Global' flag for substitutions */
  55. char pfl; /* 'print' flag for substitutions */
  56. char active; /* 1 => data between start and end */
  57. char negfl; /* negation flag */
  58. } SedCom;
  59. /* Command Codes for field SedCom.command */
  60. #define ACOM 01
  61. #define BCOM 020
  62. #define CCOM 02
  63. #define CDCOM 025
  64. #define CNCOM 022
  65. #define COCOM 017
  66. #define CPCOM 023
  67. #define DCOM 03
  68. #define ECOM 015
  69. #define EQCOM 013
  70. #define FCOM 016
  71. #define GCOM 027
  72. #define CGCOM 030
  73. #define HCOM 031
  74. #define CHCOM 032
  75. #define ICOM 04
  76. #define LCOM 05
  77. #define NCOM 012
  78. #define PCOM 010
  79. #define QCOM 011
  80. #define RCOM 06
  81. #define SCOM 07
  82. #define TCOM 021
  83. #define WCOM 014
  84. #define CWCOM 024
  85. #define YCOM 026
  86. #define XCOM 033
  87. typedef struct label { /* Label symbol table */
  88. Rune uninm[9]; /* Label name */
  89. SedCom *chain;
  90. SedCom *address; /* Command associated with label */
  91. } Label;
  92. typedef struct FILE_CACHE { /* Data file control block */
  93. struct FILE_CACHE *next; /* Forward Link */
  94. char *name; /* Name of file */
  95. } FileCache;
  96. SedCom pspace[MAXCMDS]; /* Command storage */
  97. SedCom *pend = pspace+MAXCMDS; /* End of command storage */
  98. SedCom *rep = pspace; /* Current fill point */
  99. Reprog *lastre = 0; /* Last regular expression */
  100. Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
  101. Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
  102. Rune *addend = addspace+ADDSIZE;
  103. SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
  104. SedCom **aptr = abuf;
  105. struct { /* Sed program input control block */
  106. enum PTYPE { /* Either on command line or in file */
  107. P_ARG,
  108. P_FILE,
  109. } type;
  110. union PCTL { /* Pointer to data */
  111. Biobuf *bp;
  112. char *curr;
  113. } PCTL;
  114. } prog;
  115. Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
  116. FileCache *fhead = 0; /* Head of File Cache Chain */
  117. FileCache *ftail = 0; /* Tail of File Cache Chain */
  118. Rune *loc1; /* Start of pattern match */
  119. Rune *loc2; /* End of pattern match */
  120. Rune seof; /* Pattern delimiter char */
  121. Rune linebuf[LBSIZE+1]; /* Input data buffer */
  122. Rune *lbend = linebuf+LBSIZE; /* End of buffer */
  123. Rune *spend = linebuf; /* End of input data */
  124. Rune *cp; /* Current scan point in linebuf */
  125. Rune holdsp[LBSIZE+1]; /* Hold buffer */
  126. Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
  127. Rune *hspend = holdsp; /* End of hold data */
  128. int nflag; /* Command line flags */
  129. int gflag;
  130. int dolflag; /* Set when at true EOF */
  131. int sflag; /* Set when substitution done */
  132. int jflag; /* Set when jump required */
  133. int delflag; /* Delete current line when set */
  134. int64_t lnum = 0; /* Input line count */
  135. char fname[MAXFILES][40]; /* File name cache */
  136. Biobuf *fcode[MAXFILES]; /* File ID cache */
  137. int nfiles = 0; /* Cache fill point */
  138. Biobuf fout; /* Output stream */
  139. Biobuf stdin; /* Default input */
  140. Biobuf* f = 0; /* Input data */
  141. Label ltab[LABSIZE]; /* Label name symbol table */
  142. Label *labend = ltab+LABSIZE; /* End of label table */
  143. Label *lab = ltab+1; /* Current Fill point */
  144. int depth = 0; /* {} stack pointer */
  145. Rune bad; /* Dummy err ptr reference */
  146. Rune *badp = &bad;
  147. char CGMES[] = "%S command garbled: %S";
  148. char TMMES[] = "Too much text: %S";
  149. char LTL[] = "Label too int32_t: %S";
  150. char AD0MES[] = "No addresses allowed: %S";
  151. char AD1MES[] = "Only one address allowed: %S";
  152. void address(Addr *);
  153. void arout(void);
  154. int cmp(char *, char *);
  155. int rcmp(Rune *, Rune *);
  156. void command(SedCom *);
  157. Reprog *compile(void);
  158. Rune *compsub(Rune *, Rune *);
  159. void dechain(void);
  160. void dosub(Rune *);
  161. int ecmp(Rune *, Rune *, int);
  162. void enroll(char *);
  163. void errexit(void);
  164. int executable(SedCom *);
  165. void execute(void);
  166. void fcomp(void);
  167. int32_t getrune(void);
  168. Rune *gline(Rune *);
  169. int match(Reprog *, Rune *);
  170. void newfile(enum PTYPE, char *);
  171. int opendata(void);
  172. Biobuf *open_file(char *);
  173. Rune *place(Rune *, Rune *, Rune *);
  174. void quit(char *, ...);
  175. int rline(Rune *, Rune *);
  176. Label *search(Label *);
  177. int substitute(SedCom *);
  178. char *text(char *);
  179. Rune *stext(Rune *, Rune *);
  180. int ycomp(SedCom *);
  181. char * trans(int c);
  182. void putline(Biobuf *bp, Rune *buf, int n);
  183. void
  184. main(int argc, char **argv)
  185. {
  186. int compfl;
  187. lnum = 0;
  188. Binit(&fout, 1, OWRITE);
  189. fcode[nfiles++] = &fout;
  190. compfl = 0;
  191. if(argc == 1)
  192. exits(0);
  193. ARGBEGIN{
  194. case 'e':
  195. if (argc <= 1)
  196. quit("missing pattern");
  197. newfile(P_ARG, ARGF());
  198. fcomp();
  199. compfl = 1;
  200. continue;
  201. case 'f':
  202. if(argc <= 1)
  203. quit("no pattern-file");
  204. newfile(P_FILE, ARGF());
  205. fcomp();
  206. compfl = 1;
  207. continue;
  208. case 'g':
  209. gflag++;
  210. continue;
  211. case 'n':
  212. nflag++;
  213. continue;
  214. default:
  215. fprint(2, "sed: Unknown flag: %c\n", ARGC());
  216. continue;
  217. } ARGEND
  218. if(compfl == 0) {
  219. if (--argc < 0)
  220. quit("missing pattern");
  221. newfile(P_ARG, *argv++);
  222. fcomp();
  223. }
  224. if(depth)
  225. quit("Too many {'s");
  226. ltab[0].address = rep;
  227. dechain();
  228. if(argc <= 0)
  229. enroll(0); /* Add stdin to cache */
  230. else
  231. while(--argc >= 0)
  232. enroll(*argv++);
  233. execute();
  234. exits(0);
  235. }
  236. void
  237. fcomp(void)
  238. {
  239. int i;
  240. Label *lpt;
  241. Rune *tp;
  242. SedCom *pt, *pt1;
  243. static Rune *p = addspace;
  244. static SedCom **cmpend[DEPTH]; /* stack of {} operations */
  245. while (rline(linebuf, lbend) >= 0) {
  246. cp = linebuf;
  247. comploop:
  248. while(*cp == L' ' || *cp == L'\t')
  249. cp++;
  250. if(*cp == L'\0' || *cp == L'#')
  251. continue;
  252. if(*cp == L';') {
  253. cp++;
  254. goto comploop;
  255. }
  256. address(&rep->ad1);
  257. if (rep->ad1.type != A_NONE) {
  258. if (rep->ad1.type == A_LAST) {
  259. if (!lastre)
  260. quit("First RE may not be null");
  261. rep->ad1.type = A_RE;
  262. rep->ad1.rp = lastre;
  263. }
  264. if(*cp == L',' || *cp == L';') {
  265. cp++;
  266. address(&rep->ad2);
  267. if (rep->ad2.type == A_LAST) {
  268. rep->ad2.type = A_RE;
  269. rep->ad2.rp = lastre;
  270. }
  271. } else
  272. rep->ad2.type = A_NONE;
  273. }
  274. while(*cp == L' ' || *cp == L'\t')
  275. cp++;
  276. swit:
  277. switch(*cp++) {
  278. default:
  279. quit("Unrecognized command: %S", linebuf);
  280. case '!':
  281. rep->negfl = 1;
  282. goto swit;
  283. case '{':
  284. rep->command = BCOM;
  285. rep->negfl = !rep->negfl;
  286. cmpend[depth++] = &rep->lb1;
  287. if(++rep >= pend)
  288. quit("Too many commands: %S", linebuf);
  289. if(*cp == '\0')
  290. continue;
  291. goto comploop;
  292. case '}':
  293. if(rep->ad1.type != A_NONE)
  294. quit(AD0MES, linebuf);
  295. if(--depth < 0)
  296. quit("Too many }'s");
  297. *cmpend[depth] = rep;
  298. if(*cp == 0)
  299. continue;
  300. goto comploop;
  301. case '=':
  302. rep->command = EQCOM;
  303. if(rep->ad2.type != A_NONE)
  304. quit(AD1MES, linebuf);
  305. break;
  306. case ':':
  307. if(rep->ad1.type != A_NONE)
  308. quit(AD0MES, linebuf);
  309. while(*cp == L' ')
  310. cp++;
  311. tp = lab->uninm;
  312. while (*cp && *cp != L';' && *cp != L' ' &&
  313. *cp != L'\t' && *cp != L'#') {
  314. *tp++ = *cp++;
  315. if(tp >= &lab->uninm[8])
  316. quit(LTL, linebuf);
  317. }
  318. *tp = L'\0';
  319. if (*lab->uninm == L'\0') /* no label? */
  320. quit(CGMES, L":", linebuf);
  321. if((lpt = search(lab)) != nil) {
  322. if(lpt->address)
  323. quit("Duplicate labels: %S", linebuf);
  324. } else {
  325. lab->chain = 0;
  326. lpt = lab;
  327. if(++lab >= labend)
  328. quit("Too many labels: %S", linebuf);
  329. }
  330. lpt->address = rep;
  331. if (*cp == L'#')
  332. continue;
  333. rep--; /* reuse this slot */
  334. break;
  335. case 'a':
  336. rep->command = ACOM;
  337. if(rep->ad2.type != A_NONE)
  338. quit(AD1MES, linebuf);
  339. if(*cp == L'\\')
  340. cp++;
  341. if(*cp++ != L'\n')
  342. quit(CGMES, L"a", linebuf);
  343. rep->text = p;
  344. p = stext(p, addend);
  345. break;
  346. case 'c':
  347. rep->command = CCOM;
  348. if(*cp == L'\\')
  349. cp++;
  350. if(*cp++ != L'\n')
  351. quit(CGMES, L"c", linebuf);
  352. rep->text = p;
  353. p = stext(p, addend);
  354. break;
  355. case 'i':
  356. rep->command = ICOM;
  357. if(rep->ad2.type != A_NONE)
  358. quit(AD1MES, linebuf);
  359. if(*cp == L'\\')
  360. cp++;
  361. if(*cp++ != L'\n')
  362. quit(CGMES, L"i", linebuf);
  363. rep->text = p;
  364. p = stext(p, addend);
  365. break;
  366. case 'g':
  367. rep->command = GCOM;
  368. break;
  369. case 'G':
  370. rep->command = CGCOM;
  371. break;
  372. case 'h':
  373. rep->command = HCOM;
  374. break;
  375. case 'H':
  376. rep->command = CHCOM;
  377. break;
  378. case 't':
  379. rep->command = TCOM;
  380. goto jtcommon;
  381. case 'b':
  382. rep->command = BCOM;
  383. jtcommon:
  384. while(*cp == L' ')
  385. cp++;
  386. if(*cp == L'\0' || *cp == L';') {
  387. /* no label; jump to end */
  388. if((pt = ltab[0].chain) != nil) {
  389. while((pt1 = pt->lb1) != nil)
  390. pt = pt1;
  391. pt->lb1 = rep;
  392. } else
  393. ltab[0].chain = rep;
  394. break;
  395. }
  396. /* copy label into lab->uninm */
  397. tp = lab->uninm;
  398. while((*tp = *cp++) != L'\0' && *tp != L';')
  399. if(++tp >= &lab->uninm[8])
  400. quit(LTL, linebuf);
  401. cp--;
  402. *tp = L'\0';
  403. if (*lab->uninm == L'\0')
  404. /* shouldn't get here */
  405. quit(CGMES, L"b or t", linebuf);
  406. if((lpt = search(lab)) != nil) {
  407. if(lpt->address)
  408. rep->lb1 = lpt->address;
  409. else {
  410. for(pt = lpt->chain; pt != nil &&
  411. (pt1 = pt->lb1) != nil; pt = pt1)
  412. ;
  413. if (pt)
  414. pt->lb1 = rep;
  415. }
  416. } else { /* add new label */
  417. lab->chain = rep;
  418. lab->address = 0;
  419. if(++lab >= labend)
  420. quit("Too many labels: %S", linebuf);
  421. }
  422. break;
  423. case 'n':
  424. rep->command = NCOM;
  425. break;
  426. case 'N':
  427. rep->command = CNCOM;
  428. break;
  429. case 'p':
  430. rep->command = PCOM;
  431. break;
  432. case 'P':
  433. rep->command = CPCOM;
  434. break;
  435. case 'r':
  436. rep->command = RCOM;
  437. if(rep->ad2.type != A_NONE)
  438. quit(AD1MES, linebuf);
  439. if(*cp++ != L' ')
  440. quit(CGMES, L"r", linebuf);
  441. rep->text = p;
  442. p = stext(p, addend);
  443. break;
  444. case 'd':
  445. rep->command = DCOM;
  446. break;
  447. case 'D':
  448. rep->command = CDCOM;
  449. rep->lb1 = pspace;
  450. break;
  451. case 'q':
  452. rep->command = QCOM;
  453. if(rep->ad2.type != A_NONE)
  454. quit(AD1MES, linebuf);
  455. break;
  456. case 'l':
  457. rep->command = LCOM;
  458. break;
  459. case 's':
  460. rep->command = SCOM;
  461. seof = *cp++;
  462. if ((rep->re1 = compile()) == 0) {
  463. if(!lastre)
  464. quit("First RE may not be null.");
  465. rep->re1 = lastre;
  466. }
  467. rep->rhs = p;
  468. if((p = compsub(p, addend)) == 0)
  469. quit(CGMES, L"s", linebuf);
  470. if(*cp == L'g') {
  471. cp++;
  472. rep->gfl++;
  473. } else if(gflag)
  474. rep->gfl++;
  475. if(*cp == L'p') {
  476. cp++;
  477. rep->pfl = 1;
  478. }
  479. if(*cp == L'P') {
  480. cp++;
  481. rep->pfl = 2;
  482. }
  483. if(*cp == L'w') {
  484. cp++;
  485. if(*cp++ != L' ')
  486. quit(CGMES, L"s", linebuf);
  487. text(fname[nfiles]);
  488. for(i = nfiles - 1; i >= 0; i--)
  489. if(cmp(fname[nfiles], fname[i]) == 0) {
  490. rep->fcode = fcode[i];
  491. goto done;
  492. }
  493. if(nfiles >= MAXFILES)
  494. quit("Too many files in w commands 1");
  495. rep->fcode = open_file(fname[nfiles]);
  496. }
  497. break;
  498. case 'w':
  499. rep->command = WCOM;
  500. if(*cp++ != L' ')
  501. quit(CGMES, L"w", linebuf);
  502. text(fname[nfiles]);
  503. for(i = nfiles - 1; i >= 0; i--)
  504. if(cmp(fname[nfiles], fname[i]) == 0) {
  505. rep->fcode = fcode[i];
  506. goto done;
  507. }
  508. if(nfiles >= MAXFILES){
  509. fprint(2, "sed: Too many files in w commands 2 \n");
  510. fprint(2, "nfiles = %d; MAXF = %d\n",
  511. nfiles, MAXFILES);
  512. errexit();
  513. }
  514. rep->fcode = open_file(fname[nfiles]);
  515. break;
  516. case 'x':
  517. rep->command = XCOM;
  518. break;
  519. case 'y':
  520. rep->command = YCOM;
  521. seof = *cp++;
  522. if (ycomp(rep) == 0)
  523. quit(CGMES, L"y", linebuf);
  524. break;
  525. }
  526. done:
  527. if(++rep >= pend)
  528. quit("Too many commands, last: %S", linebuf);
  529. if(*cp++ != L'\0') {
  530. if(cp[-1] == L';')
  531. goto comploop;
  532. quit(CGMES, cp - 1, linebuf);
  533. }
  534. }
  535. }
  536. Biobuf *
  537. open_file(char *name)
  538. {
  539. int fd;
  540. Biobuf *bp;
  541. if ((bp = malloc(sizeof(Biobuf))) == 0)
  542. quit("Out of memory");
  543. if ((fd = open(name, OWRITE)) < 0 &&
  544. (fd = create(name, OWRITE, 0666)) < 0)
  545. quit("Cannot create %s", name);
  546. Binit(bp, fd, OWRITE);
  547. Bseek(bp, 0, 2);
  548. fcode[nfiles++] = bp;
  549. return bp;
  550. }
  551. Rune *
  552. compsub(Rune *rhs, Rune *end)
  553. {
  554. Rune r;
  555. while ((r = *cp++) != '\0') {
  556. if(r == '\\') {
  557. if (rhs < end)
  558. *rhs++ = Runemax;
  559. else
  560. return 0;
  561. r = *cp++;
  562. if(r == 'n')
  563. r = '\n';
  564. } else {
  565. if(r == seof) {
  566. if (rhs < end)
  567. *rhs++ = '\0';
  568. else
  569. return 0;
  570. return rhs;
  571. }
  572. }
  573. if (rhs < end)
  574. *rhs++ = r;
  575. else
  576. return 0;
  577. }
  578. return 0;
  579. }
  580. Reprog *
  581. compile(void)
  582. {
  583. Rune c;
  584. char *ep;
  585. char expbuf[512];
  586. if((c = *cp++) == seof) /* L'//' */
  587. return 0;
  588. ep = expbuf;
  589. do {
  590. if (c == L'\0' || c == L'\n')
  591. quit(TMMES, linebuf);
  592. if (c == L'\\') {
  593. if (ep >= expbuf+sizeof(expbuf))
  594. quit(TMMES, linebuf);
  595. ep += runetochar(ep, &c);
  596. if ((c = *cp++) == L'n')
  597. c = L'\n';
  598. }
  599. if (ep >= expbuf + sizeof(expbuf))
  600. quit(TMMES, linebuf);
  601. ep += runetochar(ep, &c);
  602. } while ((c = *cp++) != seof);
  603. *ep = 0;
  604. return lastre = regcomp(expbuf);
  605. }
  606. void
  607. regerror(char *s)
  608. {
  609. USED(s);
  610. quit(CGMES, L"r.e.-using", linebuf);
  611. }
  612. void
  613. newfile(enum PTYPE type, char *name)
  614. {
  615. if (type == P_ARG)
  616. prog.PCTL.curr = name;
  617. else if ((prog.PCTL.bp = Bopen(name, OREAD)) == 0)
  618. quit("Cannot open pattern-file: %s\n", name);
  619. prog.type = type;
  620. }
  621. int
  622. rline(Rune *buf, Rune *end)
  623. {
  624. int32_t c;
  625. Rune r;
  626. while ((c = getrune()) >= 0) {
  627. r = c;
  628. if (r == '\\') {
  629. if (buf <= end)
  630. *buf++ = r;
  631. if ((c = getrune()) < 0)
  632. break;
  633. r = c;
  634. } else if (r == '\n') {
  635. *buf = '\0';
  636. return 1;
  637. }
  638. if (buf <= end)
  639. *buf++ = r;
  640. }
  641. *buf = '\0';
  642. return -1;
  643. }
  644. int32_t
  645. getrune(void)
  646. {
  647. int32_t c;
  648. Rune r;
  649. char *p;
  650. if (prog.type == P_ARG) {
  651. if ((p = prog.PCTL.curr) != 0) {
  652. if (*p) {
  653. prog.PCTL.curr += chartorune(&r, p);
  654. c = r;
  655. } else {
  656. c = '\n'; /* fake an end-of-line */
  657. prog.PCTL.curr = 0;
  658. }
  659. } else
  660. c = -1;
  661. } else if ((c = Bgetrune(prog.PCTL.bp)) < 0)
  662. Bterm(prog.PCTL.bp);
  663. return c;
  664. }
  665. void
  666. address(Addr *ap)
  667. {
  668. int c;
  669. int32_t lno;
  670. if((c = *cp++) == '$')
  671. ap->type = A_DOL;
  672. else if(c == '/') {
  673. seof = c;
  674. if ((ap->rp = compile()) != nil)
  675. ap->type = A_RE;
  676. else
  677. ap->type = A_LAST;
  678. }
  679. else if (c >= '0' && c <= '9') {
  680. lno = c - '0';
  681. while ((c = *cp) >= '0' && c <= '9')
  682. lno = lno*10 + *cp++ - '0';
  683. if(!lno)
  684. quit("line number 0 is illegal",0);
  685. ap->type = A_LINE;
  686. ap->line = lno;
  687. }
  688. else {
  689. cp--;
  690. ap->type = A_NONE;
  691. }
  692. }
  693. int
  694. cmp(char *a, char *b) /* compare characters */
  695. {
  696. while(*a == *b++)
  697. if (*a == '\0')
  698. return 0;
  699. else
  700. a++;
  701. return 1;
  702. }
  703. int
  704. rcmp(Rune *a, Rune *b) /* compare runes */
  705. {
  706. while(*a == *b++)
  707. if (*a == '\0')
  708. return 0;
  709. else
  710. a++;
  711. return 1;
  712. }
  713. char *
  714. text(char *p) /* extract character string */
  715. {
  716. Rune r;
  717. while(*cp == ' ' || *cp == '\t')
  718. cp++;
  719. while (*cp) {
  720. if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
  721. break;
  722. if (r == '\n')
  723. while (*cp == ' ' || *cp == '\t')
  724. cp++;
  725. p += runetochar(p, &r);
  726. }
  727. *p++ = '\0';
  728. return p;
  729. }
  730. Rune *
  731. stext(Rune *p, Rune *end) /* extract rune string */
  732. {
  733. while(*cp == L' ' || *cp == L'\t')
  734. cp++;
  735. while (*cp) {
  736. if (*cp == L'\\' && *++cp == L'\0')
  737. break;
  738. if (p >= end-1)
  739. quit(TMMES, linebuf);
  740. if ((*p++ = *cp++) == L'\n')
  741. while(*cp == L' ' || *cp == L'\t')
  742. cp++;
  743. }
  744. *p++ = 0;
  745. return p;
  746. }
  747. Label *
  748. search(Label *ptr)
  749. {
  750. Label *rp;
  751. for (rp = ltab; rp < ptr; rp++)
  752. if(rcmp(rp->uninm, ptr->uninm) == 0)
  753. return(rp);
  754. return(0);
  755. }
  756. void
  757. dechain(void)
  758. {
  759. Label *lptr;
  760. SedCom *rptr, *trptr;
  761. for(lptr = ltab; lptr < lab; lptr++) {
  762. if(lptr->address == 0)
  763. quit("Undefined label: %S", lptr->uninm);
  764. if(lptr->chain) {
  765. rptr = lptr->chain;
  766. while((trptr = rptr->lb1) != nil) {
  767. rptr->lb1 = lptr->address;
  768. rptr = trptr;
  769. }
  770. rptr->lb1 = lptr->address;
  771. }
  772. }
  773. }
  774. int
  775. ycomp(SedCom *r)
  776. {
  777. int i;
  778. Rune *rp, *sp, *tsp;
  779. Rune c, highc;
  780. highc = 0;
  781. for(tsp = cp; *tsp != seof; tsp++) {
  782. if(*tsp == L'\\')
  783. tsp++;
  784. if(*tsp == L'\n' || *tsp == L'\0')
  785. return 0;
  786. if (*tsp > highc)
  787. highc = *tsp;
  788. }
  789. tsp++;
  790. if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
  791. quit("Out of memory");
  792. *rp++ = highc; /* save upper bound */
  793. for (i = 0; i <= highc; i++)
  794. rp[i] = i;
  795. sp = cp;
  796. while((c = *sp++) != seof) {
  797. if(c == L'\\' && *sp == L'n') {
  798. sp++;
  799. c = L'\n';
  800. }
  801. if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
  802. rp[c] = L'\n';
  803. tsp++;
  804. }
  805. if(rp[c] == seof || rp[c] == L'\0') {
  806. free(r->re1);
  807. r->re1 = nil;
  808. return 0;
  809. }
  810. }
  811. if(*tsp != seof) {
  812. free(r->re1);
  813. r->re1 = nil;
  814. return 0;
  815. }
  816. cp = tsp+1;
  817. return 1;
  818. }
  819. void
  820. execute(void)
  821. {
  822. SedCom *ipc;
  823. while ((spend = gline(linebuf)) != nil){
  824. for(ipc = pspace; ipc->command; ) {
  825. if (!executable(ipc)) {
  826. ipc++;
  827. continue;
  828. }
  829. command(ipc);
  830. if(delflag)
  831. break;
  832. if(jflag) {
  833. jflag = 0;
  834. if((ipc = ipc->lb1) == 0)
  835. break;
  836. } else
  837. ipc++;
  838. }
  839. if(!nflag && !delflag)
  840. putline(&fout, linebuf, spend - linebuf);
  841. if(aptr > abuf)
  842. arout();
  843. delflag = 0;
  844. }
  845. }
  846. /* determine if a statement should be applied to an input line */
  847. int
  848. executable(SedCom *ipc)
  849. {
  850. if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
  851. if (ipc->active == 1) /* Second line */
  852. ipc->active = 2;
  853. switch(ipc->ad2.type) {
  854. case A_NONE: /* No second addr; use first */
  855. ipc->active = 0;
  856. break;
  857. case A_DOL: /* Accept everything */
  858. return !ipc->negfl;
  859. case A_LINE: /* Line at end of range? */
  860. if (lnum <= ipc->ad2.line) {
  861. if (ipc->ad2.line == lnum)
  862. ipc->active = 0;
  863. return !ipc->negfl;
  864. }
  865. ipc->active = 0; /* out of range */
  866. return ipc->negfl;
  867. case A_RE: /* Check for matching R.E. */
  868. if (match(ipc->ad2.rp, linebuf))
  869. ipc->active = 0;
  870. return !ipc->negfl;
  871. default:
  872. quit("Internal error");
  873. }
  874. }
  875. switch (ipc->ad1.type) { /* Check first address */
  876. case A_NONE: /* Everything matches */
  877. return !ipc->negfl;
  878. case A_DOL: /* Only last line */
  879. if (dolflag)
  880. return !ipc->negfl;
  881. break;
  882. case A_LINE: /* Check line number */
  883. if (ipc->ad1.line == lnum) {
  884. ipc->active = 1; /* In range */
  885. return !ipc->negfl;
  886. }
  887. break;
  888. case A_RE: /* Check R.E. */
  889. if (match(ipc->ad1.rp, linebuf)) {
  890. ipc->active = 1; /* In range */
  891. return !ipc->negfl;
  892. }
  893. break;
  894. default:
  895. quit("Internal error");
  896. }
  897. return ipc->negfl;
  898. }
  899. int
  900. match(Reprog *pattern, Rune *buf)
  901. {
  902. if (!pattern)
  903. return 0;
  904. subexp[0].rsp = buf;
  905. subexp[0].ep = 0;
  906. if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
  907. loc1 = subexp[0].rsp;
  908. loc2 = subexp[0].rep;
  909. return 1;
  910. }
  911. loc1 = loc2 = 0;
  912. return 0;
  913. }
  914. int
  915. substitute(SedCom *ipc)
  916. {
  917. int len;
  918. if(!match(ipc->re1, linebuf))
  919. return 0;
  920. /*
  921. * we have at least one match. some patterns, e.g. '$' or '^', can
  922. * produce 0-length matches, so during a global substitute we must
  923. * bump to the character after a 0-length match to keep from looping.
  924. */
  925. sflag = 1;
  926. if(ipc->gfl == 0) /* single substitution */
  927. dosub(ipc->rhs);
  928. else
  929. do{ /* global substitution */
  930. len = loc2 - loc1; /* length of match */
  931. dosub(ipc->rhs); /* dosub moves loc2 */
  932. if(*loc2 == 0) /* end of string */
  933. break;
  934. if(len == 0) /* zero-length R.E. match */
  935. loc2++; /* bump over 0-length match */
  936. if(*loc2 == 0) /* end of string */
  937. break;
  938. } while(match(ipc->re1, loc2));
  939. return 1;
  940. }
  941. void
  942. dosub(Rune *rhsbuf)
  943. {
  944. int c, n;
  945. Rune *lp, *sp, *rp;
  946. lp = linebuf;
  947. sp = genbuf;
  948. rp = rhsbuf;
  949. while (lp < loc1)
  950. *sp++ = *lp++;
  951. while((c = *rp++) != 0){
  952. if (c == '&') {
  953. sp = place(sp, loc1, loc2);
  954. continue;
  955. }
  956. if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
  957. n = c-'0';
  958. if (subexp[n].rsp && subexp[n].rep) {
  959. sp = place(sp, subexp[n].rsp, subexp[n].rep);
  960. continue;
  961. }
  962. else {
  963. fprint(2, "sed: Invalid back reference \\%d\n",n);
  964. errexit();
  965. }
  966. }
  967. *sp++ = c;
  968. if (sp >= &genbuf[LBSIZE])
  969. fprint(2, "sed: Output line too int32_t.\n");
  970. }
  971. lp = loc2;
  972. loc2 = sp - genbuf + linebuf;
  973. while ((*sp++ = *lp++) != 0)
  974. if (sp >= &genbuf[LBSIZE])
  975. fprint(2, "sed: Output line too int32_t.\n");
  976. lp = linebuf;
  977. sp = genbuf;
  978. while ((*lp++ = *sp++) != 0)
  979. ;
  980. spend = lp - 1;
  981. }
  982. Rune *
  983. place(Rune *sp, Rune *l1, Rune *l2)
  984. {
  985. while (l1 < l2) {
  986. *sp++ = *l1++;
  987. if (sp >= &genbuf[LBSIZE])
  988. fprint(2, "sed: Output line too int32_t.\n");
  989. }
  990. return sp;
  991. }
  992. char *
  993. trans(int c)
  994. {
  995. static char buf[] = "\\x0000";
  996. static char hex[] = "0123456789abcdef";
  997. switch(c) {
  998. case '\b':
  999. return "\\b";
  1000. case '\n':
  1001. return "\\n";
  1002. case '\r':
  1003. return "\\r";
  1004. case '\t':
  1005. return "\\t";
  1006. case '\\':
  1007. return "\\\\";
  1008. }
  1009. buf[2] = hex[(c>>12)&0xF];
  1010. buf[3] = hex[(c>>8)&0xF];
  1011. buf[4] = hex[(c>>4)&0xF];
  1012. buf[5] = hex[c&0xF];
  1013. return buf;
  1014. }
  1015. void
  1016. command(SedCom *ipc)
  1017. {
  1018. int i, c;
  1019. char *ucp;
  1020. Rune *execp, *p1, *p2, *rp;
  1021. switch(ipc->command) {
  1022. case ACOM:
  1023. *aptr++ = ipc;
  1024. if(aptr >= abuf+MAXADDS)
  1025. quit("sed: Too many appends after line %ld\n",
  1026. (char *)lnum);
  1027. *aptr = 0;
  1028. break;
  1029. case CCOM:
  1030. delflag = 1;
  1031. if(ipc->active == 1) {
  1032. for(rp = ipc->text; *rp; rp++)
  1033. Bputrune(&fout, *rp);
  1034. Bputc(&fout, '\n');
  1035. }
  1036. break;
  1037. case DCOM:
  1038. delflag++;
  1039. break;
  1040. case CDCOM:
  1041. p1 = p2 = linebuf;
  1042. while(*p1 != '\n') {
  1043. if(*p1++ == 0) {
  1044. delflag++;
  1045. return;
  1046. }
  1047. }
  1048. p1++;
  1049. while((*p2++ = *p1++) != 0)
  1050. ;
  1051. spend = p2 - 1;
  1052. jflag++;
  1053. break;
  1054. case EQCOM:
  1055. Bprint(&fout, "%ld\n", lnum);
  1056. break;
  1057. case GCOM:
  1058. p1 = linebuf;
  1059. p2 = holdsp;
  1060. while((*p1++ = *p2++) != 0)
  1061. ;
  1062. spend = p1 - 1;
  1063. break;
  1064. case CGCOM:
  1065. *spend++ = '\n';
  1066. p1 = spend;
  1067. p2 = holdsp;
  1068. while((*p1++ = *p2++) != 0)
  1069. if(p1 >= lbend)
  1070. break;
  1071. spend = p1 - 1;
  1072. break;
  1073. case HCOM:
  1074. p1 = holdsp;
  1075. p2 = linebuf;
  1076. while((*p1++ = *p2++) != 0);
  1077. hspend = p1 - 1;
  1078. break;
  1079. case CHCOM:
  1080. *hspend++ = '\n';
  1081. p1 = hspend;
  1082. p2 = linebuf;
  1083. while((*p1++ = *p2++) != 0)
  1084. if(p1 >= hend)
  1085. break;
  1086. hspend = p1 - 1;
  1087. break;
  1088. case ICOM:
  1089. for(rp = ipc->text; *rp; rp++)
  1090. Bputrune(&fout, *rp);
  1091. Bputc(&fout, '\n');
  1092. break;
  1093. case BCOM:
  1094. jflag = 1;
  1095. break;
  1096. case LCOM:
  1097. c = 0;
  1098. for (i = 0, rp = linebuf; *rp; rp++) {
  1099. c = *rp;
  1100. if(c >= 0x20 && c < 0x7F && c != '\\') {
  1101. Bputc(&fout, c);
  1102. if(i++ > 71) {
  1103. Bprint(&fout, "\\\n");
  1104. i = 0;
  1105. }
  1106. } else {
  1107. for (ucp = trans(*rp); *ucp; ucp++){
  1108. c = *ucp;
  1109. Bputc(&fout, c);
  1110. if(i++ > 71) {
  1111. Bprint(&fout, "\\\n");
  1112. i = 0;
  1113. }
  1114. }
  1115. }
  1116. }
  1117. if(c == ' ')
  1118. Bprint(&fout, "\\n");
  1119. Bputc(&fout, '\n');
  1120. break;
  1121. case NCOM:
  1122. if(!nflag)
  1123. putline(&fout, linebuf, spend-linebuf);
  1124. if(aptr > abuf)
  1125. arout();
  1126. if((execp = gline(linebuf)) == 0) {
  1127. delflag = 1;
  1128. break;
  1129. }
  1130. spend = execp;
  1131. break;
  1132. case CNCOM:
  1133. if(aptr > abuf)
  1134. arout();
  1135. *spend++ = '\n';
  1136. if((execp = gline(spend)) == 0) {
  1137. delflag = 1;
  1138. break;
  1139. }
  1140. spend = execp;
  1141. break;
  1142. case PCOM:
  1143. putline(&fout, linebuf, spend-linebuf);
  1144. break;
  1145. case CPCOM:
  1146. cpcom:
  1147. for(rp = linebuf; *rp && *rp != '\n'; rp++)
  1148. Bputc(&fout, *rp);
  1149. Bputc(&fout, '\n');
  1150. break;
  1151. case QCOM:
  1152. if(!nflag)
  1153. putline(&fout, linebuf, spend-linebuf);
  1154. if(aptr > abuf)
  1155. arout();
  1156. exits(0);
  1157. case RCOM:
  1158. *aptr++ = ipc;
  1159. if(aptr >= &abuf[MAXADDS])
  1160. quit("sed: Too many reads after line %ld\n",
  1161. (char *)lnum);
  1162. *aptr = 0;
  1163. break;
  1164. case SCOM:
  1165. i = substitute(ipc);
  1166. if(i && ipc->pfl){
  1167. if(ipc->pfl == 1)
  1168. putline(&fout, linebuf, spend-linebuf);
  1169. else
  1170. goto cpcom;
  1171. }
  1172. if(i && ipc->fcode)
  1173. goto wcom;
  1174. break;
  1175. case TCOM:
  1176. if(sflag) {
  1177. sflag = 0;
  1178. jflag = 1;
  1179. }
  1180. break;
  1181. case WCOM:
  1182. wcom:
  1183. putline(ipc->fcode,linebuf, spend - linebuf);
  1184. break;
  1185. case XCOM:
  1186. p1 = linebuf;
  1187. p2 = genbuf;
  1188. while((*p2++ = *p1++) != 0)
  1189. ;
  1190. p1 = holdsp;
  1191. p2 = linebuf;
  1192. while((*p2++ = *p1++) != 0)
  1193. ;
  1194. spend = p2 - 1;
  1195. p1 = genbuf;
  1196. p2 = holdsp;
  1197. while((*p2++ = *p1++) != 0)
  1198. ;
  1199. hspend = p2 - 1;
  1200. break;
  1201. case YCOM:
  1202. p1 = linebuf;
  1203. p2 = ipc->text;
  1204. for (i = *p2++; *p1; p1++)
  1205. if (*p1 <= i)
  1206. *p1 = p2[*p1];
  1207. break;
  1208. }
  1209. }
  1210. void
  1211. putline(Biobuf *bp, Rune *buf, int n)
  1212. {
  1213. while (n--)
  1214. Bputrune(bp, *buf++);
  1215. Bputc(bp, '\n');
  1216. }
  1217. int
  1218. ecmp(Rune *a, Rune *b, int count)
  1219. {
  1220. while(count--)
  1221. if(*a++ != *b++)
  1222. return 0;
  1223. return 1;
  1224. }
  1225. void
  1226. arout(void)
  1227. {
  1228. int c;
  1229. char *s;
  1230. char buf[128];
  1231. Rune *p1;
  1232. Biobuf *fi;
  1233. for (aptr = abuf; *aptr; aptr++) {
  1234. if((*aptr)->command == ACOM) {
  1235. for(p1 = (*aptr)->text; *p1; p1++ )
  1236. Bputrune(&fout, *p1);
  1237. Bputc(&fout, '\n');
  1238. } else {
  1239. for(s = buf, p1 = (*aptr)->text; *p1; p1++)
  1240. s += runetochar(s, p1);
  1241. *s = '\0';
  1242. if((fi = Bopen(buf, OREAD)) == 0)
  1243. continue;
  1244. while((c = Bgetc(fi)) >= 0)
  1245. Bputc(&fout, c);
  1246. Bterm(fi);
  1247. }
  1248. }
  1249. aptr = abuf;
  1250. *aptr = 0;
  1251. }
  1252. void
  1253. errexit(void)
  1254. {
  1255. exits("error");
  1256. }
  1257. void
  1258. quit(char *fmt, ...)
  1259. {
  1260. char *p, *ep;
  1261. char msg[256];
  1262. va_list arg;
  1263. ep = msg + sizeof msg;
  1264. p = seprint(msg, ep, "sed: ");
  1265. va_start(arg, fmt);
  1266. p = vseprint(p, ep, fmt, arg);
  1267. va_end(arg);
  1268. p = seprint(p, ep, "\n");
  1269. write(2, msg, p - msg);
  1270. errexit();
  1271. }
  1272. Rune *
  1273. gline(Rune *addr)
  1274. {
  1275. int32_t c;
  1276. Rune *p;
  1277. static int32_t peekc = 0;
  1278. if (f == 0 && opendata() < 0)
  1279. return 0;
  1280. sflag = 0;
  1281. lnum++;
  1282. /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
  1283. do {
  1284. p = addr;
  1285. for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
  1286. if (c == '\n') {
  1287. if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
  1288. dolflag = 1;
  1289. *p = '\0';
  1290. return p;
  1291. }
  1292. if (c && p < lbend)
  1293. *p++ = c;
  1294. }
  1295. /* return partial final line, adding implicit newline */
  1296. if(p != addr) {
  1297. *p = '\0';
  1298. peekc = -1;
  1299. if (fhead == 0)
  1300. dolflag = 1;
  1301. return p;
  1302. }
  1303. peekc = 0;
  1304. Bterm(f);
  1305. } while (opendata() > 0); /* Switch to next stream */
  1306. f = 0;
  1307. return 0;
  1308. }
  1309. /*
  1310. * Data file input section - the intent is to transparently
  1311. * catenate all data input streams.
  1312. */
  1313. void
  1314. enroll(char *filename) /* Add a file to the input file cache */
  1315. {
  1316. FileCache *fp;
  1317. if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
  1318. quit("Out of memory");
  1319. if (ftail == nil)
  1320. fhead = fp;
  1321. else
  1322. ftail->next = fp;
  1323. ftail = fp;
  1324. fp->next = nil;
  1325. fp->name = filename; /* 0 => stdin */
  1326. }
  1327. int
  1328. opendata(void)
  1329. {
  1330. if (fhead == nil)
  1331. return -1;
  1332. if (fhead->name) {
  1333. if ((f = Bopen(fhead->name, OREAD)) == nil)
  1334. quit("Can't open %s", fhead->name);
  1335. } else {
  1336. Binit(&stdin, 0, OREAD);
  1337. f = &stdin;
  1338. }
  1339. fhead = fhead->next;
  1340. return 1;
  1341. }