sed.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * sed -- stream editor
  11. */
  12. #include <u.h>
  13. #include <libc.h>
  14. #include <bio.h>
  15. #include <regexp.h>
  16. enum {
  17. DEPTH = 20, /* max nesting depth of {} */
  18. MAXCMDS = 512, /* max sed commands */
  19. ADDSIZE = 10000, /* size of add & read buffer */
  20. MAXADDS = 20, /* max pending adds and reads */
  21. LBSIZE = 8192, /* input line size */
  22. LABSIZE = 50, /* max number of labels */
  23. MAXSUB = 10, /* max number of sub reg exp */
  24. MAXFILES = 120, /* max output files */
  25. };
  26. /*
  27. * An address is a line #, a R.E., "$", a reference to the last
  28. * R.E., or nothing.
  29. */
  30. typedef struct {
  31. enum {
  32. A_NONE,
  33. A_DOL,
  34. A_LINE,
  35. A_RE,
  36. A_LAST,
  37. }type;
  38. union {
  39. int32_t line; /* Line # */
  40. Reprog *rp; /* Compiled R.E. */
  41. };
  42. } Addr;
  43. typedef struct SEDCOM {
  44. Addr ad1; /* optional start address */
  45. Addr ad2; /* optional end address */
  46. union {
  47. Reprog *re1; /* compiled R.E. */
  48. Rune *text; /* added text or file name */
  49. struct SEDCOM *lb1; /* destination command of branch */
  50. };
  51. Rune *rhs; /* Right-hand side of substitution */
  52. Biobuf* fcode; /* File ID for read and write */
  53. char command; /* command code -see below */
  54. char gfl; /* 'Global' flag for substitutions */
  55. char pfl; /* 'print' flag for substitutions */
  56. char active; /* 1 => data between start and end */
  57. char negfl; /* negation flag */
  58. } SedCom;
  59. /* Command Codes for field SedCom.command */
  60. #define ACOM 01
  61. #define BCOM 020
  62. #define CCOM 02
  63. #define CDCOM 025
  64. #define CNCOM 022
  65. #define COCOM 017
  66. #define CPCOM 023
  67. #define DCOM 03
  68. #define ECOM 015
  69. #define EQCOM 013
  70. #define FCOM 016
  71. #define GCOM 027
  72. #define CGCOM 030
  73. #define HCOM 031
  74. #define CHCOM 032
  75. #define ICOM 04
  76. #define LCOM 05
  77. #define NCOM 012
  78. #define PCOM 010
  79. #define QCOM 011
  80. #define RCOM 06
  81. #define SCOM 07
  82. #define TCOM 021
  83. #define WCOM 014
  84. #define CWCOM 024
  85. #define YCOM 026
  86. #define XCOM 033
  87. typedef struct label { /* Label symbol table */
  88. Rune uninm[9]; /* Label name */
  89. SedCom *chain;
  90. SedCom *address; /* Command associated with label */
  91. } Label;
  92. typedef struct FILE_CACHE { /* Data file control block */
  93. struct FILE_CACHE *next; /* Forward Link */
  94. char *name; /* Name of file */
  95. } FileCache;
  96. SedCom pspace[MAXCMDS]; /* Command storage */
  97. SedCom *pend = pspace+MAXCMDS; /* End of command storage */
  98. SedCom *rep = pspace; /* Current fill point */
  99. Reprog *lastre = 0; /* Last regular expression */
  100. Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
  101. Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
  102. Rune *addend = addspace+ADDSIZE;
  103. SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
  104. SedCom **aptr = abuf;
  105. struct { /* Sed program input control block */
  106. enum PTYPE { /* Either on command line or in file */
  107. P_ARG,
  108. P_FILE,
  109. } type;
  110. union PCTL { /* Pointer to data */
  111. Biobuf *bp;
  112. char *curr;
  113. };
  114. } prog;
  115. Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
  116. FileCache *fhead = 0; /* Head of File Cache Chain */
  117. FileCache *ftail = 0; /* Tail of File Cache Chain */
  118. Rune *loc1; /* Start of pattern match */
  119. Rune *loc2; /* End of pattern match */
  120. Rune seof; /* Pattern delimiter char */
  121. Rune linebuf[LBSIZE+1]; /* Input data buffer */
  122. Rune *lbend = linebuf+LBSIZE; /* End of buffer */
  123. Rune *spend = linebuf; /* End of input data */
  124. Rune *cp; /* Current scan point in linebuf */
  125. Rune holdsp[LBSIZE+1]; /* Hold buffer */
  126. Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
  127. Rune *hspend = holdsp; /* End of hold data */
  128. int nflag; /* Command line flags */
  129. int gflag;
  130. int dolflag; /* Set when at true EOF */
  131. int sflag; /* Set when substitution done */
  132. int jflag; /* Set when jump required */
  133. int delflag; /* Delete current line when set */
  134. int64_t lnum = 0; /* Input line count */
  135. char fname[MAXFILES][40]; /* File name cache */
  136. Biobuf *fcode[MAXFILES]; /* File ID cache */
  137. int nfiles = 0; /* Cache fill point */
  138. Biobuf fout; /* Output stream */
  139. Biobuf stdin; /* Default input */
  140. Biobuf* f = 0; /* Input data */
  141. Label ltab[LABSIZE]; /* Label name symbol table */
  142. Label *labend = ltab+LABSIZE; /* End of label table */
  143. Label *lab = ltab+1; /* Current Fill point */
  144. int depth = 0; /* {} stack pointer */
  145. Rune bad; /* Dummy err ptr reference */
  146. Rune *badp = &bad;
  147. char CGMES[] = "%S command garbled: %S";
  148. char TMMES[] = "Too much text: %S";
  149. char LTL[] = "Label too int32_t: %S";
  150. char AD0MES[] = "No addresses allowed: %S";
  151. char AD1MES[] = "Only one address allowed: %S";
  152. void address(Addr *);
  153. void arout(void);
  154. int cmp(char *, char *);
  155. int rcmp(Rune *, Rune *);
  156. void command(SedCom *);
  157. Reprog *compile(void);
  158. Rune *compsub(Rune *, Rune *);
  159. void dechain(void);
  160. void dosub(Rune *);
  161. int ecmp(Rune *, Rune *, int);
  162. void enroll(char *);
  163. void errexit(void);
  164. int executable(SedCom *);
  165. void execute(void);
  166. void fcomp(void);
  167. int32_t getrune(void);
  168. Rune *gline(Rune *);
  169. int match(Reprog *, Rune *);
  170. void newfile(enum PTYPE, char *);
  171. int opendata(void);
  172. Biobuf *open_file(char *);
  173. Rune *place(Rune *, Rune *, Rune *);
  174. void quit(char *, ...);
  175. int rline(Rune *, Rune *);
  176. Label *search(Label *);
  177. int substitute(SedCom *);
  178. char *text(char *);
  179. Rune *stext(Rune *, Rune *);
  180. int ycomp(SedCom *);
  181. char * trans(int c);
  182. void putline(Biobuf *bp, Rune *buf, int n);
  183. void
  184. main(int argc, char **argv)
  185. {
  186. int compfl;
  187. lnum = 0;
  188. Binit(&fout, 1, OWRITE);
  189. fcode[nfiles++] = &fout;
  190. compfl = 0;
  191. if(argc == 1)
  192. exits(0);
  193. ARGBEGIN{
  194. case 'e':
  195. if (argc <= 1)
  196. quit("missing pattern");
  197. newfile(P_ARG, ARGF());
  198. fcomp();
  199. compfl = 1;
  200. continue;
  201. case 'f':
  202. if(argc <= 1)
  203. quit("no pattern-file");
  204. newfile(P_FILE, ARGF());
  205. fcomp();
  206. compfl = 1;
  207. continue;
  208. case 'g':
  209. gflag++;
  210. continue;
  211. case 'n':
  212. nflag++;
  213. continue;
  214. default:
  215. fprint(2, "sed: Unknown flag: %c\n", ARGC());
  216. continue;
  217. } ARGEND
  218. if(compfl == 0) {
  219. if (--argc < 0)
  220. quit("missing pattern");
  221. newfile(P_ARG, *argv++);
  222. fcomp();
  223. }
  224. if(depth)
  225. quit("Too many {'s");
  226. ltab[0].address = rep;
  227. dechain();
  228. if(argc <= 0)
  229. enroll(0); /* Add stdin to cache */
  230. else
  231. while(--argc >= 0)
  232. enroll(*argv++);
  233. execute();
  234. exits(0);
  235. }
  236. void
  237. fcomp(void)
  238. {
  239. int i;
  240. Label *lpt;
  241. Rune *tp;
  242. SedCom *pt, *pt1;
  243. static Rune *p = addspace;
  244. static SedCom **cmpend[DEPTH]; /* stack of {} operations */
  245. while (rline(linebuf, lbend) >= 0) {
  246. cp = linebuf;
  247. comploop:
  248. while(*cp == L' ' || *cp == L'\t')
  249. cp++;
  250. if(*cp == L'\0' || *cp == L'#')
  251. continue;
  252. if(*cp == L';') {
  253. cp++;
  254. goto comploop;
  255. }
  256. address(&rep->ad1);
  257. if (rep->ad1.type != A_NONE) {
  258. if (rep->ad1.type == A_LAST) {
  259. if (!lastre)
  260. quit("First RE may not be null");
  261. rep->ad1.type = A_RE;
  262. rep->ad1.rp = lastre;
  263. }
  264. if(*cp == L',' || *cp == L';') {
  265. cp++;
  266. address(&rep->ad2);
  267. if (rep->ad2.type == A_LAST) {
  268. rep->ad2.type = A_RE;
  269. rep->ad2.rp = lastre;
  270. }
  271. } else
  272. rep->ad2.type = A_NONE;
  273. }
  274. while(*cp == L' ' || *cp == L'\t')
  275. cp++;
  276. swit:
  277. switch(*cp++) {
  278. default:
  279. quit("Unrecognized command: %S", linebuf);
  280. case '!':
  281. rep->negfl = 1;
  282. goto swit;
  283. case '{':
  284. rep->command = BCOM;
  285. rep->negfl = !rep->negfl;
  286. cmpend[depth++] = &rep->lb1;
  287. if(++rep >= pend)
  288. quit("Too many commands: %S", linebuf);
  289. if(*cp == '\0')
  290. continue;
  291. goto comploop;
  292. case '}':
  293. if(rep->ad1.type != A_NONE)
  294. quit(AD0MES, linebuf);
  295. if(--depth < 0)
  296. quit("Too many }'s");
  297. *cmpend[depth] = rep;
  298. if(*cp == 0)
  299. continue;
  300. goto comploop;
  301. case '=':
  302. rep->command = EQCOM;
  303. if(rep->ad2.type != A_NONE)
  304. quit(AD1MES, linebuf);
  305. break;
  306. case ':':
  307. if(rep->ad1.type != A_NONE)
  308. quit(AD0MES, linebuf);
  309. while(*cp == L' ')
  310. cp++;
  311. tp = lab->uninm;
  312. while (*cp && *cp != L';' && *cp != L' ' &&
  313. *cp != L'\t' && *cp != L'#') {
  314. *tp++ = *cp++;
  315. if(tp >= &lab->uninm[8])
  316. quit(LTL, linebuf);
  317. }
  318. *tp = L'\0';
  319. if (*lab->uninm == L'\0') /* no label? */
  320. quit(CGMES, L":", linebuf);
  321. if(lpt = search(lab)) {
  322. if(lpt->address)
  323. quit("Duplicate labels: %S", linebuf);
  324. } else {
  325. lab->chain = 0;
  326. lpt = lab;
  327. if(++lab >= labend)
  328. quit("Too many labels: %S", linebuf);
  329. }
  330. lpt->address = rep;
  331. if (*cp == L'#')
  332. continue;
  333. rep--; /* reuse this slot */
  334. break;
  335. case 'a':
  336. rep->command = ACOM;
  337. if(rep->ad2.type != A_NONE)
  338. quit(AD1MES, linebuf);
  339. if(*cp == L'\\')
  340. cp++;
  341. if(*cp++ != L'\n')
  342. quit(CGMES, L"a", linebuf);
  343. rep->text = p;
  344. p = stext(p, addend);
  345. break;
  346. case 'c':
  347. rep->command = CCOM;
  348. if(*cp == L'\\')
  349. cp++;
  350. if(*cp++ != L'\n')
  351. quit(CGMES, L"c", linebuf);
  352. rep->text = p;
  353. p = stext(p, addend);
  354. break;
  355. case 'i':
  356. rep->command = ICOM;
  357. if(rep->ad2.type != A_NONE)
  358. quit(AD1MES, linebuf);
  359. if(*cp == L'\\')
  360. cp++;
  361. if(*cp++ != L'\n')
  362. quit(CGMES, L"i", linebuf);
  363. rep->text = p;
  364. p = stext(p, addend);
  365. break;
  366. case 'g':
  367. rep->command = GCOM;
  368. break;
  369. case 'G':
  370. rep->command = CGCOM;
  371. break;
  372. case 'h':
  373. rep->command = HCOM;
  374. break;
  375. case 'H':
  376. rep->command = CHCOM;
  377. break;
  378. case 't':
  379. rep->command = TCOM;
  380. goto jtcommon;
  381. case 'b':
  382. rep->command = BCOM;
  383. jtcommon:
  384. while(*cp == L' ')
  385. cp++;
  386. if(*cp == L'\0' || *cp == L';') {
  387. /* no label; jump to end */
  388. if(pt = ltab[0].chain) {
  389. while((pt1 = pt->lb1) != nil)
  390. pt = pt1;
  391. pt->lb1 = rep;
  392. } else
  393. ltab[0].chain = rep;
  394. break;
  395. }
  396. /* copy label into lab->uninm */
  397. tp = lab->uninm;
  398. while((*tp = *cp++) != L'\0' && *tp != L';')
  399. if(++tp >= &lab->uninm[8])
  400. quit(LTL, linebuf);
  401. cp--;
  402. *tp = L'\0';
  403. if (*lab->uninm == L'\0')
  404. /* shouldn't get here */
  405. quit(CGMES, L"b or t", linebuf);
  406. if((lpt = search(lab)) != nil) {
  407. if(lpt->address)
  408. rep->lb1 = lpt->address;
  409. else {
  410. for(pt = lpt->chain; pt != nil &&
  411. (pt1 = pt->lb1) != nil; pt = pt1)
  412. ;
  413. if (pt)
  414. pt->lb1 = rep;
  415. }
  416. } else { /* add new label */
  417. lab->chain = rep;
  418. lab->address = 0;
  419. if(++lab >= labend)
  420. quit("Too many labels: %S", linebuf);
  421. }
  422. break;
  423. case 'n':
  424. rep->command = NCOM;
  425. break;
  426. case 'N':
  427. rep->command = CNCOM;
  428. break;
  429. case 'p':
  430. rep->command = PCOM;
  431. break;
  432. case 'P':
  433. rep->command = CPCOM;
  434. break;
  435. case 'r':
  436. rep->command = RCOM;
  437. if(rep->ad2.type != A_NONE)
  438. quit(AD1MES, linebuf);
  439. if(*cp++ != L' ')
  440. quit(CGMES, L"r", linebuf);
  441. rep->text = p;
  442. p = stext(p, addend);
  443. break;
  444. case 'd':
  445. rep->command = DCOM;
  446. break;
  447. case 'D':
  448. rep->command = CDCOM;
  449. rep->lb1 = pspace;
  450. break;
  451. case 'q':
  452. rep->command = QCOM;
  453. if(rep->ad2.type != A_NONE)
  454. quit(AD1MES, linebuf);
  455. break;
  456. case 'l':
  457. rep->command = LCOM;
  458. break;
  459. case 's':
  460. rep->command = SCOM;
  461. seof = *cp++;
  462. if ((rep->re1 = compile()) == 0) {
  463. if(!lastre)
  464. quit("First RE may not be null.");
  465. rep->re1 = lastre;
  466. }
  467. rep->rhs = p;
  468. if((p = compsub(p, addend)) == 0)
  469. quit(CGMES, L"s", linebuf);
  470. if(*cp == L'g') {
  471. cp++;
  472. rep->gfl++;
  473. } else if(gflag)
  474. rep->gfl++;
  475. if(*cp == L'p') {
  476. cp++;
  477. rep->pfl = 1;
  478. }
  479. if(*cp == L'P') {
  480. cp++;
  481. rep->pfl = 2;
  482. }
  483. if(*cp == L'w') {
  484. cp++;
  485. if(*cp++ != L' ')
  486. quit(CGMES, L"s", linebuf);
  487. text(fname[nfiles]);
  488. for(i = nfiles - 1; i >= 0; i--)
  489. if(cmp(fname[nfiles], fname[i]) == 0) {
  490. rep->fcode = fcode[i];
  491. goto done;
  492. }
  493. if(nfiles >= MAXFILES)
  494. quit("Too many files in w commands 1");
  495. rep->fcode = open_file(fname[nfiles]);
  496. }
  497. break;
  498. case 'w':
  499. rep->command = WCOM;
  500. if(*cp++ != L' ')
  501. quit(CGMES, L"w", linebuf);
  502. text(fname[nfiles]);
  503. for(i = nfiles - 1; i >= 0; i--)
  504. if(cmp(fname[nfiles], fname[i]) == 0) {
  505. rep->fcode = fcode[i];
  506. goto done;
  507. }
  508. if(nfiles >= MAXFILES){
  509. fprint(2, "sed: Too many files in w commands 2 \n");
  510. fprint(2, "nfiles = %d; MAXF = %d\n",
  511. nfiles, MAXFILES);
  512. errexit();
  513. }
  514. rep->fcode = open_file(fname[nfiles]);
  515. break;
  516. case 'x':
  517. rep->command = XCOM;
  518. break;
  519. case 'y':
  520. rep->command = YCOM;
  521. seof = *cp++;
  522. if (ycomp(rep) == 0)
  523. quit(CGMES, L"y", linebuf);
  524. break;
  525. }
  526. done:
  527. if(++rep >= pend)
  528. quit("Too many commands, last: %S", linebuf);
  529. if(*cp++ != L'\0') {
  530. if(cp[-1] == L';')
  531. goto comploop;
  532. quit(CGMES, cp - 1, linebuf);
  533. }
  534. }
  535. }
  536. Biobuf *
  537. open_file(char *name)
  538. {
  539. int fd;
  540. Biobuf *bp;
  541. if ((bp = malloc(sizeof(Biobuf))) == 0)
  542. quit("Out of memory");
  543. if ((fd = open(name, OWRITE)) < 0 &&
  544. (fd = create(name, OWRITE, 0666)) < 0)
  545. quit("Cannot create %s", name);
  546. Binit(bp, fd, OWRITE);
  547. Bseek(bp, 0, 2);
  548. fcode[nfiles++] = bp;
  549. return bp;
  550. }
  551. Rune *
  552. compsub(Rune *rhs, Rune *end)
  553. {
  554. Rune r;
  555. while ((r = *cp++) != '\0') {
  556. if(r == '\\') {
  557. if (rhs < end)
  558. *rhs++ = Runemax;
  559. else
  560. return 0;
  561. r = *cp++;
  562. if(r == 'n')
  563. r = '\n';
  564. } else {
  565. if(r == seof) {
  566. if (rhs < end)
  567. *rhs++ = '\0';
  568. else
  569. return 0;
  570. return rhs;
  571. }
  572. }
  573. if (rhs < end)
  574. *rhs++ = r;
  575. else
  576. return 0;
  577. }
  578. return 0;
  579. }
  580. Reprog *
  581. compile(void)
  582. {
  583. Rune c;
  584. char *ep;
  585. char expbuf[512];
  586. if((c = *cp++) == seof) /* L'//' */
  587. return 0;
  588. ep = expbuf;
  589. do {
  590. if (c == L'\0' || c == L'\n')
  591. quit(TMMES, linebuf);
  592. if (c == L'\\') {
  593. if (ep >= expbuf+sizeof(expbuf))
  594. quit(TMMES, linebuf);
  595. ep += runetochar(ep, &c);
  596. if ((c = *cp++) == L'n')
  597. c = L'\n';
  598. }
  599. if (ep >= expbuf + sizeof(expbuf))
  600. quit(TMMES, linebuf);
  601. ep += runetochar(ep, &c);
  602. } while ((c = *cp++) != seof);
  603. *ep = 0;
  604. return lastre = regcomp(expbuf);
  605. }
  606. void
  607. regerror(char *s)
  608. {
  609. USED(s);
  610. quit(CGMES, L"r.e.-using", linebuf);
  611. }
  612. void
  613. newfile(enum PTYPE type, char *name)
  614. {
  615. if (type == P_ARG)
  616. prog.curr = name;
  617. else if ((prog.bp = Bopen(name, OREAD)) == 0)
  618. quit("Cannot open pattern-file: %s\n", name);
  619. prog.type = type;
  620. }
  621. int
  622. rline(Rune *buf, Rune *end)
  623. {
  624. int32_t c;
  625. Rune r;
  626. while ((c = getrune()) >= 0) {
  627. r = c;
  628. if (r == '\\') {
  629. if (buf <= end)
  630. *buf++ = r;
  631. if ((c = getrune()) < 0)
  632. break;
  633. r = c;
  634. } else if (r == '\n') {
  635. *buf = '\0';
  636. return 1;
  637. }
  638. if (buf <= end)
  639. *buf++ = r;
  640. }
  641. *buf = '\0';
  642. return -1;
  643. }
  644. int32_t
  645. getrune(void)
  646. {
  647. int32_t c;
  648. Rune r;
  649. char *p;
  650. if (prog.type == P_ARG) {
  651. if ((p = prog.curr) != 0) {
  652. if (*p) {
  653. prog.curr += chartorune(&r, p);
  654. c = r;
  655. } else {
  656. c = '\n'; /* fake an end-of-line */
  657. prog.curr = 0;
  658. }
  659. } else
  660. c = -1;
  661. } else if ((c = Bgetrune(prog.bp)) < 0)
  662. Bterm(prog.bp);
  663. return c;
  664. }
  665. void
  666. address(Addr *ap)
  667. {
  668. int c;
  669. int32_t lno;
  670. if((c = *cp++) == '$')
  671. ap->type = A_DOL;
  672. else if(c == '/') {
  673. seof = c;
  674. if (ap->rp = compile())
  675. ap->type = A_RE;
  676. else
  677. ap->type = A_LAST;
  678. }
  679. else if (c >= '0' && c <= '9') {
  680. lno = c - '0';
  681. while ((c = *cp) >= '0' && c <= '9')
  682. lno = lno*10 + *cp++ - '0';
  683. if(!lno)
  684. quit("line number 0 is illegal",0);
  685. ap->type = A_LINE;
  686. ap->line = lno;
  687. }
  688. else {
  689. cp--;
  690. ap->type = A_NONE;
  691. }
  692. }
  693. int
  694. cmp(char *a, char *b) /* compare characters */
  695. {
  696. while(*a == *b++)
  697. if (*a == '\0')
  698. return 0;
  699. else
  700. a++;
  701. return 1;
  702. }
  703. int
  704. rcmp(Rune *a, Rune *b) /* compare runes */
  705. {
  706. while(*a == *b++)
  707. if (*a == '\0')
  708. return 0;
  709. else
  710. a++;
  711. return 1;
  712. }
  713. char *
  714. text(char *p) /* extract character string */
  715. {
  716. Rune r;
  717. while(*cp == ' ' || *cp == '\t')
  718. cp++;
  719. while (*cp) {
  720. if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
  721. break;
  722. if (r == '\n')
  723. while (*cp == ' ' || *cp == '\t')
  724. cp++;
  725. p += runetochar(p, &r);
  726. }
  727. *p++ = '\0';
  728. return p;
  729. }
  730. Rune *
  731. stext(Rune *p, Rune *end) /* extract rune string */
  732. {
  733. while(*cp == L' ' || *cp == L'\t')
  734. cp++;
  735. while (*cp) {
  736. if (*cp == L'\\' && *++cp == L'\0')
  737. break;
  738. if (p >= end-1)
  739. quit(TMMES, linebuf);
  740. if ((*p++ = *cp++) == L'\n')
  741. while(*cp == L' ' || *cp == L'\t')
  742. cp++;
  743. }
  744. *p++ = 0;
  745. return p;
  746. }
  747. Label *
  748. search(Label *ptr)
  749. {
  750. Label *rp;
  751. for (rp = ltab; rp < ptr; rp++)
  752. if(rcmp(rp->uninm, ptr->uninm) == 0)
  753. return(rp);
  754. return(0);
  755. }
  756. void
  757. dechain(void)
  758. {
  759. Label *lptr;
  760. SedCom *rptr, *trptr;
  761. for(lptr = ltab; lptr < lab; lptr++) {
  762. if(lptr->address == 0)
  763. quit("Undefined label: %S", lptr->uninm);
  764. if(lptr->chain) {
  765. rptr = lptr->chain;
  766. while((trptr = rptr->lb1) != nil) {
  767. rptr->lb1 = lptr->address;
  768. rptr = trptr;
  769. }
  770. rptr->lb1 = lptr->address;
  771. }
  772. }
  773. }
  774. int
  775. ycomp(SedCom *r)
  776. {
  777. int i;
  778. Rune *rp, *sp, *tsp;
  779. Rune c, highc;
  780. highc = 0;
  781. for(tsp = cp; *tsp != seof; tsp++) {
  782. if(*tsp == L'\\')
  783. tsp++;
  784. if(*tsp == L'\n' || *tsp == L'\0')
  785. return 0;
  786. if (*tsp > highc)
  787. highc = *tsp;
  788. }
  789. tsp++;
  790. if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
  791. quit("Out of memory");
  792. *rp++ = highc; /* save upper bound */
  793. for (i = 0; i <= highc; i++)
  794. rp[i] = i;
  795. sp = cp;
  796. while((c = *sp++) != seof) {
  797. if(c == L'\\' && *sp == L'n') {
  798. sp++;
  799. c = L'\n';
  800. }
  801. if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
  802. rp[c] = L'\n';
  803. tsp++;
  804. }
  805. if(rp[c] == seof || rp[c] == L'\0') {
  806. free(r->re1);
  807. r->re1 = nil;
  808. return 0;
  809. }
  810. }
  811. if(*tsp != seof) {
  812. free(r->re1);
  813. r->re1 = nil;
  814. return 0;
  815. }
  816. cp = tsp+1;
  817. return 1;
  818. }
  819. void
  820. execute(void)
  821. {
  822. SedCom *ipc;
  823. while (spend = gline(linebuf)){
  824. for(ipc = pspace; ipc->command; ) {
  825. if (!executable(ipc)) {
  826. ipc++;
  827. continue;
  828. }
  829. command(ipc);
  830. if(delflag)
  831. break;
  832. if(jflag) {
  833. jflag = 0;
  834. if((ipc = ipc->lb1) == 0)
  835. break;
  836. } else
  837. ipc++;
  838. }
  839. if(!nflag && !delflag)
  840. putline(&fout, linebuf, spend - linebuf);
  841. if(aptr > abuf)
  842. arout();
  843. delflag = 0;
  844. }
  845. }
  846. /* determine if a statement should be applied to an input line */
  847. int
  848. executable(SedCom *ipc)
  849. {
  850. if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
  851. if (ipc->active == 1) /* Second line */
  852. ipc->active = 2;
  853. switch(ipc->ad2.type) {
  854. case A_NONE: /* No second addr; use first */
  855. ipc->active = 0;
  856. break;
  857. case A_DOL: /* Accept everything */
  858. return !ipc->negfl;
  859. case A_LINE: /* Line at end of range? */
  860. if (lnum <= ipc->ad2.line) {
  861. if (ipc->ad2.line == lnum)
  862. ipc->active = 0;
  863. return !ipc->negfl;
  864. }
  865. ipc->active = 0; /* out of range */
  866. return ipc->negfl;
  867. case A_RE: /* Check for matching R.E. */
  868. if (match(ipc->ad2.rp, linebuf))
  869. ipc->active = 0;
  870. return !ipc->negfl;
  871. default:
  872. quit("Internal error");
  873. }
  874. }
  875. switch (ipc->ad1.type) { /* Check first address */
  876. case A_NONE: /* Everything matches */
  877. return !ipc->negfl;
  878. case A_DOL: /* Only last line */
  879. if (dolflag)
  880. return !ipc->negfl;
  881. break;
  882. case A_LINE: /* Check line number */
  883. if (ipc->ad1.line == lnum) {
  884. ipc->active = 1; /* In range */
  885. return !ipc->negfl;
  886. }
  887. break;
  888. case A_RE: /* Check R.E. */
  889. if (match(ipc->ad1.rp, linebuf)) {
  890. ipc->active = 1; /* In range */
  891. return !ipc->negfl;
  892. }
  893. break;
  894. default:
  895. quit("Internal error");
  896. }
  897. return ipc->negfl;
  898. }
  899. int
  900. match(Reprog *pattern, Rune *buf)
  901. {
  902. if (!pattern)
  903. return 0;
  904. subexp[0].rsp = buf;
  905. subexp[0].ep = 0;
  906. if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
  907. loc1 = subexp[0].rsp;
  908. loc2 = subexp[0].rep;
  909. return 1;
  910. }
  911. loc1 = loc2 = 0;
  912. return 0;
  913. }
  914. int
  915. substitute(SedCom *ipc)
  916. {
  917. int len;
  918. if(!match(ipc->re1, linebuf))
  919. return 0;
  920. /*
  921. * we have at least one match. some patterns, e.g. '$' or '^', can
  922. * produce 0-length matches, so during a global substitute we must
  923. * bump to the character after a 0-length match to keep from looping.
  924. */
  925. sflag = 1;
  926. if(ipc->gfl == 0) /* single substitution */
  927. dosub(ipc->rhs);
  928. else
  929. do{ /* global substitution */
  930. len = loc2 - loc1; /* length of match */
  931. dosub(ipc->rhs); /* dosub moves loc2 */
  932. if(*loc2 == 0) /* end of string */
  933. break;
  934. if(len == 0) /* zero-length R.E. match */
  935. loc2++; /* bump over 0-length match */
  936. if(*loc2 == 0) /* end of string */
  937. break;
  938. } while(match(ipc->re1, loc2));
  939. return 1;
  940. }
  941. void
  942. dosub(Rune *rhsbuf)
  943. {
  944. int c, n;
  945. Rune *lp, *sp, *rp;
  946. lp = linebuf;
  947. sp = genbuf;
  948. rp = rhsbuf;
  949. while (lp < loc1)
  950. *sp++ = *lp++;
  951. while(c = *rp++) {
  952. if (c == '&') {
  953. sp = place(sp, loc1, loc2);
  954. continue;
  955. }
  956. if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
  957. n = c-'0';
  958. if (subexp[n].rsp && subexp[n].rep) {
  959. sp = place(sp, subexp[n].rsp, subexp[n].rep);
  960. continue;
  961. }
  962. else {
  963. fprint(2, "sed: Invalid back reference \\%d\n",n);
  964. errexit();
  965. }
  966. }
  967. *sp++ = c;
  968. if (sp >= &genbuf[LBSIZE])
  969. fprint(2, "sed: Output line too int32_t.\n");
  970. }
  971. lp = loc2;
  972. loc2 = sp - genbuf + linebuf;
  973. while (*sp++ = *lp++)
  974. if (sp >= &genbuf[LBSIZE])
  975. fprint(2, "sed: Output line too int32_t.\n");
  976. lp = linebuf;
  977. sp = genbuf;
  978. while (*lp++ = *sp++)
  979. ;
  980. spend = lp - 1;
  981. }
  982. Rune *
  983. place(Rune *sp, Rune *l1, Rune *l2)
  984. {
  985. while (l1 < l2) {
  986. *sp++ = *l1++;
  987. if (sp >= &genbuf[LBSIZE])
  988. fprint(2, "sed: Output line too int32_t.\n");
  989. }
  990. return sp;
  991. }
  992. char *
  993. trans(int c)
  994. {
  995. static char buf[] = "\\x0000";
  996. static char hex[] = "0123456789abcdef";
  997. switch(c) {
  998. case '\b':
  999. return "\\b";
  1000. case '\n':
  1001. return "\\n";
  1002. case '\r':
  1003. return "\\r";
  1004. case '\t':
  1005. return "\\t";
  1006. case '\\':
  1007. return "\\\\";
  1008. }
  1009. buf[2] = hex[(c>>12)&0xF];
  1010. buf[3] = hex[(c>>8)&0xF];
  1011. buf[4] = hex[(c>>4)&0xF];
  1012. buf[5] = hex[c&0xF];
  1013. return buf;
  1014. }
  1015. void
  1016. command(SedCom *ipc)
  1017. {
  1018. int i, c;
  1019. char *ucp;
  1020. Rune *execp, *p1, *p2, *rp;
  1021. switch(ipc->command) {
  1022. case ACOM:
  1023. *aptr++ = ipc;
  1024. if(aptr >= abuf+MAXADDS)
  1025. quit("sed: Too many appends after line %ld\n",
  1026. (char *)lnum);
  1027. *aptr = 0;
  1028. break;
  1029. case CCOM:
  1030. delflag = 1;
  1031. if(ipc->active == 1) {
  1032. for(rp = ipc->text; *rp; rp++)
  1033. Bputrune(&fout, *rp);
  1034. Bputc(&fout, '\n');
  1035. }
  1036. break;
  1037. case DCOM:
  1038. delflag++;
  1039. break;
  1040. case CDCOM:
  1041. p1 = p2 = linebuf;
  1042. while(*p1 != '\n') {
  1043. if(*p1++ == 0) {
  1044. delflag++;
  1045. return;
  1046. }
  1047. }
  1048. p1++;
  1049. while(*p2++ = *p1++)
  1050. ;
  1051. spend = p2 - 1;
  1052. jflag++;
  1053. break;
  1054. case EQCOM:
  1055. Bprint(&fout, "%ld\n", lnum);
  1056. break;
  1057. case GCOM:
  1058. p1 = linebuf;
  1059. p2 = holdsp;
  1060. while(*p1++ = *p2++)
  1061. ;
  1062. spend = p1 - 1;
  1063. break;
  1064. case CGCOM:
  1065. *spend++ = '\n';
  1066. p1 = spend;
  1067. p2 = holdsp;
  1068. while(*p1++ = *p2++)
  1069. if(p1 >= lbend)
  1070. break;
  1071. spend = p1 - 1;
  1072. break;
  1073. case HCOM:
  1074. p1 = holdsp;
  1075. p2 = linebuf;
  1076. while(*p1++ = *p2++);
  1077. hspend = p1 - 1;
  1078. break;
  1079. case CHCOM:
  1080. *hspend++ = '\n';
  1081. p1 = hspend;
  1082. p2 = linebuf;
  1083. while(*p1++ = *p2++)
  1084. if(p1 >= hend)
  1085. break;
  1086. hspend = p1 - 1;
  1087. break;
  1088. case ICOM:
  1089. for(rp = ipc->text; *rp; rp++)
  1090. Bputrune(&fout, *rp);
  1091. Bputc(&fout, '\n');
  1092. break;
  1093. case BCOM:
  1094. jflag = 1;
  1095. break;
  1096. case LCOM:
  1097. c = 0;
  1098. for (i = 0, rp = linebuf; *rp; rp++) {
  1099. c = *rp;
  1100. if(c >= 0x20 && c < 0x7F && c != '\\') {
  1101. Bputc(&fout, c);
  1102. if(i++ > 71) {
  1103. Bprint(&fout, "\\\n");
  1104. i = 0;
  1105. }
  1106. } else {
  1107. for (ucp = trans(*rp); *ucp; ucp++){
  1108. c = *ucp;
  1109. Bputc(&fout, c);
  1110. if(i++ > 71) {
  1111. Bprint(&fout, "\\\n");
  1112. i = 0;
  1113. }
  1114. }
  1115. }
  1116. }
  1117. if(c == ' ')
  1118. Bprint(&fout, "\\n");
  1119. Bputc(&fout, '\n');
  1120. break;
  1121. case NCOM:
  1122. if(!nflag)
  1123. putline(&fout, linebuf, spend-linebuf);
  1124. if(aptr > abuf)
  1125. arout();
  1126. if((execp = gline(linebuf)) == 0) {
  1127. delflag = 1;
  1128. break;
  1129. }
  1130. spend = execp;
  1131. break;
  1132. case CNCOM:
  1133. if(aptr > abuf)
  1134. arout();
  1135. *spend++ = '\n';
  1136. if((execp = gline(spend)) == 0) {
  1137. delflag = 1;
  1138. break;
  1139. }
  1140. spend = execp;
  1141. break;
  1142. case PCOM:
  1143. putline(&fout, linebuf, spend-linebuf);
  1144. break;
  1145. case CPCOM:
  1146. cpcom:
  1147. for(rp = linebuf; *rp && *rp != '\n'; rp++)
  1148. Bputc(&fout, *rp);
  1149. Bputc(&fout, '\n');
  1150. break;
  1151. case QCOM:
  1152. if(!nflag)
  1153. putline(&fout, linebuf, spend-linebuf);
  1154. if(aptr > abuf)
  1155. arout();
  1156. exits(0);
  1157. case RCOM:
  1158. *aptr++ = ipc;
  1159. if(aptr >= &abuf[MAXADDS])
  1160. quit("sed: Too many reads after line %ld\n",
  1161. (char *)lnum);
  1162. *aptr = 0;
  1163. break;
  1164. case SCOM:
  1165. i = substitute(ipc);
  1166. if(i && ipc->pfl)
  1167. if(ipc->pfl == 1)
  1168. putline(&fout, linebuf, spend-linebuf);
  1169. else
  1170. goto cpcom;
  1171. if(i && ipc->fcode)
  1172. goto wcom;
  1173. break;
  1174. case TCOM:
  1175. if(sflag) {
  1176. sflag = 0;
  1177. jflag = 1;
  1178. }
  1179. break;
  1180. case WCOM:
  1181. wcom:
  1182. putline(ipc->fcode,linebuf, spend - linebuf);
  1183. break;
  1184. case XCOM:
  1185. p1 = linebuf;
  1186. p2 = genbuf;
  1187. while(*p2++ = *p1++)
  1188. ;
  1189. p1 = holdsp;
  1190. p2 = linebuf;
  1191. while(*p2++ = *p1++)
  1192. ;
  1193. spend = p2 - 1;
  1194. p1 = genbuf;
  1195. p2 = holdsp;
  1196. while(*p2++ = *p1++)
  1197. ;
  1198. hspend = p2 - 1;
  1199. break;
  1200. case YCOM:
  1201. p1 = linebuf;
  1202. p2 = ipc->text;
  1203. for (i = *p2++; *p1; p1++)
  1204. if (*p1 <= i)
  1205. *p1 = p2[*p1];
  1206. break;
  1207. }
  1208. }
  1209. void
  1210. putline(Biobuf *bp, Rune *buf, int n)
  1211. {
  1212. while (n--)
  1213. Bputrune(bp, *buf++);
  1214. Bputc(bp, '\n');
  1215. }
  1216. int
  1217. ecmp(Rune *a, Rune *b, int count)
  1218. {
  1219. while(count--)
  1220. if(*a++ != *b++)
  1221. return 0;
  1222. return 1;
  1223. }
  1224. void
  1225. arout(void)
  1226. {
  1227. int c;
  1228. char *s;
  1229. char buf[128];
  1230. Rune *p1;
  1231. Biobuf *fi;
  1232. for (aptr = abuf; *aptr; aptr++) {
  1233. if((*aptr)->command == ACOM) {
  1234. for(p1 = (*aptr)->text; *p1; p1++ )
  1235. Bputrune(&fout, *p1);
  1236. Bputc(&fout, '\n');
  1237. } else {
  1238. for(s = buf, p1 = (*aptr)->text; *p1; p1++)
  1239. s += runetochar(s, p1);
  1240. *s = '\0';
  1241. if((fi = Bopen(buf, OREAD)) == 0)
  1242. continue;
  1243. while((c = Bgetc(fi)) >= 0)
  1244. Bputc(&fout, c);
  1245. Bterm(fi);
  1246. }
  1247. }
  1248. aptr = abuf;
  1249. *aptr = 0;
  1250. }
  1251. void
  1252. errexit(void)
  1253. {
  1254. exits("error");
  1255. }
  1256. void
  1257. quit(char *fmt, ...)
  1258. {
  1259. char *p, *ep;
  1260. char msg[256];
  1261. va_list arg;
  1262. ep = msg + sizeof msg;
  1263. p = seprint(msg, ep, "sed: ");
  1264. va_start(arg, fmt);
  1265. p = vseprint(p, ep, fmt, arg);
  1266. va_end(arg);
  1267. p = seprint(p, ep, "\n");
  1268. write(2, msg, p - msg);
  1269. errexit();
  1270. }
  1271. Rune *
  1272. gline(Rune *addr)
  1273. {
  1274. int32_t c;
  1275. Rune *p;
  1276. static int32_t peekc = 0;
  1277. if (f == 0 && opendata() < 0)
  1278. return 0;
  1279. sflag = 0;
  1280. lnum++;
  1281. /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
  1282. do {
  1283. p = addr;
  1284. for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
  1285. if (c == '\n') {
  1286. if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
  1287. dolflag = 1;
  1288. *p = '\0';
  1289. return p;
  1290. }
  1291. if (c && p < lbend)
  1292. *p++ = c;
  1293. }
  1294. /* return partial final line, adding implicit newline */
  1295. if(p != addr) {
  1296. *p = '\0';
  1297. peekc = -1;
  1298. if (fhead == 0)
  1299. dolflag = 1;
  1300. return p;
  1301. }
  1302. peekc = 0;
  1303. Bterm(f);
  1304. } while (opendata() > 0); /* Switch to next stream */
  1305. f = 0;
  1306. return 0;
  1307. }
  1308. /*
  1309. * Data file input section - the intent is to transparently
  1310. * catenate all data input streams.
  1311. */
  1312. void
  1313. enroll(char *filename) /* Add a file to the input file cache */
  1314. {
  1315. FileCache *fp;
  1316. if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
  1317. quit("Out of memory");
  1318. if (ftail == nil)
  1319. fhead = fp;
  1320. else
  1321. ftail->next = fp;
  1322. ftail = fp;
  1323. fp->next = nil;
  1324. fp->name = filename; /* 0 => stdin */
  1325. }
  1326. int
  1327. opendata(void)
  1328. {
  1329. if (fhead == nil)
  1330. return -1;
  1331. if (fhead->name) {
  1332. if ((f = Bopen(fhead->name, OREAD)) == nil)
  1333. quit("Can't open %s", fhead->name);
  1334. } else {
  1335. Binit(&stdin, 0, OREAD);
  1336. f = &stdin;
  1337. }
  1338. fhead = fhead->next;
  1339. return 1;
  1340. }