123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455 |
- /*
- * sed -- stream editor
- */
- #include <u.h>
- #include <libc.h>
- #include <bio.h>
- #include <regexp.h>
- enum {
- DEPTH = 20, /* max nesting depth of {} */
- MAXCMDS = 512, /* max sed commands */
- ADDSIZE = 10000, /* size of add & read buffer */
- MAXADDS = 20, /* max pending adds and reads */
- LBSIZE = 8192, /* input line size */
- LABSIZE = 50, /* max number of labels */
- MAXSUB = 10, /* max number of sub reg exp */
- MAXFILES = 120, /* max output files */
- };
- /*
- * An address is a line #, a R.E., "$", a reference to the last
- * R.E., or nothing.
- */
- typedef struct {
- enum {
- A_NONE,
- A_DOL,
- A_LINE,
- A_RE,
- A_LAST,
- }type;
- union {
- long line; /* Line # */
- Reprog *rp; /* Compiled R.E. */
- };
- } Addr;
- typedef struct SEDCOM {
- Addr ad1; /* optional start address */
- Addr ad2; /* optional end address */
- union {
- Reprog *re1; /* compiled R.E. */
- Rune *text; /* added text or file name */
- struct SEDCOM *lb1; /* destination command of branch */
- };
- Rune *rhs; /* Right-hand side of substitution */
- Biobuf* fcode; /* File ID for read and write */
- char command; /* command code -see below */
- char gfl; /* 'Global' flag for substitutions */
- char pfl; /* 'print' flag for substitutions */
- char active; /* 1 => data between start and end */
- char negfl; /* negation flag */
- } SedCom;
- /* Command Codes for field SedCom.command */
- #define ACOM 01
- #define BCOM 020
- #define CCOM 02
- #define CDCOM 025
- #define CNCOM 022
- #define COCOM 017
- #define CPCOM 023
- #define DCOM 03
- #define ECOM 015
- #define EQCOM 013
- #define FCOM 016
- #define GCOM 027
- #define CGCOM 030
- #define HCOM 031
- #define CHCOM 032
- #define ICOM 04
- #define LCOM 05
- #define NCOM 012
- #define PCOM 010
- #define QCOM 011
- #define RCOM 06
- #define SCOM 07
- #define TCOM 021
- #define WCOM 014
- #define CWCOM 024
- #define YCOM 026
- #define XCOM 033
- typedef struct label { /* Label symbol table */
- Rune uninm[9]; /* Label name */
- SedCom *chain;
- SedCom *address; /* Command associated with label */
- } Label;
- typedef struct FILE_CACHE { /* Data file control block */
- struct FILE_CACHE *next; /* Forward Link */
- char *name; /* Name of file */
- } FileCache;
- SedCom pspace[MAXCMDS]; /* Command storage */
- SedCom *pend = pspace+MAXCMDS; /* End of command storage */
- SedCom *rep = pspace; /* Current fill point */
- Reprog *lastre = 0; /* Last regular expression */
- Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
- Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
- Rune *addend = addspace+ADDSIZE;
- SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
- SedCom **aptr = abuf;
- struct { /* Sed program input control block */
- enum PTYPE { /* Either on command line or in file */
- P_ARG,
- P_FILE,
- } type;
- union PCTL { /* Pointer to data */
- Biobuf *bp;
- char *curr;
- };
- } prog;
- Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
- FileCache *fhead = 0; /* Head of File Cache Chain */
- FileCache *ftail = 0; /* Tail of File Cache Chain */
- Rune *loc1; /* Start of pattern match */
- Rune *loc2; /* End of pattern match */
- Rune seof; /* Pattern delimiter char */
- Rune linebuf[LBSIZE+1]; /* Input data buffer */
- Rune *lbend = linebuf+LBSIZE; /* End of buffer */
- Rune *spend = linebuf; /* End of input data */
- Rune *cp; /* Current scan point in linebuf */
- Rune holdsp[LBSIZE+1]; /* Hold buffer */
- Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
- Rune *hspend = holdsp; /* End of hold data */
- int nflag; /* Command line flags */
- int gflag;
- int dolflag; /* Set when at true EOF */
- int sflag; /* Set when substitution done */
- int jflag; /* Set when jump required */
- int delflag; /* Delete current line when set */
- long lnum = 0; /* Input line count */
- char fname[MAXFILES][40]; /* File name cache */
- Biobuf *fcode[MAXFILES]; /* File ID cache */
- int nfiles = 0; /* Cache fill point */
- Biobuf fout; /* Output stream */
- Biobuf stdin; /* Default input */
- Biobuf* f = 0; /* Input data */
- Label ltab[LABSIZE]; /* Label name symbol table */
- Label *labend = ltab+LABSIZE; /* End of label table */
- Label *lab = ltab+1; /* Current Fill point */
- int depth = 0; /* {} stack pointer */
- Rune bad; /* Dummy err ptr reference */
- Rune *badp = &bad;
- char CGMES[] = "%S command garbled: %S";
- char TMMES[] = "Too much text: %S";
- char LTL[] = "Label too long: %S";
- char AD0MES[] = "No addresses allowed: %S";
- char AD1MES[] = "Only one address allowed: %S";
- void address(Addr *);
- void arout(void);
- int cmp(char *, char *);
- int rcmp(Rune *, Rune *);
- void command(SedCom *);
- Reprog *compile(void);
- Rune *compsub(Rune *, Rune *);
- void dechain(void);
- void dosub(Rune *);
- int ecmp(Rune *, Rune *, int);
- void enroll(char *);
- void errexit(void);
- int executable(SedCom *);
- void execute(void);
- void fcomp(void);
- long getrune(void);
- Rune *gline(Rune *);
- int match(Reprog *, Rune *);
- void newfile(enum PTYPE, char *);
- int opendata(void);
- Biobuf *open_file(char *);
- Rune *place(Rune *, Rune *, Rune *);
- void quit(char *, ...);
- int rline(Rune *, Rune *);
- Label *search(Label *);
- int substitute(SedCom *);
- char *text(char *);
- Rune *stext(Rune *, Rune *);
- int ycomp(SedCom *);
- char * trans(int c);
- void putline(Biobuf *bp, Rune *buf, int n);
- void
- main(int argc, char **argv)
- {
- int compfl;
- lnum = 0;
- Binit(&fout, 1, OWRITE);
- fcode[nfiles++] = &fout;
- compfl = 0;
- if(argc == 1)
- exits(0);
- ARGBEGIN{
- case 'e':
- if (argc <= 1)
- quit("missing pattern");
- newfile(P_ARG, ARGF());
- fcomp();
- compfl = 1;
- continue;
- case 'f':
- if(argc <= 1)
- quit("no pattern-file");
- newfile(P_FILE, ARGF());
- fcomp();
- compfl = 1;
- continue;
- case 'g':
- gflag++;
- continue;
- case 'n':
- nflag++;
- continue;
- default:
- fprint(2, "sed: Unknown flag: %c\n", ARGC());
- continue;
- } ARGEND
- if(compfl == 0) {
- if (--argc < 0)
- quit("missing pattern");
- newfile(P_ARG, *argv++);
- fcomp();
- }
- if(depth)
- quit("Too many {'s");
- ltab[0].address = rep;
- dechain();
- if(argc <= 0)
- enroll(0); /* Add stdin to cache */
- else
- while(--argc >= 0)
- enroll(*argv++);
- execute();
- exits(0);
- }
- void
- fcomp(void)
- {
- int i;
- Label *lpt;
- Rune *tp;
- SedCom *pt, *pt1;
- static Rune *p = addspace;
- static SedCom **cmpend[DEPTH]; /* stack of {} operations */
- while (rline(linebuf, lbend) >= 0) {
- cp = linebuf;
- comploop:
- while(*cp == L' ' || *cp == L'\t')
- cp++;
- if(*cp == L'\0' || *cp == L'#')
- continue;
- if(*cp == L';') {
- cp++;
- goto comploop;
- }
- address(&rep->ad1);
- if (rep->ad1.type != A_NONE) {
- if (rep->ad1.type == A_LAST) {
- if (!lastre)
- quit("First RE may not be null");
- rep->ad1.type = A_RE;
- rep->ad1.rp = lastre;
- }
- if(*cp == L',' || *cp == L';') {
- cp++;
- address(&rep->ad2);
- if (rep->ad2.type == A_LAST) {
- rep->ad2.type = A_RE;
- rep->ad2.rp = lastre;
- }
- } else
- rep->ad2.type = A_NONE;
- }
- while(*cp == L' ' || *cp == L'\t')
- cp++;
- swit:
- switch(*cp++) {
- default:
- quit("Unrecognized command: %S", linebuf);
- case '!':
- rep->negfl = 1;
- goto swit;
- case '{':
- rep->command = BCOM;
- rep->negfl = !rep->negfl;
- cmpend[depth++] = &rep->lb1;
- if(++rep >= pend)
- quit("Too many commands: %S", linebuf);
- if(*cp == '\0')
- continue;
- goto comploop;
- case '}':
- if(rep->ad1.type != A_NONE)
- quit(AD0MES, linebuf);
- if(--depth < 0)
- quit("Too many }'s");
- *cmpend[depth] = rep;
- if(*cp == 0)
- continue;
- goto comploop;
- case '=':
- rep->command = EQCOM;
- if(rep->ad2.type != A_NONE)
- quit(AD1MES, linebuf);
- break;
- case ':':
- if(rep->ad1.type != A_NONE)
- quit(AD0MES, linebuf);
- while(*cp == L' ')
- cp++;
- tp = lab->uninm;
- while (*cp && *cp != L';' && *cp != L' ' &&
- *cp != L'\t' && *cp != L'#') {
- *tp++ = *cp++;
- if(tp >= &lab->uninm[8])
- quit(LTL, linebuf);
- }
- *tp = L'\0';
- if (*lab->uninm == L'\0') /* no label? */
- quit(CGMES, L":", linebuf);
- if(lpt = search(lab)) {
- if(lpt->address)
- quit("Duplicate labels: %S", linebuf);
- } else {
- lab->chain = 0;
- lpt = lab;
- if(++lab >= labend)
- quit("Too many labels: %S", linebuf);
- }
- lpt->address = rep;
- if (*cp == L'#')
- continue;
- rep--; /* reuse this slot */
- break;
- case 'a':
- rep->command = ACOM;
- if(rep->ad2.type != A_NONE)
- quit(AD1MES, linebuf);
- if(*cp == L'\\')
- cp++;
- if(*cp++ != L'\n')
- quit(CGMES, L"a", linebuf);
- rep->text = p;
- p = stext(p, addend);
- break;
- case 'c':
- rep->command = CCOM;
- if(*cp == L'\\')
- cp++;
- if(*cp++ != L'\n')
- quit(CGMES, L"c", linebuf);
- rep->text = p;
- p = stext(p, addend);
- break;
- case 'i':
- rep->command = ICOM;
- if(rep->ad2.type != A_NONE)
- quit(AD1MES, linebuf);
- if(*cp == L'\\')
- cp++;
- if(*cp++ != L'\n')
- quit(CGMES, L"i", linebuf);
- rep->text = p;
- p = stext(p, addend);
- break;
- case 'g':
- rep->command = GCOM;
- break;
- case 'G':
- rep->command = CGCOM;
- break;
- case 'h':
- rep->command = HCOM;
- break;
- case 'H':
- rep->command = CHCOM;
- break;
- case 't':
- rep->command = TCOM;
- goto jtcommon;
- case 'b':
- rep->command = BCOM;
- jtcommon:
- while(*cp == L' ')
- cp++;
- if(*cp == L'\0' || *cp == L';') {
- /* no label; jump to end */
- if(pt = ltab[0].chain) {
- while((pt1 = pt->lb1) != nil)
- pt = pt1;
- pt->lb1 = rep;
- } else
- ltab[0].chain = rep;
- break;
- }
- /* copy label into lab->uninm */
- tp = lab->uninm;
- while((*tp = *cp++) != L'\0' && *tp != L';')
- if(++tp >= &lab->uninm[8])
- quit(LTL, linebuf);
- cp--;
- *tp = L'\0';
- if (*lab->uninm == L'\0')
- /* shouldn't get here */
- quit(CGMES, L"b or t", linebuf);
- if((lpt = search(lab)) != nil) {
- if(lpt->address)
- rep->lb1 = lpt->address;
- else {
- for(pt = lpt->chain; pt != nil &&
- (pt1 = pt->lb1) != nil; pt = pt1)
- ;
- if (pt)
- pt->lb1 = rep;
- }
- } else { /* add new label */
- lab->chain = rep;
- lab->address = 0;
- if(++lab >= labend)
- quit("Too many labels: %S", linebuf);
- }
- break;
- case 'n':
- rep->command = NCOM;
- break;
- case 'N':
- rep->command = CNCOM;
- break;
- case 'p':
- rep->command = PCOM;
- break;
- case 'P':
- rep->command = CPCOM;
- break;
- case 'r':
- rep->command = RCOM;
- if(rep->ad2.type != A_NONE)
- quit(AD1MES, linebuf);
- if(*cp++ != L' ')
- quit(CGMES, L"r", linebuf);
- rep->text = p;
- p = stext(p, addend);
- break;
- case 'd':
- rep->command = DCOM;
- break;
- case 'D':
- rep->command = CDCOM;
- rep->lb1 = pspace;
- break;
- case 'q':
- rep->command = QCOM;
- if(rep->ad2.type != A_NONE)
- quit(AD1MES, linebuf);
- break;
- case 'l':
- rep->command = LCOM;
- break;
- case 's':
- rep->command = SCOM;
- seof = *cp++;
- if ((rep->re1 = compile()) == 0) {
- if(!lastre)
- quit("First RE may not be null.");
- rep->re1 = lastre;
- }
- rep->rhs = p;
- if((p = compsub(p, addend)) == 0)
- quit(CGMES, L"s", linebuf);
- if(*cp == L'g') {
- cp++;
- rep->gfl++;
- } else if(gflag)
- rep->gfl++;
- if(*cp == L'p') {
- cp++;
- rep->pfl = 1;
- }
- if(*cp == L'P') {
- cp++;
- rep->pfl = 2;
- }
- if(*cp == L'w') {
- cp++;
- if(*cp++ != L' ')
- quit(CGMES, L"s", linebuf);
- text(fname[nfiles]);
- for(i = nfiles - 1; i >= 0; i--)
- if(cmp(fname[nfiles], fname[i]) == 0) {
- rep->fcode = fcode[i];
- goto done;
- }
- if(nfiles >= MAXFILES)
- quit("Too many files in w commands 1");
- rep->fcode = open_file(fname[nfiles]);
- }
- break;
- case 'w':
- rep->command = WCOM;
- if(*cp++ != L' ')
- quit(CGMES, L"w", linebuf);
- text(fname[nfiles]);
- for(i = nfiles - 1; i >= 0; i--)
- if(cmp(fname[nfiles], fname[i]) == 0) {
- rep->fcode = fcode[i];
- goto done;
- }
- if(nfiles >= MAXFILES){
- fprint(2, "sed: Too many files in w commands 2 \n");
- fprint(2, "nfiles = %d; MAXF = %d\n",
- nfiles, MAXFILES);
- errexit();
- }
- rep->fcode = open_file(fname[nfiles]);
- break;
- case 'x':
- rep->command = XCOM;
- break;
- case 'y':
- rep->command = YCOM;
- seof = *cp++;
- if (ycomp(rep) == 0)
- quit(CGMES, L"y", linebuf);
- break;
- }
- done:
- if(++rep >= pend)
- quit("Too many commands, last: %S", linebuf);
- if(*cp++ != L'\0') {
- if(cp[-1] == L';')
- goto comploop;
- quit(CGMES, cp - 1, linebuf);
- }
- }
- }
- Biobuf *
- open_file(char *name)
- {
- int fd;
- Biobuf *bp;
- if ((bp = malloc(sizeof(Biobuf))) == 0)
- quit("Out of memory");
- if ((fd = open(name, OWRITE)) < 0 &&
- (fd = create(name, OWRITE, 0666)) < 0)
- quit("Cannot create %s", name);
- Binit(bp, fd, OWRITE);
- Bseek(bp, 0, 2);
- fcode[nfiles++] = bp;
- return bp;
- }
- Rune *
- compsub(Rune *rhs, Rune *end)
- {
- Rune r;
- while ((r = *cp++) != '\0') {
- if(r == '\\') {
- if (rhs < end)
- *rhs++ = 0xFFFF;
- else
- return 0;
- r = *cp++;
- if(r == 'n')
- r = '\n';
- } else {
- if(r == seof) {
- if (rhs < end)
- *rhs++ = '\0';
- else
- return 0;
- return rhs;
- }
- }
- if (rhs < end)
- *rhs++ = r;
- else
- return 0;
- }
- return 0;
- }
- Reprog *
- compile(void)
- {
- Rune c;
- char *ep;
- char expbuf[512];
- if((c = *cp++) == seof) /* L'//' */
- return 0;
- ep = expbuf;
- do {
- if (c == L'\0' || c == L'\n')
- quit(TMMES, linebuf);
- if (c == L'\\') {
- if (ep >= expbuf+sizeof(expbuf))
- quit(TMMES, linebuf);
- ep += runetochar(ep, &c);
- if ((c = *cp++) == L'n')
- c = L'\n';
- }
- if (ep >= expbuf + sizeof(expbuf))
- quit(TMMES, linebuf);
- ep += runetochar(ep, &c);
- } while ((c = *cp++) != seof);
- *ep = 0;
- return lastre = regcomp(expbuf);
- }
- void
- regerror(char *s)
- {
- USED(s);
- quit(CGMES, L"r.e.-using", linebuf);
- }
- void
- newfile(enum PTYPE type, char *name)
- {
- if (type == P_ARG)
- prog.curr = name;
- else if ((prog.bp = Bopen(name, OREAD)) == 0)
- quit("Cannot open pattern-file: %s\n", name);
- prog.type = type;
- }
- int
- rline(Rune *buf, Rune *end)
- {
- long c;
- Rune r;
- while ((c = getrune()) >= 0) {
- r = c;
- if (r == '\\') {
- if (buf <= end)
- *buf++ = r;
- if ((c = getrune()) < 0)
- break;
- r = c;
- } else if (r == '\n') {
- *buf = '\0';
- return 1;
- }
- if (buf <= end)
- *buf++ = r;
- }
- *buf = '\0';
- return -1;
- }
- long
- getrune(void)
- {
- long c;
- Rune r;
- char *p;
- if (prog.type == P_ARG) {
- if ((p = prog.curr) != 0) {
- if (*p) {
- prog.curr += chartorune(&r, p);
- c = r;
- } else {
- c = '\n'; /* fake an end-of-line */
- prog.curr = 0;
- }
- } else
- c = -1;
- } else if ((c = Bgetrune(prog.bp)) < 0)
- Bterm(prog.bp);
- return c;
- }
- void
- address(Addr *ap)
- {
- int c;
- long lno;
- if((c = *cp++) == '$')
- ap->type = A_DOL;
- else if(c == '/') {
- seof = c;
- if (ap->rp = compile())
- ap->type = A_RE;
- else
- ap->type = A_LAST;
- }
- else if (c >= '0' && c <= '9') {
- lno = c - '0';
- while ((c = *cp) >= '0' && c <= '9')
- lno = lno*10 + *cp++ - '0';
- if(!lno)
- quit("line number 0 is illegal",0);
- ap->type = A_LINE;
- ap->line = lno;
- }
- else {
- cp--;
- ap->type = A_NONE;
- }
- }
- cmp(char *a, char *b) /* compare characters */
- {
- while(*a == *b++)
- if (*a == '\0')
- return 0;
- else
- a++;
- return 1;
- }
- rcmp(Rune *a, Rune *b) /* compare runes */
- {
- while(*a == *b++)
- if (*a == '\0')
- return 0;
- else
- a++;
- return 1;
- }
- char *
- text(char *p) /* extract character string */
- {
- Rune r;
- while(*cp == ' ' || *cp == '\t')
- cp++;
- while (*cp) {
- if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
- break;
- if (r == '\n')
- while (*cp == ' ' || *cp == '\t')
- cp++;
- p += runetochar(p, &r);
- }
- *p++ = '\0';
- return p;
- }
- Rune *
- stext(Rune *p, Rune *end) /* extract rune string */
- {
- while(*cp == L' ' || *cp == L'\t')
- cp++;
- while (*cp) {
- if (*cp == L'\\' && *++cp == L'\0')
- break;
- if (p >= end-1)
- quit(TMMES, linebuf);
- if ((*p++ = *cp++) == L'\n')
- while(*cp == L' ' || *cp == L'\t')
- cp++;
- }
- *p++ = 0;
- return p;
- }
- Label *
- search(Label *ptr)
- {
- Label *rp;
- for (rp = ltab; rp < ptr; rp++)
- if(rcmp(rp->uninm, ptr->uninm) == 0)
- return(rp);
- return(0);
- }
- void
- dechain(void)
- {
- Label *lptr;
- SedCom *rptr, *trptr;
- for(lptr = ltab; lptr < lab; lptr++) {
- if(lptr->address == 0)
- quit("Undefined label: %S", lptr->uninm);
- if(lptr->chain) {
- rptr = lptr->chain;
- while((trptr = rptr->lb1) != nil) {
- rptr->lb1 = lptr->address;
- rptr = trptr;
- }
- rptr->lb1 = lptr->address;
- }
- }
- }
- int
- ycomp(SedCom *r)
- {
- int i;
- Rune *rp, *sp, *tsp;
- Rune c, highc;
- highc = 0;
- for(tsp = cp; *tsp != seof; tsp++) {
- if(*tsp == L'\\')
- tsp++;
- if(*tsp == L'\n' || *tsp == L'\0')
- return 0;
- if (*tsp > highc)
- highc = *tsp;
- }
- tsp++;
- if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
- quit("Out of memory");
- *rp++ = highc; /* save upper bound */
- for (i = 0; i <= highc; i++)
- rp[i] = i;
- sp = cp;
- while((c = *sp++) != seof) {
- if(c == L'\\' && *sp == L'n') {
- sp++;
- c = L'\n';
- }
- if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
- rp[c] = L'\n';
- tsp++;
- }
- if(rp[c] == seof || rp[c] == L'\0') {
- free(r->re1);
- r->re1 = nil;
- return 0;
- }
- }
- if(*tsp != seof) {
- free(r->re1);
- r->re1 = nil;
- return 0;
- }
- cp = tsp+1;
- return 1;
- }
- void
- execute(void)
- {
- SedCom *ipc;
- while (spend = gline(linebuf)){
- for(ipc = pspace; ipc->command; ) {
- if (!executable(ipc)) {
- ipc++;
- continue;
- }
- command(ipc);
- if(delflag)
- break;
- if(jflag) {
- jflag = 0;
- if((ipc = ipc->lb1) == 0)
- break;
- } else
- ipc++;
- }
- if(!nflag && !delflag)
- putline(&fout, linebuf, spend - linebuf);
- if(aptr > abuf)
- arout();
- delflag = 0;
- }
- }
- /* determine if a statement should be applied to an input line */
- int
- executable(SedCom *ipc)
- {
- if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
- if (ipc->active == 1) /* Second line */
- ipc->active = 2;
- switch(ipc->ad2.type) {
- case A_NONE: /* No second addr; use first */
- ipc->active = 0;
- break;
- case A_DOL: /* Accept everything */
- return !ipc->negfl;
- case A_LINE: /* Line at end of range? */
- if (lnum <= ipc->ad2.line) {
- if (ipc->ad2.line == lnum)
- ipc->active = 0;
- return !ipc->negfl;
- }
- ipc->active = 0; /* out of range */
- return ipc->negfl;
- case A_RE: /* Check for matching R.E. */
- if (match(ipc->ad2.rp, linebuf))
- ipc->active = 0;
- return !ipc->negfl;
- default:
- quit("Internal error");
- }
- }
- switch (ipc->ad1.type) { /* Check first address */
- case A_NONE: /* Everything matches */
- return !ipc->negfl;
- case A_DOL: /* Only last line */
- if (dolflag)
- return !ipc->negfl;
- break;
- case A_LINE: /* Check line number */
- if (ipc->ad1.line == lnum) {
- ipc->active = 1; /* In range */
- return !ipc->negfl;
- }
- break;
- case A_RE: /* Check R.E. */
- if (match(ipc->ad1.rp, linebuf)) {
- ipc->active = 1; /* In range */
- return !ipc->negfl;
- }
- break;
- default:
- quit("Internal error");
- }
- return ipc->negfl;
- }
- int
- match(Reprog *pattern, Rune *buf)
- {
- if (!pattern)
- return 0;
- subexp[0].rsp = buf;
- subexp[0].ep = 0;
- if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
- loc1 = subexp[0].rsp;
- loc2 = subexp[0].rep;
- return 1;
- }
- loc1 = loc2 = 0;
- return 0;
- }
- int
- substitute(SedCom *ipc)
- {
- int len;
- if(!match(ipc->re1, linebuf))
- return 0;
- /*
- * we have at least one match. some patterns, e.g. '$' or '^', can
- * produce 0-length matches, so during a global substitute we must
- * bump to the character after a 0-length match to keep from looping.
- */
- sflag = 1;
- if(ipc->gfl == 0) /* single substitution */
- dosub(ipc->rhs);
- else
- do{ /* global substitution */
- len = loc2 - loc1; /* length of match */
- dosub(ipc->rhs); /* dosub moves loc2 */
- if(*loc2 == 0) /* end of string */
- break;
- if(len == 0) /* zero-length R.E. match */
- loc2++; /* bump over 0-length match */
- if(*loc2 == 0) /* end of string */
- break;
- } while(match(ipc->re1, loc2));
- return 1;
- }
- void
- dosub(Rune *rhsbuf)
- {
- int c, n;
- Rune *lp, *sp, *rp;
- lp = linebuf;
- sp = genbuf;
- rp = rhsbuf;
- while (lp < loc1)
- *sp++ = *lp++;
- while(c = *rp++) {
- if (c == '&') {
- sp = place(sp, loc1, loc2);
- continue;
- }
- if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB + '0') {
- n = c-'0';
- if (subexp[n].rsp && subexp[n].rep) {
- sp = place(sp, subexp[n].rsp, subexp[n].rep);
- continue;
- }
- else {
- fprint(2, "sed: Invalid back reference \\%d\n",n);
- errexit();
- }
- }
- *sp++ = c;
- if (sp >= &genbuf[LBSIZE])
- fprint(2, "sed: Output line too long.\n");
- }
- lp = loc2;
- loc2 = sp - genbuf + linebuf;
- while (*sp++ = *lp++)
- if (sp >= &genbuf[LBSIZE])
- fprint(2, "sed: Output line too long.\n");
- lp = linebuf;
- sp = genbuf;
- while (*lp++ = *sp++)
- ;
- spend = lp - 1;
- }
- Rune *
- place(Rune *sp, Rune *l1, Rune *l2)
- {
- while (l1 < l2) {
- *sp++ = *l1++;
- if (sp >= &genbuf[LBSIZE])
- fprint(2, "sed: Output line too long.\n");
- }
- return sp;
- }
- char *
- trans(int c)
- {
- static char buf[] = "\\x0000";
- static char hex[] = "0123456789abcdef";
- switch(c) {
- case '\b':
- return "\\b";
- case '\n':
- return "\\n";
- case '\r':
- return "\\r";
- case '\t':
- return "\\t";
- case '\\':
- return "\\\\";
- }
- buf[2] = hex[(c>>12)&0xF];
- buf[3] = hex[(c>>8)&0xF];
- buf[4] = hex[(c>>4)&0xF];
- buf[5] = hex[c&0xF];
- return buf;
- }
- void
- command(SedCom *ipc)
- {
- int i, c;
- char *ucp;
- Rune *execp, *p1, *p2, *rp;
- switch(ipc->command) {
- case ACOM:
- *aptr++ = ipc;
- if(aptr >= abuf+MAXADDS)
- quit("sed: Too many appends after line %ld\n",
- (char *)lnum);
- *aptr = 0;
- break;
- case CCOM:
- delflag = 1;
- if(ipc->active == 1) {
- for(rp = ipc->text; *rp; rp++)
- Bputrune(&fout, *rp);
- Bputc(&fout, '\n');
- }
- break;
- case DCOM:
- delflag++;
- break;
- case CDCOM:
- p1 = p2 = linebuf;
- while(*p1 != '\n') {
- if(*p1++ == 0) {
- delflag++;
- return;
- }
- }
- p1++;
- while(*p2++ = *p1++)
- ;
- spend = p2 - 1;
- jflag++;
- break;
- case EQCOM:
- Bprint(&fout, "%ld\n", lnum);
- break;
- case GCOM:
- p1 = linebuf;
- p2 = holdsp;
- while(*p1++ = *p2++)
- ;
- spend = p1 - 1;
- break;
- case CGCOM:
- *spend++ = '\n';
- p1 = spend;
- p2 = holdsp;
- while(*p1++ = *p2++)
- if(p1 >= lbend)
- break;
- spend = p1 - 1;
- break;
- case HCOM:
- p1 = holdsp;
- p2 = linebuf;
- while(*p1++ = *p2++);
- hspend = p1 - 1;
- break;
- case CHCOM:
- *hspend++ = '\n';
- p1 = hspend;
- p2 = linebuf;
- while(*p1++ = *p2++)
- if(p1 >= hend)
- break;
- hspend = p1 - 1;
- break;
- case ICOM:
- for(rp = ipc->text; *rp; rp++)
- Bputrune(&fout, *rp);
- Bputc(&fout, '\n');
- break;
- case BCOM:
- jflag = 1;
- break;
- case LCOM:
- c = 0;
- for (i = 0, rp = linebuf; *rp; rp++) {
- c = *rp;
- if(c >= 0x20 && c < 0x7F && c != '\\') {
- Bputc(&fout, c);
- if(i++ > 71) {
- Bprint(&fout, "\\\n");
- i = 0;
- }
- } else {
- for (ucp = trans(*rp); *ucp; ucp++){
- c = *ucp;
- Bputc(&fout, c);
- if(i++ > 71) {
- Bprint(&fout, "\\\n");
- i = 0;
- }
- }
- }
- }
- if(c == ' ')
- Bprint(&fout, "\\n");
- Bputc(&fout, '\n');
- break;
- case NCOM:
- if(!nflag)
- putline(&fout, linebuf, spend-linebuf);
- if(aptr > abuf)
- arout();
- if((execp = gline(linebuf)) == 0) {
- delflag = 1;
- break;
- }
- spend = execp;
- break;
- case CNCOM:
- if(aptr > abuf)
- arout();
- *spend++ = '\n';
- if((execp = gline(spend)) == 0) {
- delflag = 1;
- break;
- }
- spend = execp;
- break;
- case PCOM:
- putline(&fout, linebuf, spend-linebuf);
- break;
- case CPCOM:
- cpcom:
- for(rp = linebuf; *rp && *rp != '\n'; rp++)
- Bputc(&fout, *rp);
- Bputc(&fout, '\n');
- break;
- case QCOM:
- if(!nflag)
- putline(&fout, linebuf, spend-linebuf);
- if(aptr > abuf)
- arout();
- exits(0);
- case RCOM:
- *aptr++ = ipc;
- if(aptr >= &abuf[MAXADDS])
- quit("sed: Too many reads after line %ld\n",
- (char *)lnum);
- *aptr = 0;
- break;
- case SCOM:
- i = substitute(ipc);
- if(i && ipc->pfl)
- if(ipc->pfl == 1)
- putline(&fout, linebuf, spend-linebuf);
- else
- goto cpcom;
- if(i && ipc->fcode)
- goto wcom;
- break;
- case TCOM:
- if(sflag) {
- sflag = 0;
- jflag = 1;
- }
- break;
- case WCOM:
- wcom:
- putline(ipc->fcode,linebuf, spend - linebuf);
- break;
- case XCOM:
- p1 = linebuf;
- p2 = genbuf;
- while(*p2++ = *p1++)
- ;
- p1 = holdsp;
- p2 = linebuf;
- while(*p2++ = *p1++)
- ;
- spend = p2 - 1;
- p1 = genbuf;
- p2 = holdsp;
- while(*p2++ = *p1++)
- ;
- hspend = p2 - 1;
- break;
- case YCOM:
- p1 = linebuf;
- p2 = ipc->text;
- for (i = *p2++; *p1; p1++)
- if (*p1 <= i)
- *p1 = p2[*p1];
- break;
- }
- }
- void
- putline(Biobuf *bp, Rune *buf, int n)
- {
- while (n--)
- Bputrune(bp, *buf++);
- Bputc(bp, '\n');
- }
- ecmp(Rune *a, Rune *b, int count)
- {
- while(count--)
- if(*a++ != *b++)
- return 0;
- return 1;
- }
- void
- arout(void)
- {
- int c;
- char *s;
- char buf[128];
- Rune *p1;
- Biobuf *fi;
- for (aptr = abuf; *aptr; aptr++) {
- if((*aptr)->command == ACOM) {
- for(p1 = (*aptr)->text; *p1; p1++ )
- Bputrune(&fout, *p1);
- Bputc(&fout, '\n');
- } else {
- for(s = buf, p1 = (*aptr)->text; *p1; p1++)
- s += runetochar(s, p1);
- *s = '\0';
- if((fi = Bopen(buf, OREAD)) == 0)
- continue;
- while((c = Bgetc(fi)) >= 0)
- Bputc(&fout, c);
- Bterm(fi);
- }
- }
- aptr = abuf;
- *aptr = 0;
- }
- void
- errexit(void)
- {
- exits("error");
- }
- void
- quit(char *fmt, ...)
- {
- char *p, *ep;
- char msg[256];
- va_list arg;
- ep = msg + sizeof msg;
- p = seprint(msg, ep, "sed: ");
- va_start(arg, fmt);
- p = vseprint(p, ep, fmt, arg);
- va_end(arg);
- p = seprint(p, ep, "\n");
- write(2, msg, p - msg);
- errexit();
- }
- Rune *
- gline(Rune *addr)
- {
- long c;
- Rune *p;
- static long peekc = 0;
- if (f == 0 && opendata() < 0)
- return 0;
- sflag = 0;
- lnum++;
- /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
- do {
- p = addr;
- for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
- if (c == '\n') {
- if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
- dolflag = 1;
- *p = '\0';
- return p;
- }
- if (c && p < lbend)
- *p++ = c;
- }
- /* return partial final line, adding implicit newline */
- if(p != addr) {
- *p = '\0';
- peekc = -1;
- if (fhead == 0)
- dolflag = 1;
- return p;
- }
- peekc = 0;
- Bterm(f);
- } while (opendata() > 0); /* Switch to next stream */
- f = 0;
- return 0;
- }
- /*
- * Data file input section - the intent is to transparently
- * catenate all data input streams.
- */
- void
- enroll(char *filename) /* Add a file to the input file cache */
- {
- FileCache *fp;
- if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
- quit("Out of memory");
- if (ftail == nil)
- fhead = fp;
- else
- ftail->next = fp;
- ftail = fp;
- fp->next = nil;
- fp->name = filename; /* 0 => stdin */
- }
- int
- opendata(void)
- {
- if (fhead == nil)
- return -1;
- if (fhead->name) {
- if ((f = Bopen(fhead->name, OREAD)) == nil)
- quit("Can't open %s", fhead->name);
- } else {
- Binit(&stdin, 0, OREAD);
- f = &stdin;
- }
- fhead = fhead->next;
- return 1;
- }
|