123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472 |
- #include <u.h>
- #include <libc.h>
- #include <bio.h>
- #include <ctype.h>
- #include <regexp.h>
- typedef struct Range Range;
- typedef struct Slice Slice;
- typedef struct Slices Slices;
- typedef struct Token Token;
- struct Range {
- int begin;
- int end;
- };
- struct Slice {
- char *begin;
- char *end;
- };
- #pragma varargck type "S" Slice
- struct Slices {
- uint len;
- uint size;
- Slice *slices;
- };
- struct Token {
- int type;
- Slice slice;
- };
- enum {
- NF = 0x7FFFFFFF
- };
- Biobuf bin;
- Biobuf bout;
- int guesscollapse(const char *sep);
- int Sfmt(Fmt *f);
- Slice lex(char **sp);
- Slice next(char **sp);
- Slice peek(void);
- void extend(Slice *slice, char **sp);
- int tiseof(Slice *tok);
- int tisdelim(Slice *tok);
- int tisspace(Slice *tok);
- int parseranges(char *src, Range **rv);
- Range parserange(char **sp);
- int stoi(Slice slice);
- int parsenum(char **s);
- void process(Biobuf *b, int rc, Range *rv, Reprog *delim, char *sep, int collapse);
- void pprefix(char *prefix);
- uint split(char *line, Reprog *delim, Slices *ss, int collapse);
- void reset(Slices *ss);
- void append(Slices *ss, char *begin, char *end);
- void usage(void);
- void
- main(int argc, char *argv[])
- {
- Range *rv;
- char *filename, *insep, *outsep;
- Reprog *delim;
- int rc, collapse, eflag, Eflag, oflag, zflag;
- insep = "[ \t\v\r]+";
- outsep = " ";
- Binit(&bin, 0, OREAD);
- Binit(&bout, 1, OWRITE);
- fmtinstall('S', Sfmt);
- zflag = 0;
- eflag = 0;
- Eflag = 0;
- oflag = 0;
- ARGBEGIN {
- case '0':
- outsep = "";
- zflag = 1;
- break;
- case 'e':
- eflag = 1;
- break;
- case 'E':
- Eflag = 1;
- break;
- case 'F':
- insep = EARGF(usage());
- break;
- case 'O':
- oflag = 1;
- outsep = EARGF(usage());
- break;
- default:
- usage();
- break;
- } ARGEND;
- if (eflag && Eflag) {
- fprint(2, "flag conflict: -e and -E are mutually exclusive\n");
- usage();
- }
- if (oflag && zflag) {
- fprint(2, "flag conflict: -0 and -O are mutually exclusive\n");
- usage();
- }
- if (argc <= 0)
- usage();
- delim = regcomp(insep);
- if (delim == nil)
- sysfatal("bad input separator regexp '%s': %r", insep);
- rv = nil;
- rc = parseranges(*argv++, &rv);
- if (rc < 0)
- sysfatal("parseranges failed");
- collapse = guesscollapse(insep);
- if (eflag)
- collapse = 0;
- if (Eflag)
- collapse = 1;
- if (*argv == nil) {
- process(&bin, rc, rv, delim, outsep, collapse);
- } else while ((filename = *argv++) != nil) {
- Biobuf *b;
- if (strcmp(filename, "-") == 0) {
- process(&bin, rc, rv, delim, outsep, collapse);
- continue;
- }
- b = Bopen(filename, OREAD);
- if (b == nil)
- sysfatal("failure opening '%s': %r", filename);
- process(b, rc, rv, delim, outsep, collapse);
- Bterm(b);
- }
- exits(0);
- }
- int
- guesscollapse(const char *sep)
- {
- int len = utflen(sep);
- return len > 1 && (len != 2 || *sep != '\\');
- }
- int
- Sfmt(Fmt *f)
- {
- Slice s = va_arg(f->args, Slice);
- if (s.begin == nil || s.end == nil)
- return 0;
- return fmtprint(f, "%.*s", s.end - s.begin, s.begin);
- }
- /*
- * The field selection syntax is:
- *
- * fields := range [[delim] fields]
- * range := field | NUM '-' [field]
- * field := NUM | 'NF'
- * delim := ws+ | '|' | ','
- * ws := c such that `isspace(c)` is true.
- */
- Slice
- lex(char **sp)
- {
- char *s;
- Slice slice;
- memset(&slice, 0, sizeof(slice));
- s = *sp;
- slice.begin = s;
- while (isspace(*s))
- s++;
- if (s == *sp) {
- switch (*s) {
- case '\0':
- slice.begin = nil;
- break;
- case '-':
- s++;
- break;
- case 'N':
- if (*++s == 'F')
- s++;
- break;
- case ',':
- case '|':
- s++;
- break;
- default:
- if (!isdigit(*s))
- sysfatal("lexical error, c = %c", *s);
- while (isdigit(*s))
- s++;
- break;
- }
- }
- slice.end = s;
- *sp = s;
- return slice;
- }
- Slice current;
- Slice
- peek()
- {
- return current;
- }
- Slice
- next(char **sp)
- {
- Slice tok = peek();
- current = lex(sp);
- return tok;
- }
- void
- extend(Slice *slice, char **sp)
- {
- Slice tok = next(sp);
- slice->end = tok.end;
- }
- int
- stoi(Slice slice)
- {
- char *s;
- int n = 0, sign = 1;
- s = slice.begin;
- if (*s == '-') {
- sign = -1;
- s++;
- }
- for (; s != slice.end; s++) {
- if (!isdigit(*s))
- sysfatal("stoi: bad number in '%S', c = %c", slice, *s);
- n = n * 10 + (*s - '0');
- }
- return sign * n;
- }
- int
- tiseof(Slice *tok)
- {
- return tok == nil || tok->begin == nil;
- }
- int
- tisdelim(Slice *tok)
- {
- return tiseof(tok) || tisspace(tok) || *tok->begin == ',' || *tok->begin == '|';
- }
- int
- tisspace(Slice *tok)
- {
- return !tiseof(tok) && isspace(*tok->begin);
- }
- int
- parseranges(char *src, Range **rv)
- {
- char *s;
- Range *rs, *t;
- int n, m;
- Slice tok;
- rs = nil;
- m = 0;
- n = 0;
- s = src;
- if (s == nil || *s == '\0')
- return -1;
- next(&s);
- do {
- tok = peek();
- while (tisspace(&tok))
- tok = next(&s);
- Range r = parserange(&s);
- if (n >= m) {
- m = 2*m;
- if (m == 0)
- m = 1;
- t = realloc(rs, sizeof(Range) * m);
- if (t == nil)
- sysfatal("realloc failed parsing ranges");
- rs = t;
- }
- rs[n++] = r;
- tok = next(&s);
- if (!tisdelim(&tok))
- sysfatal("syntax error in field list");
- } while (!tiseof(&tok));
- *rv = rs;
- return n;
- }
- int
- tokeq(Slice *tok, const char *s)
- {
- return !tiseof(tok) && !strncmp(tok->begin, s, tok->end - tok->begin);
- }
- Range
- parserange(char **sp)
- {
- Range range;
- Slice tok;
- range.begin = range.end = NF;
- tok = peek();
- if (tokeq(&tok, "NF")) {
- next(sp);
- return range;
- }
- range.begin = range.end = parsenum(sp);
- tok = peek();
- if (tokeq(&tok, "-")) {
- next(sp);
- range.end = NF;
- tok = peek();
- if (tokeq(&tok, "NF")) {
- next(sp);
- return range;
- }
- if (!tiseof(&tok) && !tisdelim(&tok))
- range.end = parsenum(sp);
- }
- return range;
- }
- int
- parsenum(char **sp)
- {
- Slice tok;
- tok = next(sp);
- if (tiseof(&tok))
- sysfatal("EOF in number parser");
- if (isdigit(*tok.begin))
- return stoi(tok);
- if (*tok.begin != '-')
- sysfatal("number parse error: unexpected '%S'", tok);
- extend(&tok, sp);
- if (!isdigit(*(tok.begin + 1)))
- sysfatal("negative number parse error: unspected '%S'", tok);
- return stoi(tok);
- }
- void
- process(Biobuf *b, int rc, Range *rv, Reprog *delim, char *outsep, int collapse)
- {
- char *line, *prefix;
- const int nulldelim = 1;
- Slice *s;
- Slices ss;
- memset(&ss, 0, sizeof(ss));
- while ((line = Brdstr(b, '\n', nulldelim)) != 0) {
- int printed = 0;
- uint nfields = split(line, delim, &ss, collapse);
- s = ss.slices;
- prefix = nil;
- for (int k = 0; k < rc; k++) {
- int begin = rv[k].begin;
- int end = rv[k].end;
- if (begin == 0) {
- pprefix(prefix);
- prefix = outsep;
- Bprint(&bout, "%s", line);
- printed = 1;
- begin = 1;
- }
- if (begin == NF)
- begin = nfields;
- if (begin < 0)
- begin += nfields + 1;
- begin--;
- if (end < 0)
- end += nfields + 1;
- if (begin < 0 || end < 0 || end < begin || nfields < begin)
- continue;
- for (int f = begin; f < end && f < nfields; f++) {
- pprefix(prefix);
- prefix = outsep;
- Bprint(&bout, "%S", s[f]);
- printed = 1;
- }
- }
- if (rc != 0 && (printed || !collapse))
- Bputc(&bout, '\n');
- free(line);
- }
- free(ss.slices);
- }
- void
- pprefix(char *prefix)
- {
- if (prefix == nil)
- return;
- if (*prefix == '\0')
- Bputc(&bout, '\0');
- else
- Bprint(&bout, "%s", prefix);
- }
- void
- reset(Slices *ss)
- {
- ss->len = 0;
- }
- uint
- split(char *line, Reprog *delim, Slices *ss, int collapse)
- {
- char *s, *b, *e;
- Resub match[1];
- memset(match, 0, sizeof(match));
- reset(ss);
- b = nil;
- e = nil;
- s = line;
- while (regexec(delim, s, match, nelem(match))) {
- b = s;
- e = match[0].sp;
- s = match[0].ep;
- memset(match, 0, sizeof(match));
- if (collapse && (e == line || b == e))
- continue;
- append(ss, b, e);
- }
- b = s;
- e = b + strlen(s);
- if (!collapse || b != e)
- append(ss, b, e);
- return ss->len;
- }
- void
- append(Slices *ss, char *begin, char *end)
- {
- if (ss->len >= ss->size) {
- Slice *s;
- ss->size *= 2;
- if (ss->size == 0)
- ss->size = 1;
- s = realloc(ss->slices, ss->size * sizeof(Slice));
- if (s == nil)
- sysfatal("malloc failed appending slice: %r");
- ss->slices = s;
- }
- ss->slices[ss->len].begin = begin;
- ss->slices[ss->len++].end = end;
- }
- void
- usage()
- {
- sysfatal("usage: field [ -E | -e ] [ -F regexp ] [ -0 | -O delimiter ] <field list> [file...]");
- }
|