123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169 |
- /*
- * Deal with duplicated lines in a file
- */
- #include <u.h>
- #include <libc.h>
- #include <bio.h>
- #include <ctype.h>
- #define SIZE 8000
- int fields = 0;
- int letters = 0;
- int linec = 0;
- char mode;
- int uniq;
- char *b1, *b2;
- long bsize;
- Biobuf fin;
- Biobuf fout;
- int gline(char *buf);
- void pline(char *buf);
- int equal(char *b1, char *b2);
- char* skip(char *s);
- void
- main(int argc, char *argv[])
- {
- int f;
- bsize = SIZE;
- b1 = malloc(bsize);
- b2 = malloc(bsize);
- f = 0;
- while(argc > 1) {
- if(*argv[1] == '-') {
- if(isdigit(argv[1][1]))
- fields = atoi(&argv[1][1]);
- else
- mode = argv[1][1];
- argc--;
- argv++;
- continue;
- }
- if(*argv[1] == '+') {
- letters = atoi(&argv[1][1]);
- argc--;
- argv++;
- continue;
- }
- f = open(argv[1], 0);
- if(f < 0) {
- fprint(2, "cannot open %s\n", argv[1]);
- exits("open");
- }
- break;
- }
- if(argc > 2) {
- fprint(2, "unexpected argument %s\n", argv[2]);
- exits("arg");
- }
- Binit(&fin, f, OREAD);
- Binit(&fout, 1, OWRITE);
- if(gline(b1))
- exits(0);
- for(;;) {
- linec++;
- if(gline(b2)) {
- pline(b1);
- exits(0);
- }
- if(!equal(b1, b2)) {
- pline(b1);
- linec = 0;
- do {
- linec++;
- if(gline(b1)) {
- pline(b2);
- exits(0);
- }
- } while(equal(b2, b1));
- pline(b2);
- linec = 0;
- }
- }
- }
- int
- gline(char *buf)
- {
- char *p;
- p = Brdline(&fin, '\n');
- if(p == 0)
- return 1;
- if(fin.rdline >= bsize-1) {
- fprint(2, "line too long\n");
- exits("too long");
- }
- memmove(buf, p, fin.rdline);
- buf[fin.rdline-1] = 0;
- return 0;
- }
- void
- pline(char *buf)
- {
- switch(mode) {
- case 'u':
- if(uniq) {
- uniq = 0;
- return;
- }
- break;
- case 'd':
- if(uniq)
- break;
- return;
- case 'c':
- Bprint(&fout, "%4d ", linec);
- }
- uniq = 0;
- Bprint(&fout, "%s\n", buf);
- }
- int
- equal(char *b1, char *b2)
- {
- char c;
- if(fields || letters) {
- b1 = skip(b1);
- b2 = skip(b2);
- }
- for(;;) {
- c = *b1++;
- if(c != *b2++) {
- if(c == 0 && mode == 's')
- return 1;
- return 0;
- }
- if(c == 0) {
- uniq++;
- return 1;
- }
- }
- }
- char*
- skip(char *s)
- {
- int nf, nl;
- nf = nl = 0;
- while(nf++ < fields) {
- while(*s == ' ' || *s == '\t')
- s++;
- while(!(*s == ' ' || *s == '\t' || *s == 0) )
- s++;
- }
- while(nl++ < letters && *s != 0)
- s++;
- return s;
- }
|