uniq.c 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /*
  2. * Deal with duplicated lines in a file
  3. */
  4. #include <u.h>
  5. #include <libc.h>
  6. #include <bio.h>
  7. #include <ctype.h>
  8. #define SIZE 8000
  9. int fields = 0;
  10. int letters = 0;
  11. int linec = 0;
  12. char mode;
  13. int uniq;
  14. char *b1, *b2;
  15. long bsize;
  16. Biobuf fin;
  17. Biobuf fout;
  18. int gline(char *buf);
  19. void pline(char *buf);
  20. int equal(char *b1, char *b2);
  21. char* skip(char *s);
  22. void
  23. main(int argc, char *argv[])
  24. {
  25. int f;
  26. argv0 = argv[0];
  27. bsize = SIZE;
  28. b1 = malloc(bsize);
  29. b2 = malloc(bsize);
  30. f = 0;
  31. while(argc > 1) {
  32. if(*argv[1] == '-') {
  33. if(isdigit(argv[1][1]))
  34. fields = atoi(&argv[1][1]);
  35. else
  36. mode = argv[1][1];
  37. argc--;
  38. argv++;
  39. continue;
  40. }
  41. if(*argv[1] == '+') {
  42. letters = atoi(&argv[1][1]);
  43. argc--;
  44. argv++;
  45. continue;
  46. }
  47. f = open(argv[1], 0);
  48. if(f < 0)
  49. sysfatal("cannot open %s", argv[1]);
  50. break;
  51. }
  52. if(argc > 2)
  53. sysfatal("unexpected argument %s", argv[2]);
  54. Binit(&fin, f, OREAD);
  55. Binit(&fout, 1, OWRITE);
  56. if(gline(b1))
  57. exits(0);
  58. for(;;) {
  59. linec++;
  60. if(gline(b2)) {
  61. pline(b1);
  62. exits(0);
  63. }
  64. if(!equal(b1, b2)) {
  65. pline(b1);
  66. linec = 0;
  67. do {
  68. linec++;
  69. if(gline(b1)) {
  70. pline(b2);
  71. exits(0);
  72. }
  73. } while(equal(b2, b1));
  74. pline(b2);
  75. linec = 0;
  76. }
  77. }
  78. }
  79. int
  80. gline(char *buf)
  81. {
  82. char *p;
  83. p = Brdline(&fin, '\n');
  84. if(p == 0)
  85. return 1;
  86. if(fin.rdline >= bsize-1)
  87. sysfatal("line too long");
  88. memmove(buf, p, fin.rdline);
  89. buf[fin.rdline-1] = 0;
  90. return 0;
  91. }
  92. void
  93. pline(char *buf)
  94. {
  95. switch(mode) {
  96. case 'u':
  97. if(uniq) {
  98. uniq = 0;
  99. return;
  100. }
  101. break;
  102. case 'd':
  103. if(uniq)
  104. break;
  105. return;
  106. case 'c':
  107. Bprint(&fout, "%4d ", linec);
  108. }
  109. uniq = 0;
  110. Bprint(&fout, "%s\n", buf);
  111. }
  112. int
  113. equal(char *b1, char *b2)
  114. {
  115. char c;
  116. if(fields || letters) {
  117. b1 = skip(b1);
  118. b2 = skip(b2);
  119. }
  120. for(;;) {
  121. c = *b1++;
  122. if(c != *b2++) {
  123. if(c == 0 && mode == 's')
  124. return 1;
  125. return 0;
  126. }
  127. if(c == 0) {
  128. uniq++;
  129. return 1;
  130. }
  131. }
  132. }
  133. char*
  134. skip(char *s)
  135. {
  136. int nf, nl;
  137. nf = nl = 0;
  138. while(nf++ < fields) {
  139. while(*s == ' ' || *s == '\t')
  140. s++;
  141. while(!(*s == ' ' || *s == '\t' || *s == 0) )
  142. s++;
  143. }
  144. while(nl++ < letters && *s != 0)
  145. s++;
  146. return s;
  147. }