uniq.c 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * Deal with duplicated lines in a file
  11. */
  12. #include <u.h>
  13. #include <libc.h>
  14. #include <bio.h>
  15. #include <ctype.h>
  16. #define SIZE 8000
  17. int fields = 0;
  18. int letters = 0;
  19. int linec = 0;
  20. char mode;
  21. int uniq;
  22. char *b1, *b2;
  23. int32_t bsize;
  24. Biobuf fin;
  25. Biobuf fout;
  26. int gline(char *buf);
  27. void pline(char *buf);
  28. int equal(char *b1, char *b2);
  29. char* skip(char *s);
  30. void
  31. main(int argc, char *argv[])
  32. {
  33. int f;
  34. argv0 = argv[0];
  35. bsize = SIZE;
  36. b1 = malloc(bsize);
  37. b2 = malloc(bsize);
  38. f = 0;
  39. while(argc > 1) {
  40. if(*argv[1] == '-') {
  41. if(isdigit(argv[1][1]))
  42. fields = atoi(&argv[1][1]);
  43. else
  44. mode = argv[1][1];
  45. argc--;
  46. argv++;
  47. continue;
  48. }
  49. if(*argv[1] == '+') {
  50. letters = atoi(&argv[1][1]);
  51. argc--;
  52. argv++;
  53. continue;
  54. }
  55. f = open(argv[1], 0);
  56. if(f < 0)
  57. sysfatal("cannot open %s", argv[1]);
  58. break;
  59. }
  60. if(argc > 2)
  61. sysfatal("unexpected argument %s", argv[2]);
  62. Binit(&fin, f, OREAD);
  63. Binit(&fout, 1, OWRITE);
  64. if(gline(b1))
  65. exits(0);
  66. for(;;) {
  67. linec++;
  68. if(gline(b2)) {
  69. pline(b1);
  70. exits(0);
  71. }
  72. if(!equal(b1, b2)) {
  73. pline(b1);
  74. linec = 0;
  75. do {
  76. linec++;
  77. if(gline(b1)) {
  78. pline(b2);
  79. exits(0);
  80. }
  81. } while(equal(b2, b1));
  82. pline(b2);
  83. linec = 0;
  84. }
  85. }
  86. }
  87. int
  88. gline(char *buf)
  89. {
  90. int len;
  91. char *p;
  92. p = Brdline(&fin, '\n');
  93. if(p == 0)
  94. return 1;
  95. len = Blinelen(&fin);
  96. if(len >= bsize-1)
  97. sysfatal("line too int32_t");
  98. memmove(buf, p, len);
  99. buf[len-1] = 0;
  100. return 0;
  101. }
  102. void
  103. pline(char *buf)
  104. {
  105. switch(mode) {
  106. case 'u':
  107. if(uniq) {
  108. uniq = 0;
  109. return;
  110. }
  111. break;
  112. case 'd':
  113. if(uniq)
  114. break;
  115. return;
  116. case 'c':
  117. Bprint(&fout, "%4d ", linec);
  118. }
  119. uniq = 0;
  120. Bprint(&fout, "%s\n", buf);
  121. }
  122. int
  123. equal(char *b1, char *b2)
  124. {
  125. char c;
  126. if(fields || letters) {
  127. b1 = skip(b1);
  128. b2 = skip(b2);
  129. }
  130. for(;;) {
  131. c = *b1++;
  132. if(c != *b2++) {
  133. if(c == 0 && mode == 's')
  134. return 1;
  135. return 0;
  136. }
  137. if(c == 0) {
  138. uniq++;
  139. return 1;
  140. }
  141. }
  142. }
  143. char*
  144. skip(char *s)
  145. {
  146. int nf, nl;
  147. nf = nl = 0;
  148. while(nf++ < fields) {
  149. while(*s == ' ' || *s == '\t')
  150. s++;
  151. while(!(*s == ' ' || *s == '\t' || *s == 0) )
  152. s++;
  153. }
  154. while(nl++ < letters && *s != 0)
  155. s++;
  156. return s;
  157. }