uniq.c 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. /*
  2. * Deal with duplicated lines in a file
  3. */
  4. #include <u.h>
  5. #include <libc.h>
  6. #include <bio.h>
  7. #include <ctype.h>
  8. #define SIZE 8000
  9. int fields = 0;
  10. int letters = 0;
  11. int linec = 0;
  12. char mode;
  13. int uniq;
  14. char *b1, *b2;
  15. long bsize;
  16. Biobuf fin;
  17. Biobuf fout;
  18. int gline(char *buf);
  19. void pline(char *buf);
  20. int equal(char *b1, char *b2);
  21. char* skip(char *s);
  22. void
  23. main(int argc, char *argv[])
  24. {
  25. int f;
  26. bsize = SIZE;
  27. b1 = malloc(bsize);
  28. b2 = malloc(bsize);
  29. f = 0;
  30. while(argc > 1) {
  31. if(*argv[1] == '-') {
  32. if(isdigit(argv[1][1]))
  33. fields = atoi(&argv[1][1]);
  34. else
  35. mode = argv[1][1];
  36. argc--;
  37. argv++;
  38. continue;
  39. }
  40. if(*argv[1] == '+') {
  41. letters = atoi(&argv[1][1]);
  42. argc--;
  43. argv++;
  44. continue;
  45. }
  46. f = open(argv[1], 0);
  47. if(f < 0) {
  48. fprint(2, "cannot open %s\n", argv[1]);
  49. exits("open");
  50. }
  51. break;
  52. }
  53. if(argc > 2) {
  54. fprint(2, "unexpected argument %s\n", argv[2]);
  55. exits("arg");
  56. }
  57. Binit(&fin, f, OREAD);
  58. Binit(&fout, 1, OWRITE);
  59. if(gline(b1))
  60. exits(0);
  61. for(;;) {
  62. linec++;
  63. if(gline(b2)) {
  64. pline(b1);
  65. exits(0);
  66. }
  67. if(!equal(b1, b2)) {
  68. pline(b1);
  69. linec = 0;
  70. do {
  71. linec++;
  72. if(gline(b1)) {
  73. pline(b2);
  74. exits(0);
  75. }
  76. } while(equal(b2, b1));
  77. pline(b2);
  78. linec = 0;
  79. }
  80. }
  81. }
  82. int
  83. gline(char *buf)
  84. {
  85. char *p;
  86. p = Brdline(&fin, '\n');
  87. if(p == 0)
  88. return 1;
  89. if(fin.rdline >= bsize-1) {
  90. fprint(2, "line too long\n");
  91. exits("too long");
  92. }
  93. memmove(buf, p, fin.rdline);
  94. buf[fin.rdline-1] = 0;
  95. return 0;
  96. }
  97. void
  98. pline(char *buf)
  99. {
  100. switch(mode) {
  101. case 'u':
  102. if(uniq) {
  103. uniq = 0;
  104. return;
  105. }
  106. break;
  107. case 'd':
  108. if(uniq)
  109. break;
  110. return;
  111. case 'c':
  112. Bprint(&fout, "%4d ", linec);
  113. }
  114. uniq = 0;
  115. Bprint(&fout, "%s\n", buf);
  116. }
  117. int
  118. equal(char *b1, char *b2)
  119. {
  120. char c;
  121. if(fields || letters) {
  122. b1 = skip(b1);
  123. b2 = skip(b2);
  124. }
  125. for(;;) {
  126. c = *b1++;
  127. if(c != *b2++) {
  128. if(c == 0 && mode == 's')
  129. return 1;
  130. return 0;
  131. }
  132. if(c == 0) {
  133. uniq++;
  134. return 1;
  135. }
  136. }
  137. }
  138. char*
  139. skip(char *s)
  140. {
  141. int nf, nl;
  142. nf = nl = 0;
  143. while(nf++ < fields) {
  144. while(*s == ' ' || *s == '\t')
  145. s++;
  146. while(!(*s == ' ' || *s == '\t' || *s == 0) )
  147. s++;
  148. }
  149. while(nl++ < letters && *s != 0)
  150. s++;
  151. return s;
  152. }