uniq.c 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. /*
  2. * Deal with duplicated lines in a file
  3. */
  4. #include <u.h>
  5. #include <libc.h>
  6. #include <bio.h>
  7. #include <ctype.h>
  8. #define SIZE 8000
  9. int fields = 0;
  10. int letters = 0;
  11. int linec = 0;
  12. char mode;
  13. int uniq;
  14. char *b1, *b2;
  15. long bsize;
  16. Biobuf fin;
  17. Biobuf fout;
  18. int gline(char *buf);
  19. void pline(char *buf);
  20. int equal(char *b1, char *b2);
  21. char* skip(char *s);
  22. void
  23. main(int argc, char *argv[])
  24. {
  25. int f;
  26. argv0 = argv[0];
  27. bsize = SIZE;
  28. b1 = malloc(bsize);
  29. b2 = malloc(bsize);
  30. f = 0;
  31. while(argc > 1) {
  32. if(*argv[1] == '-') {
  33. if(isdigit(argv[1][1]))
  34. fields = atoi(&argv[1][1]);
  35. else
  36. mode = argv[1][1];
  37. argc--;
  38. argv++;
  39. continue;
  40. }
  41. if(*argv[1] == '+') {
  42. letters = atoi(&argv[1][1]);
  43. argc--;
  44. argv++;
  45. continue;
  46. }
  47. f = open(argv[1], 0);
  48. if(f < 0)
  49. sysfatal("cannot open %s", argv[1]);
  50. break;
  51. }
  52. if(argc > 2)
  53. sysfatal("unexpected argument %s", argv[2]);
  54. Binit(&fin, f, OREAD);
  55. Binit(&fout, 1, OWRITE);
  56. if(gline(b1))
  57. exits(0);
  58. for(;;) {
  59. linec++;
  60. if(gline(b2)) {
  61. pline(b1);
  62. exits(0);
  63. }
  64. if(!equal(b1, b2)) {
  65. pline(b1);
  66. linec = 0;
  67. do {
  68. linec++;
  69. if(gline(b1)) {
  70. pline(b2);
  71. exits(0);
  72. }
  73. } while(equal(b2, b1));
  74. pline(b2);
  75. linec = 0;
  76. }
  77. }
  78. }
  79. int
  80. gline(char *buf)
  81. {
  82. int len;
  83. char *p;
  84. p = Brdline(&fin, '\n');
  85. if(p == 0)
  86. return 1;
  87. len = Blinelen(&fin);
  88. if(len >= bsize-1)
  89. sysfatal("line too long");
  90. memmove(buf, p, len);
  91. buf[len-1] = 0;
  92. return 0;
  93. }
  94. void
  95. pline(char *buf)
  96. {
  97. switch(mode) {
  98. case 'u':
  99. if(uniq) {
  100. uniq = 0;
  101. return;
  102. }
  103. break;
  104. case 'd':
  105. if(uniq)
  106. break;
  107. return;
  108. case 'c':
  109. Bprint(&fout, "%4d ", linec);
  110. }
  111. uniq = 0;
  112. Bprint(&fout, "%s\n", buf);
  113. }
  114. int
  115. equal(char *b1, char *b2)
  116. {
  117. char c;
  118. if(fields || letters) {
  119. b1 = skip(b1);
  120. b2 = skip(b2);
  121. }
  122. for(;;) {
  123. c = *b1++;
  124. if(c != *b2++) {
  125. if(c == 0 && mode == 's')
  126. return 1;
  127. return 0;
  128. }
  129. if(c == 0) {
  130. uniq++;
  131. return 1;
  132. }
  133. }
  134. }
  135. char*
  136. skip(char *s)
  137. {
  138. int nf, nl;
  139. nf = nl = 0;
  140. while(nf++ < fields) {
  141. while(*s == ' ' || *s == '\t')
  142. s++;
  143. while(!(*s == ' ' || *s == '\t' || *s == 0) )
  144. s++;
  145. }
  146. while(nl++ < letters && *s != 0)
  147. s++;
  148. return s;
  149. }