wc.c 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * wc implementation for busybox
  4. *
  5. * Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org>
  6. *
  7. * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
  8. */
  9. /* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */
  10. /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */
  11. /* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org)
  12. *
  13. * Rewritten to fix a number of problems and do some size optimizations.
  14. * Problems in the previous busybox implementation (besides bloat) included:
  15. * 1) broken 'wc -c' optimization (read note below)
  16. * 2) broken handling of '-' args
  17. * 3) no checking of ferror on EOF returns
  18. * 4) isprint() wasn't considered when word counting.
  19. *
  20. * TODO:
  21. *
  22. * When locale support is enabled, count multibyte chars in the '-m' case.
  23. *
  24. * NOTES:
  25. *
  26. * The previous busybox wc attempted an optimization using stat for the
  27. * case of counting chars only. I omitted that because it was broken.
  28. * It didn't take into account the possibility of input coming from a
  29. * pipe, or input from a file with file pointer not at the beginning.
  30. *
  31. * To implement such a speed optimization correctly, not only do you
  32. * need the size, but also the file position. Note also that the
  33. * file position may be past the end of file. Consider the example
  34. * (adapted from example in gnu wc.c)
  35. *
  36. * echo hello > /tmp/testfile &&
  37. * (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile
  38. *
  39. * for which 'wc -c' should output '0'.
  40. */
  41. #include "libbb.h"
  42. #if ENABLE_LOCALE_SUPPORT
  43. #define isspace_given_isprint(c) isspace(c)
  44. #else
  45. #undef isspace
  46. #undef isprint
  47. #define isspace(c) ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9))))
  48. #define isprint(c) (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20))
  49. #define isspace_given_isprint(c) ((c) == ' ')
  50. #endif
  51. #if ENABLE_FEATURE_WC_LARGE
  52. #define COUNT_T unsigned long long
  53. #define COUNT_FMT "llu"
  54. #else
  55. #define COUNT_T unsigned
  56. #define COUNT_FMT "u"
  57. #endif
  58. enum {
  59. WC_LINES = 0,
  60. WC_WORDS = 1,
  61. WC_CHARS = 2,
  62. WC_LENGTH = 3
  63. };
  64. int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
  65. int wc_main(int argc, char **argv)
  66. {
  67. FILE *fp;
  68. const char *s, *arg;
  69. const char *start_fmt = "%9"COUNT_FMT;
  70. const char *fname_fmt = " %s\n";
  71. COUNT_T *pcounts;
  72. COUNT_T counts[4];
  73. COUNT_T totals[4];
  74. unsigned linepos;
  75. unsigned u;
  76. int num_files = 0;
  77. int c;
  78. smallint status = EXIT_SUCCESS;
  79. smallint in_word;
  80. unsigned print_type;
  81. print_type = getopt32(argv, "lwcL");
  82. if (print_type == 0) {
  83. print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS);
  84. }
  85. argv += optind;
  86. if (!argv[0]) {
  87. *--argv = (char *) bb_msg_standard_input;
  88. fname_fmt = "\n";
  89. if (!((print_type-1) & print_type)) /* exactly one option? */
  90. start_fmt = "%"COUNT_FMT;
  91. }
  92. memset(totals, 0, sizeof(totals));
  93. pcounts = counts;
  94. while ((arg = *argv++) != 0) {
  95. ++num_files;
  96. fp = fopen_or_warn_stdin(arg);
  97. if (!fp) {
  98. status = EXIT_FAILURE;
  99. continue;
  100. }
  101. memset(counts, 0, sizeof(counts));
  102. linepos = 0;
  103. in_word = 0;
  104. do {
  105. /* Our -w doesn't match GNU wc exactly... oh well */
  106. ++counts[WC_CHARS];
  107. c = getc(fp);
  108. if (isprint(c)) {
  109. ++linepos;
  110. if (!isspace_given_isprint(c)) {
  111. in_word = 1;
  112. continue;
  113. }
  114. } else if (((unsigned int)(c - 9)) <= 4) {
  115. /* \t 9
  116. * \n 10
  117. * \v 11
  118. * \f 12
  119. * \r 13
  120. */
  121. if (c == '\t') {
  122. linepos = (linepos | 7) + 1;
  123. } else { /* '\n', '\r', '\f', or '\v' */
  124. DO_EOF:
  125. if (linepos > counts[WC_LENGTH]) {
  126. counts[WC_LENGTH] = linepos;
  127. }
  128. if (c == '\n') {
  129. ++counts[WC_LINES];
  130. }
  131. if (c != '\v') {
  132. linepos = 0;
  133. }
  134. }
  135. } else if (c == EOF) {
  136. if (ferror(fp)) {
  137. bb_simple_perror_msg(arg);
  138. status = EXIT_FAILURE;
  139. }
  140. --counts[WC_CHARS];
  141. goto DO_EOF; /* Treat an EOF as '\r'. */
  142. } else {
  143. continue;
  144. }
  145. counts[WC_WORDS] += in_word;
  146. in_word = 0;
  147. if (c == EOF) {
  148. break;
  149. }
  150. } while (1);
  151. if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
  152. totals[WC_LENGTH] = counts[WC_LENGTH];
  153. }
  154. totals[WC_LENGTH] -= counts[WC_LENGTH];
  155. fclose_if_not_stdin(fp);
  156. OUTPUT:
  157. /* coreutils wc tries hard to print pretty columns
  158. * (saves results for all files, find max col len etc...)
  159. * we won't try that hard, it will bloat us too much */
  160. s = start_fmt;
  161. u = 0;
  162. do {
  163. if (print_type & (1 << u)) {
  164. printf(s, pcounts[u]);
  165. s = " %9"COUNT_FMT; /* Ok... restore the leading space. */
  166. }
  167. totals[u] += pcounts[u];
  168. } while (++u < 4);
  169. printf(fname_fmt, arg);
  170. }
  171. /* If more than one file was processed, we want the totals. To save some
  172. * space, we set the pcounts ptr to the totals array. This has the side
  173. * effect of trashing the totals array after outputting it, but that's
  174. * irrelavent since we no longer need it. */
  175. if (num_files > 1) {
  176. num_files = 0; /* Make sure we don't get here again. */
  177. arg = "total";
  178. pcounts = totals;
  179. --argv;
  180. goto OUTPUT;
  181. }
  182. fflush_stdout_and_exit(status);
  183. }