wc.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * wc implementation for busybox
  4. *
  5. * Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. */
  22. /* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */
  23. /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */
  24. /* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org)
  25. *
  26. * Rewritten to fix a number of problems and do some size optimizations.
  27. * Problems in the previous busybox implementation (besides bloat) included:
  28. * 1) broken 'wc -c' optimization (read note below)
  29. * 2) broken handling of '-' args
  30. * 3) no checking of ferror on EOF returns
  31. * 4) isprint() wasn't considered when word counting.
  32. *
  33. * TODO:
  34. *
  35. * When locale support is enabled, count multibyte chars in the '-m' case.
  36. *
  37. * NOTES:
  38. *
  39. * The previous busybox wc attempted an optimization using stat for the
  40. * case of counting chars only. I omitted that because it was broken.
  41. * It didn't take into account the possibility of input coming from a
  42. * pipe, or input from a file with file pointer not at the beginning.
  43. *
  44. * To implement such a speed optimization correctly, not only do you
  45. * need the size, but also the file position. Note also that the
  46. * file position may be past the end of file. Consider the example
  47. * (adapted from example in gnu wc.c)
  48. *
  49. * echo hello > /tmp/testfile &&
  50. * (dd ibs=1k skip=1 count=0 &> /dev/null ; wc -c) < /tmp/testfile
  51. *
  52. * for which 'wc -c' should output '0'.
  53. */
  54. #include <stdio.h>
  55. #include <stdlib.h>
  56. #include <string.h>
  57. #include <unistd.h>
  58. #include "busybox.h"
  59. #ifdef CONFIG_LOCALE_SUPPORT
  60. #include <locale.h>
  61. #include <ctype.h>
  62. #define isspace_given_isprint(c) isspace(c)
  63. #else
  64. #undef isspace
  65. #undef isprint
  66. #define isspace(c) ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9))))
  67. #define isprint(c) (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20))
  68. #define isspace_given_isprint(c) ((c) == ' ')
  69. #endif
  70. enum {
  71. WC_LINES = 0,
  72. WC_WORDS = 1,
  73. WC_CHARS = 2,
  74. WC_LENGTH = 3
  75. };
  76. /* Note: If this changes, remember to change the initialization of
  77. * 'name' in wc_main. It needs to point to the terminating nul. */
  78. static const char wc_opts[] = "lwcL"; /* READ THE WARNING ABOVE! */
  79. enum {
  80. OP_INC_LINE = 1, /* OP_INC_LINE must be 1. */
  81. OP_SPACE = 2,
  82. OP_NEWLINE = 4,
  83. OP_TAB = 8,
  84. OP_NUL = 16,
  85. };
  86. /* Note: If fmt_str changes, the offsets to 's' in the OUTPUT section
  87. * will need to be updated. */
  88. static const char fmt_str[] = " %7u\0 %s\n";
  89. static const char total_str[] = "total";
  90. int wc_main(int argc, char **argv)
  91. {
  92. FILE *fp;
  93. const char *s;
  94. unsigned int *pcounts;
  95. unsigned int counts[4];
  96. unsigned int totals[4];
  97. unsigned int linepos;
  98. unsigned int u;
  99. int num_files = 0;
  100. int c;
  101. char status = EXIT_SUCCESS;
  102. char in_word;
  103. char print_type;
  104. print_type = bb_getopt_ulflags(argc, argv, wc_opts);
  105. if (print_type == 0) {
  106. print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS);
  107. }
  108. argv += optind;
  109. if (!*argv) {
  110. *--argv = (char *) bb_msg_standard_input;
  111. }
  112. memset(totals, 0, sizeof(totals));
  113. pcounts = counts;
  114. do {
  115. ++num_files;
  116. if (!(fp = bb_wfopen_input(*argv))) {
  117. status = EXIT_FAILURE;
  118. continue;
  119. }
  120. memset(counts, 0, sizeof(counts));
  121. linepos = 0;
  122. in_word = 0;
  123. do {
  124. ++counts[WC_CHARS];
  125. c = getc(fp);
  126. if (isprint(c)) {
  127. ++linepos;
  128. if (!isspace_given_isprint(c)) {
  129. in_word = 1;
  130. continue;
  131. }
  132. } else if (((unsigned int)(c - 9)) <= 4) {
  133. /* \t 9
  134. * \n 10
  135. * \v 11
  136. * \f 12
  137. * \r 13
  138. */
  139. if (c == '\t') {
  140. linepos = (linepos | 7) + 1;
  141. } else { /* '\n', '\r', '\f', or '\v' */
  142. DO_EOF:
  143. if (linepos > counts[WC_LENGTH]) {
  144. counts[WC_LENGTH] = linepos;
  145. }
  146. if (c == '\n') {
  147. ++counts[WC_LINES];
  148. }
  149. if (c != '\v') {
  150. linepos = 0;
  151. }
  152. }
  153. } else if (c == EOF) {
  154. if (ferror(fp)) {
  155. bb_perror_msg("%s", *argv);
  156. status = EXIT_FAILURE;
  157. }
  158. --counts[WC_CHARS];
  159. goto DO_EOF; /* Treat an EOF as '\r'. */
  160. } else {
  161. continue;
  162. }
  163. counts[WC_WORDS] += in_word;
  164. in_word = 0;
  165. if (c == EOF) {
  166. break;
  167. }
  168. } while (1);
  169. if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
  170. totals[WC_LENGTH] = counts[WC_LENGTH];
  171. }
  172. totals[WC_LENGTH] -= counts[WC_LENGTH];
  173. bb_fclose_nonstdin(fp);
  174. OUTPUT:
  175. s = fmt_str + 1; /* Skip the leading space on 1st pass. */
  176. u = 0;
  177. do {
  178. if (print_type & (1 << u)) {
  179. bb_printf(s, pcounts[u]);
  180. s = fmt_str; /* Ok... restore the leading space. */
  181. }
  182. totals[u] += pcounts[u];
  183. } while (++u < 4);
  184. s += 8; /* Set the format to the empty string. */
  185. if (*argv != bb_msg_standard_input) {
  186. s -= 3; /* We have a name, so do %s conversion. */
  187. }
  188. bb_printf(s, *argv);
  189. } while (*++argv);
  190. /* If more than one file was processed, we want the totals. To save some
  191. * space, we set the pcounts ptr to the totals array. This has the side
  192. * effect of trashing the totals array after outputting it, but that's
  193. * irrelavent since we no longer need it. */
  194. if (num_files > 1) {
  195. num_files = 0; /* Make sure we don't get here again. */
  196. *--argv = (char *) total_str;
  197. pcounts = totals;
  198. goto OUTPUT;
  199. }
  200. bb_fflush_stdout_and_exit(status);
  201. }