123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- /* vi: set sw=4 ts=4: */
- /*
- * wc implementation for busybox
- *
- * Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
- /* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */
- /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */
- /* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org)
- *
- * Rewritten to fix a number of problems and do some size optimizations.
- * Problems in the previous busybox implementation (besides bloat) included:
- * 1) broken 'wc -c' optimization (read note below)
- * 2) broken handling of '-' args
- * 3) no checking of ferror on EOF returns
- * 4) isprint() wasn't considered when word counting.
- *
- * TODO:
- *
- * When locale support is enabled, count multibyte chars in the '-m' case.
- *
- * NOTES:
- *
- * The previous busybox wc attempted an optimization using stat for the
- * case of counting chars only. I omitted that because it was broken.
- * It didn't take into account the possibility of input coming from a
- * pipe, or input from a file with file pointer not at the beginning.
- *
- * To implement such a speed optimization correctly, not only do you
- * need the size, but also the file position. Note also that the
- * file position may be past the end of file. Consider the example
- * (adapted from example in gnu wc.c)
- *
- * echo hello > /tmp/testfile &&
- * (dd ibs=1k skip=1 count=0 &> /dev/null ; wc -c) < /tmp/testfile
- *
- * for which 'wc -c' should output '0'.
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h>
- #include "busybox.h"
- #ifdef CONFIG_LOCALE_SUPPORT
- #include <locale.h>
- #include <ctype.h>
- #define isspace_given_isprint(c) isspace(c)
- #else
- #undef isspace
- #undef isprint
- #define isspace(c) ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9))))
- #define isprint(c) (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20))
- #define isspace_given_isprint(c) ((c) == ' ')
- #endif
- enum {
- WC_LINES = 0,
- WC_WORDS = 1,
- WC_CHARS = 2,
- WC_LENGTH = 3
- };
- /* Note: If this changes, remember to change the initialization of
- * 'name' in wc_main. It needs to point to the terminating nul. */
- static const char wc_opts[] = "lwcL"; /* READ THE WARNING ABOVE! */
- enum {
- OP_INC_LINE = 1, /* OP_INC_LINE must be 1. */
- OP_SPACE = 2,
- OP_NEWLINE = 4,
- OP_TAB = 8,
- OP_NUL = 16,
- };
- /* Note: If fmt_str changes, the offsets to 's' in the OUTPUT section
- * will need to be updated. */
- static const char fmt_str[] = " %7u\0 %s\n";
- static const char total_str[] = "total";
- int wc_main(int argc, char **argv)
- {
- FILE *fp;
- const char *s;
- unsigned int *pcounts;
- unsigned int counts[4];
- unsigned int totals[4];
- unsigned int linepos;
- unsigned int u;
- int num_files = 0;
- int c;
- char status = EXIT_SUCCESS;
- char in_word;
- char print_type;
- print_type = bb_getopt_ulflags(argc, argv, wc_opts);
- if (print_type == 0) {
- print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS);
- }
- argv += optind;
- if (!*argv) {
- *--argv = (char *) bb_msg_standard_input;
- }
- memset(totals, 0, sizeof(totals));
- pcounts = counts;
- do {
- ++num_files;
- if (!(fp = bb_wfopen_input(*argv))) {
- status = EXIT_FAILURE;
- continue;
- }
- memset(counts, 0, sizeof(counts));
- linepos = 0;
- in_word = 0;
- do {
- ++counts[WC_CHARS];
- c = getc(fp);
- if (isprint(c)) {
- ++linepos;
- if (!isspace_given_isprint(c)) {
- in_word = 1;
- continue;
- }
- } else if (((unsigned int)(c - 9)) <= 4) {
- /* \t 9
- * \n 10
- * \v 11
- * \f 12
- * \r 13
- */
- if (c == '\t') {
- linepos = (linepos | 7) + 1;
- } else { /* '\n', '\r', '\f', or '\v' */
- DO_EOF:
- if (linepos > counts[WC_LENGTH]) {
- counts[WC_LENGTH] = linepos;
- }
- if (c == '\n') {
- ++counts[WC_LINES];
- }
- if (c != '\v') {
- linepos = 0;
- }
- }
- } else if (c == EOF) {
- if (ferror(fp)) {
- bb_perror_msg("%s", *argv);
- status = EXIT_FAILURE;
- }
- --counts[WC_CHARS];
- goto DO_EOF; /* Treat an EOF as '\r'. */
- } else {
- continue;
- }
- counts[WC_WORDS] += in_word;
- in_word = 0;
- if (c == EOF) {
- break;
- }
- } while (1);
- if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
- totals[WC_LENGTH] = counts[WC_LENGTH];
- }
- totals[WC_LENGTH] -= counts[WC_LENGTH];
- bb_fclose_nonstdin(fp);
- OUTPUT:
- s = fmt_str + 1; /* Skip the leading space on 1st pass. */
- u = 0;
- do {
- if (print_type & (1 << u)) {
- bb_printf(s, pcounts[u]);
- s = fmt_str; /* Ok... restore the leading space. */
- }
- totals[u] += pcounts[u];
- } while (++u < 4);
- s += 8; /* Set the format to the empty string. */
- if (*argv != bb_msg_standard_input) {
- s -= 3; /* We have a name, so do %s conversion. */
- }
- bb_printf(s, *argv);
- } while (*++argv);
- /* If more than one file was processed, we want the totals. To save some
- * space, we set the pcounts ptr to the totals array. This has the side
- * effect of trashing the totals array after outputting it, but that's
- * irrelavent since we no longer need it. */
- if (num_files > 1) {
- num_files = 0; /* Make sure we don't get here again. */
- *--argv = (char *) total_str;
- pcounts = totals;
- goto OUTPUT;
- }
- bb_fflush_stdout_and_exit(status);
- }
|