3
0

cut.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * cut.c - minimalist version of cut
  4. *
  5. * Copyright (C) 1999,2000,2001 by Lineo, inc.
  6. * Written by Mark Whitley <markw@codepoet.org>
  7. * debloated by Bernhard Fischer
  8. *
  9. * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
  10. */
  11. #include "busybox.h"
  12. /* option vars */
  13. static const char optstring[] = "b:c:f:d:sn";
  14. #define CUT_OPT_BYTE_FLGS (1<<0)
  15. #define CUT_OPT_CHAR_FLGS (1<<1)
  16. #define CUT_OPT_FIELDS_FLGS (1<<2)
  17. #define CUT_OPT_DELIM_FLGS (1<<3)
  18. #define CUT_OPT_SUPPRESS_FLGS (1<<4)
  19. static char delim = '\t'; /* delimiter, default is tab */
  20. struct cut_list {
  21. int startpos;
  22. int endpos;
  23. };
  24. enum {
  25. BOL = 0,
  26. EOL = INT_MAX,
  27. NON_RANGE = -1
  28. };
  29. /* growable array holding a series of lists */
  30. static struct cut_list *cut_lists;
  31. static unsigned int nlists; /* number of elements in above list */
  32. static int cmpfunc(const void *a, const void *b)
  33. {
  34. return (((struct cut_list *) a)->startpos -
  35. ((struct cut_list *) b)->startpos);
  36. }
  37. static void cut_file(FILE * file)
  38. {
  39. char *line = NULL;
  40. unsigned int linenum = 0; /* keep these zero-based to be consistent */
  41. /* go through every line in the file */
  42. while ((line = xmalloc_getline(file)) != NULL) {
  43. /* set up a list so we can keep track of what's been printed */
  44. char * printed = xzalloc(strlen(line) * sizeof(char));
  45. char * orig_line = line;
  46. unsigned int cl_pos = 0;
  47. int spos;
  48. /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */
  49. if (option_mask32 & (CUT_OPT_CHAR_FLGS | CUT_OPT_BYTE_FLGS)) {
  50. /* print the chars specified in each cut list */
  51. for (; cl_pos < nlists; cl_pos++) {
  52. spos = cut_lists[cl_pos].startpos;
  53. while (spos < strlen(line)) {
  54. if (!printed[spos]) {
  55. printed[spos] = 'X';
  56. putchar(line[spos]);
  57. }
  58. spos++;
  59. if (spos > cut_lists[cl_pos].endpos
  60. || cut_lists[cl_pos].endpos == NON_RANGE)
  61. break;
  62. }
  63. }
  64. } else if (delim == '\n') { /* cut by lines */
  65. spos = cut_lists[cl_pos].startpos;
  66. /* get out if we have no more lists to process or if the lines
  67. * are lower than what we're interested in */
  68. if (linenum < spos || cl_pos >= nlists)
  69. goto next_line;
  70. /* if the line we're looking for is lower than the one we were
  71. * passed, it means we displayed it already, so move on */
  72. while (spos < linenum) {
  73. spos++;
  74. /* go to the next list if we're at the end of this one */
  75. if (spos > cut_lists[cl_pos].endpos
  76. || cut_lists[cl_pos].endpos == NON_RANGE) {
  77. cl_pos++;
  78. /* get out if there's no more lists to process */
  79. if (cl_pos >= nlists)
  80. goto next_line;
  81. spos = cut_lists[cl_pos].startpos;
  82. /* get out if the current line is lower than the one
  83. * we just became interested in */
  84. if (linenum < spos)
  85. goto next_line;
  86. }
  87. }
  88. /* If we made it here, it means we've found the line we're
  89. * looking for, so print it */
  90. puts(line);
  91. goto next_line;
  92. } else { /* cut by fields */
  93. int ndelim = -1; /* zero-based / one-based problem */
  94. int nfields_printed = 0;
  95. char *field = NULL;
  96. const char delimiter[2] = { delim, 0 };
  97. /* does this line contain any delimiters? */
  98. if (strchr(line, delim) == NULL) {
  99. if (!(option_mask32 & CUT_OPT_SUPPRESS_FLGS))
  100. puts(line);
  101. goto next_line;
  102. }
  103. /* process each list on this line, for as long as we've got
  104. * a line to process */
  105. for (; cl_pos < nlists && line; cl_pos++) {
  106. spos = cut_lists[cl_pos].startpos;
  107. do {
  108. /* find the field we're looking for */
  109. while (line && ndelim < spos) {
  110. field = strsep(&line, delimiter);
  111. ndelim++;
  112. }
  113. /* we found it, and it hasn't been printed yet */
  114. if (field && ndelim == spos && !printed[ndelim]) {
  115. /* if this isn't our first time through, we need to
  116. * print the delimiter after the last field that was
  117. * printed */
  118. if (nfields_printed > 0)
  119. putchar(delim);
  120. fputs(field, stdout);
  121. printed[ndelim] = 'X';
  122. nfields_printed++; /* shouldn't overflow.. */
  123. }
  124. spos++;
  125. /* keep going as long as we have a line to work with,
  126. * this is a list, and we're not at the end of that
  127. * list */
  128. } while (spos <= cut_lists[cl_pos].endpos && line
  129. && cut_lists[cl_pos].endpos != NON_RANGE);
  130. }
  131. }
  132. /* if we printed anything at all, we need to finish it with a
  133. * newline cuz we were handed a chomped line */
  134. putchar('\n');
  135. next_line:
  136. linenum++;
  137. free(printed);
  138. free(orig_line);
  139. }
  140. }
  141. static const char _op_on_field[] = " only when operating on fields";
  142. int cut_main(int argc, char **argv)
  143. {
  144. char *sopt, *ltok;
  145. opt_complementary = "b--bcf:c--bcf:f--bcf";
  146. getopt32(argc, argv, optstring, &sopt, &sopt, &sopt, &ltok);
  147. if (!(option_mask32 & (CUT_OPT_BYTE_FLGS | CUT_OPT_CHAR_FLGS | CUT_OPT_FIELDS_FLGS)))
  148. bb_error_msg_and_die("expected a list of bytes, characters, or fields");
  149. if (option_mask32 & BB_GETOPT_ERROR)
  150. bb_error_msg_and_die("only one type of list may be specified");
  151. if (option_mask32 & CUT_OPT_DELIM_FLGS) {
  152. if (strlen(ltok) > 1) {
  153. bb_error_msg_and_die("the delimiter must be a single character");
  154. }
  155. delim = ltok[0];
  156. }
  157. /* non-field (char or byte) cutting has some special handling */
  158. if (!(option_mask32 & CUT_OPT_FIELDS_FLGS)) {
  159. if (option_mask32 & CUT_OPT_SUPPRESS_FLGS) {
  160. bb_error_msg_and_die
  161. ("suppressing non-delimited lines makes sense%s",
  162. _op_on_field);
  163. }
  164. if (delim != '\t') {
  165. bb_error_msg_and_die
  166. ("a delimiter may be specified%s", _op_on_field);
  167. }
  168. }
  169. /*
  170. * parse list and put values into startpos and endpos.
  171. * valid list formats: N, N-, N-M, -M
  172. * more than one list can be separated by commas
  173. */
  174. {
  175. char *ntok;
  176. int s = 0, e = 0;
  177. /* take apart the lists, one by one (they are separated with commas */
  178. while ((ltok = strsep(&sopt, ",")) != NULL) {
  179. /* it's actually legal to pass an empty list */
  180. if (strlen(ltok) == 0)
  181. continue;
  182. /* get the start pos */
  183. ntok = strsep(&ltok, "-");
  184. if (ntok == NULL) {
  185. bb_error_msg
  186. ("internal error: ntok is null for start pos!?\n");
  187. } else if (strlen(ntok) == 0) {
  188. s = BOL;
  189. } else {
  190. s = xatoi_u(ntok);
  191. /* account for the fact that arrays are zero based, while
  192. * the user expects the first char on the line to be char #1 */
  193. if (s != 0)
  194. s--;
  195. }
  196. /* get the end pos */
  197. ntok = strsep(&ltok, "-");
  198. if (ntok == NULL) {
  199. e = NON_RANGE;
  200. } else if (strlen(ntok) == 0) {
  201. e = EOL;
  202. } else {
  203. e = xatoi_u(ntok);
  204. /* if the user specified and end position of 0, that means "til the
  205. * end of the line */
  206. if (e == 0)
  207. e = EOL;
  208. e--; /* again, arrays are zero based, lines are 1 based */
  209. if (e == s)
  210. e = NON_RANGE;
  211. }
  212. /* if there's something left to tokenize, the user passed
  213. * an invalid list */
  214. if (ltok)
  215. bb_error_msg_and_die("invalid byte or field list");
  216. /* add the new list */
  217. cut_lists = xrealloc(cut_lists, sizeof(struct cut_list) * (++nlists));
  218. cut_lists[nlists-1].startpos = s;
  219. cut_lists[nlists-1].endpos = e;
  220. }
  221. /* make sure we got some cut positions out of all that */
  222. if (nlists == 0)
  223. bb_error_msg_and_die("missing list of positions");
  224. /* now that the lists are parsed, we need to sort them to make life
  225. * easier on us when it comes time to print the chars / fields / lines
  226. */
  227. qsort(cut_lists, nlists, sizeof(struct cut_list), cmpfunc);
  228. }
  229. /* argv[(optind)..(argc-1)] should be names of file to process. If no
  230. * files were specified or '-' was specified, take input from stdin.
  231. * Otherwise, we process all the files specified. */
  232. if (argv[optind] == NULL
  233. || (argv[optind][0] == '-' && argv[optind][1] == '\0')) {
  234. cut_file(stdin);
  235. } else {
  236. FILE *file;
  237. for (; optind < argc; optind++) {
  238. file = fopen_or_warn(argv[optind], "r");
  239. if (file) {
  240. cut_file(file);
  241. fclose(file);
  242. }
  243. }
  244. }
  245. if (ENABLE_FEATURE_CLEAN_UP)
  246. free(cut_lists);
  247. return EXIT_SUCCESS;
  248. }