shuf.c 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * shuf: Write a random permutation of the input lines to standard output.
  4. *
  5. * Copyright (C) 2014 by Bartosz Golaszewski <bartekgola@gmail.com>
  6. *
  7. * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  8. */
  9. //config:config SHUF
  10. //config: bool "shuf (5.4 kb)"
  11. //config: default y
  12. //config: help
  13. //config: Generate random permutations
  14. //applet:IF_SHUF(APPLET_NOEXEC(shuf, shuf, BB_DIR_USR_BIN, BB_SUID_DROP, shuf))
  15. //kbuild:lib-$(CONFIG_SHUF) += shuf.o
  16. //usage:#define shuf_trivial_usage
  17. //usage: "[-n NUM] [-o FILE] [-z] [FILE | -e [ARG...] | -i L-H]"
  18. //usage:#define shuf_full_usage "\n\n"
  19. //usage: "Randomly permute lines\n"
  20. //usage: "\n -n NUM Output at most NUM lines"
  21. //usage: "\n -o FILE Write to FILE, not standard output"
  22. //usage: "\n -z NUL terminated output"
  23. //usage: "\n -e Treat ARGs as lines"
  24. //usage: "\n -i L-H Treat numbers L-H as lines"
  25. #include "libbb.h"
  26. /* This is a NOEXEC applet. Be very careful! */
  27. #define OPT_e (1 << 0)
  28. #define OPT_i (1 << 1)
  29. #define OPT_n (1 << 2)
  30. #define OPT_o (1 << 3)
  31. #define OPT_z (1 << 4)
  32. #define OPT_STR "ei:n:o:z"
  33. /*
  34. * Use the Fisher-Yates shuffle algorithm on an array of lines.
  35. * If the required number of output lines is less than the total
  36. * we can stop shuffling early.
  37. */
  38. static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines)
  39. {
  40. srand(monotonic_us());
  41. while (outlines != 0) {
  42. char *tmp;
  43. unsigned r = rand();
  44. /* RAND_MAX can be as small as 32767 */
  45. if (numlines > RAND_MAX)
  46. r ^= rand() << 15;
  47. r %= numlines;
  48. //TODO: the above method is seriously non-uniform when numlines is very large.
  49. //For example, with numlines of 0xf0000000,
  50. //values of (r % numlines) in [0, 0x0fffffff] range
  51. //are more likely: e.g. r=1 and r=0xf0000001 both map to 1,
  52. //whereas only one value, r=0xefffffff, maps to 0xefffffff.
  53. numlines--;
  54. tmp = lines[numlines];
  55. lines[numlines] = lines[r];
  56. lines[r] = tmp;
  57. outlines--;
  58. }
  59. }
  60. int shuf_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
  61. int shuf_main(int argc, char **argv)
  62. {
  63. unsigned opts;
  64. char *opt_i_str, *opt_n_str, *opt_o_str;
  65. char **lines;
  66. unsigned long long lo = lo;
  67. unsigned numlines, outlines;
  68. unsigned i;
  69. char eol;
  70. opts = getopt32(argv, "^"
  71. OPT_STR
  72. "\0" "e--i:i--e"/* mutually exclusive */,
  73. &opt_i_str, &opt_n_str, &opt_o_str
  74. );
  75. argc -= optind;
  76. argv += optind;
  77. /* Prepare lines for shuffling - either: */
  78. if (opts & OPT_e) {
  79. /* make lines from command-line arguments */
  80. numlines = argc;
  81. lines = argv;
  82. } else
  83. if (opts & OPT_i) {
  84. /* create a range of numbers */
  85. unsigned long long hi;
  86. char *dash;
  87. if (argv[0])
  88. bb_show_usage();
  89. dash = strchr(opt_i_str, '-');
  90. if (!dash) {
  91. bb_error_msg_and_die("bad range '%s'", opt_i_str);
  92. }
  93. *dash = '\0';
  94. lo = xatoull(opt_i_str);
  95. hi = xatoull(dash + 1);
  96. *dash = '-';
  97. if (hi < lo)
  98. bb_error_msg_and_die("bad range '%s'", opt_i_str);
  99. hi -= lo;
  100. if (sizeof(size_t) > sizeof(numlines)) {
  101. if (hi >= UINT_MAX)
  102. bb_error_msg_and_die("bad range '%s'", opt_i_str);
  103. } else {
  104. if (hi >= UINT_MAX / sizeof(lines[0]))
  105. bb_error_msg_and_die("bad range '%s'", opt_i_str);
  106. }
  107. numlines = hi + 1;
  108. lines = xmalloc((size_t)numlines * sizeof(lines[0]));
  109. for (i = 0; i < numlines; i++) {
  110. lines[i] = (char*)(uintptr_t)i;
  111. }
  112. } else {
  113. /* default - read lines from stdin or the input file */
  114. FILE *fp;
  115. const char *fname = "-";
  116. if (argv[0]) {
  117. if (argv[1])
  118. bb_show_usage();
  119. fname = argv[0];
  120. }
  121. fp = xfopen_stdin(fname);
  122. lines = NULL;
  123. numlines = 0;
  124. for (;;) {
  125. char *line = xmalloc_fgetline(fp);
  126. if (!line)
  127. break;
  128. lines = xrealloc_vector(lines, 6, numlines);
  129. lines[numlines++] = line;
  130. }
  131. fclose_if_not_stdin(fp);
  132. }
  133. outlines = numlines;
  134. if (opts & OPT_n) {
  135. outlines = xatou(opt_n_str);
  136. if (outlines > numlines)
  137. outlines = numlines;
  138. }
  139. shuffle_lines(lines, numlines, outlines);
  140. if (opts & OPT_o)
  141. xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO);
  142. eol = '\n';
  143. if (opts & OPT_z)
  144. eol = '\0';
  145. for (i = numlines - outlines; i < numlines; i++) {
  146. if (opts & OPT_i)
  147. printf("%llu%c", lo + (uintptr_t)lines[i], eol);
  148. else
  149. printf("%s%c", lines[i], eol);
  150. }
  151. fflush_stdout_and_exit_SUCCESS();
  152. }