factor.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. /*
  2. * Copyright (C) 2017 Denys Vlasenko <vda.linux@googlemail.com>
  3. *
  4. * Licensed under GPLv2, see file LICENSE in this source tree.
  5. */
  6. //config:config FACTOR
  7. //config: bool "factor (3.2 kb)"
  8. //config: default y
  9. //config: help
  10. //config: factor factorizes integers
  11. //applet:IF_FACTOR(APPLET(factor, BB_DIR_USR_BIN, BB_SUID_DROP))
  12. //kbuild:lib-$(CONFIG_FACTOR) += factor.o
  13. //usage:#define factor_trivial_usage
  14. //usage: "[NUMBER]..."
  15. //usage:#define factor_full_usage "\n\n"
  16. //usage: "Print prime factors"
  17. #include "libbb.h"
  18. #include "common_bufsiz.h"
  19. #if 0
  20. # define dbg(...) bb_error_msg(__VA_ARGS__)
  21. #else
  22. # define dbg(...) ((void)0)
  23. #endif
  24. typedef unsigned long long wide_t;
  25. #if ULLONG_MAX == (UINT_MAX * UINT_MAX + 2 * UINT_MAX)
  26. /* "unsigned" is half as wide as ullong */
  27. typedef unsigned half_t;
  28. #define HALF_MAX UINT_MAX
  29. #define HALF_FMT ""
  30. #elif ULLONG_MAX == (ULONG_MAX * ULONG_MAX + 2 * ULONG_MAX)
  31. /* long is half as wide as ullong */
  32. typedef unsigned long half_t;
  33. #define HALF_MAX ULONG_MAX
  34. #define HALF_FMT "l"
  35. #else
  36. #error Cant find an integer type which is half as wide as ullong
  37. #endif
  38. /* The trial divisor increment wheel. Use it to skip over divisors that
  39. * are composites of 2, 3, 5, 7, or 11.
  40. * Larger wheels improve sieving only slightly, but quickly grow in size
  41. * (adding just one prime, 13, results in 5766 element sieve).
  42. */
  43. #define R(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J,x) \
  44. (((uint64_t)(a<<0) | (b<<3) | (c<<6) | (d<<9) | (e<<12) | (f<<15) | (g<<18) | (h<<21) | (i<<24) | (j<<27)) << 1) | \
  45. (((uint64_t)(A<<0) | (B<<3) | (C<<6) | (D<<9) | (E<<12) | (F<<15) | (G<<18) | (H<<21) | (I<<24) | (J<<27)) << 31) | \
  46. ((uint64_t)x << 61)
  47. #define P(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J,x) \
  48. R( (a/2),(b/2),(c/2),(d/2),(e/2),(f/2),(g/2),(h/2),(i/2),(j/2), \
  49. (A/2),(B/2),(C/2),(D/2),(E/2),(F/2),(G/2),(H/2),(I/2),(J/2), \
  50. (x/2) \
  51. )
  52. static const uint64_t packed_wheel[] = {
  53. /* 1, 2, */
  54. P( 2, 4, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6, 4, 6),
  55. P( 8, 4, 2, 4, 2, 4,14, 4, 6, 2,10, 2, 6, 6, 4, 2, 4, 6, 2,10, 2),
  56. P( 4, 2,12,10, 2, 4, 2, 4, 6, 2, 6, 4, 6, 6, 6, 2, 6, 4, 2, 6, 4),
  57. P( 6, 8, 4, 2, 4, 6, 8, 6,10, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6),
  58. P( 4, 2, 6,10, 2,10, 2, 4, 2, 4, 6, 8, 4, 2, 4,12, 2, 6, 4, 2, 6),
  59. P( 4, 6,12, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6,10, 2, 4, 6, 2),
  60. P( 6, 4, 2, 4, 2,10, 2,10, 2, 4, 6, 6, 2, 6, 6, 4, 6, 6, 2, 6, 4),
  61. P( 2, 6, 4, 6, 8, 4, 2, 6, 4, 8, 6, 4, 6, 2, 4, 6, 8, 6, 4, 2,10),
  62. P( 2, 6, 4, 2, 4, 2,10, 2,10, 2, 4, 2, 4, 8, 6, 4, 2, 4, 6, 6, 2),
  63. P( 6, 4, 8, 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 6, 6, 6, 2, 6, 6, 4),
  64. P( 2, 4, 6, 2, 6, 4, 2, 4, 2,10, 2,10, 2, 6, 4, 6, 2, 6, 4, 2, 4),
  65. P( 6, 6, 8, 4, 2, 6,10, 8, 4, 2, 4, 2, 4, 8,10, 6, 2, 4, 8, 6, 6),
  66. P( 4, 2, 4, 6, 2, 6, 4, 6, 2,10, 2,10, 2, 4, 2, 4, 6, 2, 6, 4, 2),
  67. P( 4, 6, 6, 2, 6, 6, 6, 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 8, 4, 6),
  68. P( 2, 6, 6, 4, 2, 4, 6, 8, 4, 2, 4, 2,10, 2,10, 2, 4, 2, 4, 6, 2),
  69. P(10, 2, 4, 6, 8, 6, 4, 2, 6, 4, 6, 8, 4, 6, 2, 4, 8, 6, 4, 6, 2),
  70. P( 4, 6, 2, 6, 6, 4, 6, 6, 2, 6, 6, 4, 2,10, 2,10, 2, 4, 2, 4, 6),
  71. P( 2, 6, 4, 2,10, 6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4, 2,12, 6, 4),
  72. P( 6, 2, 4, 6, 2,12, 4, 2, 4, 8, 6, 4, 2, 4, 2,10, 2,10, 6, 2, 4),
  73. P( 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2,10, 6, 8, 6, 4, 2, 4, 8, 6),
  74. P( 4, 6, 2, 4, 6, 2, 6, 6, 6, 4, 6, 2, 6, 4, 2, 4, 2,10,12, 2, 4),
  75. P( 2,10, 2, 6, 4, 2, 4, 6, 6, 2,10, 2, 6, 4,14, 4, 2, 4, 2, 4, 8),
  76. P( 6, 4, 6, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4,12, 2,12),
  77. };
  78. #undef P
  79. #undef R
  80. #define WHEEL_START 5
  81. #define WHEEL_SIZE (5 + 24 * 20)
  82. #define square_count (((uint8_t*)&bb_common_bufsiz1)[0])
  83. #define wheel_tab (((uint8_t*)&bb_common_bufsiz1) + 1)
  84. /*
  85. * Why, you ask?
  86. * plain byte array:
  87. * function old new delta
  88. * wheel_tab - 485 +485
  89. * 3-bit-packed insanity:
  90. * packed_wheel - 184 +184
  91. * factor_main 108 163 +55
  92. */
  93. static void unpack_wheel(void)
  94. {
  95. int i;
  96. uint8_t *p;
  97. setup_common_bufsiz();
  98. wheel_tab[0] = 1;
  99. wheel_tab[1] = 2;
  100. p = &wheel_tab[2];
  101. for (i = 0; i < ARRAY_SIZE(packed_wheel); i++) {
  102. uint64_t v = packed_wheel[i];
  103. while ((v & 0xe) != 0) {
  104. *p = v & 0xe;
  105. //printf("%2u,", *p);
  106. p++;
  107. v >>= 3;
  108. }
  109. //printf("\n");
  110. }
  111. }
  112. /* Prevent inlining, factorize() needs all help it can get with reducing register pressure */
  113. static NOINLINE void print_w(wide_t n)
  114. {
  115. unsigned rep = square_count;
  116. do
  117. printf(" %llu", n);
  118. while (--rep != 0);
  119. }
  120. static NOINLINE void print_h(half_t n)
  121. {
  122. print_w(n);
  123. }
  124. static void factorize(wide_t N);
  125. static half_t isqrt_odd(wide_t N)
  126. {
  127. half_t s = isqrt(N);
  128. /* s^2 is <= N, (s+1)^2 > N */
  129. /* If s^2 in fact is EQUAL to N, it's very lucky.
  130. * Examples:
  131. * factor 18446743988964486098 = 2 * 3037000493 * 3037000493
  132. * factor 18446743902517389507 = 3 * 2479700513 * 2479700513
  133. */
  134. if ((wide_t)s * s == N) {
  135. /* factorize sqrt(N), printing each factor twice */
  136. square_count *= 2;
  137. factorize(s);
  138. /* Let caller know we recursed */
  139. return 0;
  140. }
  141. /* Subtract 1 from even s, odd s won't change: */
  142. /* (doesnt work for zero, but we know that s != 0 here) */
  143. s = (s - 1) | 1;
  144. return s;
  145. }
  146. static NOINLINE void factorize(wide_t N)
  147. {
  148. unsigned w;
  149. half_t factor;
  150. half_t max_factor;
  151. if (N < 4)
  152. goto end;
  153. /* The code needs to be optimized for the case where
  154. * there are large prime factors. For example,
  155. * this is not hard:
  156. * 8262075252869367027 = 3 7 17 23 47 101 113 127 131 137 823
  157. * (the largest divisor to test for largest factor 823
  158. * is only ~sqrt(823) = 28, the entire factorization needs
  159. * only ~33 trial divisions)
  160. * but this is:
  161. * 18446744073709551601 = 53 348051774975651917
  162. * the last factor requires testing up to
  163. * 589959129 - about 100 million iterations.
  164. * The slowest case (largest prime) for N < 2^64 is
  165. * factor 18446744073709551557 (0xffffffffffffffc5).
  166. */
  167. max_factor = isqrt_odd(N);
  168. if (!max_factor)
  169. return; /* square was detected and recursively factored */
  170. factor = 2;
  171. w = 0;
  172. for (;;) {
  173. half_t fw;
  174. /* The division is the most costly part of the loop.
  175. * On 64bit CPUs, takes at best 12 cycles, often ~20.
  176. */
  177. while ((N % factor) == 0) { /* not likely */
  178. N = N / factor;
  179. print_h(factor);
  180. max_factor = isqrt_odd(N);
  181. if (!max_factor)
  182. return; /* square was detected */
  183. }
  184. if (factor >= max_factor)
  185. break;
  186. fw = factor + wheel_tab[w];
  187. if (fw < factor)
  188. break; /* overflow */
  189. factor = fw;
  190. w++;
  191. if (w < WHEEL_SIZE)
  192. continue;
  193. w = WHEEL_START;
  194. }
  195. end:
  196. if (N > 1)
  197. print_w(N);
  198. bb_putchar('\n');
  199. }
  200. static void factorize_numstr(const char *numstr)
  201. {
  202. wide_t N;
  203. /* Leading + is ok (coreutils compat) */
  204. if (*numstr == '+')
  205. numstr++;
  206. N = bb_strtoull(numstr, NULL, 10);
  207. if (errno)
  208. bb_show_usage();
  209. printf("%llu:", N);
  210. square_count = 1;
  211. factorize(N);
  212. }
  213. int factor_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
  214. int factor_main(int argc UNUSED_PARAM, char **argv)
  215. {
  216. unpack_wheel();
  217. //// coreutils has undocumented option ---debug (three dashes)
  218. //getopt32(argv, "");
  219. //argv += optind;
  220. argv++;
  221. if (!*argv) {
  222. /* Read from stdin, several numbers per line are accepted */
  223. for (;;) {
  224. char *numstr, *line;
  225. line = xmalloc_fgetline(stdin);
  226. if (!line)
  227. return EXIT_SUCCESS;
  228. numstr = line;
  229. for (;;) {
  230. char *end;
  231. numstr = skip_whitespace(numstr);
  232. if (!numstr[0])
  233. break;
  234. end = skip_non_whitespace(numstr);
  235. if (*end != '\0')
  236. *end++ = '\0';
  237. factorize_numstr(numstr);
  238. numstr = end;
  239. }
  240. free(line);
  241. }
  242. }
  243. do {
  244. /* Leading spaces are ok (coreutils compat) */
  245. factorize_numstr(skip_whitespace(*argv));
  246. } while (*++argv);
  247. return EXIT_SUCCESS;
  248. }