factor.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. /*
  2. * Copyright (C) 2017 Denys Vlasenko <vda.linux@googlemail.com>
  3. *
  4. * Licensed under GPLv2, see file LICENSE in this source tree.
  5. */
  6. //config:config FACTOR
  7. //config: bool "factor (2.7 kb)"
  8. //config: default y
  9. //config: help
  10. //config: factor factorizes integers
  11. //applet:IF_FACTOR(APPLET(factor, BB_DIR_USR_BIN, BB_SUID_DROP))
  12. //kbuild:lib-$(CONFIG_FACTOR) += factor.o
  13. //usage:#define factor_trivial_usage
  14. //usage: "[NUMBER]..."
  15. //usage:#define factor_full_usage "\n\n"
  16. //usage: "Print prime factors"
  17. #include "libbb.h"
  18. #include "common_bufsiz.h"
  19. #if 0
  20. # define dbg(...) bb_error_msg(__VA_ARGS__)
  21. #else
  22. # define dbg(...) ((void)0)
  23. #endif
  24. typedef unsigned long long wide_t;
  25. #if ULLONG_MAX == (UINT_MAX * UINT_MAX + 2 * UINT_MAX)
  26. /* "unsigned" is half as wide as ullong */
  27. typedef unsigned half_t;
  28. #define HALF_MAX UINT_MAX
  29. #define HALF_FMT ""
  30. #elif ULLONG_MAX == (ULONG_MAX * ULONG_MAX + 2 * ULONG_MAX)
  31. /* long is half as wide as ullong */
  32. typedef unsigned long half_t;
  33. #define HALF_MAX ULONG_MAX
  34. #define HALF_FMT "l"
  35. #else
  36. #error Cant find an integer type which is half as wide as ullong
  37. #endif
  38. /* The trial divisor increment wheel. Use it to skip over divisors that
  39. * are composites of 2, 3, 5, 7, or 11.
  40. * Larger wheels improve sieving only slightly, but quickly grow in size
  41. * (adding just one prime, 13, results in 5766 element sieve).
  42. */
  43. #define R(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J) \
  44. (((uint64_t)(a<<0) | (b<<3) | (c<<6) | (d<<9) | (e<<12) | (f<<15) | (g<<18) | (h<<21) | (i<<24) | (j<<27)) << 1) | \
  45. (((uint64_t)(A<<0) | (B<<3) | (C<<6) | (D<<9) | (E<<12) | (F<<15) | (G<<18) | (H<<21) | (I<<24) | (J<<27)) << 31)
  46. #define P(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J) \
  47. R( (a/2),(b/2),(c/2),(d/2),(e/2),(f/2),(g/2),(h/2),(i/2),(j/2), \
  48. (A/2),(B/2),(C/2),(D/2),(E/2),(F/2),(G/2),(H/2),(I/2),(J/2) )
  49. static const uint64_t packed_wheel[] = {
  50. /*1, 2, 2, 4, 2,*/
  51. P( 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6, 4, 6, 8, 4), //01
  52. P( 2, 4, 2, 4,14, 4, 6, 2,10, 2, 6, 6, 4, 2, 4, 6, 2,10, 2, 4), //02
  53. P( 2,12,10, 2, 4, 2, 4, 6, 2, 6, 4, 6, 6, 6, 2, 6, 4, 2, 6, 4), //03
  54. P( 6, 8, 4, 2, 4, 6, 8, 6,10, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2), //04
  55. P( 6, 4, 2, 6,10, 2,10, 2, 4, 2, 4, 6, 8, 4, 2, 4,12, 2, 6, 4), //05
  56. P( 2, 6, 4, 6,12, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6,10, 2), //06
  57. P( 4, 6, 2, 6, 4, 2, 4, 2,10, 2,10, 2, 4, 6, 6, 2, 6, 6, 4, 6), //07
  58. P( 6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 6, 4, 8, 6, 4, 6, 2, 4, 6), //08
  59. P( 8, 6, 4, 2,10, 2, 6, 4, 2, 4, 2,10, 2,10, 2, 4, 2, 4, 8, 6), //09
  60. P( 4, 2, 4, 6, 6, 2, 6, 4, 8, 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4), //10
  61. P( 6, 6, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2,10, 2,10, 2), //11
  62. P( 6, 4, 6, 2, 6, 4, 2, 4, 6, 6, 8, 4, 2, 6,10, 8, 4, 2, 4, 2), //12
  63. P( 4, 8,10, 6, 2, 4, 8, 6, 6, 4, 2, 4, 6, 2, 6, 4, 6, 2,10, 2), //13
  64. P(10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 6, 6, 4, 6, 8), //14
  65. P( 4, 2, 4, 2, 4, 8, 6, 4, 8, 4, 6, 2, 6, 6, 4, 2, 4, 6, 8, 4), //15
  66. P( 2, 4, 2,10, 2,10, 2, 4, 2, 4, 6, 2,10, 2, 4, 6, 8, 6, 4, 2), //16
  67. P( 6, 4, 6, 8, 4, 6, 2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6, 6, 4, 6), //17
  68. P( 6, 2, 6, 6, 4, 2,10, 2,10, 2, 4, 2, 4, 6, 2, 6, 4, 2,10, 6), //18
  69. P( 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4, 2,12, 6, 4, 6, 2, 4, 6, 2), //19
  70. P(12, 4, 2, 4, 8, 6, 4, 2, 4, 2,10, 2,10, 6, 2, 4, 6, 2, 6, 4), //20
  71. P( 2, 4, 6, 6, 2, 6, 4, 2,10, 6, 8, 6, 4, 2, 4, 8, 6, 4, 6, 2), //21
  72. P( 4, 6, 2, 6, 6, 6, 4, 6, 2, 6, 4, 2, 4, 2,10,12, 2, 4, 2,10), //22
  73. P( 2, 6, 4, 2, 4, 6, 6, 2,10, 2, 6, 4,14, 4, 2, 4, 2, 4, 8, 6), //23
  74. P( 4, 6, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4,12, 2,12), //24
  75. };
  76. #undef P
  77. #undef R
  78. #define WHEEL_START 5
  79. #define WHEEL_SIZE (5 + 24 * 20)
  80. #define square_count (((uint8_t*)&bb_common_bufsiz1)[0])
  81. #define wheel_tab (((uint8_t*)&bb_common_bufsiz1) + 1)
  82. /*
  83. * Why, you ask?
  84. * plain byte array:
  85. * function old new delta
  86. * wheel_tab - 485 +485
  87. * 3-bit-packed insanity:
  88. * packed_wheel - 192 +192
  89. * factor_main 108 171 +63
  90. */
  91. static void unpack_wheel(void)
  92. {
  93. int i;
  94. uint8_t *p;
  95. setup_common_bufsiz();
  96. wheel_tab[0] = 1;
  97. wheel_tab[1] = 2;
  98. wheel_tab[2] = 2;
  99. wheel_tab[3] = 4;
  100. wheel_tab[4] = 2;
  101. p = &wheel_tab[5];
  102. for (i = 0; i < ARRAY_SIZE(packed_wheel); i++) {
  103. uint64_t v = packed_wheel[i];
  104. while ((v & 0xe) != 0) {
  105. *p = v & 0xe;
  106. //printf("%2u,", *p);
  107. p++;
  108. v >>= 3;
  109. }
  110. //printf("\n");
  111. }
  112. }
  113. /* Prevent inlining, factorize() needs all help it can get with reducing register pressure */
  114. static NOINLINE void print_w(wide_t n)
  115. {
  116. unsigned rep = square_count;
  117. do
  118. printf(" %llu", n);
  119. while (--rep != 0);
  120. }
  121. static NOINLINE void print_h(half_t n)
  122. {
  123. print_w(n);
  124. }
  125. static void factorize(wide_t N);
  126. static half_t isqrt_odd(wide_t N)
  127. {
  128. half_t s = isqrt(N);
  129. /* s^2 is <= N, (s+1)^2 > N */
  130. /* If s^2 in fact is EQUAL to N, it's very lucky.
  131. * Examples:
  132. * factor 18446743988964486098 = 2 * 3037000493 * 3037000493
  133. * factor 18446743902517389507 = 3 * 2479700513 * 2479700513
  134. */
  135. if ((wide_t)s * s == N) {
  136. /* factorize sqrt(N), printing each factor twice */
  137. square_count *= 2;
  138. factorize(s);
  139. /* Let caller know we recursed */
  140. return 0;
  141. }
  142. /* Subtract 1 from even s, odd s won't change: */
  143. /* (doesnt work for zero, but we know that s != 0 here) */
  144. s = (s - 1) | 1;
  145. return s;
  146. }
  147. static NOINLINE void factorize(wide_t N)
  148. {
  149. unsigned w;
  150. half_t factor;
  151. half_t max_factor;
  152. if (N < 4)
  153. goto end;
  154. /* The code needs to be optimized for the case where
  155. * there are large prime factors. For example,
  156. * this is not hard:
  157. * 8262075252869367027 = 3 7 17 23 47 101 113 127 131 137 823
  158. * (the largest divisor to test for largest factor 823
  159. * is only ~sqrt(823) = 28, the entire factorization needs
  160. * only ~33 trial divisions)
  161. * but this is:
  162. * 18446744073709551601 = 53 348051774975651917
  163. * the last factor requires testing up to
  164. * 589959129 - about 100 million iterations.
  165. * The slowest case (largest prime) for N < 2^64 is
  166. * factor 18446744073709551557 (0xffffffffffffffc5).
  167. */
  168. max_factor = isqrt_odd(N);
  169. if (!max_factor)
  170. return; /* square was detected and recursively factored */
  171. factor = 2;
  172. w = 0;
  173. for (;;) {
  174. half_t fw;
  175. /* The division is the most costly part of the loop.
  176. * On 64bit CPUs, takes at best 12 cycles, often ~20.
  177. */
  178. while ((N % factor) == 0) { /* not likely */
  179. N = N / factor;
  180. print_h(factor);
  181. max_factor = isqrt_odd(N);
  182. if (!max_factor)
  183. return; /* square was detected */
  184. }
  185. if (factor >= max_factor)
  186. break;
  187. fw = factor + wheel_tab[w];
  188. if (fw < factor)
  189. break; /* overflow */
  190. factor = fw;
  191. w++;
  192. if (w < WHEEL_SIZE)
  193. continue;
  194. w = WHEEL_START;
  195. }
  196. end:
  197. if (N > 1)
  198. print_w(N);
  199. bb_putchar('\n');
  200. }
  201. static void factorize_numstr(const char *numstr)
  202. {
  203. wide_t N;
  204. /* Leading + is ok (coreutils compat) */
  205. if (*numstr == '+')
  206. numstr++;
  207. N = bb_strtoull(numstr, NULL, 10);
  208. if (errno)
  209. bb_show_usage();
  210. printf("%llu:", N);
  211. square_count = 1;
  212. factorize(N);
  213. }
  214. int factor_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
  215. int factor_main(int argc UNUSED_PARAM, char **argv)
  216. {
  217. unpack_wheel();
  218. //// coreutils has undocumented option ---debug (three dashes)
  219. //getopt32(argv, "");
  220. //argv += optind;
  221. argv++;
  222. if (!*argv) {
  223. /* Read from stdin, several numbers per line are accepted */
  224. for (;;) {
  225. char *numstr, *line;
  226. line = xmalloc_fgetline(stdin);
  227. if (!line)
  228. return EXIT_SUCCESS;
  229. numstr = line;
  230. for (;;) {
  231. char *end;
  232. numstr = skip_whitespace(numstr);
  233. if (!numstr[0])
  234. break;
  235. end = skip_non_whitespace(numstr);
  236. if (*end != '\0')
  237. *end++ = '\0';
  238. factorize_numstr(numstr);
  239. numstr = end;
  240. }
  241. free(line);
  242. }
  243. }
  244. do {
  245. /* Leading spaces are ok (coreutils compat) */
  246. factorize_numstr(skip_whitespace(*argv));
  247. } while (*++argv);
  248. return EXIT_SUCCESS;
  249. }