bn_exp.c 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398
  1. /* crypto/bn/bn_exp.c */
  2. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  3. * All rights reserved.
  4. *
  5. * This package is an SSL implementation written
  6. * by Eric Young (eay@cryptsoft.com).
  7. * The implementation was written so as to conform with Netscapes SSL.
  8. *
  9. * This library is free for commercial and non-commercial use as long as
  10. * the following conditions are aheared to. The following conditions
  11. * apply to all code found in this distribution, be it the RC4, RSA,
  12. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  13. * included with this distribution is covered by the same copyright terms
  14. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  15. *
  16. * Copyright remains Eric Young's, and as such any Copyright notices in
  17. * the code are not to be removed.
  18. * If this package is used in a product, Eric Young should be given attribution
  19. * as the author of the parts of the library used.
  20. * This can be in the form of a textual message at program startup or
  21. * in documentation (online or textual) provided with the package.
  22. *
  23. * Redistribution and use in source and binary forms, with or without
  24. * modification, are permitted provided that the following conditions
  25. * are met:
  26. * 1. Redistributions of source code must retain the copyright
  27. * notice, this list of conditions and the following disclaimer.
  28. * 2. Redistributions in binary form must reproduce the above copyright
  29. * notice, this list of conditions and the following disclaimer in the
  30. * documentation and/or other materials provided with the distribution.
  31. * 3. All advertising materials mentioning features or use of this software
  32. * must display the following acknowledgement:
  33. * "This product includes cryptographic software written by
  34. * Eric Young (eay@cryptsoft.com)"
  35. * The word 'cryptographic' can be left out if the rouines from the library
  36. * being used are not cryptographic related :-).
  37. * 4. If you include any Windows specific code (or a derivative thereof) from
  38. * the apps directory (application code) you must include an acknowledgement:
  39. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  40. *
  41. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  42. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  43. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  44. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  45. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  46. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  47. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  48. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  49. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  50. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  51. * SUCH DAMAGE.
  52. *
  53. * The licence and distribution terms for any publically available version or
  54. * derivative of this code cannot be changed. i.e. this code cannot simply be
  55. * copied and put under another distribution licence
  56. * [including the GNU Public Licence.]
  57. */
  58. /* ====================================================================
  59. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  60. *
  61. * Redistribution and use in source and binary forms, with or without
  62. * modification, are permitted provided that the following conditions
  63. * are met:
  64. *
  65. * 1. Redistributions of source code must retain the above copyright
  66. * notice, this list of conditions and the following disclaimer.
  67. *
  68. * 2. Redistributions in binary form must reproduce the above copyright
  69. * notice, this list of conditions and the following disclaimer in
  70. * the documentation and/or other materials provided with the
  71. * distribution.
  72. *
  73. * 3. All advertising materials mentioning features or use of this
  74. * software must display the following acknowledgment:
  75. * "This product includes software developed by the OpenSSL Project
  76. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  77. *
  78. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  79. * endorse or promote products derived from this software without
  80. * prior written permission. For written permission, please contact
  81. * openssl-core@openssl.org.
  82. *
  83. * 5. Products derived from this software may not be called "OpenSSL"
  84. * nor may "OpenSSL" appear in their names without prior written
  85. * permission of the OpenSSL Project.
  86. *
  87. * 6. Redistributions of any form whatsoever must retain the following
  88. * acknowledgment:
  89. * "This product includes software developed by the OpenSSL Project
  90. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  91. *
  92. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  93. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  94. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  95. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  96. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  97. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  98. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  99. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  100. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  101. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  102. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  103. * OF THE POSSIBILITY OF SUCH DAMAGE.
  104. * ====================================================================
  105. *
  106. * This product includes cryptographic software written by Eric Young
  107. * (eay@cryptsoft.com). This product includes software written by Tim
  108. * Hudson (tjh@cryptsoft.com).
  109. *
  110. */
  111. #include "internal/cryptlib.h"
  112. #include "bn_lcl.h"
  113. #include <stdlib.h>
  114. #ifdef _WIN32
  115. # include <malloc.h>
  116. # ifndef alloca
  117. # define alloca _alloca
  118. # endif
  119. #elif defined(__GNUC__)
  120. # ifndef alloca
  121. # define alloca(s) __builtin_alloca((s))
  122. # endif
  123. #elif defined(__sun)
  124. # include <alloca.h>
  125. #endif
  126. #undef RSAZ_ENABLED
  127. #if defined(OPENSSL_BN_ASM_MONT) && \
  128. (defined(__x86_64) || defined(__x86_64__) || \
  129. defined(_M_AMD64) || defined(_M_X64))
  130. # include "rsaz_exp.h"
  131. # define RSAZ_ENABLED
  132. #endif
  133. #undef SPARC_T4_MONT
  134. #if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
  135. # include "sparc_arch.h"
  136. extern unsigned int OPENSSL_sparcv9cap_P[];
  137. # define SPARC_T4_MONT
  138. #endif
  139. /* maximum precomputation table size for *variable* sliding windows */
  140. #define TABLE_SIZE 32
  141. /* this one works - simple but works */
  142. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
  143. {
  144. int i, bits, ret = 0;
  145. BIGNUM *v, *rr;
  146. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  147. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  148. BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  149. return -1;
  150. }
  151. BN_CTX_start(ctx);
  152. if ((r == a) || (r == p))
  153. rr = BN_CTX_get(ctx);
  154. else
  155. rr = r;
  156. v = BN_CTX_get(ctx);
  157. if (rr == NULL || v == NULL)
  158. goto err;
  159. if (BN_copy(v, a) == NULL)
  160. goto err;
  161. bits = BN_num_bits(p);
  162. if (BN_is_odd(p)) {
  163. if (BN_copy(rr, a) == NULL)
  164. goto err;
  165. } else {
  166. if (!BN_one(rr))
  167. goto err;
  168. }
  169. for (i = 1; i < bits; i++) {
  170. if (!BN_sqr(v, v, ctx))
  171. goto err;
  172. if (BN_is_bit_set(p, i)) {
  173. if (!BN_mul(rr, rr, v, ctx))
  174. goto err;
  175. }
  176. }
  177. if (r != rr)
  178. BN_copy(r, rr);
  179. ret = 1;
  180. err:
  181. BN_CTX_end(ctx);
  182. bn_check_top(r);
  183. return (ret);
  184. }
  185. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  186. BN_CTX *ctx)
  187. {
  188. int ret;
  189. bn_check_top(a);
  190. bn_check_top(p);
  191. bn_check_top(m);
  192. /*-
  193. * For even modulus m = 2^k*m_odd, it might make sense to compute
  194. * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
  195. * exponentiation for the odd part), using appropriate exponent
  196. * reductions, and combine the results using the CRT.
  197. *
  198. * For now, we use Montgomery only if the modulus is odd; otherwise,
  199. * exponentiation using the reciprocal-based quick remaindering
  200. * algorithm is used.
  201. *
  202. * (Timing obtained with expspeed.c [computations a^p mod m
  203. * where a, p, m are of the same length: 256, 512, 1024, 2048,
  204. * 4096, 8192 bits], compared to the running time of the
  205. * standard algorithm:
  206. *
  207. * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
  208. * 55 .. 77 % [UltraSparc processor, but
  209. * debug-solaris-sparcv8-gcc conf.]
  210. *
  211. * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
  212. * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
  213. *
  214. * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
  215. * at 2048 and more bits, but at 512 and 1024 bits, it was
  216. * slower even than the standard algorithm!
  217. *
  218. * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
  219. * should be obtained when the new Montgomery reduction code
  220. * has been integrated into OpenSSL.)
  221. */
  222. #define MONT_MUL_MOD
  223. #define MONT_EXP_WORD
  224. #define RECP_MUL_MOD
  225. #ifdef MONT_MUL_MOD
  226. /*
  227. * I have finally been able to take out this pre-condition of the top bit
  228. * being set. It was caused by an error in BN_div with negatives. There
  229. * was also another problem when for a^b%m a >= m. eay 07-May-97
  230. */
  231. /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
  232. if (BN_is_odd(m)) {
  233. # ifdef MONT_EXP_WORD
  234. if (a->top == 1 && !a->neg
  235. && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
  236. BN_ULONG A = a->d[0];
  237. ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
  238. } else
  239. # endif
  240. ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
  241. } else
  242. #endif
  243. #ifdef RECP_MUL_MOD
  244. {
  245. ret = BN_mod_exp_recp(r, a, p, m, ctx);
  246. }
  247. #else
  248. {
  249. ret = BN_mod_exp_simple(r, a, p, m, ctx);
  250. }
  251. #endif
  252. bn_check_top(r);
  253. return (ret);
  254. }
  255. int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  256. const BIGNUM *m, BN_CTX *ctx)
  257. {
  258. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  259. int start = 1;
  260. BIGNUM *aa;
  261. /* Table of variables obtained from 'ctx' */
  262. BIGNUM *val[TABLE_SIZE];
  263. BN_RECP_CTX recp;
  264. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  265. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  266. BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  267. return -1;
  268. }
  269. bits = BN_num_bits(p);
  270. if (bits == 0) {
  271. ret = BN_one(r);
  272. return ret;
  273. }
  274. BN_CTX_start(ctx);
  275. aa = BN_CTX_get(ctx);
  276. val[0] = BN_CTX_get(ctx);
  277. if (!aa || !val[0])
  278. goto err;
  279. BN_RECP_CTX_init(&recp);
  280. if (m->neg) {
  281. /* ignore sign of 'm' */
  282. if (!BN_copy(aa, m))
  283. goto err;
  284. aa->neg = 0;
  285. if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
  286. goto err;
  287. } else {
  288. if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
  289. goto err;
  290. }
  291. if (!BN_nnmod(val[0], a, m, ctx))
  292. goto err; /* 1 */
  293. if (BN_is_zero(val[0])) {
  294. BN_zero(r);
  295. ret = 1;
  296. goto err;
  297. }
  298. window = BN_window_bits_for_exponent_size(bits);
  299. if (window > 1) {
  300. if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
  301. goto err; /* 2 */
  302. j = 1 << (window - 1);
  303. for (i = 1; i < j; i++) {
  304. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  305. !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
  306. goto err;
  307. }
  308. }
  309. start = 1; /* This is used to avoid multiplication etc
  310. * when there is only the value '1' in the
  311. * buffer. */
  312. wvalue = 0; /* The 'value' of the window */
  313. wstart = bits - 1; /* The top bit of the window */
  314. wend = 0; /* The bottom bit of the window */
  315. if (!BN_one(r))
  316. goto err;
  317. for (;;) {
  318. if (BN_is_bit_set(p, wstart) == 0) {
  319. if (!start)
  320. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
  321. goto err;
  322. if (wstart == 0)
  323. break;
  324. wstart--;
  325. continue;
  326. }
  327. /*
  328. * We now have wstart on a 'set' bit, we now need to work out how bit
  329. * a window to do. To do this we need to scan forward until the last
  330. * set bit before the end of the window
  331. */
  332. j = wstart;
  333. wvalue = 1;
  334. wend = 0;
  335. for (i = 1; i < window; i++) {
  336. if (wstart - i < 0)
  337. break;
  338. if (BN_is_bit_set(p, wstart - i)) {
  339. wvalue <<= (i - wend);
  340. wvalue |= 1;
  341. wend = i;
  342. }
  343. }
  344. /* wend is the size of the current window */
  345. j = wend + 1;
  346. /* add the 'bytes above' */
  347. if (!start)
  348. for (i = 0; i < j; i++) {
  349. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
  350. goto err;
  351. }
  352. /* wvalue will be an odd number < 2^window */
  353. if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
  354. goto err;
  355. /* move the 'window' down further */
  356. wstart -= wend + 1;
  357. wvalue = 0;
  358. start = 0;
  359. if (wstart < 0)
  360. break;
  361. }
  362. ret = 1;
  363. err:
  364. BN_CTX_end(ctx);
  365. BN_RECP_CTX_free(&recp);
  366. bn_check_top(r);
  367. return (ret);
  368. }
  369. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  370. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
  371. {
  372. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  373. int start = 1;
  374. BIGNUM *d, *r;
  375. const BIGNUM *aa;
  376. /* Table of variables obtained from 'ctx' */
  377. BIGNUM *val[TABLE_SIZE];
  378. BN_MONT_CTX *mont = NULL;
  379. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  380. return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
  381. }
  382. bn_check_top(a);
  383. bn_check_top(p);
  384. bn_check_top(m);
  385. if (!BN_is_odd(m)) {
  386. BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
  387. return (0);
  388. }
  389. bits = BN_num_bits(p);
  390. if (bits == 0) {
  391. ret = BN_one(rr);
  392. return ret;
  393. }
  394. BN_CTX_start(ctx);
  395. d = BN_CTX_get(ctx);
  396. r = BN_CTX_get(ctx);
  397. val[0] = BN_CTX_get(ctx);
  398. if (!d || !r || !val[0])
  399. goto err;
  400. /*
  401. * If this is not done, things will break in the montgomery part
  402. */
  403. if (in_mont != NULL)
  404. mont = in_mont;
  405. else {
  406. if ((mont = BN_MONT_CTX_new()) == NULL)
  407. goto err;
  408. if (!BN_MONT_CTX_set(mont, m, ctx))
  409. goto err;
  410. }
  411. if (a->neg || BN_ucmp(a, m) >= 0) {
  412. if (!BN_nnmod(val[0], a, m, ctx))
  413. goto err;
  414. aa = val[0];
  415. } else
  416. aa = a;
  417. if (BN_is_zero(aa)) {
  418. BN_zero(rr);
  419. ret = 1;
  420. goto err;
  421. }
  422. if (!BN_to_montgomery(val[0], aa, mont, ctx))
  423. goto err; /* 1 */
  424. window = BN_window_bits_for_exponent_size(bits);
  425. if (window > 1) {
  426. if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
  427. goto err; /* 2 */
  428. j = 1 << (window - 1);
  429. for (i = 1; i < j; i++) {
  430. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  431. !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
  432. goto err;
  433. }
  434. }
  435. start = 1; /* This is used to avoid multiplication etc
  436. * when there is only the value '1' in the
  437. * buffer. */
  438. wvalue = 0; /* The 'value' of the window */
  439. wstart = bits - 1; /* The top bit of the window */
  440. wend = 0; /* The bottom bit of the window */
  441. #if 1 /* by Shay Gueron's suggestion */
  442. j = m->top; /* borrow j */
  443. if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  444. if (bn_wexpand(r, j) == NULL)
  445. goto err;
  446. /* 2^(top*BN_BITS2) - m */
  447. r->d[0] = (0 - m->d[0]) & BN_MASK2;
  448. for (i = 1; i < j; i++)
  449. r->d[i] = (~m->d[i]) & BN_MASK2;
  450. r->top = j;
  451. /*
  452. * Upper words will be zero if the corresponding words of 'm' were
  453. * 0xfff[...], so decrement r->top accordingly.
  454. */
  455. bn_correct_top(r);
  456. } else
  457. #endif
  458. if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
  459. goto err;
  460. for (;;) {
  461. if (BN_is_bit_set(p, wstart) == 0) {
  462. if (!start) {
  463. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  464. goto err;
  465. }
  466. if (wstart == 0)
  467. break;
  468. wstart--;
  469. continue;
  470. }
  471. /*
  472. * We now have wstart on a 'set' bit, we now need to work out how bit
  473. * a window to do. To do this we need to scan forward until the last
  474. * set bit before the end of the window
  475. */
  476. j = wstart;
  477. wvalue = 1;
  478. wend = 0;
  479. for (i = 1; i < window; i++) {
  480. if (wstart - i < 0)
  481. break;
  482. if (BN_is_bit_set(p, wstart - i)) {
  483. wvalue <<= (i - wend);
  484. wvalue |= 1;
  485. wend = i;
  486. }
  487. }
  488. /* wend is the size of the current window */
  489. j = wend + 1;
  490. /* add the 'bytes above' */
  491. if (!start)
  492. for (i = 0; i < j; i++) {
  493. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  494. goto err;
  495. }
  496. /* wvalue will be an odd number < 2^window */
  497. if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
  498. goto err;
  499. /* move the 'window' down further */
  500. wstart -= wend + 1;
  501. wvalue = 0;
  502. start = 0;
  503. if (wstart < 0)
  504. break;
  505. }
  506. #if defined(SPARC_T4_MONT)
  507. if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
  508. j = mont->N.top; /* borrow j */
  509. val[0]->d[0] = 1; /* borrow val[0] */
  510. for (i = 1; i < j; i++)
  511. val[0]->d[i] = 0;
  512. val[0]->top = j;
  513. if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx))
  514. goto err;
  515. } else
  516. #endif
  517. if (!BN_from_montgomery(rr, r, mont, ctx))
  518. goto err;
  519. ret = 1;
  520. err:
  521. if (in_mont == NULL)
  522. BN_MONT_CTX_free(mont);
  523. BN_CTX_end(ctx);
  524. bn_check_top(rr);
  525. return (ret);
  526. }
  527. #if defined(SPARC_T4_MONT)
  528. static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
  529. {
  530. BN_ULONG ret = 0;
  531. int wordpos;
  532. wordpos = bitpos / BN_BITS2;
  533. bitpos %= BN_BITS2;
  534. if (wordpos >= 0 && wordpos < a->top) {
  535. ret = a->d[wordpos] & BN_MASK2;
  536. if (bitpos) {
  537. ret >>= bitpos;
  538. if (++wordpos < a->top)
  539. ret |= a->d[wordpos] << (BN_BITS2 - bitpos);
  540. }
  541. }
  542. return ret & BN_MASK2;
  543. }
  544. #endif
  545. /*
  546. * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
  547. * layout so that accessing any of these table values shows the same access
  548. * pattern as far as cache lines are concerned. The following functions are
  549. * used to transfer a BIGNUM from/to that table.
  550. */
  551. static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
  552. unsigned char *buf, int idx,
  553. int width)
  554. {
  555. size_t i, j;
  556. if (top > b->top)
  557. top = b->top; /* this works because 'buf' is explicitly
  558. * zeroed */
  559. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  560. buf[j] = ((unsigned char *)b->d)[i];
  561. }
  562. return 1;
  563. }
  564. static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
  565. unsigned char *buf, int idx,
  566. int width)
  567. {
  568. size_t i, j;
  569. if (bn_wexpand(b, top) == NULL)
  570. return 0;
  571. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  572. ((unsigned char *)b->d)[i] = buf[j];
  573. }
  574. b->top = top;
  575. bn_correct_top(b);
  576. return 1;
  577. }
  578. /*
  579. * Given a pointer value, compute the next address that is a cache line
  580. * multiple.
  581. */
  582. #define MOD_EXP_CTIME_ALIGN(x_) \
  583. ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  584. /*
  585. * This variant of BN_mod_exp_mont() uses fixed windows and the special
  586. * precomputation memory layout to limit data-dependency to a minimum to
  587. * protect secret exponents (cf. the hyper-threading timing attacks pointed
  588. * out by Colin Percival,
  589. * http://www.daemong-consideredperthreading-considered-harmful/)
  590. */
  591. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  592. const BIGNUM *m, BN_CTX *ctx,
  593. BN_MONT_CTX *in_mont)
  594. {
  595. int i, bits, ret = 0, window, wvalue;
  596. int top;
  597. BN_MONT_CTX *mont = NULL;
  598. int numPowers;
  599. unsigned char *powerbufFree = NULL;
  600. int powerbufLen = 0;
  601. unsigned char *powerbuf = NULL;
  602. BIGNUM tmp, am;
  603. #if defined(SPARC_T4_MONT)
  604. unsigned int t4 = 0;
  605. #endif
  606. bn_check_top(a);
  607. bn_check_top(p);
  608. bn_check_top(m);
  609. top = m->top;
  610. if (!(m->d[0] & 1)) {
  611. BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
  612. return (0);
  613. }
  614. bits = BN_num_bits(p);
  615. if (bits == 0) {
  616. ret = BN_one(rr);
  617. return ret;
  618. }
  619. BN_CTX_start(ctx);
  620. /*
  621. * Allocate a montgomery context if it was not supplied by the caller. If
  622. * this is not done, things will break in the montgomery part.
  623. */
  624. if (in_mont != NULL)
  625. mont = in_mont;
  626. else {
  627. if ((mont = BN_MONT_CTX_new()) == NULL)
  628. goto err;
  629. if (!BN_MONT_CTX_set(mont, m, ctx))
  630. goto err;
  631. }
  632. #ifdef RSAZ_ENABLED
  633. /*
  634. * If the size of the operands allow it, perform the optimized
  635. * RSAZ exponentiation. For further information see
  636. * crypto/bn/rsaz_exp.c and accompanying assembly modules.
  637. */
  638. if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
  639. && rsaz_avx2_eligible()) {
  640. if (NULL == bn_wexpand(rr, 16))
  641. goto err;
  642. RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
  643. mont->n0[0]);
  644. rr->top = 16;
  645. rr->neg = 0;
  646. bn_correct_top(rr);
  647. ret = 1;
  648. goto err;
  649. } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
  650. if (NULL == bn_wexpand(rr, 8))
  651. goto err;
  652. RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
  653. rr->top = 8;
  654. rr->neg = 0;
  655. bn_correct_top(rr);
  656. ret = 1;
  657. goto err;
  658. }
  659. #endif
  660. /* Get the window size to use with size of p. */
  661. window = BN_window_bits_for_ctime_exponent_size(bits);
  662. #if defined(SPARC_T4_MONT)
  663. if (window >= 5 && (top & 15) == 0 && top <= 64 &&
  664. (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) ==
  665. (CFR_MONTMUL | CFR_MONTSQR) && (t4 = OPENSSL_sparcv9cap_P[0]))
  666. window = 5;
  667. else
  668. #endif
  669. #if defined(OPENSSL_BN_ASM_MONT5)
  670. if (window >= 5) {
  671. window = 5; /* ~5% improvement for RSA2048 sign, and even
  672. * for RSA4096 */
  673. if ((top & 7) == 0)
  674. powerbufLen += 2 * top * sizeof(m->d[0]);
  675. }
  676. #endif
  677. (void)0;
  678. /*
  679. * Allocate a buffer large enough to hold all of the pre-computed powers
  680. * of am, am itself and tmp.
  681. */
  682. numPowers = 1 << window;
  683. powerbufLen += sizeof(m->d[0]) * (top * numPowers +
  684. ((2 * top) >
  685. numPowers ? (2 * top) : numPowers));
  686. #ifdef alloca
  687. if (powerbufLen < 3072)
  688. powerbufFree =
  689. alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  690. else
  691. #endif
  692. if ((powerbufFree =
  693. OPENSSL_malloc(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
  694. == NULL)
  695. goto err;
  696. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  697. memset(powerbuf, 0, powerbufLen);
  698. #ifdef alloca
  699. if (powerbufLen < 3072)
  700. powerbufFree = NULL;
  701. #endif
  702. /* lay down tmp and am right after powers table */
  703. tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
  704. am.d = tmp.d + top;
  705. tmp.top = am.top = 0;
  706. tmp.dmax = am.dmax = top;
  707. tmp.neg = am.neg = 0;
  708. tmp.flags = am.flags = BN_FLG_STATIC_DATA;
  709. /* prepare a^0 in Montgomery domain */
  710. #if 1 /* by Shay Gueron's suggestion */
  711. if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  712. /* 2^(top*BN_BITS2) - m */
  713. tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
  714. for (i = 1; i < top; i++)
  715. tmp.d[i] = (~m->d[i]) & BN_MASK2;
  716. tmp.top = top;
  717. } else
  718. #endif
  719. if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
  720. goto err;
  721. /* prepare a^1 in Montgomery domain */
  722. if (a->neg || BN_ucmp(a, m) >= 0) {
  723. if (!BN_mod(&am, a, m, ctx))
  724. goto err;
  725. if (!BN_to_montgomery(&am, &am, mont, ctx))
  726. goto err;
  727. } else if (!BN_to_montgomery(&am, a, mont, ctx))
  728. goto err;
  729. #if defined(SPARC_T4_MONT)
  730. if (t4) {
  731. typedef int (*bn_pwr5_mont_f) (BN_ULONG *tp, const BN_ULONG *np,
  732. const BN_ULONG *n0, const void *table,
  733. int power, int bits);
  734. int bn_pwr5_mont_t4_8(BN_ULONG *tp, const BN_ULONG *np,
  735. const BN_ULONG *n0, const void *table,
  736. int power, int bits);
  737. int bn_pwr5_mont_t4_16(BN_ULONG *tp, const BN_ULONG *np,
  738. const BN_ULONG *n0, const void *table,
  739. int power, int bits);
  740. int bn_pwr5_mont_t4_24(BN_ULONG *tp, const BN_ULONG *np,
  741. const BN_ULONG *n0, const void *table,
  742. int power, int bits);
  743. int bn_pwr5_mont_t4_32(BN_ULONG *tp, const BN_ULONG *np,
  744. const BN_ULONG *n0, const void *table,
  745. int power, int bits);
  746. static const bn_pwr5_mont_f pwr5_funcs[4] = {
  747. bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16,
  748. bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32
  749. };
  750. bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top / 16 - 1];
  751. typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap,
  752. const void *bp, const BN_ULONG *np,
  753. const BN_ULONG *n0);
  754. int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const void *bp,
  755. const BN_ULONG *np, const BN_ULONG *n0);
  756. int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap,
  757. const void *bp, const BN_ULONG *np,
  758. const BN_ULONG *n0);
  759. int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap,
  760. const void *bp, const BN_ULONG *np,
  761. const BN_ULONG *n0);
  762. int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap,
  763. const void *bp, const BN_ULONG *np,
  764. const BN_ULONG *n0);
  765. static const bn_mul_mont_f mul_funcs[4] = {
  766. bn_mul_mont_t4_8, bn_mul_mont_t4_16,
  767. bn_mul_mont_t4_24, bn_mul_mont_t4_32
  768. };
  769. bn_mul_mont_f mul_worker = mul_funcs[top / 16 - 1];
  770. void bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap,
  771. const void *bp, const BN_ULONG *np,
  772. const BN_ULONG *n0, int num);
  773. void bn_mul_mont_t4(BN_ULONG *rp, const BN_ULONG *ap,
  774. const void *bp, const BN_ULONG *np,
  775. const BN_ULONG *n0, int num);
  776. void bn_mul_mont_gather5_t4(BN_ULONG *rp, const BN_ULONG *ap,
  777. const void *table, const BN_ULONG *np,
  778. const BN_ULONG *n0, int num, int power);
  779. void bn_flip_n_scatter5_t4(const BN_ULONG *inp, size_t num,
  780. void *table, size_t power);
  781. void bn_gather5_t4(BN_ULONG *out, size_t num,
  782. void *table, size_t power);
  783. void bn_flip_t4(BN_ULONG *dst, BN_ULONG *src, size_t num);
  784. BN_ULONG *np = mont->N.d, *n0 = mont->n0;
  785. int stride = 5 * (6 - (top / 16 - 1)); /* multiple of 5, but less
  786. * than 32 */
  787. /*
  788. * BN_to_montgomery can contaminate words above .top [in
  789. * BN_DEBUG[_DEBUG] build]...
  790. */
  791. for (i = am.top; i < top; i++)
  792. am.d[i] = 0;
  793. for (i = tmp.top; i < top; i++)
  794. tmp.d[i] = 0;
  795. bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 0);
  796. bn_flip_n_scatter5_t4(am.d, top, powerbuf, 1);
  797. if (!(*mul_worker) (tmp.d, am.d, am.d, np, n0) &&
  798. !(*mul_worker) (tmp.d, am.d, am.d, np, n0))
  799. bn_mul_mont_vis3(tmp.d, am.d, am.d, np, n0, top);
  800. bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 2);
  801. for (i = 3; i < 32; i++) {
  802. /* Calculate a^i = a^(i-1) * a */
  803. if (!(*mul_worker) (tmp.d, tmp.d, am.d, np, n0) &&
  804. !(*mul_worker) (tmp.d, tmp.d, am.d, np, n0))
  805. bn_mul_mont_vis3(tmp.d, tmp.d, am.d, np, n0, top);
  806. bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, i);
  807. }
  808. /* switch to 64-bit domain */
  809. np = alloca(top * sizeof(BN_ULONG));
  810. top /= 2;
  811. bn_flip_t4(np, mont->N.d, top);
  812. bits--;
  813. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
  814. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  815. bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
  816. /*
  817. * Scan the exponent one window at a time starting from the most
  818. * significant bits.
  819. */
  820. while (bits >= 0) {
  821. if (bits < stride)
  822. stride = bits + 1;
  823. bits -= stride;
  824. wvalue = bn_get_bits(p, bits + 1);
  825. if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
  826. continue;
  827. /* retry once and fall back */
  828. if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
  829. continue;
  830. bits += stride - 5;
  831. wvalue >>= stride - 5;
  832. wvalue &= 31;
  833. bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
  834. bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
  835. bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
  836. bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
  837. bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
  838. bn_mul_mont_gather5_t4(tmp.d, tmp.d, powerbuf, np, n0, top,
  839. wvalue);
  840. }
  841. bn_flip_t4(tmp.d, tmp.d, top);
  842. top *= 2;
  843. /* back to 32-bit domain */
  844. tmp.top = top;
  845. bn_correct_top(&tmp);
  846. OPENSSL_cleanse(np, top * sizeof(BN_ULONG));
  847. } else
  848. #endif
  849. #if defined(OPENSSL_BN_ASM_MONT5)
  850. if (window == 5 && top > 1) {
  851. /*
  852. * This optimization uses ideas from http://eprint.iacr.org/2011/239,
  853. * specifically optimization of cache-timing attack countermeasures
  854. * and pre-computation optimization.
  855. */
  856. /*
  857. * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  858. * 512-bit RSA is hardly relevant, we omit it to spare size...
  859. */
  860. void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
  861. const void *table, const BN_ULONG *np,
  862. const BN_ULONG *n0, int num, int power);
  863. void bn_scatter5(const BN_ULONG *inp, size_t num,
  864. void *table, size_t power);
  865. void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
  866. void bn_power5(BN_ULONG *rp, const BN_ULONG *ap,
  867. const void *table, const BN_ULONG *np,
  868. const BN_ULONG *n0, int num, int power);
  869. int bn_get_bits5(const BN_ULONG *ap, int off);
  870. int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
  871. const BN_ULONG *not_used, const BN_ULONG *np,
  872. const BN_ULONG *n0, int num);
  873. BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
  874. /*
  875. * BN_to_montgomery can contaminate words above .top [in
  876. * BN_DEBUG[_DEBUG] build]...
  877. */
  878. for (i = am.top; i < top; i++)
  879. am.d[i] = 0;
  880. for (i = tmp.top; i < top; i++)
  881. tmp.d[i] = 0;
  882. if (top & 7)
  883. np2 = np;
  884. else
  885. for (np2 = am.d + top, i = 0; i < top; i++)
  886. np2[2 * i] = np[i];
  887. bn_scatter5(tmp.d, top, powerbuf, 0);
  888. bn_scatter5(am.d, am.top, powerbuf, 1);
  889. bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
  890. bn_scatter5(tmp.d, top, powerbuf, 2);
  891. # if 0
  892. for (i = 3; i < 32; i++) {
  893. /* Calculate a^i = a^(i-1) * a */
  894. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  895. bn_scatter5(tmp.d, top, powerbuf, i);
  896. }
  897. # else
  898. /* same as above, but uses squaring for 1/2 of operations */
  899. for (i = 4; i < 32; i *= 2) {
  900. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  901. bn_scatter5(tmp.d, top, powerbuf, i);
  902. }
  903. for (i = 3; i < 8; i += 2) {
  904. int j;
  905. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  906. bn_scatter5(tmp.d, top, powerbuf, i);
  907. for (j = 2 * i; j < 32; j *= 2) {
  908. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  909. bn_scatter5(tmp.d, top, powerbuf, j);
  910. }
  911. }
  912. for (; i < 16; i += 2) {
  913. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  914. bn_scatter5(tmp.d, top, powerbuf, i);
  915. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  916. bn_scatter5(tmp.d, top, powerbuf, 2 * i);
  917. }
  918. for (; i < 32; i += 2) {
  919. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  920. bn_scatter5(tmp.d, top, powerbuf, i);
  921. }
  922. # endif
  923. bits--;
  924. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
  925. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  926. bn_gather5(tmp.d, top, powerbuf, wvalue);
  927. /*
  928. * Scan the exponent one window at a time starting from the most
  929. * significant bits.
  930. */
  931. if (top & 7)
  932. while (bits >= 0) {
  933. for (wvalue = 0, i = 0; i < 5; i++, bits--)
  934. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  935. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  936. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  937. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  938. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  939. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  940. bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
  941. wvalue);
  942. } else {
  943. while (bits >= 0) {
  944. wvalue = bn_get_bits5(p->d, bits - 4);
  945. bits -= 5;
  946. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  947. }
  948. }
  949. ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
  950. tmp.top = top;
  951. bn_correct_top(&tmp);
  952. if (ret) {
  953. if (!BN_copy(rr, &tmp))
  954. ret = 0;
  955. goto err; /* non-zero ret means it's not error */
  956. }
  957. } else
  958. #endif
  959. {
  960. if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers))
  961. goto err;
  962. if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers))
  963. goto err;
  964. /*
  965. * If the window size is greater than 1, then calculate
  966. * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even
  967. * powers could instead be computed as (a^(i/2))^2 to use the slight
  968. * performance advantage of sqr over mul).
  969. */
  970. if (window > 1) {
  971. if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
  972. goto err;
  973. if (!MOD_EXP_CTIME_COPY_TO_PREBUF
  974. (&tmp, top, powerbuf, 2, numPowers))
  975. goto err;
  976. for (i = 3; i < numPowers; i++) {
  977. /* Calculate a^i = a^(i-1) * a */
  978. if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
  979. goto err;
  980. if (!MOD_EXP_CTIME_COPY_TO_PREBUF
  981. (&tmp, top, powerbuf, i, numPowers))
  982. goto err;
  983. }
  984. }
  985. bits--;
  986. for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
  987. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  988. if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
  989. (&tmp, top, powerbuf, wvalue, numPowers))
  990. goto err;
  991. /*
  992. * Scan the exponent one window at a time starting from the most
  993. * significant bits.
  994. */
  995. while (bits >= 0) {
  996. wvalue = 0; /* The 'value' of the window */
  997. /* Scan the window, squaring the result as we go */
  998. for (i = 0; i < window; i++, bits--) {
  999. if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
  1000. goto err;
  1001. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1002. }
  1003. /*
  1004. * Fetch the appropriate pre-computed value from the pre-buf
  1005. */
  1006. if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
  1007. (&am, top, powerbuf, wvalue, numPowers))
  1008. goto err;
  1009. /* Multiply the result into the intermediate result */
  1010. if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
  1011. goto err;
  1012. }
  1013. }
  1014. /* Convert the final result from montgomery to standard format */
  1015. #if defined(SPARC_T4_MONT)
  1016. if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
  1017. am.d[0] = 1; /* borrow am */
  1018. for (i = 1; i < top; i++)
  1019. am.d[i] = 0;
  1020. if (!BN_mod_mul_montgomery(rr, &tmp, &am, mont, ctx))
  1021. goto err;
  1022. } else
  1023. #endif
  1024. if (!BN_from_montgomery(rr, &tmp, mont, ctx))
  1025. goto err;
  1026. ret = 1;
  1027. err:
  1028. if (in_mont == NULL)
  1029. BN_MONT_CTX_free(mont);
  1030. if (powerbuf != NULL) {
  1031. OPENSSL_cleanse(powerbuf, powerbufLen);
  1032. OPENSSL_free(powerbufFree);
  1033. }
  1034. BN_CTX_end(ctx);
  1035. return (ret);
  1036. }
  1037. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  1038. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
  1039. {
  1040. BN_MONT_CTX *mont = NULL;
  1041. int b, bits, ret = 0;
  1042. int r_is_one;
  1043. BN_ULONG w, next_w;
  1044. BIGNUM *d, *r, *t;
  1045. BIGNUM *swap_tmp;
  1046. #define BN_MOD_MUL_WORD(r, w, m) \
  1047. (BN_mul_word(r, (w)) && \
  1048. (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
  1049. (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
  1050. /*
  1051. * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
  1052. * probably more overhead than always using BN_mod (which uses BN_copy if
  1053. * a similar test returns true).
  1054. */
  1055. /*
  1056. * We can use BN_mod and do not need BN_nnmod because our accumulator is
  1057. * never negative (the result of BN_mod does not depend on the sign of
  1058. * the modulus).
  1059. */
  1060. #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
  1061. (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
  1062. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  1063. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  1064. BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  1065. return -1;
  1066. }
  1067. bn_check_top(p);
  1068. bn_check_top(m);
  1069. if (!BN_is_odd(m)) {
  1070. BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
  1071. return (0);
  1072. }
  1073. if (m->top == 1)
  1074. a %= m->d[0]; /* make sure that 'a' is reduced */
  1075. bits = BN_num_bits(p);
  1076. if (bits == 0) {
  1077. /* x**0 mod 1 is still zero. */
  1078. if (BN_is_one(m)) {
  1079. ret = 1;
  1080. BN_zero(rr);
  1081. } else
  1082. ret = BN_one(rr);
  1083. return ret;
  1084. }
  1085. if (a == 0) {
  1086. BN_zero(rr);
  1087. ret = 1;
  1088. return ret;
  1089. }
  1090. BN_CTX_start(ctx);
  1091. d = BN_CTX_get(ctx);
  1092. r = BN_CTX_get(ctx);
  1093. t = BN_CTX_get(ctx);
  1094. if (d == NULL || r == NULL || t == NULL)
  1095. goto err;
  1096. if (in_mont != NULL)
  1097. mont = in_mont;
  1098. else {
  1099. if ((mont = BN_MONT_CTX_new()) == NULL)
  1100. goto err;
  1101. if (!BN_MONT_CTX_set(mont, m, ctx))
  1102. goto err;
  1103. }
  1104. r_is_one = 1; /* except for Montgomery factor */
  1105. /* bits-1 >= 0 */
  1106. /* The result is accumulated in the product r*w. */
  1107. w = a; /* bit 'bits-1' of 'p' is always set */
  1108. for (b = bits - 2; b >= 0; b--) {
  1109. /* First, square r*w. */
  1110. next_w = w * w;
  1111. if ((next_w / w) != w) { /* overflow */
  1112. if (r_is_one) {
  1113. if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
  1114. goto err;
  1115. r_is_one = 0;
  1116. } else {
  1117. if (!BN_MOD_MUL_WORD(r, w, m))
  1118. goto err;
  1119. }
  1120. next_w = 1;
  1121. }
  1122. w = next_w;
  1123. if (!r_is_one) {
  1124. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  1125. goto err;
  1126. }
  1127. /* Second, multiply r*w by 'a' if exponent bit is set. */
  1128. if (BN_is_bit_set(p, b)) {
  1129. next_w = w * a;
  1130. if ((next_w / a) != w) { /* overflow */
  1131. if (r_is_one) {
  1132. if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
  1133. goto err;
  1134. r_is_one = 0;
  1135. } else {
  1136. if (!BN_MOD_MUL_WORD(r, w, m))
  1137. goto err;
  1138. }
  1139. next_w = a;
  1140. }
  1141. w = next_w;
  1142. }
  1143. }
  1144. /* Finally, set r:=r*w. */
  1145. if (w != 1) {
  1146. if (r_is_one) {
  1147. if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
  1148. goto err;
  1149. r_is_one = 0;
  1150. } else {
  1151. if (!BN_MOD_MUL_WORD(r, w, m))
  1152. goto err;
  1153. }
  1154. }
  1155. if (r_is_one) { /* can happen only if a == 1 */
  1156. if (!BN_one(rr))
  1157. goto err;
  1158. } else {
  1159. if (!BN_from_montgomery(rr, r, mont, ctx))
  1160. goto err;
  1161. }
  1162. ret = 1;
  1163. err:
  1164. if (in_mont == NULL)
  1165. BN_MONT_CTX_free(mont);
  1166. BN_CTX_end(ctx);
  1167. bn_check_top(rr);
  1168. return (ret);
  1169. }
  1170. /* The old fallback, simple version :-) */
  1171. int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  1172. const BIGNUM *m, BN_CTX *ctx)
  1173. {
  1174. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  1175. int start = 1;
  1176. BIGNUM *d;
  1177. /* Table of variables obtained from 'ctx' */
  1178. BIGNUM *val[TABLE_SIZE];
  1179. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  1180. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  1181. BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  1182. return -1;
  1183. }
  1184. bits = BN_num_bits(p);
  1185. if (bits == 0) {
  1186. ret = BN_one(r);
  1187. return ret;
  1188. }
  1189. BN_CTX_start(ctx);
  1190. d = BN_CTX_get(ctx);
  1191. val[0] = BN_CTX_get(ctx);
  1192. if (!d || !val[0])
  1193. goto err;
  1194. if (!BN_nnmod(val[0], a, m, ctx))
  1195. goto err; /* 1 */
  1196. if (BN_is_zero(val[0])) {
  1197. BN_zero(r);
  1198. ret = 1;
  1199. goto err;
  1200. }
  1201. window = BN_window_bits_for_exponent_size(bits);
  1202. if (window > 1) {
  1203. if (!BN_mod_mul(d, val[0], val[0], m, ctx))
  1204. goto err; /* 2 */
  1205. j = 1 << (window - 1);
  1206. for (i = 1; i < j; i++) {
  1207. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  1208. !BN_mod_mul(val[i], val[i - 1], d, m, ctx))
  1209. goto err;
  1210. }
  1211. }
  1212. start = 1; /* This is used to avoid multiplication etc
  1213. * when there is only the value '1' in the
  1214. * buffer. */
  1215. wvalue = 0; /* The 'value' of the window */
  1216. wstart = bits - 1; /* The top bit of the window */
  1217. wend = 0; /* The bottom bit of the window */
  1218. if (!BN_one(r))
  1219. goto err;
  1220. for (;;) {
  1221. if (BN_is_bit_set(p, wstart) == 0) {
  1222. if (!start)
  1223. if (!BN_mod_mul(r, r, r, m, ctx))
  1224. goto err;
  1225. if (wstart == 0)
  1226. break;
  1227. wstart--;
  1228. continue;
  1229. }
  1230. /*
  1231. * We now have wstart on a 'set' bit, we now need to work out how bit
  1232. * a window to do. To do this we need to scan forward until the last
  1233. * set bit before the end of the window
  1234. */
  1235. j = wstart;
  1236. wvalue = 1;
  1237. wend = 0;
  1238. for (i = 1; i < window; i++) {
  1239. if (wstart - i < 0)
  1240. break;
  1241. if (BN_is_bit_set(p, wstart - i)) {
  1242. wvalue <<= (i - wend);
  1243. wvalue |= 1;
  1244. wend = i;
  1245. }
  1246. }
  1247. /* wend is the size of the current window */
  1248. j = wend + 1;
  1249. /* add the 'bytes above' */
  1250. if (!start)
  1251. for (i = 0; i < j; i++) {
  1252. if (!BN_mod_mul(r, r, r, m, ctx))
  1253. goto err;
  1254. }
  1255. /* wvalue will be an odd number < 2^window */
  1256. if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
  1257. goto err;
  1258. /* move the 'window' down further */
  1259. wstart -= wend + 1;
  1260. wvalue = 0;
  1261. start = 0;
  1262. if (wstart < 0)
  1263. break;
  1264. }
  1265. ret = 1;
  1266. err:
  1267. BN_CTX_end(ctx);
  1268. bn_check_top(r);
  1269. return (ret);
  1270. }