bn_exp.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105
  1. /* crypto/bn/bn_exp.c */
  2. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  3. * All rights reserved.
  4. *
  5. * This package is an SSL implementation written
  6. * by Eric Young (eay@cryptsoft.com).
  7. * The implementation was written so as to conform with Netscapes SSL.
  8. *
  9. * This library is free for commercial and non-commercial use as long as
  10. * the following conditions are aheared to. The following conditions
  11. * apply to all code found in this distribution, be it the RC4, RSA,
  12. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  13. * included with this distribution is covered by the same copyright terms
  14. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  15. *
  16. * Copyright remains Eric Young's, and as such any Copyright notices in
  17. * the code are not to be removed.
  18. * If this package is used in a product, Eric Young should be given attribution
  19. * as the author of the parts of the library used.
  20. * This can be in the form of a textual message at program startup or
  21. * in documentation (online or textual) provided with the package.
  22. *
  23. * Redistribution and use in source and binary forms, with or without
  24. * modification, are permitted provided that the following conditions
  25. * are met:
  26. * 1. Redistributions of source code must retain the copyright
  27. * notice, this list of conditions and the following disclaimer.
  28. * 2. Redistributions in binary form must reproduce the above copyright
  29. * notice, this list of conditions and the following disclaimer in the
  30. * documentation and/or other materials provided with the distribution.
  31. * 3. All advertising materials mentioning features or use of this software
  32. * must display the following acknowledgement:
  33. * "This product includes cryptographic software written by
  34. * Eric Young (eay@cryptsoft.com)"
  35. * The word 'cryptographic' can be left out if the rouines from the library
  36. * being used are not cryptographic related :-).
  37. * 4. If you include any Windows specific code (or a derivative thereof) from
  38. * the apps directory (application code) you must include an acknowledgement:
  39. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  40. *
  41. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  42. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  43. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  44. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  45. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  46. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  47. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  48. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  49. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  50. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  51. * SUCH DAMAGE.
  52. *
  53. * The licence and distribution terms for any publically available version or
  54. * derivative of this code cannot be changed. i.e. this code cannot simply be
  55. * copied and put under another distribution licence
  56. * [including the GNU Public Licence.]
  57. */
  58. /* ====================================================================
  59. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  60. *
  61. * Redistribution and use in source and binary forms, with or without
  62. * modification, are permitted provided that the following conditions
  63. * are met:
  64. *
  65. * 1. Redistributions of source code must retain the above copyright
  66. * notice, this list of conditions and the following disclaimer.
  67. *
  68. * 2. Redistributions in binary form must reproduce the above copyright
  69. * notice, this list of conditions and the following disclaimer in
  70. * the documentation and/or other materials provided with the
  71. * distribution.
  72. *
  73. * 3. All advertising materials mentioning features or use of this
  74. * software must display the following acknowledgment:
  75. * "This product includes software developed by the OpenSSL Project
  76. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  77. *
  78. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  79. * endorse or promote products derived from this software without
  80. * prior written permission. For written permission, please contact
  81. * openssl-core@openssl.org.
  82. *
  83. * 5. Products derived from this software may not be called "OpenSSL"
  84. * nor may "OpenSSL" appear in their names without prior written
  85. * permission of the OpenSSL Project.
  86. *
  87. * 6. Redistributions of any form whatsoever must retain the following
  88. * acknowledgment:
  89. * "This product includes software developed by the OpenSSL Project
  90. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  91. *
  92. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  93. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  94. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  95. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  96. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  97. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  98. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  99. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  100. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  101. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  102. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  103. * OF THE POSSIBILITY OF SUCH DAMAGE.
  104. * ====================================================================
  105. *
  106. * This product includes cryptographic software written by Eric Young
  107. * (eay@cryptsoft.com). This product includes software written by Tim
  108. * Hudson (tjh@cryptsoft.com).
  109. *
  110. */
  111. #define OPENSSL_FIPSAPI
  112. #include "cryptlib.h"
  113. #include "bn_lcl.h"
  114. #include <stdlib.h>
  115. #ifdef _WIN32
  116. # include <malloc.h>
  117. # ifndef alloca
  118. # define alloca _alloca
  119. # endif
  120. #elif defined(__GNUC__)
  121. # ifndef alloca
  122. # define alloca(s) __builtin_alloca((s))
  123. # endif
  124. #endif
  125. /* maximum precomputation table size for *variable* sliding windows */
  126. #define TABLE_SIZE 32
  127. /* this one works - simple but works */
  128. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
  129. {
  130. int i,bits,ret=0;
  131. BIGNUM *v,*rr;
  132. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
  133. {
  134. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  135. BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  136. return -1;
  137. }
  138. BN_CTX_start(ctx);
  139. if ((r == a) || (r == p))
  140. rr = BN_CTX_get(ctx);
  141. else
  142. rr = r;
  143. v = BN_CTX_get(ctx);
  144. if (rr == NULL || v == NULL) goto err;
  145. if (BN_copy(v,a) == NULL) goto err;
  146. bits=BN_num_bits(p);
  147. if (BN_is_odd(p))
  148. { if (BN_copy(rr,a) == NULL) goto err; }
  149. else { if (!BN_one(rr)) goto err; }
  150. for (i=1; i<bits; i++)
  151. {
  152. if (!BN_sqr(v,v,ctx)) goto err;
  153. if (BN_is_bit_set(p,i))
  154. {
  155. if (!BN_mul(rr,rr,v,ctx)) goto err;
  156. }
  157. }
  158. ret=1;
  159. err:
  160. if (r != rr) BN_copy(r,rr);
  161. BN_CTX_end(ctx);
  162. bn_check_top(r);
  163. return(ret);
  164. }
  165. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  166. BN_CTX *ctx)
  167. {
  168. int ret;
  169. bn_check_top(a);
  170. bn_check_top(p);
  171. bn_check_top(m);
  172. /* For even modulus m = 2^k*m_odd, it might make sense to compute
  173. * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
  174. * exponentiation for the odd part), using appropriate exponent
  175. * reductions, and combine the results using the CRT.
  176. *
  177. * For now, we use Montgomery only if the modulus is odd; otherwise,
  178. * exponentiation using the reciprocal-based quick remaindering
  179. * algorithm is used.
  180. *
  181. * (Timing obtained with expspeed.c [computations a^p mod m
  182. * where a, p, m are of the same length: 256, 512, 1024, 2048,
  183. * 4096, 8192 bits], compared to the running time of the
  184. * standard algorithm:
  185. *
  186. * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
  187. * 55 .. 77 % [UltraSparc processor, but
  188. * debug-solaris-sparcv8-gcc conf.]
  189. *
  190. * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
  191. * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
  192. *
  193. * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
  194. * at 2048 and more bits, but at 512 and 1024 bits, it was
  195. * slower even than the standard algorithm!
  196. *
  197. * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
  198. * should be obtained when the new Montgomery reduction code
  199. * has been integrated into OpenSSL.)
  200. */
  201. #define MONT_MUL_MOD
  202. #define MONT_EXP_WORD
  203. #define RECP_MUL_MOD
  204. #ifdef MONT_MUL_MOD
  205. /* I have finally been able to take out this pre-condition of
  206. * the top bit being set. It was caused by an error in BN_div
  207. * with negatives. There was also another problem when for a^b%m
  208. * a >= m. eay 07-May-97 */
  209. /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
  210. if (BN_is_odd(m))
  211. {
  212. # ifdef MONT_EXP_WORD
  213. if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
  214. {
  215. BN_ULONG A = a->d[0];
  216. ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
  217. }
  218. else
  219. # endif
  220. ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
  221. }
  222. else
  223. #endif
  224. #ifdef RECP_MUL_MOD
  225. { ret=BN_mod_exp_recp(r,a,p,m,ctx); }
  226. #else
  227. { ret=BN_mod_exp_simple(r,a,p,m,ctx); }
  228. #endif
  229. bn_check_top(r);
  230. return(ret);
  231. }
  232. int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  233. const BIGNUM *m, BN_CTX *ctx)
  234. {
  235. int i,j,bits,ret=0,wstart,wend,window,wvalue;
  236. int start=1;
  237. BIGNUM *aa;
  238. /* Table of variables obtained from 'ctx' */
  239. BIGNUM *val[TABLE_SIZE];
  240. BN_RECP_CTX recp;
  241. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
  242. {
  243. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  244. BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  245. return -1;
  246. }
  247. bits=BN_num_bits(p);
  248. if (bits == 0)
  249. {
  250. ret = BN_one(r);
  251. return ret;
  252. }
  253. BN_CTX_start(ctx);
  254. aa = BN_CTX_get(ctx);
  255. val[0] = BN_CTX_get(ctx);
  256. if(!aa || !val[0]) goto err;
  257. BN_RECP_CTX_init(&recp);
  258. if (m->neg)
  259. {
  260. /* ignore sign of 'm' */
  261. if (!BN_copy(aa, m)) goto err;
  262. aa->neg = 0;
  263. if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
  264. }
  265. else
  266. {
  267. if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
  268. }
  269. if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */
  270. if (BN_is_zero(val[0]))
  271. {
  272. BN_zero(r);
  273. ret = 1;
  274. goto err;
  275. }
  276. window = BN_window_bits_for_exponent_size(bits);
  277. if (window > 1)
  278. {
  279. if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx))
  280. goto err; /* 2 */
  281. j=1<<(window-1);
  282. for (i=1; i<j; i++)
  283. {
  284. if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
  285. !BN_mod_mul_reciprocal(val[i],val[i-1],
  286. aa,&recp,ctx))
  287. goto err;
  288. }
  289. }
  290. start=1; /* This is used to avoid multiplication etc
  291. * when there is only the value '1' in the
  292. * buffer. */
  293. wvalue=0; /* The 'value' of the window */
  294. wstart=bits-1; /* The top bit of the window */
  295. wend=0; /* The bottom bit of the window */
  296. if (!BN_one(r)) goto err;
  297. for (;;)
  298. {
  299. if (BN_is_bit_set(p,wstart) == 0)
  300. {
  301. if (!start)
  302. if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
  303. goto err;
  304. if (wstart == 0) break;
  305. wstart--;
  306. continue;
  307. }
  308. /* We now have wstart on a 'set' bit, we now need to work out
  309. * how bit a window to do. To do this we need to scan
  310. * forward until the last set bit before the end of the
  311. * window */
  312. j=wstart;
  313. wvalue=1;
  314. wend=0;
  315. for (i=1; i<window; i++)
  316. {
  317. if (wstart-i < 0) break;
  318. if (BN_is_bit_set(p,wstart-i))
  319. {
  320. wvalue<<=(i-wend);
  321. wvalue|=1;
  322. wend=i;
  323. }
  324. }
  325. /* wend is the size of the current window */
  326. j=wend+1;
  327. /* add the 'bytes above' */
  328. if (!start)
  329. for (i=0; i<j; i++)
  330. {
  331. if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
  332. goto err;
  333. }
  334. /* wvalue will be an odd number < 2^window */
  335. if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx))
  336. goto err;
  337. /* move the 'window' down further */
  338. wstart-=wend+1;
  339. wvalue=0;
  340. start=0;
  341. if (wstart < 0) break;
  342. }
  343. ret=1;
  344. err:
  345. BN_CTX_end(ctx);
  346. BN_RECP_CTX_free(&recp);
  347. bn_check_top(r);
  348. return(ret);
  349. }
  350. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  351. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
  352. {
  353. int i,j,bits,ret=0,wstart,wend,window,wvalue;
  354. int start=1;
  355. BIGNUM *d,*r;
  356. const BIGNUM *aa;
  357. /* Table of variables obtained from 'ctx' */
  358. BIGNUM *val[TABLE_SIZE];
  359. BN_MONT_CTX *mont=NULL;
  360. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
  361. {
  362. return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
  363. }
  364. bn_check_top(a);
  365. bn_check_top(p);
  366. bn_check_top(m);
  367. if (!BN_is_odd(m))
  368. {
  369. BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
  370. return(0);
  371. }
  372. bits=BN_num_bits(p);
  373. if (bits == 0)
  374. {
  375. ret = BN_one(rr);
  376. return ret;
  377. }
  378. BN_CTX_start(ctx);
  379. d = BN_CTX_get(ctx);
  380. r = BN_CTX_get(ctx);
  381. val[0] = BN_CTX_get(ctx);
  382. if (!d || !r || !val[0]) goto err;
  383. /* If this is not done, things will break in the montgomery
  384. * part */
  385. if (in_mont != NULL)
  386. mont=in_mont;
  387. else
  388. {
  389. if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
  390. if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
  391. }
  392. if (a->neg || BN_ucmp(a,m) >= 0)
  393. {
  394. if (!BN_nnmod(val[0],a,m,ctx))
  395. goto err;
  396. aa= val[0];
  397. }
  398. else
  399. aa=a;
  400. if (BN_is_zero(aa))
  401. {
  402. BN_zero(rr);
  403. ret = 1;
  404. goto err;
  405. }
  406. if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
  407. window = BN_window_bits_for_exponent_size(bits);
  408. if (window > 1)
  409. {
  410. if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
  411. j=1<<(window-1);
  412. for (i=1; i<j; i++)
  413. {
  414. if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
  415. !BN_mod_mul_montgomery(val[i],val[i-1],
  416. d,mont,ctx))
  417. goto err;
  418. }
  419. }
  420. start=1; /* This is used to avoid multiplication etc
  421. * when there is only the value '1' in the
  422. * buffer. */
  423. wvalue=0; /* The 'value' of the window */
  424. wstart=bits-1; /* The top bit of the window */
  425. wend=0; /* The bottom bit of the window */
  426. if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
  427. for (;;)
  428. {
  429. if (BN_is_bit_set(p,wstart) == 0)
  430. {
  431. if (!start)
  432. {
  433. if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
  434. goto err;
  435. }
  436. if (wstart == 0) break;
  437. wstart--;
  438. continue;
  439. }
  440. /* We now have wstart on a 'set' bit, we now need to work out
  441. * how bit a window to do. To do this we need to scan
  442. * forward until the last set bit before the end of the
  443. * window */
  444. j=wstart;
  445. wvalue=1;
  446. wend=0;
  447. for (i=1; i<window; i++)
  448. {
  449. if (wstart-i < 0) break;
  450. if (BN_is_bit_set(p,wstart-i))
  451. {
  452. wvalue<<=(i-wend);
  453. wvalue|=1;
  454. wend=i;
  455. }
  456. }
  457. /* wend is the size of the current window */
  458. j=wend+1;
  459. /* add the 'bytes above' */
  460. if (!start)
  461. for (i=0; i<j; i++)
  462. {
  463. if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
  464. goto err;
  465. }
  466. /* wvalue will be an odd number < 2^window */
  467. if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
  468. goto err;
  469. /* move the 'window' down further */
  470. wstart-=wend+1;
  471. wvalue=0;
  472. start=0;
  473. if (wstart < 0) break;
  474. }
  475. if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
  476. ret=1;
  477. err:
  478. if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
  479. BN_CTX_end(ctx);
  480. bn_check_top(rr);
  481. return(ret);
  482. }
  483. /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
  484. * so that accessing any of these table values shows the same access pattern as far
  485. * as cache lines are concerned. The following functions are used to transfer a BIGNUM
  486. * from/to that table. */
  487. static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
  488. {
  489. size_t i, j;
  490. if (bn_wexpand(b, top) == NULL)
  491. return 0;
  492. while (b->top < top)
  493. {
  494. b->d[b->top++] = 0;
  495. }
  496. for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
  497. {
  498. buf[j] = ((unsigned char*)b->d)[i];
  499. }
  500. bn_correct_top(b);
  501. return 1;
  502. }
  503. static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
  504. {
  505. size_t i, j;
  506. if (bn_wexpand(b, top) == NULL)
  507. return 0;
  508. for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
  509. {
  510. ((unsigned char*)b->d)[i] = buf[j];
  511. }
  512. b->top = top;
  513. bn_correct_top(b);
  514. return 1;
  515. }
  516. /* Given a pointer value, compute the next address that is a cache line multiple. */
  517. #define MOD_EXP_CTIME_ALIGN(x_) \
  518. ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  519. /* This variant of BN_mod_exp_mont() uses fixed windows and the special
  520. * precomputation memory layout to limit data-dependency to a minimum
  521. * to protect secret exponents (cf. the hyper-threading timing attacks
  522. * pointed out by Colin Percival,
  523. * http://www.daemonology.net/hyperthreading-considered-harmful/)
  524. */
  525. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  526. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
  527. {
  528. int i,bits,ret=0,window,wvalue;
  529. int top;
  530. BIGNUM *r;
  531. BN_MONT_CTX *mont=NULL;
  532. int numPowers;
  533. unsigned char *powerbufFree=NULL;
  534. int powerbufLen = 0;
  535. unsigned char *powerbuf=NULL;
  536. BIGNUM computeTemp, *am=NULL;
  537. bn_check_top(a);
  538. bn_check_top(p);
  539. bn_check_top(m);
  540. top = m->top;
  541. if (!(m->d[0] & 1))
  542. {
  543. BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
  544. return(0);
  545. }
  546. bits=BN_num_bits(p);
  547. if (bits == 0)
  548. {
  549. ret = BN_one(rr);
  550. return ret;
  551. }
  552. /* Initialize BIGNUM context and allocate intermediate result */
  553. BN_CTX_start(ctx);
  554. r = BN_CTX_get(ctx);
  555. if (r == NULL) goto err;
  556. /* Allocate a montgomery context if it was not supplied by the caller.
  557. * If this is not done, things will break in the montgomery part.
  558. */
  559. if (in_mont != NULL)
  560. mont=in_mont;
  561. else
  562. {
  563. if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
  564. if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
  565. }
  566. /* Get the window size to use with size of p. */
  567. window = BN_window_bits_for_ctime_exponent_size(bits);
  568. #if defined(OPENSSL_BN_ASM_MONT5)
  569. if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */
  570. #endif
  571. /* Adjust the number of bits up to a multiple of the window size.
  572. * If the exponent length is not a multiple of the window size, then
  573. * this pads the most significant bits with zeros to normalize the
  574. * scanning loop to there's no special cases.
  575. *
  576. * * NOTE: Making the window size a power of two less than the native
  577. * * word size ensures that the padded bits won't go past the last
  578. * * word in the internal BIGNUM structure. Going past the end will
  579. * * still produce the correct result, but causes a different branch
  580. * * to be taken in the BN_is_bit_set function.
  581. */
  582. bits = ((bits+window-1)/window)*window;
  583. /* Allocate a buffer large enough to hold all of the pre-computed
  584. * powers of a, plus computeTemp.
  585. */
  586. numPowers = 1 << window;
  587. powerbufLen = sizeof(m->d[0])*(top*numPowers +
  588. (top>numPowers?top:numPowers));
  589. #ifdef alloca
  590. if (powerbufLen < 3072)
  591. powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  592. else
  593. #endif
  594. if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
  595. goto err;
  596. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  597. memset(powerbuf, 0, powerbufLen);
  598. #ifdef alloca
  599. if (powerbufLen < 3072)
  600. powerbufFree = NULL;
  601. #endif
  602. computeTemp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers);
  603. computeTemp.top = computeTemp.dmax = top;
  604. computeTemp.neg = 0;
  605. computeTemp.flags = BN_FLG_STATIC_DATA;
  606. /* Initialize the intermediate result. Do this early to save double conversion,
  607. * once each for a^0 and intermediate result.
  608. */
  609. if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
  610. /* Initialize computeTemp as a^1 with montgomery precalcs */
  611. am = BN_CTX_get(ctx);
  612. if (am==NULL) goto err;
  613. if (a->neg || BN_ucmp(a,m) >= 0)
  614. {
  615. if (!BN_mod(am,a,m,ctx)) goto err;
  616. if (!BN_to_montgomery(am,am,mont,ctx)) goto err;
  617. }
  618. else if (!BN_to_montgomery(am,a,mont,ctx)) goto err;
  619. if (!BN_copy(&computeTemp, am)) goto err;
  620. if (bn_wexpand(am,top)==NULL || bn_wexpand(r,top)==NULL)
  621. goto err;
  622. #if defined(OPENSSL_BN_ASM_MONT5)
  623. /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
  624. * specifically optimization of cache-timing attack countermeasures
  625. * and pre-computation optimization. */
  626. /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  627. * 512-bit RSA is hardly relevant, we omit it to spare size... */
  628. if (window==5)
  629. {
  630. void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap,
  631. const void *table,const BN_ULONG *np,
  632. const BN_ULONG *n0,int num,int power);
  633. void bn_scatter5(const BN_ULONG *inp,size_t num,
  634. void *table,size_t power);
  635. BN_ULONG *acc, *np=mont->N.d, *n0=mont->n0;
  636. bn_scatter5(r->d,r->top,powerbuf,0);
  637. bn_scatter5(am->d,am->top,powerbuf,1);
  638. acc = computeTemp.d;
  639. #if 0
  640. for (i=2; i<32; i++)
  641. {
  642. bn_mul_mont_gather5(acc,am->d,powerbuf,np,n0,top,i-1);
  643. bn_scatter5(acc,top,powerbuf,i);
  644. }
  645. #else
  646. /* same as above, but uses squaring for 1/2 of operations */
  647. for (i=2; i<32; i*=2)
  648. {
  649. bn_mul_mont(acc,acc,acc,np,n0,top);
  650. bn_scatter5(acc,top,powerbuf,i);
  651. }
  652. for (i=3; i<8; i+=2)
  653. {
  654. int j;
  655. bn_mul_mont_gather5(acc,am->d,powerbuf,np,n0,top,i-1);
  656. bn_scatter5(acc,top,powerbuf,i);
  657. for (j=2*i; j<32; j*=2)
  658. {
  659. bn_mul_mont(acc,acc,acc,np,n0,top);
  660. bn_scatter5(acc,top,powerbuf,j);
  661. }
  662. }
  663. for (; i<16; i+=2)
  664. {
  665. bn_mul_mont_gather5(acc,am->d,powerbuf,np,n0,top,i-1);
  666. bn_scatter5(acc,top,powerbuf,i);
  667. bn_mul_mont(acc,acc,acc,np,n0,top);
  668. bn_scatter5(acc,top,powerbuf,2*i);
  669. }
  670. for (; i<32; i+=2)
  671. {
  672. bn_mul_mont_gather5(acc,am->d,powerbuf,np,n0,top,i-1);
  673. bn_scatter5(acc,top,powerbuf,i);
  674. }
  675. #endif
  676. acc = r->d;
  677. /* Scan the exponent one window at a time starting from the most
  678. * significant bits.
  679. */
  680. bits--;
  681. while (bits >= 0)
  682. {
  683. for (wvalue=0, i=0; i<5; i++,bits--)
  684. wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
  685. bn_mul_mont(acc,acc,acc,np,n0,top);
  686. bn_mul_mont(acc,acc,acc,np,n0,top);
  687. bn_mul_mont(acc,acc,acc,np,n0,top);
  688. bn_mul_mont(acc,acc,acc,np,n0,top);
  689. bn_mul_mont(acc,acc,acc,np,n0,top);
  690. bn_mul_mont_gather5(acc,acc,powerbuf,np,n0,top,wvalue);
  691. }
  692. r->top=top;
  693. bn_correct_top(r);
  694. }
  695. else
  696. #endif
  697. {
  698. if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
  699. if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
  700. /* If the window size is greater than 1, then calculate
  701. * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
  702. * (even powers could instead be computed as (a^(i/2))^2
  703. * to use the slight performance advantage of sqr over mul).
  704. */
  705. if (window > 1)
  706. {
  707. for (i=2; i<numPowers; i++)
  708. {
  709. /* Calculate a^i = a^(i-1) * a */
  710. if (!BN_mod_mul_montgomery(&computeTemp,am,&computeTemp,mont,ctx))
  711. goto err;
  712. if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&computeTemp, top, powerbuf, i, numPowers)) goto err;
  713. }
  714. }
  715. /* Scan the exponent one window at a time starting from the most
  716. * significant bits.
  717. */
  718. bits--;
  719. while (bits >= 0)
  720. {
  721. wvalue=0; /* The 'value' of the window */
  722. /* Scan the window, squaring the result as we go */
  723. for (i=0; i<window; i++,bits--)
  724. {
  725. if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err;
  726. wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
  727. }
  728. /* Fetch the appropriate pre-computed value from the pre-buf */
  729. if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
  730. /* Multiply the result into the intermediate result */
  731. if (!BN_mod_mul_montgomery(r,r,&computeTemp,mont,ctx)) goto err;
  732. }
  733. }
  734. /* Convert the final result from montgomery to standard format */
  735. if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
  736. ret=1;
  737. err:
  738. if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
  739. if (powerbuf!=NULL)
  740. {
  741. OPENSSL_cleanse(powerbuf,powerbufLen);
  742. if (powerbufFree) OPENSSL_free(powerbufFree);
  743. }
  744. if (am!=NULL) BN_clear(am);
  745. BN_CTX_end(ctx);
  746. return(ret);
  747. }
  748. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  749. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
  750. {
  751. BN_MONT_CTX *mont = NULL;
  752. int b, bits, ret=0;
  753. int r_is_one;
  754. BN_ULONG w, next_w;
  755. BIGNUM *d, *r, *t;
  756. BIGNUM *swap_tmp;
  757. #define BN_MOD_MUL_WORD(r, w, m) \
  758. (BN_mul_word(r, (w)) && \
  759. (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
  760. (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
  761. /* BN_MOD_MUL_WORD is only used with 'w' large,
  762. * so the BN_ucmp test is probably more overhead
  763. * than always using BN_mod (which uses BN_copy if
  764. * a similar test returns true). */
  765. /* We can use BN_mod and do not need BN_nnmod because our
  766. * accumulator is never negative (the result of BN_mod does
  767. * not depend on the sign of the modulus).
  768. */
  769. #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
  770. (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
  771. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
  772. {
  773. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  774. BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  775. return -1;
  776. }
  777. bn_check_top(p);
  778. bn_check_top(m);
  779. if (!BN_is_odd(m))
  780. {
  781. BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
  782. return(0);
  783. }
  784. if (m->top == 1)
  785. a %= m->d[0]; /* make sure that 'a' is reduced */
  786. bits = BN_num_bits(p);
  787. if (bits == 0)
  788. {
  789. ret = BN_one(rr);
  790. return ret;
  791. }
  792. if (a == 0)
  793. {
  794. BN_zero(rr);
  795. ret = 1;
  796. return ret;
  797. }
  798. BN_CTX_start(ctx);
  799. d = BN_CTX_get(ctx);
  800. r = BN_CTX_get(ctx);
  801. t = BN_CTX_get(ctx);
  802. if (d == NULL || r == NULL || t == NULL) goto err;
  803. if (in_mont != NULL)
  804. mont=in_mont;
  805. else
  806. {
  807. if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
  808. if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
  809. }
  810. r_is_one = 1; /* except for Montgomery factor */
  811. /* bits-1 >= 0 */
  812. /* The result is accumulated in the product r*w. */
  813. w = a; /* bit 'bits-1' of 'p' is always set */
  814. for (b = bits-2; b >= 0; b--)
  815. {
  816. /* First, square r*w. */
  817. next_w = w*w;
  818. if ((next_w/w) != w) /* overflow */
  819. {
  820. if (r_is_one)
  821. {
  822. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
  823. r_is_one = 0;
  824. }
  825. else
  826. {
  827. if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
  828. }
  829. next_w = 1;
  830. }
  831. w = next_w;
  832. if (!r_is_one)
  833. {
  834. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
  835. }
  836. /* Second, multiply r*w by 'a' if exponent bit is set. */
  837. if (BN_is_bit_set(p, b))
  838. {
  839. next_w = w*a;
  840. if ((next_w/a) != w) /* overflow */
  841. {
  842. if (r_is_one)
  843. {
  844. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
  845. r_is_one = 0;
  846. }
  847. else
  848. {
  849. if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
  850. }
  851. next_w = a;
  852. }
  853. w = next_w;
  854. }
  855. }
  856. /* Finally, set r:=r*w. */
  857. if (w != 1)
  858. {
  859. if (r_is_one)
  860. {
  861. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
  862. r_is_one = 0;
  863. }
  864. else
  865. {
  866. if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
  867. }
  868. }
  869. if (r_is_one) /* can happen only if a == 1*/
  870. {
  871. if (!BN_one(rr)) goto err;
  872. }
  873. else
  874. {
  875. if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
  876. }
  877. ret = 1;
  878. err:
  879. if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
  880. BN_CTX_end(ctx);
  881. bn_check_top(rr);
  882. return(ret);
  883. }
  884. /* The old fallback, simple version :-) */
  885. int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  886. const BIGNUM *m, BN_CTX *ctx)
  887. {
  888. int i,j,bits,ret=0,wstart,wend,window,wvalue;
  889. int start=1;
  890. BIGNUM *d;
  891. /* Table of variables obtained from 'ctx' */
  892. BIGNUM *val[TABLE_SIZE];
  893. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
  894. {
  895. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  896. BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  897. return -1;
  898. }
  899. bits=BN_num_bits(p);
  900. if (bits == 0)
  901. {
  902. ret = BN_one(r);
  903. return ret;
  904. }
  905. BN_CTX_start(ctx);
  906. d = BN_CTX_get(ctx);
  907. val[0] = BN_CTX_get(ctx);
  908. if(!d || !val[0]) goto err;
  909. if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */
  910. if (BN_is_zero(val[0]))
  911. {
  912. BN_zero(r);
  913. ret = 1;
  914. goto err;
  915. }
  916. window = BN_window_bits_for_exponent_size(bits);
  917. if (window > 1)
  918. {
  919. if (!BN_mod_mul(d,val[0],val[0],m,ctx))
  920. goto err; /* 2 */
  921. j=1<<(window-1);
  922. for (i=1; i<j; i++)
  923. {
  924. if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
  925. !BN_mod_mul(val[i],val[i-1],d,m,ctx))
  926. goto err;
  927. }
  928. }
  929. start=1; /* This is used to avoid multiplication etc
  930. * when there is only the value '1' in the
  931. * buffer. */
  932. wvalue=0; /* The 'value' of the window */
  933. wstart=bits-1; /* The top bit of the window */
  934. wend=0; /* The bottom bit of the window */
  935. if (!BN_one(r)) goto err;
  936. for (;;)
  937. {
  938. if (BN_is_bit_set(p,wstart) == 0)
  939. {
  940. if (!start)
  941. if (!BN_mod_mul(r,r,r,m,ctx))
  942. goto err;
  943. if (wstart == 0) break;
  944. wstart--;
  945. continue;
  946. }
  947. /* We now have wstart on a 'set' bit, we now need to work out
  948. * how bit a window to do. To do this we need to scan
  949. * forward until the last set bit before the end of the
  950. * window */
  951. j=wstart;
  952. wvalue=1;
  953. wend=0;
  954. for (i=1; i<window; i++)
  955. {
  956. if (wstart-i < 0) break;
  957. if (BN_is_bit_set(p,wstart-i))
  958. {
  959. wvalue<<=(i-wend);
  960. wvalue|=1;
  961. wend=i;
  962. }
  963. }
  964. /* wend is the size of the current window */
  965. j=wend+1;
  966. /* add the 'bytes above' */
  967. if (!start)
  968. for (i=0; i<j; i++)
  969. {
  970. if (!BN_mod_mul(r,r,r,m,ctx))
  971. goto err;
  972. }
  973. /* wvalue will be an odd number < 2^window */
  974. if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
  975. goto err;
  976. /* move the 'window' down further */
  977. wstart-=wend+1;
  978. wvalue=0;
  979. start=0;
  980. if (wstart < 0) break;
  981. }
  982. ret=1;
  983. err:
  984. BN_CTX_end(ctx);
  985. bn_check_top(r);
  986. return(ret);
  987. }