sp_cortexm.c 939 KB


  1. /* sp.c
  2. *
  3. * Copyright (C) 2006-2020 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /* Implementation by Sean Parkinson. */
  22. #ifdef HAVE_CONFIG_H
  23. #include <config.h>
  24. #endif
  25. #include <wolfssl/wolfcrypt/settings.h>
  26. #include <wolfssl/wolfcrypt/error-crypt.h>
  27. #include <wolfssl/wolfcrypt/cpuid.h>
  28. #ifdef NO_INLINE
  29. #include <wolfssl/wolfcrypt/misc.h>
  30. #else
  31. #define WOLFSSL_MISC_INCLUDED
  32. #include <wolfcrypt/src/misc.c>
  33. #endif
  34. #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
  35. defined(WOLFSSL_HAVE_SP_ECC)
  36. #ifdef RSA_LOW_MEM
  37. #ifndef WOLFSSL_SP_SMALL
  38. #define WOLFSSL_SP_SMALL
  39. #endif
  40. #endif
  41. #include <wolfssl/wolfcrypt/sp.h>
  42. #ifdef __IAR_SYSTEMS_ICC__
  43. #define __asm__ asm
  44. #define __volatile__ volatile
  45. #endif /* __IAR_SYSTEMS_ICC__ */
  46. #ifdef __KEIL__
  47. #define __asm__ __asm
  48. #define __volatile__ volatile
  49. #endif
  50. #ifdef WOLFSSL_SP_ARM_CORTEX_M_ASM
  51. #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
  52. #ifndef WOLFSSL_SP_NO_2048
  53. /* Read big endian unsigned byte array into r.
  54. *
  55. * r A single precision integer.
  56. * size Maximum number of bytes to convert
  57. * a Byte array.
  58. * n Number of bytes in array to read.
  59. */
  60. static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
  61. {
  62. int i, j = 0;
  63. word32 s = 0;
  64. r[0] = 0;
  65. for (i = n-1; i >= 0; i--) {
  66. r[j] |= (((sp_digit)a[i]) << s);
  67. if (s >= 24U) {
  68. r[j] &= 0xffffffff;
  69. s = 32U - s;
  70. if (j + 1 >= size) {
  71. break;
  72. }
  73. r[++j] = (sp_digit)a[i] >> s;
  74. s = 8U - s;
  75. }
  76. else {
  77. s += 8U;
  78. }
  79. }
  80. for (j++; j < size; j++) {
  81. r[j] = 0;
  82. }
  83. }
  84. /* Convert an mp_int to an array of sp_digit.
  85. *
  86. * r A single precision integer.
  87. * size Maximum number of bytes to convert
  88. * a A multi-precision integer.
  89. */
  90. static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
  91. {
  92. #if DIGIT_BIT == 32
  93. int j;
  94. XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
  95. for (j = a->used; j < size; j++) {
  96. r[j] = 0;
  97. }
  98. #elif DIGIT_BIT > 32
  99. int i, j = 0;
  100. word32 s = 0;
  101. r[0] = 0;
  102. for (i = 0; i < a->used && j < size; i++) {
  103. r[j] |= ((sp_digit)a->dp[i] << s);
  104. r[j] &= 0xffffffff;
  105. s = 32U - s;
  106. if (j + 1 >= size) {
  107. break;
  108. }
  109. /* lint allow cast of mismatch word32 and mp_digit */
  110. r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
  111. while ((s + 32U) <= (word32)DIGIT_BIT) {
  112. s += 32U;
  113. r[j] &= 0xffffffff;
  114. if (j + 1 >= size) {
  115. break;
  116. }
  117. if (s < (word32)DIGIT_BIT) {
  118. /* lint allow cast of mismatch word32 and mp_digit */
  119. r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
  120. }
  121. else {
  122. r[++j] = 0L;
  123. }
  124. }
  125. s = (word32)DIGIT_BIT - s;
  126. }
  127. for (j++; j < size; j++) {
  128. r[j] = 0;
  129. }
  130. #else
  131. int i, j = 0, s = 0;
  132. r[0] = 0;
  133. for (i = 0; i < a->used && j < size; i++) {
  134. r[j] |= ((sp_digit)a->dp[i]) << s;
  135. if (s + DIGIT_BIT >= 32) {
  136. r[j] &= 0xffffffff;
  137. if (j + 1 >= size) {
  138. break;
  139. }
  140. s = 32 - s;
  141. if (s == DIGIT_BIT) {
  142. r[++j] = 0;
  143. s = 0;
  144. }
  145. else {
  146. r[++j] = a->dp[i] >> s;
  147. s = DIGIT_BIT - s;
  148. }
  149. }
  150. else {
  151. s += DIGIT_BIT;
  152. }
  153. }
  154. for (j++; j < size; j++) {
  155. r[j] = 0;
  156. }
  157. #endif
  158. }
  159. /* Write r as big endian to byte array.
  160. * Fixed length number of bytes written: 256
  161. *
  162. * r A single precision integer.
  163. * a Byte array.
  164. */
  165. static void sp_2048_to_bin(sp_digit* r, byte* a)
  166. {
  167. int i, j, s = 0, b;
  168. j = 2048 / 8 - 1;
  169. a[j] = 0;
  170. for (i=0; i<64 && j>=0; i++) {
  171. b = 0;
  172. /* lint allow cast of mismatch sp_digit and int */
  173. a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
  174. b += 8 - s;
  175. if (j < 0) {
  176. break;
  177. }
  178. while (b < 32) {
  179. a[j--] = (byte)(r[i] >> b);
  180. b += 8;
  181. if (j < 0) {
  182. break;
  183. }
  184. }
  185. s = 8 - (b - 32);
  186. if (j >= 0) {
  187. a[j] = 0;
  188. }
  189. if (s != 0) {
  190. j++;
  191. }
  192. }
  193. }
  194. #ifndef WOLFSSL_SP_SMALL
  195. /* Multiply a and b into r. (r = a * b)
  196. *
  197. * r A single precision integer.
  198. * a A single precision integer.
  199. * b A single precision integer.
  200. */
  201. SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a,
  202. const sp_digit* b)
  203. {
  204. sp_digit tmp_arr[8];
  205. sp_digit* tmp = tmp_arr;
  206. __asm__ __volatile__ (
  207. /* A[0] * B[0] */
  208. "ldr r6, [%[a], #0]\n\t"
  209. "ldr r8, [%[b], #0]\n\t"
  210. "umull r3, r4, r6, r8\n\t"
  211. "mov r5, #0\n\t"
  212. "str r3, [%[tmp], #0]\n\t"
  213. "mov r3, #0\n\t"
  214. /* A[0] * B[1] */
  215. "ldr r8, [%[b], #4]\n\t"
  216. "umull r6, r8, r6, r8\n\t"
  217. "adds r4, r4, r6\n\t"
  218. "adc r5, r5, r8\n\t"
  219. /* A[1] * B[0] */
  220. "ldr r6, [%[a], #4]\n\t"
  221. "ldr r8, [%[b], #0]\n\t"
  222. "umull r6, r8, r6, r8\n\t"
  223. "adds r4, r4, r6\n\t"
  224. "adcs r5, r5, r8\n\t"
  225. "adc r3, r3, #0\n\t"
  226. "str r4, [%[tmp], #4]\n\t"
  227. "mov r4, #0\n\t"
  228. /* A[0] * B[2] */
  229. "ldr r6, [%[a], #0]\n\t"
  230. "ldr r8, [%[b], #8]\n\t"
  231. "umull r6, r8, r6, r8\n\t"
  232. "adds r5, r5, r6\n\t"
  233. "adcs r3, r3, r8\n\t"
  234. "adc r4, r4, #0\n\t"
  235. /* A[1] * B[1] */
  236. "ldr r6, [%[a], #4]\n\t"
  237. "ldr r8, [%[b], #4]\n\t"
  238. "umull r6, r8, r6, r8\n\t"
  239. "adds r5, r5, r6\n\t"
  240. "adcs r3, r3, r8\n\t"
  241. "adc r4, r4, #0\n\t"
  242. /* A[2] * B[0] */
  243. "ldr r6, [%[a], #8]\n\t"
  244. "ldr r8, [%[b], #0]\n\t"
  245. "umull r6, r8, r6, r8\n\t"
  246. "adds r5, r5, r6\n\t"
  247. "adcs r3, r3, r8\n\t"
  248. "adc r4, r4, #0\n\t"
  249. "str r5, [%[tmp], #8]\n\t"
  250. "mov r5, #0\n\t"
  251. /* A[0] * B[3] */
  252. "ldr r6, [%[a], #0]\n\t"
  253. "ldr r8, [%[b], #12]\n\t"
  254. "umull r6, r8, r6, r8\n\t"
  255. "adds r3, r3, r6\n\t"
  256. "adcs r4, r4, r8\n\t"
  257. "adc r5, r5, #0\n\t"
  258. /* A[1] * B[2] */
  259. "ldr r6, [%[a], #4]\n\t"
  260. "ldr r8, [%[b], #8]\n\t"
  261. "umull r6, r8, r6, r8\n\t"
  262. "adds r3, r3, r6\n\t"
  263. "adcs r4, r4, r8\n\t"
  264. "adc r5, r5, #0\n\t"
  265. /* A[2] * B[1] */
  266. "ldr r6, [%[a], #8]\n\t"
  267. "ldr r8, [%[b], #4]\n\t"
  268. "umull r6, r8, r6, r8\n\t"
  269. "adds r3, r3, r6\n\t"
  270. "adcs r4, r4, r8\n\t"
  271. "adc r5, r5, #0\n\t"
  272. /* A[3] * B[0] */
  273. "ldr r6, [%[a], #12]\n\t"
  274. "ldr r8, [%[b], #0]\n\t"
  275. "umull r6, r8, r6, r8\n\t"
  276. "adds r3, r3, r6\n\t"
  277. "adcs r4, r4, r8\n\t"
  278. "adc r5, r5, #0\n\t"
  279. "str r3, [%[tmp], #12]\n\t"
  280. "mov r3, #0\n\t"
  281. /* A[0] * B[4] */
  282. "ldr r6, [%[a], #0]\n\t"
  283. "ldr r8, [%[b], #16]\n\t"
  284. "umull r6, r8, r6, r8\n\t"
  285. "adds r4, r4, r6\n\t"
  286. "adcs r5, r5, r8\n\t"
  287. "adc r3, r3, #0\n\t"
  288. /* A[1] * B[3] */
  289. "ldr r6, [%[a], #4]\n\t"
  290. "ldr r8, [%[b], #12]\n\t"
  291. "umull r6, r8, r6, r8\n\t"
  292. "adds r4, r4, r6\n\t"
  293. "adcs r5, r5, r8\n\t"
  294. "adc r3, r3, #0\n\t"
  295. /* A[2] * B[2] */
  296. "ldr r6, [%[a], #8]\n\t"
  297. "ldr r8, [%[b], #8]\n\t"
  298. "umull r6, r8, r6, r8\n\t"
  299. "adds r4, r4, r6\n\t"
  300. "adcs r5, r5, r8\n\t"
  301. "adc r3, r3, #0\n\t"
  302. /* A[3] * B[1] */
  303. "ldr r6, [%[a], #12]\n\t"
  304. "ldr r8, [%[b], #4]\n\t"
  305. "umull r6, r8, r6, r8\n\t"
  306. "adds r4, r4, r6\n\t"
  307. "adcs r5, r5, r8\n\t"
  308. "adc r3, r3, #0\n\t"
  309. /* A[4] * B[0] */
  310. "ldr r6, [%[a], #16]\n\t"
  311. "ldr r8, [%[b], #0]\n\t"
  312. "umull r6, r8, r6, r8\n\t"
  313. "adds r4, r4, r6\n\t"
  314. "adcs r5, r5, r8\n\t"
  315. "adc r3, r3, #0\n\t"
  316. "str r4, [%[tmp], #16]\n\t"
  317. "mov r4, #0\n\t"
  318. /* A[0] * B[5] */
  319. "ldr r6, [%[a], #0]\n\t"
  320. "ldr r8, [%[b], #20]\n\t"
  321. "umull r6, r8, r6, r8\n\t"
  322. "adds r5, r5, r6\n\t"
  323. "adcs r3, r3, r8\n\t"
  324. "adc r4, r4, #0\n\t"
  325. /* A[1] * B[4] */
  326. "ldr r6, [%[a], #4]\n\t"
  327. "ldr r8, [%[b], #16]\n\t"
  328. "umull r6, r8, r6, r8\n\t"
  329. "adds r5, r5, r6\n\t"
  330. "adcs r3, r3, r8\n\t"
  331. "adc r4, r4, #0\n\t"
  332. /* A[2] * B[3] */
  333. "ldr r6, [%[a], #8]\n\t"
  334. "ldr r8, [%[b], #12]\n\t"
  335. "umull r6, r8, r6, r8\n\t"
  336. "adds r5, r5, r6\n\t"
  337. "adcs r3, r3, r8\n\t"
  338. "adc r4, r4, #0\n\t"
  339. /* A[3] * B[2] */
  340. "ldr r6, [%[a], #12]\n\t"
  341. "ldr r8, [%[b], #8]\n\t"
  342. "umull r6, r8, r6, r8\n\t"
  343. "adds r5, r5, r6\n\t"
  344. "adcs r3, r3, r8\n\t"
  345. "adc r4, r4, #0\n\t"
  346. /* A[4] * B[1] */
  347. "ldr r6, [%[a], #16]\n\t"
  348. "ldr r8, [%[b], #4]\n\t"
  349. "umull r6, r8, r6, r8\n\t"
  350. "adds r5, r5, r6\n\t"
  351. "adcs r3, r3, r8\n\t"
  352. "adc r4, r4, #0\n\t"
  353. /* A[5] * B[0] */
  354. "ldr r6, [%[a], #20]\n\t"
  355. "ldr r8, [%[b], #0]\n\t"
  356. "umull r6, r8, r6, r8\n\t"
  357. "adds r5, r5, r6\n\t"
  358. "adcs r3, r3, r8\n\t"
  359. "adc r4, r4, #0\n\t"
  360. "str r5, [%[tmp], #20]\n\t"
  361. "mov r5, #0\n\t"
  362. /* A[0] * B[6] */
  363. "ldr r6, [%[a], #0]\n\t"
  364. "ldr r8, [%[b], #24]\n\t"
  365. "umull r6, r8, r6, r8\n\t"
  366. "adds r3, r3, r6\n\t"
  367. "adcs r4, r4, r8\n\t"
  368. "adc r5, r5, #0\n\t"
  369. /* A[1] * B[5] */
  370. "ldr r6, [%[a], #4]\n\t"
  371. "ldr r8, [%[b], #20]\n\t"
  372. "umull r6, r8, r6, r8\n\t"
  373. "adds r3, r3, r6\n\t"
  374. "adcs r4, r4, r8\n\t"
  375. "adc r5, r5, #0\n\t"
  376. /* A[2] * B[4] */
  377. "ldr r6, [%[a], #8]\n\t"
  378. "ldr r8, [%[b], #16]\n\t"
  379. "umull r6, r8, r6, r8\n\t"
  380. "adds r3, r3, r6\n\t"
  381. "adcs r4, r4, r8\n\t"
  382. "adc r5, r5, #0\n\t"
  383. /* A[3] * B[3] */
  384. "ldr r6, [%[a], #12]\n\t"
  385. "ldr r8, [%[b], #12]\n\t"
  386. "umull r6, r8, r6, r8\n\t"
  387. "adds r3, r3, r6\n\t"
  388. "adcs r4, r4, r8\n\t"
  389. "adc r5, r5, #0\n\t"
  390. /* A[4] * B[2] */
  391. "ldr r6, [%[a], #16]\n\t"
  392. "ldr r8, [%[b], #8]\n\t"
  393. "umull r6, r8, r6, r8\n\t"
  394. "adds r3, r3, r6\n\t"
  395. "adcs r4, r4, r8\n\t"
  396. "adc r5, r5, #0\n\t"
  397. /* A[5] * B[1] */
  398. "ldr r6, [%[a], #20]\n\t"
  399. "ldr r8, [%[b], #4]\n\t"
  400. "umull r6, r8, r6, r8\n\t"
  401. "adds r3, r3, r6\n\t"
  402. "adcs r4, r4, r8\n\t"
  403. "adc r5, r5, #0\n\t"
  404. /* A[6] * B[0] */
  405. "ldr r6, [%[a], #24]\n\t"
  406. "ldr r8, [%[b], #0]\n\t"
  407. "umull r6, r8, r6, r8\n\t"
  408. "adds r3, r3, r6\n\t"
  409. "adcs r4, r4, r8\n\t"
  410. "adc r5, r5, #0\n\t"
  411. "str r3, [%[tmp], #24]\n\t"
  412. "mov r3, #0\n\t"
  413. /* A[0] * B[7] */
  414. "ldr r6, [%[a], #0]\n\t"
  415. "ldr r8, [%[b], #28]\n\t"
  416. "umull r6, r8, r6, r8\n\t"
  417. "adds r4, r4, r6\n\t"
  418. "adcs r5, r5, r8\n\t"
  419. "adc r3, r3, #0\n\t"
  420. /* A[1] * B[6] */
  421. "ldr r6, [%[a], #4]\n\t"
  422. "ldr r8, [%[b], #24]\n\t"
  423. "umull r6, r8, r6, r8\n\t"
  424. "adds r4, r4, r6\n\t"
  425. "adcs r5, r5, r8\n\t"
  426. "adc r3, r3, #0\n\t"
  427. /* A[2] * B[5] */
  428. "ldr r6, [%[a], #8]\n\t"
  429. "ldr r8, [%[b], #20]\n\t"
  430. "umull r6, r8, r6, r8\n\t"
  431. "adds r4, r4, r6\n\t"
  432. "adcs r5, r5, r8\n\t"
  433. "adc r3, r3, #0\n\t"
  434. /* A[3] * B[4] */
  435. "ldr r6, [%[a], #12]\n\t"
  436. "ldr r8, [%[b], #16]\n\t"
  437. "umull r6, r8, r6, r8\n\t"
  438. "adds r4, r4, r6\n\t"
  439. "adcs r5, r5, r8\n\t"
  440. "adc r3, r3, #0\n\t"
  441. /* A[4] * B[3] */
  442. "ldr r6, [%[a], #16]\n\t"
  443. "ldr r8, [%[b], #12]\n\t"
  444. "umull r6, r8, r6, r8\n\t"
  445. "adds r4, r4, r6\n\t"
  446. "adcs r5, r5, r8\n\t"
  447. "adc r3, r3, #0\n\t"
  448. /* A[5] * B[2] */
  449. "ldr r6, [%[a], #20]\n\t"
  450. "ldr r8, [%[b], #8]\n\t"
  451. "umull r6, r8, r6, r8\n\t"
  452. "adds r4, r4, r6\n\t"
  453. "adcs r5, r5, r8\n\t"
  454. "adc r3, r3, #0\n\t"
  455. /* A[6] * B[1] */
  456. "ldr r6, [%[a], #24]\n\t"
  457. "ldr r8, [%[b], #4]\n\t"
  458. "umull r6, r8, r6, r8\n\t"
  459. "adds r4, r4, r6\n\t"
  460. "adcs r5, r5, r8\n\t"
  461. "adc r3, r3, #0\n\t"
  462. /* A[7] * B[0] */
  463. "ldr r6, [%[a], #28]\n\t"
  464. "ldr r8, [%[b], #0]\n\t"
  465. "umull r6, r8, r6, r8\n\t"
  466. "adds r4, r4, r6\n\t"
  467. "adcs r5, r5, r8\n\t"
  468. "adc r3, r3, #0\n\t"
  469. "str r4, [%[tmp], #28]\n\t"
  470. "mov r4, #0\n\t"
  471. /* A[1] * B[7] */
  472. "ldr r6, [%[a], #4]\n\t"
  473. "ldr r8, [%[b], #28]\n\t"
  474. "umull r6, r8, r6, r8\n\t"
  475. "adds r5, r5, r6\n\t"
  476. "adcs r3, r3, r8\n\t"
  477. "adc r4, r4, #0\n\t"
  478. /* A[2] * B[6] */
  479. "ldr r6, [%[a], #8]\n\t"
  480. "ldr r8, [%[b], #24]\n\t"
  481. "umull r6, r8, r6, r8\n\t"
  482. "adds r5, r5, r6\n\t"
  483. "adcs r3, r3, r8\n\t"
  484. "adc r4, r4, #0\n\t"
  485. /* A[3] * B[5] */
  486. "ldr r6, [%[a], #12]\n\t"
  487. "ldr r8, [%[b], #20]\n\t"
  488. "umull r6, r8, r6, r8\n\t"
  489. "adds r5, r5, r6\n\t"
  490. "adcs r3, r3, r8\n\t"
  491. "adc r4, r4, #0\n\t"
  492. /* A[4] * B[4] */
  493. "ldr r6, [%[a], #16]\n\t"
  494. "ldr r8, [%[b], #16]\n\t"
  495. "umull r6, r8, r6, r8\n\t"
  496. "adds r5, r5, r6\n\t"
  497. "adcs r3, r3, r8\n\t"
  498. "adc r4, r4, #0\n\t"
  499. /* A[5] * B[3] */
  500. "ldr r6, [%[a], #20]\n\t"
  501. "ldr r8, [%[b], #12]\n\t"
  502. "umull r6, r8, r6, r8\n\t"
  503. "adds r5, r5, r6\n\t"
  504. "adcs r3, r3, r8\n\t"
  505. "adc r4, r4, #0\n\t"
  506. /* A[6] * B[2] */
  507. "ldr r6, [%[a], #24]\n\t"
  508. "ldr r8, [%[b], #8]\n\t"
  509. "umull r6, r8, r6, r8\n\t"
  510. "adds r5, r5, r6\n\t"
  511. "adcs r3, r3, r8\n\t"
  512. "adc r4, r4, #0\n\t"
  513. /* A[7] * B[1] */
  514. "ldr r6, [%[a], #28]\n\t"
  515. "ldr r8, [%[b], #4]\n\t"
  516. "umull r6, r8, r6, r8\n\t"
  517. "adds r5, r5, r6\n\t"
  518. "adcs r3, r3, r8\n\t"
  519. "adc r4, r4, #0\n\t"
  520. "str r5, [%[r], #32]\n\t"
  521. "mov r5, #0\n\t"
  522. /* A[2] * B[7] */
  523. "ldr r6, [%[a], #8]\n\t"
  524. "ldr r8, [%[b], #28]\n\t"
  525. "umull r6, r8, r6, r8\n\t"
  526. "adds r3, r3, r6\n\t"
  527. "adcs r4, r4, r8\n\t"
  528. "adc r5, r5, #0\n\t"
  529. /* A[3] * B[6] */
  530. "ldr r6, [%[a], #12]\n\t"
  531. "ldr r8, [%[b], #24]\n\t"
  532. "umull r6, r8, r6, r8\n\t"
  533. "adds r3, r3, r6\n\t"
  534. "adcs r4, r4, r8\n\t"
  535. "adc r5, r5, #0\n\t"
  536. /* A[4] * B[5] */
  537. "ldr r6, [%[a], #16]\n\t"
  538. "ldr r8, [%[b], #20]\n\t"
  539. "umull r6, r8, r6, r8\n\t"
  540. "adds r3, r3, r6\n\t"
  541. "adcs r4, r4, r8\n\t"
  542. "adc r5, r5, #0\n\t"
  543. /* A[5] * B[4] */
  544. "ldr r6, [%[a], #20]\n\t"
  545. "ldr r8, [%[b], #16]\n\t"
  546. "umull r6, r8, r6, r8\n\t"
  547. "adds r3, r3, r6\n\t"
  548. "adcs r4, r4, r8\n\t"
  549. "adc r5, r5, #0\n\t"
  550. /* A[6] * B[3] */
  551. "ldr r6, [%[a], #24]\n\t"
  552. "ldr r8, [%[b], #12]\n\t"
  553. "umull r6, r8, r6, r8\n\t"
  554. "adds r3, r3, r6\n\t"
  555. "adcs r4, r4, r8\n\t"
  556. "adc r5, r5, #0\n\t"
  557. /* A[7] * B[2] */
  558. "ldr r6, [%[a], #28]\n\t"
  559. "ldr r8, [%[b], #8]\n\t"
  560. "umull r6, r8, r6, r8\n\t"
  561. "adds r3, r3, r6\n\t"
  562. "adcs r4, r4, r8\n\t"
  563. "adc r5, r5, #0\n\t"
  564. "str r3, [%[r], #36]\n\t"
  565. "mov r3, #0\n\t"
  566. /* A[3] * B[7] */
  567. "ldr r6, [%[a], #12]\n\t"
  568. "ldr r8, [%[b], #28]\n\t"
  569. "umull r6, r8, r6, r8\n\t"
  570. "adds r4, r4, r6\n\t"
  571. "adcs r5, r5, r8\n\t"
  572. "adc r3, r3, #0\n\t"
  573. /* A[4] * B[6] */
  574. "ldr r6, [%[a], #16]\n\t"
  575. "ldr r8, [%[b], #24]\n\t"
  576. "umull r6, r8, r6, r8\n\t"
  577. "adds r4, r4, r6\n\t"
  578. "adcs r5, r5, r8\n\t"
  579. "adc r3, r3, #0\n\t"
  580. /* A[5] * B[5] */
  581. "ldr r6, [%[a], #20]\n\t"
  582. "ldr r8, [%[b], #20]\n\t"
  583. "umull r6, r8, r6, r8\n\t"
  584. "adds r4, r4, r6\n\t"
  585. "adcs r5, r5, r8\n\t"
  586. "adc r3, r3, #0\n\t"
  587. /* A[6] * B[4] */
  588. "ldr r6, [%[a], #24]\n\t"
  589. "ldr r8, [%[b], #16]\n\t"
  590. "umull r6, r8, r6, r8\n\t"
  591. "adds r4, r4, r6\n\t"
  592. "adcs r5, r5, r8\n\t"
  593. "adc r3, r3, #0\n\t"
  594. /* A[7] * B[3] */
  595. "ldr r6, [%[a], #28]\n\t"
  596. "ldr r8, [%[b], #12]\n\t"
  597. "umull r6, r8, r6, r8\n\t"
  598. "adds r4, r4, r6\n\t"
  599. "adcs r5, r5, r8\n\t"
  600. "adc r3, r3, #0\n\t"
  601. "str r4, [%[r], #40]\n\t"
  602. "mov r4, #0\n\t"
  603. /* A[4] * B[7] */
  604. "ldr r6, [%[a], #16]\n\t"
  605. "ldr r8, [%[b], #28]\n\t"
  606. "umull r6, r8, r6, r8\n\t"
  607. "adds r5, r5, r6\n\t"
  608. "adcs r3, r3, r8\n\t"
  609. "adc r4, r4, #0\n\t"
  610. /* A[5] * B[6] */
  611. "ldr r6, [%[a], #20]\n\t"
  612. "ldr r8, [%[b], #24]\n\t"
  613. "umull r6, r8, r6, r8\n\t"
  614. "adds r5, r5, r6\n\t"
  615. "adcs r3, r3, r8\n\t"
  616. "adc r4, r4, #0\n\t"
  617. /* A[6] * B[5] */
  618. "ldr r6, [%[a], #24]\n\t"
  619. "ldr r8, [%[b], #20]\n\t"
  620. "umull r6, r8, r6, r8\n\t"
  621. "adds r5, r5, r6\n\t"
  622. "adcs r3, r3, r8\n\t"
  623. "adc r4, r4, #0\n\t"
  624. /* A[7] * B[4] */
  625. "ldr r6, [%[a], #28]\n\t"
  626. "ldr r8, [%[b], #16]\n\t"
  627. "umull r6, r8, r6, r8\n\t"
  628. "adds r5, r5, r6\n\t"
  629. "adcs r3, r3, r8\n\t"
  630. "adc r4, r4, #0\n\t"
  631. "str r5, [%[r], #44]\n\t"
  632. "mov r5, #0\n\t"
  633. /* A[5] * B[7] */
  634. "ldr r6, [%[a], #20]\n\t"
  635. "ldr r8, [%[b], #28]\n\t"
  636. "umull r6, r8, r6, r8\n\t"
  637. "adds r3, r3, r6\n\t"
  638. "adcs r4, r4, r8\n\t"
  639. "adc r5, r5, #0\n\t"
  640. /* A[6] * B[6] */
  641. "ldr r6, [%[a], #24]\n\t"
  642. "ldr r8, [%[b], #24]\n\t"
  643. "umull r6, r8, r6, r8\n\t"
  644. "adds r3, r3, r6\n\t"
  645. "adcs r4, r4, r8\n\t"
  646. "adc r5, r5, #0\n\t"
  647. /* A[7] * B[5] */
  648. "ldr r6, [%[a], #28]\n\t"
  649. "ldr r8, [%[b], #20]\n\t"
  650. "umull r6, r8, r6, r8\n\t"
  651. "adds r3, r3, r6\n\t"
  652. "adcs r4, r4, r8\n\t"
  653. "adc r5, r5, #0\n\t"
  654. "str r3, [%[r], #48]\n\t"
  655. "mov r3, #0\n\t"
  656. /* A[6] * B[7] */
  657. "ldr r6, [%[a], #24]\n\t"
  658. "ldr r8, [%[b], #28]\n\t"
  659. "umull r6, r8, r6, r8\n\t"
  660. "adds r4, r4, r6\n\t"
  661. "adcs r5, r5, r8\n\t"
  662. "adc r3, r3, #0\n\t"
  663. /* A[7] * B[6] */
  664. "ldr r6, [%[a], #28]\n\t"
  665. "ldr r8, [%[b], #24]\n\t"
  666. "umull r6, r8, r6, r8\n\t"
  667. "adds r4, r4, r6\n\t"
  668. "adcs r5, r5, r8\n\t"
  669. "adc r3, r3, #0\n\t"
  670. "str r4, [%[r], #52]\n\t"
  671. "mov r4, #0\n\t"
  672. /* A[7] * B[7] */
  673. "ldr r6, [%[a], #28]\n\t"
  674. "ldr r8, [%[b], #28]\n\t"
  675. "umull r6, r8, r6, r8\n\t"
  676. "adds r5, r5, r6\n\t"
  677. "adc r3, r3, r8\n\t"
  678. "str r5, [%[r], #56]\n\t"
  679. "str r3, [%[r], #60]\n\t"
  680. /* Transfer tmp to r */
  681. "ldr r3, [%[tmp], #0]\n\t"
  682. "ldr r4, [%[tmp], #4]\n\t"
  683. "ldr r5, [%[tmp], #8]\n\t"
  684. "ldr r6, [%[tmp], #12]\n\t"
  685. "str r3, [%[r], #0]\n\t"
  686. "str r4, [%[r], #4]\n\t"
  687. "str r5, [%[r], #8]\n\t"
  688. "str r6, [%[r], #12]\n\t"
  689. "ldr r3, [%[tmp], #16]\n\t"
  690. "ldr r4, [%[tmp], #20]\n\t"
  691. "ldr r5, [%[tmp], #24]\n\t"
  692. "ldr r6, [%[tmp], #28]\n\t"
  693. "str r3, [%[r], #16]\n\t"
  694. "str r4, [%[r], #20]\n\t"
  695. "str r5, [%[r], #24]\n\t"
  696. "str r6, [%[r], #28]\n\t"
  697. :
  698. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
  699. : "memory", "r3", "r4", "r5", "r6", "r8"
  700. );
  701. }
  702. /* Square a and put result in r. (r = a * a)
  703. *
  704. * r A single precision integer.
  705. * a A single precision integer.
  706. */
  707. SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
  708. {
  709. sp_digit tmp_arr[8];
  710. sp_digit* tmp = tmp_arr;
  711. __asm__ __volatile__ (
  712. /* A[0] * A[0] */
  713. "ldr r6, [%[a], #0]\n\t"
  714. "umull r3, r4, r6, r6\n\t"
  715. "mov r5, #0\n\t"
  716. "str r3, [%[tmp], #0]\n\t"
  717. "mov r3, #0\n\t"
  718. /* A[0] * A[1] */
  719. "ldr r8, [%[a], #4]\n\t"
  720. "umull r6, r8, r6, r8\n\t"
  721. "adds r4, r4, r6\n\t"
  722. "adc r5, r5, r8\n\t"
  723. "adds r4, r4, r6\n\t"
  724. "adcs r5, r5, r8\n\t"
  725. "adc r3, r3, #0\n\t"
  726. "str r4, [%[tmp], #4]\n\t"
  727. "mov r4, #0\n\t"
  728. /* A[0] * A[2] */
  729. "ldr r6, [%[a], #0]\n\t"
  730. "ldr r8, [%[a], #8]\n\t"
  731. "umull r6, r8, r6, r8\n\t"
  732. "adds r5, r5, r6\n\t"
  733. "adc r3, r3, r8\n\t"
  734. "adds r5, r5, r6\n\t"
  735. "adcs r3, r3, r8\n\t"
  736. "adc r4, r4, #0\n\t"
  737. /* A[1] * A[1] */
  738. "ldr r6, [%[a], #4]\n\t"
  739. "umull r6, r8, r6, r6\n\t"
  740. "adds r5, r5, r6\n\t"
  741. "adcs r3, r3, r8\n\t"
  742. "adc r4, r4, #0\n\t"
  743. "str r5, [%[tmp], #8]\n\t"
  744. "mov r5, #0\n\t"
  745. /* A[0] * A[3] */
  746. "ldr r6, [%[a], #0]\n\t"
  747. "ldr r8, [%[a], #12]\n\t"
  748. "umull r9, r10, r6, r8\n\t"
  749. "mov r11, #0\n\t"
  750. /* A[1] * A[2] */
  751. "ldr r6, [%[a], #4]\n\t"
  752. "ldr r8, [%[a], #8]\n\t"
  753. "umull r6, r8, r6, r8\n\t"
  754. "adds r9, r9, r6\n\t"
  755. "adcs r10, r10, r8\n\t"
  756. "adc r11, r11, #0\n\t"
  757. "adds r9, r9, r9\n\t"
  758. "adcs r10, r10, r10\n\t"
  759. "adc r11, r11, r11\n\t"
  760. "adds r3, r3, r9\n\t"
  761. "adcs r4, r4, r10\n\t"
  762. "adc r5, r5, r11\n\t"
  763. "str r3, [%[tmp], #12]\n\t"
  764. "mov r3, #0\n\t"
  765. /* A[0] * A[4] */
  766. "ldr r6, [%[a], #0]\n\t"
  767. "ldr r8, [%[a], #16]\n\t"
  768. "umull r9, r10, r6, r8\n\t"
  769. "mov r11, #0\n\t"
  770. /* A[1] * A[3] */
  771. "ldr r6, [%[a], #4]\n\t"
  772. "ldr r8, [%[a], #12]\n\t"
  773. "umull r6, r8, r6, r8\n\t"
  774. "adds r9, r9, r6\n\t"
  775. "adcs r10, r10, r8\n\t"
  776. "adc r11, r11, #0\n\t"
  777. /* A[2] * A[2] */
  778. "ldr r6, [%[a], #8]\n\t"
  779. "umull r6, r8, r6, r6\n\t"
  780. "adds r4, r4, r6\n\t"
  781. "adcs r5, r5, r8\n\t"
  782. "adc r3, r3, #0\n\t"
  783. "adds r9, r9, r9\n\t"
  784. "adcs r10, r10, r10\n\t"
  785. "adc r11, r11, r11\n\t"
  786. "adds r4, r4, r9\n\t"
  787. "adcs r5, r5, r10\n\t"
  788. "adc r3, r3, r11\n\t"
  789. "str r4, [%[tmp], #16]\n\t"
  790. "mov r4, #0\n\t"
  791. /* A[0] * A[5] */
  792. "ldr r6, [%[a], #0]\n\t"
  793. "ldr r8, [%[a], #20]\n\t"
  794. "umull r9, r10, r6, r8\n\t"
  795. "mov r11, #0\n\t"
  796. /* A[1] * A[4] */
  797. "ldr r6, [%[a], #4]\n\t"
  798. "ldr r8, [%[a], #16]\n\t"
  799. "umull r6, r8, r6, r8\n\t"
  800. "adds r9, r9, r6\n\t"
  801. "adcs r10, r10, r8\n\t"
  802. "adc r11, r11, #0\n\t"
  803. /* A[2] * A[3] */
  804. "ldr r6, [%[a], #8]\n\t"
  805. "ldr r8, [%[a], #12]\n\t"
  806. "umull r6, r8, r6, r8\n\t"
  807. "adds r9, r9, r6\n\t"
  808. "adcs r10, r10, r8\n\t"
  809. "adc r11, r11, #0\n\t"
  810. "adds r9, r9, r9\n\t"
  811. "adcs r10, r10, r10\n\t"
  812. "adc r11, r11, r11\n\t"
  813. "adds r5, r5, r9\n\t"
  814. "adcs r3, r3, r10\n\t"
  815. "adc r4, r4, r11\n\t"
  816. "str r5, [%[tmp], #20]\n\t"
  817. "mov r5, #0\n\t"
  818. /* A[0] * A[6] */
  819. "ldr r6, [%[a], #0]\n\t"
  820. "ldr r8, [%[a], #24]\n\t"
  821. "umull r9, r10, r6, r8\n\t"
  822. "mov r11, #0\n\t"
  823. /* A[1] * A[5] */
  824. "ldr r6, [%[a], #4]\n\t"
  825. "ldr r8, [%[a], #20]\n\t"
  826. "umull r6, r8, r6, r8\n\t"
  827. "adds r9, r9, r6\n\t"
  828. "adcs r10, r10, r8\n\t"
  829. "adc r11, r11, #0\n\t"
  830. /* A[2] * A[4] */
  831. "ldr r6, [%[a], #8]\n\t"
  832. "ldr r8, [%[a], #16]\n\t"
  833. "umull r6, r8, r6, r8\n\t"
  834. "adds r9, r9, r6\n\t"
  835. "adcs r10, r10, r8\n\t"
  836. "adc r11, r11, #0\n\t"
  837. /* A[3] * A[3] */
  838. "ldr r6, [%[a], #12]\n\t"
  839. "umull r6, r8, r6, r6\n\t"
  840. "adds r3, r3, r6\n\t"
  841. "adcs r4, r4, r8\n\t"
  842. "adc r5, r5, #0\n\t"
  843. "adds r9, r9, r9\n\t"
  844. "adcs r10, r10, r10\n\t"
  845. "adc r11, r11, r11\n\t"
  846. "adds r3, r3, r9\n\t"
  847. "adcs r4, r4, r10\n\t"
  848. "adc r5, r5, r11\n\t"
  849. "str r3, [%[tmp], #24]\n\t"
  850. "mov r3, #0\n\t"
  851. /* A[0] * A[7] */
  852. "ldr r6, [%[a], #0]\n\t"
  853. "ldr r8, [%[a], #28]\n\t"
  854. "umull r9, r10, r6, r8\n\t"
  855. "mov r11, #0\n\t"
  856. /* A[1] * A[6] */
  857. "ldr r6, [%[a], #4]\n\t"
  858. "ldr r8, [%[a], #24]\n\t"
  859. "umull r6, r8, r6, r8\n\t"
  860. "adds r9, r9, r6\n\t"
  861. "adcs r10, r10, r8\n\t"
  862. "adc r11, r11, #0\n\t"
  863. /* A[2] * A[5] */
  864. "ldr r6, [%[a], #8]\n\t"
  865. "ldr r8, [%[a], #20]\n\t"
  866. "umull r6, r8, r6, r8\n\t"
  867. "adds r9, r9, r6\n\t"
  868. "adcs r10, r10, r8\n\t"
  869. "adc r11, r11, #0\n\t"
  870. /* A[3] * A[4] */
  871. "ldr r6, [%[a], #12]\n\t"
  872. "ldr r8, [%[a], #16]\n\t"
  873. "umull r6, r8, r6, r8\n\t"
  874. "adds r9, r9, r6\n\t"
  875. "adcs r10, r10, r8\n\t"
  876. "adc r11, r11, #0\n\t"
  877. "adds r9, r9, r9\n\t"
  878. "adcs r10, r10, r10\n\t"
  879. "adc r11, r11, r11\n\t"
  880. "adds r4, r4, r9\n\t"
  881. "adcs r5, r5, r10\n\t"
  882. "adc r3, r3, r11\n\t"
  883. "str r4, [%[tmp], #28]\n\t"
  884. "mov r4, #0\n\t"
  885. /* A[1] * A[7] */
  886. "ldr r6, [%[a], #4]\n\t"
  887. "ldr r8, [%[a], #28]\n\t"
  888. "umull r9, r10, r6, r8\n\t"
  889. "mov r11, #0\n\t"
  890. /* A[2] * A[6] */
  891. "ldr r6, [%[a], #8]\n\t"
  892. "ldr r8, [%[a], #24]\n\t"
  893. "umull r6, r8, r6, r8\n\t"
  894. "adds r9, r9, r6\n\t"
  895. "adcs r10, r10, r8\n\t"
  896. "adc r11, r11, #0\n\t"
  897. /* A[3] * A[5] */
  898. "ldr r6, [%[a], #12]\n\t"
  899. "ldr r8, [%[a], #20]\n\t"
  900. "umull r6, r8, r6, r8\n\t"
  901. "adds r9, r9, r6\n\t"
  902. "adcs r10, r10, r8\n\t"
  903. "adc r11, r11, #0\n\t"
  904. /* A[4] * A[4] */
  905. "ldr r6, [%[a], #16]\n\t"
  906. "umull r6, r8, r6, r6\n\t"
  907. "adds r5, r5, r6\n\t"
  908. "adcs r3, r3, r8\n\t"
  909. "adc r4, r4, #0\n\t"
  910. "adds r9, r9, r9\n\t"
  911. "adcs r10, r10, r10\n\t"
  912. "adc r11, r11, r11\n\t"
  913. "adds r5, r5, r9\n\t"
  914. "adcs r3, r3, r10\n\t"
  915. "adc r4, r4, r11\n\t"
  916. "str r5, [%[r], #32]\n\t"
  917. "mov r5, #0\n\t"
  918. /* A[2] * A[7] */
  919. "ldr r6, [%[a], #8]\n\t"
  920. "ldr r8, [%[a], #28]\n\t"
  921. "umull r9, r10, r6, r8\n\t"
  922. "mov r11, #0\n\t"
  923. /* A[3] * A[6] */
  924. "ldr r6, [%[a], #12]\n\t"
  925. "ldr r8, [%[a], #24]\n\t"
  926. "umull r6, r8, r6, r8\n\t"
  927. "adds r9, r9, r6\n\t"
  928. "adcs r10, r10, r8\n\t"
  929. "adc r11, r11, #0\n\t"
  930. /* A[4] * A[5] */
  931. "ldr r6, [%[a], #16]\n\t"
  932. "ldr r8, [%[a], #20]\n\t"
  933. "umull r6, r8, r6, r8\n\t"
  934. "adds r9, r9, r6\n\t"
  935. "adcs r10, r10, r8\n\t"
  936. "adc r11, r11, #0\n\t"
  937. "adds r9, r9, r9\n\t"
  938. "adcs r10, r10, r10\n\t"
  939. "adc r11, r11, r11\n\t"
  940. "adds r3, r3, r9\n\t"
  941. "adcs r4, r4, r10\n\t"
  942. "adc r5, r5, r11\n\t"
  943. "str r3, [%[r], #36]\n\t"
  944. "mov r3, #0\n\t"
  945. /* A[3] * A[7] */
  946. "ldr r6, [%[a], #12]\n\t"
  947. "ldr r8, [%[a], #28]\n\t"
  948. "umull r9, r10, r6, r8\n\t"
  949. "mov r11, #0\n\t"
  950. /* A[4] * A[6] */
  951. "ldr r6, [%[a], #16]\n\t"
  952. "ldr r8, [%[a], #24]\n\t"
  953. "umull r6, r8, r6, r8\n\t"
  954. "adds r9, r9, r6\n\t"
  955. "adcs r10, r10, r8\n\t"
  956. "adc r11, r11, #0\n\t"
  957. /* A[5] * A[5] */
  958. "ldr r6, [%[a], #20]\n\t"
  959. "umull r6, r8, r6, r6\n\t"
  960. "adds r4, r4, r6\n\t"
  961. "adcs r5, r5, r8\n\t"
  962. "adc r3, r3, #0\n\t"
  963. "adds r9, r9, r9\n\t"
  964. "adcs r10, r10, r10\n\t"
  965. "adc r11, r11, r11\n\t"
  966. "adds r4, r4, r9\n\t"
  967. "adcs r5, r5, r10\n\t"
  968. "adc r3, r3, r11\n\t"
  969. "str r4, [%[r], #40]\n\t"
  970. "mov r4, #0\n\t"
  971. /* A[4] * A[7] */
  972. "ldr r6, [%[a], #16]\n\t"
  973. "ldr r8, [%[a], #28]\n\t"
  974. "umull r6, r8, r6, r8\n\t"
  975. "adds r5, r5, r6\n\t"
  976. "adcs r3, r3, r8\n\t"
  977. "adc r4, r4, #0\n\t"
  978. "adds r5, r5, r6\n\t"
  979. "adcs r3, r3, r8\n\t"
  980. "adc r4, r4, #0\n\t"
  981. /* A[5] * A[6] */
  982. "ldr r6, [%[a], #20]\n\t"
  983. "ldr r8, [%[a], #24]\n\t"
  984. "umull r6, r8, r6, r8\n\t"
  985. "adds r5, r5, r6\n\t"
  986. "adcs r3, r3, r8\n\t"
  987. "adc r4, r4, #0\n\t"
  988. "adds r5, r5, r6\n\t"
  989. "adcs r3, r3, r8\n\t"
  990. "adc r4, r4, #0\n\t"
  991. "str r5, [%[r], #44]\n\t"
  992. "mov r5, #0\n\t"
  993. /* A[5] * A[7] */
  994. "ldr r6, [%[a], #20]\n\t"
  995. "ldr r8, [%[a], #28]\n\t"
  996. "umull r6, r8, r6, r8\n\t"
  997. "adds r3, r3, r6\n\t"
  998. "adcs r4, r4, r8\n\t"
  999. "adc r5, r5, #0\n\t"
  1000. "adds r3, r3, r6\n\t"
  1001. "adcs r4, r4, r8\n\t"
  1002. "adc r5, r5, #0\n\t"
  1003. /* A[6] * A[6] */
  1004. "ldr r6, [%[a], #24]\n\t"
  1005. "umull r6, r8, r6, r6\n\t"
  1006. "adds r3, r3, r6\n\t"
  1007. "adcs r4, r4, r8\n\t"
  1008. "adc r5, r5, #0\n\t"
  1009. "str r3, [%[r], #48]\n\t"
  1010. "mov r3, #0\n\t"
  1011. /* A[6] * A[7] */
  1012. "ldr r6, [%[a], #24]\n\t"
  1013. "ldr r8, [%[a], #28]\n\t"
  1014. "umull r6, r8, r6, r8\n\t"
  1015. "adds r4, r4, r6\n\t"
  1016. "adcs r5, r5, r8\n\t"
  1017. "adc r3, r3, #0\n\t"
  1018. "adds r4, r4, r6\n\t"
  1019. "adcs r5, r5, r8\n\t"
  1020. "adc r3, r3, #0\n\t"
  1021. "str r4, [%[r], #52]\n\t"
  1022. "mov r4, #0\n\t"
  1023. /* A[7] * A[7] */
  1024. "ldr r6, [%[a], #28]\n\t"
  1025. "umull r6, r8, r6, r6\n\t"
  1026. "adds r5, r5, r6\n\t"
  1027. "adc r3, r3, r8\n\t"
  1028. "str r5, [%[r], #56]\n\t"
  1029. "str r3, [%[r], #60]\n\t"
  1030. /* Transfer tmp to r */
  1031. "ldr r3, [%[tmp], #0]\n\t"
  1032. "ldr r4, [%[tmp], #4]\n\t"
  1033. "ldr r5, [%[tmp], #8]\n\t"
  1034. "ldr r6, [%[tmp], #12]\n\t"
  1035. "str r3, [%[r], #0]\n\t"
  1036. "str r4, [%[r], #4]\n\t"
  1037. "str r5, [%[r], #8]\n\t"
  1038. "str r6, [%[r], #12]\n\t"
  1039. "ldr r3, [%[tmp], #16]\n\t"
  1040. "ldr r4, [%[tmp], #20]\n\t"
  1041. "ldr r5, [%[tmp], #24]\n\t"
  1042. "ldr r6, [%[tmp], #28]\n\t"
  1043. "str r3, [%[r], #16]\n\t"
  1044. "str r4, [%[r], #20]\n\t"
  1045. "str r5, [%[r], #24]\n\t"
  1046. "str r6, [%[r], #28]\n\t"
  1047. :
  1048. : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
  1049. : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11"
  1050. );
  1051. }
  1052. /* Add b to a into r. (r = a + b)
  1053. *
  1054. * r A single precision integer.
  1055. * a A single precision integer.
  1056. * b A single precision integer.
  1057. */
  1058. SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
  1059. const sp_digit* b)
  1060. {
  1061. sp_digit c = 0;
  1062. __asm__ __volatile__ (
  1063. "ldm %[a]!, {r4, r5}\n\t"
  1064. "ldm %[b]!, {r6, r8}\n\t"
  1065. "adds r4, r4, r6\n\t"
  1066. "adcs r5, r5, r8\n\t"
  1067. "stm %[r]!, {r4, r5}\n\t"
  1068. "ldm %[a]!, {r4, r5}\n\t"
  1069. "ldm %[b]!, {r6, r8}\n\t"
  1070. "adcs r4, r4, r6\n\t"
  1071. "adcs r5, r5, r8\n\t"
  1072. "stm %[r]!, {r4, r5}\n\t"
  1073. "ldm %[a]!, {r4, r5}\n\t"
  1074. "ldm %[b]!, {r6, r8}\n\t"
  1075. "adcs r4, r4, r6\n\t"
  1076. "adcs r5, r5, r8\n\t"
  1077. "stm %[r]!, {r4, r5}\n\t"
  1078. "ldm %[a]!, {r4, r5}\n\t"
  1079. "ldm %[b]!, {r6, r8}\n\t"
  1080. "adcs r4, r4, r6\n\t"
  1081. "adcs r5, r5, r8\n\t"
  1082. "stm %[r]!, {r4, r5}\n\t"
  1083. "mov %[c], #0\n\t"
  1084. "adc %[c], %[c], %[c]\n\t"
  1085. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  1086. :
  1087. : "memory", "r4", "r5", "r6", "r8"
  1088. );
  1089. return c;
  1090. }
  1091. /* Sub b from a into r. (r = a - b)
  1092. *
  1093. * r A single precision integer.
  1094. * a A single precision integer.
  1095. * b A single precision integer.
  1096. */
  1097. SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a,
  1098. const sp_digit* b)
  1099. {
  1100. sp_digit c = 0;
  1101. __asm__ __volatile__ (
  1102. "ldm %[a], {r3, r4}\n\t"
  1103. "ldm %[b]!, {r5, r6}\n\t"
  1104. "subs r3, r3, r5\n\t"
  1105. "sbcs r4, r4, r6\n\t"
  1106. "stm %[a]!, {r3, r4}\n\t"
  1107. "ldm %[a], {r3, r4}\n\t"
  1108. "ldm %[b]!, {r5, r6}\n\t"
  1109. "sbcs r3, r3, r5\n\t"
  1110. "sbcs r4, r4, r6\n\t"
  1111. "stm %[a]!, {r3, r4}\n\t"
  1112. "ldm %[a], {r3, r4}\n\t"
  1113. "ldm %[b]!, {r5, r6}\n\t"
  1114. "sbcs r3, r3, r5\n\t"
  1115. "sbcs r4, r4, r6\n\t"
  1116. "stm %[a]!, {r3, r4}\n\t"
  1117. "ldm %[a], {r3, r4}\n\t"
  1118. "ldm %[b]!, {r5, r6}\n\t"
  1119. "sbcs r3, r3, r5\n\t"
  1120. "sbcs r4, r4, r6\n\t"
  1121. "stm %[a]!, {r3, r4}\n\t"
  1122. "ldm %[a], {r3, r4}\n\t"
  1123. "ldm %[b]!, {r5, r6}\n\t"
  1124. "sbcs r3, r3, r5\n\t"
  1125. "sbcs r4, r4, r6\n\t"
  1126. "stm %[a]!, {r3, r4}\n\t"
  1127. "ldm %[a], {r3, r4}\n\t"
  1128. "ldm %[b]!, {r5, r6}\n\t"
  1129. "sbcs r3, r3, r5\n\t"
  1130. "sbcs r4, r4, r6\n\t"
  1131. "stm %[a]!, {r3, r4}\n\t"
  1132. "ldm %[a], {r3, r4}\n\t"
  1133. "ldm %[b]!, {r5, r6}\n\t"
  1134. "sbcs r3, r3, r5\n\t"
  1135. "sbcs r4, r4, r6\n\t"
  1136. "stm %[a]!, {r3, r4}\n\t"
  1137. "ldm %[a], {r3, r4}\n\t"
  1138. "ldm %[b]!, {r5, r6}\n\t"
  1139. "sbcs r3, r3, r5\n\t"
  1140. "sbcs r4, r4, r6\n\t"
  1141. "stm %[a]!, {r3, r4}\n\t"
  1142. "sbc %[c], %[c], %[c]\n\t"
  1143. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  1144. :
  1145. : "memory", "r3", "r4", "r5", "r6"
  1146. );
  1147. return c;
  1148. }
  1149. /* Add b to a into r. (r = a + b)
  1150. *
  1151. * r A single precision integer.
  1152. * a A single precision integer.
  1153. * b A single precision integer.
  1154. */
  1155. SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
  1156. const sp_digit* b)
  1157. {
  1158. sp_digit c = 0;
  1159. __asm__ __volatile__ (
  1160. "ldm %[a]!, {r4, r5}\n\t"
  1161. "ldm %[b]!, {r6, r8}\n\t"
  1162. "adds r4, r4, r6\n\t"
  1163. "adcs r5, r5, r8\n\t"
  1164. "stm %[r]!, {r4, r5}\n\t"
  1165. "ldm %[a]!, {r4, r5}\n\t"
  1166. "ldm %[b]!, {r6, r8}\n\t"
  1167. "adcs r4, r4, r6\n\t"
  1168. "adcs r5, r5, r8\n\t"
  1169. "stm %[r]!, {r4, r5}\n\t"
  1170. "ldm %[a]!, {r4, r5}\n\t"
  1171. "ldm %[b]!, {r6, r8}\n\t"
  1172. "adcs r4, r4, r6\n\t"
  1173. "adcs r5, r5, r8\n\t"
  1174. "stm %[r]!, {r4, r5}\n\t"
  1175. "ldm %[a]!, {r4, r5}\n\t"
  1176. "ldm %[b]!, {r6, r8}\n\t"
  1177. "adcs r4, r4, r6\n\t"
  1178. "adcs r5, r5, r8\n\t"
  1179. "stm %[r]!, {r4, r5}\n\t"
  1180. "ldm %[a]!, {r4, r5}\n\t"
  1181. "ldm %[b]!, {r6, r8}\n\t"
  1182. "adcs r4, r4, r6\n\t"
  1183. "adcs r5, r5, r8\n\t"
  1184. "stm %[r]!, {r4, r5}\n\t"
  1185. "ldm %[a]!, {r4, r5}\n\t"
  1186. "ldm %[b]!, {r6, r8}\n\t"
  1187. "adcs r4, r4, r6\n\t"
  1188. "adcs r5, r5, r8\n\t"
  1189. "stm %[r]!, {r4, r5}\n\t"
  1190. "ldm %[a]!, {r4, r5}\n\t"
  1191. "ldm %[b]!, {r6, r8}\n\t"
  1192. "adcs r4, r4, r6\n\t"
  1193. "adcs r5, r5, r8\n\t"
  1194. "stm %[r]!, {r4, r5}\n\t"
  1195. "ldm %[a]!, {r4, r5}\n\t"
  1196. "ldm %[b]!, {r6, r8}\n\t"
  1197. "adcs r4, r4, r6\n\t"
  1198. "adcs r5, r5, r8\n\t"
  1199. "stm %[r]!, {r4, r5}\n\t"
  1200. "mov %[c], #0\n\t"
  1201. "adc %[c], %[c], %[c]\n\t"
  1202. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  1203. :
  1204. : "memory", "r4", "r5", "r6", "r8"
  1205. );
  1206. return c;
  1207. }
  1208. /* AND m into each word of a and store in r.
  1209. *
  1210. * r A single precision integer.
  1211. * a A single precision integer.
  1212. * m Mask to AND against each digit.
  1213. */
  1214. static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
  1215. {
  1216. #ifdef WOLFSSL_SP_SMALL
  1217. int i;
  1218. for (i=0; i<8; i++) {
  1219. r[i] = a[i] & m;
  1220. }
  1221. #else
  1222. r[0] = a[0] & m;
  1223. r[1] = a[1] & m;
  1224. r[2] = a[2] & m;
  1225. r[3] = a[3] & m;
  1226. r[4] = a[4] & m;
  1227. r[5] = a[5] & m;
  1228. r[6] = a[6] & m;
  1229. r[7] = a[7] & m;
  1230. #endif
  1231. }
  1232. /* Multiply a and b into r. (r = a * b)
  1233. *
  1234. * r A single precision integer.
  1235. * a A single precision integer.
  1236. * b A single precision integer.
  1237. */
  1238. SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
  1239. const sp_digit* b)
  1240. {
  1241. sp_digit* z0 = r;
  1242. sp_digit z1[16];
  1243. sp_digit a1[8];
  1244. sp_digit b1[8];
  1245. sp_digit z2[16];
  1246. sp_digit u, ca, cb;
  1247. ca = sp_2048_add_8(a1, a, &a[8]);
  1248. cb = sp_2048_add_8(b1, b, &b[8]);
  1249. u = ca & cb;
  1250. sp_2048_mul_8(z1, a1, b1);
  1251. sp_2048_mul_8(z2, &a[8], &b[8]);
  1252. sp_2048_mul_8(z0, a, b);
  1253. sp_2048_mask_8(r + 16, a1, 0 - cb);
  1254. sp_2048_mask_8(b1, b1, 0 - ca);
  1255. u += sp_2048_add_8(r + 16, r + 16, b1);
  1256. u += sp_2048_sub_in_place_16(z1, z2);
  1257. u += sp_2048_sub_in_place_16(z1, z0);
  1258. u += sp_2048_add_16(r + 8, r + 8, z1);
  1259. r[24] = u;
  1260. XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
  1261. (void)sp_2048_add_16(r + 16, r + 16, z2);
  1262. }
  1263. /* Square a and put result in r. (r = a * a)
  1264. *
  1265. * r A single precision integer.
  1266. * a A single precision integer.
  1267. */
  1268. SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
  1269. {
  1270. sp_digit* z0 = r;
  1271. sp_digit z2[16];
  1272. sp_digit z1[16];
  1273. sp_digit a1[8];
  1274. sp_digit u;
  1275. u = sp_2048_add_8(a1, a, &a[8]);
  1276. sp_2048_sqr_8(z1, a1);
  1277. sp_2048_sqr_8(z2, &a[8]);
  1278. sp_2048_sqr_8(z0, a);
  1279. sp_2048_mask_8(r + 16, a1, 0 - u);
  1280. u += sp_2048_add_8(r + 16, r + 16, r + 16);
  1281. u += sp_2048_sub_in_place_16(z1, z2);
  1282. u += sp_2048_sub_in_place_16(z1, z0);
  1283. u += sp_2048_add_16(r + 8, r + 8, z1);
  1284. r[24] = u;
  1285. XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
  1286. (void)sp_2048_add_16(r + 16, r + 16, z2);
  1287. }
  1288. /* Sub b from a into r. (r = a - b)
  1289. *
  1290. * r A single precision integer.
  1291. * a A single precision integer.
  1292. * b A single precision integer.
  1293. */
  1294. SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
  1295. const sp_digit* b)
  1296. {
  1297. sp_digit c = 0;
  1298. __asm__ __volatile__ (
  1299. "ldm %[a], {r3, r4}\n\t"
  1300. "ldm %[b]!, {r5, r6}\n\t"
  1301. "subs r3, r3, r5\n\t"
  1302. "sbcs r4, r4, r6\n\t"
  1303. "stm %[a]!, {r3, r4}\n\t"
  1304. "ldm %[a], {r3, r4}\n\t"
  1305. "ldm %[b]!, {r5, r6}\n\t"
  1306. "sbcs r3, r3, r5\n\t"
  1307. "sbcs r4, r4, r6\n\t"
  1308. "stm %[a]!, {r3, r4}\n\t"
  1309. "ldm %[a], {r3, r4}\n\t"
  1310. "ldm %[b]!, {r5, r6}\n\t"
  1311. "sbcs r3, r3, r5\n\t"
  1312. "sbcs r4, r4, r6\n\t"
  1313. "stm %[a]!, {r3, r4}\n\t"
  1314. "ldm %[a], {r3, r4}\n\t"
  1315. "ldm %[b]!, {r5, r6}\n\t"
  1316. "sbcs r3, r3, r5\n\t"
  1317. "sbcs r4, r4, r6\n\t"
  1318. "stm %[a]!, {r3, r4}\n\t"
  1319. "ldm %[a], {r3, r4}\n\t"
  1320. "ldm %[b]!, {r5, r6}\n\t"
  1321. "sbcs r3, r3, r5\n\t"
  1322. "sbcs r4, r4, r6\n\t"
  1323. "stm %[a]!, {r3, r4}\n\t"
  1324. "ldm %[a], {r3, r4}\n\t"
  1325. "ldm %[b]!, {r5, r6}\n\t"
  1326. "sbcs r3, r3, r5\n\t"
  1327. "sbcs r4, r4, r6\n\t"
  1328. "stm %[a]!, {r3, r4}\n\t"
  1329. "ldm %[a], {r3, r4}\n\t"
  1330. "ldm %[b]!, {r5, r6}\n\t"
  1331. "sbcs r3, r3, r5\n\t"
  1332. "sbcs r4, r4, r6\n\t"
  1333. "stm %[a]!, {r3, r4}\n\t"
  1334. "ldm %[a], {r3, r4}\n\t"
  1335. "ldm %[b]!, {r5, r6}\n\t"
  1336. "sbcs r3, r3, r5\n\t"
  1337. "sbcs r4, r4, r6\n\t"
  1338. "stm %[a]!, {r3, r4}\n\t"
  1339. "ldm %[a], {r3, r4}\n\t"
  1340. "ldm %[b]!, {r5, r6}\n\t"
  1341. "sbcs r3, r3, r5\n\t"
  1342. "sbcs r4, r4, r6\n\t"
  1343. "stm %[a]!, {r3, r4}\n\t"
  1344. "ldm %[a], {r3, r4}\n\t"
  1345. "ldm %[b]!, {r5, r6}\n\t"
  1346. "sbcs r3, r3, r5\n\t"
  1347. "sbcs r4, r4, r6\n\t"
  1348. "stm %[a]!, {r3, r4}\n\t"
  1349. "ldm %[a], {r3, r4}\n\t"
  1350. "ldm %[b]!, {r5, r6}\n\t"
  1351. "sbcs r3, r3, r5\n\t"
  1352. "sbcs r4, r4, r6\n\t"
  1353. "stm %[a]!, {r3, r4}\n\t"
  1354. "ldm %[a], {r3, r4}\n\t"
  1355. "ldm %[b]!, {r5, r6}\n\t"
  1356. "sbcs r3, r3, r5\n\t"
  1357. "sbcs r4, r4, r6\n\t"
  1358. "stm %[a]!, {r3, r4}\n\t"
  1359. "ldm %[a], {r3, r4}\n\t"
  1360. "ldm %[b]!, {r5, r6}\n\t"
  1361. "sbcs r3, r3, r5\n\t"
  1362. "sbcs r4, r4, r6\n\t"
  1363. "stm %[a]!, {r3, r4}\n\t"
  1364. "ldm %[a], {r3, r4}\n\t"
  1365. "ldm %[b]!, {r5, r6}\n\t"
  1366. "sbcs r3, r3, r5\n\t"
  1367. "sbcs r4, r4, r6\n\t"
  1368. "stm %[a]!, {r3, r4}\n\t"
  1369. "ldm %[a], {r3, r4}\n\t"
  1370. "ldm %[b]!, {r5, r6}\n\t"
  1371. "sbcs r3, r3, r5\n\t"
  1372. "sbcs r4, r4, r6\n\t"
  1373. "stm %[a]!, {r3, r4}\n\t"
  1374. "ldm %[a], {r3, r4}\n\t"
  1375. "ldm %[b]!, {r5, r6}\n\t"
  1376. "sbcs r3, r3, r5\n\t"
  1377. "sbcs r4, r4, r6\n\t"
  1378. "stm %[a]!, {r3, r4}\n\t"
  1379. "sbc %[c], %[c], %[c]\n\t"
  1380. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  1381. :
  1382. : "memory", "r3", "r4", "r5", "r6"
  1383. );
  1384. return c;
  1385. }
  1386. /* Add b to a into r. (r = a + b)
  1387. *
  1388. * r A single precision integer.
  1389. * a A single precision integer.
  1390. * b A single precision integer.
  1391. */
  1392. SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
  1393. const sp_digit* b)
  1394. {
  1395. sp_digit c = 0;
  1396. __asm__ __volatile__ (
  1397. "ldm %[a]!, {r4, r5}\n\t"
  1398. "ldm %[b]!, {r6, r8}\n\t"
  1399. "adds r4, r4, r6\n\t"
  1400. "adcs r5, r5, r8\n\t"
  1401. "stm %[r]!, {r4, r5}\n\t"
  1402. "ldm %[a]!, {r4, r5}\n\t"
  1403. "ldm %[b]!, {r6, r8}\n\t"
  1404. "adcs r4, r4, r6\n\t"
  1405. "adcs r5, r5, r8\n\t"
  1406. "stm %[r]!, {r4, r5}\n\t"
  1407. "ldm %[a]!, {r4, r5}\n\t"
  1408. "ldm %[b]!, {r6, r8}\n\t"
  1409. "adcs r4, r4, r6\n\t"
  1410. "adcs r5, r5, r8\n\t"
  1411. "stm %[r]!, {r4, r5}\n\t"
  1412. "ldm %[a]!, {r4, r5}\n\t"
  1413. "ldm %[b]!, {r6, r8}\n\t"
  1414. "adcs r4, r4, r6\n\t"
  1415. "adcs r5, r5, r8\n\t"
  1416. "stm %[r]!, {r4, r5}\n\t"
  1417. "ldm %[a]!, {r4, r5}\n\t"
  1418. "ldm %[b]!, {r6, r8}\n\t"
  1419. "adcs r4, r4, r6\n\t"
  1420. "adcs r5, r5, r8\n\t"
  1421. "stm %[r]!, {r4, r5}\n\t"
  1422. "ldm %[a]!, {r4, r5}\n\t"
  1423. "ldm %[b]!, {r6, r8}\n\t"
  1424. "adcs r4, r4, r6\n\t"
  1425. "adcs r5, r5, r8\n\t"
  1426. "stm %[r]!, {r4, r5}\n\t"
  1427. "ldm %[a]!, {r4, r5}\n\t"
  1428. "ldm %[b]!, {r6, r8}\n\t"
  1429. "adcs r4, r4, r6\n\t"
  1430. "adcs r5, r5, r8\n\t"
  1431. "stm %[r]!, {r4, r5}\n\t"
  1432. "ldm %[a]!, {r4, r5}\n\t"
  1433. "ldm %[b]!, {r6, r8}\n\t"
  1434. "adcs r4, r4, r6\n\t"
  1435. "adcs r5, r5, r8\n\t"
  1436. "stm %[r]!, {r4, r5}\n\t"
  1437. "ldm %[a]!, {r4, r5}\n\t"
  1438. "ldm %[b]!, {r6, r8}\n\t"
  1439. "adcs r4, r4, r6\n\t"
  1440. "adcs r5, r5, r8\n\t"
  1441. "stm %[r]!, {r4, r5}\n\t"
  1442. "ldm %[a]!, {r4, r5}\n\t"
  1443. "ldm %[b]!, {r6, r8}\n\t"
  1444. "adcs r4, r4, r6\n\t"
  1445. "adcs r5, r5, r8\n\t"
  1446. "stm %[r]!, {r4, r5}\n\t"
  1447. "ldm %[a]!, {r4, r5}\n\t"
  1448. "ldm %[b]!, {r6, r8}\n\t"
  1449. "adcs r4, r4, r6\n\t"
  1450. "adcs r5, r5, r8\n\t"
  1451. "stm %[r]!, {r4, r5}\n\t"
  1452. "ldm %[a]!, {r4, r5}\n\t"
  1453. "ldm %[b]!, {r6, r8}\n\t"
  1454. "adcs r4, r4, r6\n\t"
  1455. "adcs r5, r5, r8\n\t"
  1456. "stm %[r]!, {r4, r5}\n\t"
  1457. "ldm %[a]!, {r4, r5}\n\t"
  1458. "ldm %[b]!, {r6, r8}\n\t"
  1459. "adcs r4, r4, r6\n\t"
  1460. "adcs r5, r5, r8\n\t"
  1461. "stm %[r]!, {r4, r5}\n\t"
  1462. "ldm %[a]!, {r4, r5}\n\t"
  1463. "ldm %[b]!, {r6, r8}\n\t"
  1464. "adcs r4, r4, r6\n\t"
  1465. "adcs r5, r5, r8\n\t"
  1466. "stm %[r]!, {r4, r5}\n\t"
  1467. "ldm %[a]!, {r4, r5}\n\t"
  1468. "ldm %[b]!, {r6, r8}\n\t"
  1469. "adcs r4, r4, r6\n\t"
  1470. "adcs r5, r5, r8\n\t"
  1471. "stm %[r]!, {r4, r5}\n\t"
  1472. "ldm %[a]!, {r4, r5}\n\t"
  1473. "ldm %[b]!, {r6, r8}\n\t"
  1474. "adcs r4, r4, r6\n\t"
  1475. "adcs r5, r5, r8\n\t"
  1476. "stm %[r]!, {r4, r5}\n\t"
  1477. "mov %[c], #0\n\t"
  1478. "adc %[c], %[c], %[c]\n\t"
  1479. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  1480. :
  1481. : "memory", "r4", "r5", "r6", "r8"
  1482. );
  1483. return c;
  1484. }
  1485. /* AND m into each word of a and store in r.
  1486. *
  1487. * r A single precision integer.
  1488. * a A single precision integer.
  1489. * m Mask to AND against each digit.
  1490. */
  1491. static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
  1492. {
  1493. #ifdef WOLFSSL_SP_SMALL
  1494. int i;
  1495. for (i=0; i<16; i++) {
  1496. r[i] = a[i] & m;
  1497. }
  1498. #else
  1499. int i;
  1500. for (i = 0; i < 16; i += 8) {
  1501. r[i+0] = a[i+0] & m;
  1502. r[i+1] = a[i+1] & m;
  1503. r[i+2] = a[i+2] & m;
  1504. r[i+3] = a[i+3] & m;
  1505. r[i+4] = a[i+4] & m;
  1506. r[i+5] = a[i+5] & m;
  1507. r[i+6] = a[i+6] & m;
  1508. r[i+7] = a[i+7] & m;
  1509. }
  1510. #endif
  1511. }
  1512. /* Multiply a and b into r. (r = a * b)
  1513. *
  1514. * r A single precision integer.
  1515. * a A single precision integer.
  1516. * b A single precision integer.
  1517. */
  1518. SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
  1519. const sp_digit* b)
  1520. {
  1521. sp_digit* z0 = r;
  1522. sp_digit z1[32];
  1523. sp_digit a1[16];
  1524. sp_digit b1[16];
  1525. sp_digit z2[32];
  1526. sp_digit u, ca, cb;
  1527. ca = sp_2048_add_16(a1, a, &a[16]);
  1528. cb = sp_2048_add_16(b1, b, &b[16]);
  1529. u = ca & cb;
  1530. sp_2048_mul_16(z1, a1, b1);
  1531. sp_2048_mul_16(z2, &a[16], &b[16]);
  1532. sp_2048_mul_16(z0, a, b);
  1533. sp_2048_mask_16(r + 32, a1, 0 - cb);
  1534. sp_2048_mask_16(b1, b1, 0 - ca);
  1535. u += sp_2048_add_16(r + 32, r + 32, b1);
  1536. u += sp_2048_sub_in_place_32(z1, z2);
  1537. u += sp_2048_sub_in_place_32(z1, z0);
  1538. u += sp_2048_add_32(r + 16, r + 16, z1);
  1539. r[48] = u;
  1540. XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
  1541. (void)sp_2048_add_32(r + 32, r + 32, z2);
  1542. }
  1543. /* Square a and put result in r. (r = a * a)
  1544. *
  1545. * r A single precision integer.
  1546. * a A single precision integer.
  1547. */
  1548. SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
  1549. {
  1550. sp_digit* z0 = r;
  1551. sp_digit z2[32];
  1552. sp_digit z1[32];
  1553. sp_digit a1[16];
  1554. sp_digit u;
  1555. u = sp_2048_add_16(a1, a, &a[16]);
  1556. sp_2048_sqr_16(z1, a1);
  1557. sp_2048_sqr_16(z2, &a[16]);
  1558. sp_2048_sqr_16(z0, a);
  1559. sp_2048_mask_16(r + 32, a1, 0 - u);
  1560. u += sp_2048_add_16(r + 32, r + 32, r + 32);
  1561. u += sp_2048_sub_in_place_32(z1, z2);
  1562. u += sp_2048_sub_in_place_32(z1, z0);
  1563. u += sp_2048_add_32(r + 16, r + 16, z1);
  1564. r[48] = u;
  1565. XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
  1566. (void)sp_2048_add_32(r + 32, r + 32, z2);
  1567. }
  1568. /* Sub b from a into r. (r = a - b)
  1569. *
  1570. * r A single precision integer.
  1571. * a A single precision integer.
  1572. * b A single precision integer.
  1573. */
  1574. SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
  1575. const sp_digit* b)
  1576. {
  1577. sp_digit c = 0;
  1578. __asm__ __volatile__ (
  1579. "ldm %[a], {r3, r4}\n\t"
  1580. "ldm %[b]!, {r5, r6}\n\t"
  1581. "subs r3, r3, r5\n\t"
  1582. "sbcs r4, r4, r6\n\t"
  1583. "stm %[a]!, {r3, r4}\n\t"
  1584. "ldm %[a], {r3, r4}\n\t"
  1585. "ldm %[b]!, {r5, r6}\n\t"
  1586. "sbcs r3, r3, r5\n\t"
  1587. "sbcs r4, r4, r6\n\t"
  1588. "stm %[a]!, {r3, r4}\n\t"
  1589. "ldm %[a], {r3, r4}\n\t"
  1590. "ldm %[b]!, {r5, r6}\n\t"
  1591. "sbcs r3, r3, r5\n\t"
  1592. "sbcs r4, r4, r6\n\t"
  1593. "stm %[a]!, {r3, r4}\n\t"
  1594. "ldm %[a], {r3, r4}\n\t"
  1595. "ldm %[b]!, {r5, r6}\n\t"
  1596. "sbcs r3, r3, r5\n\t"
  1597. "sbcs r4, r4, r6\n\t"
  1598. "stm %[a]!, {r3, r4}\n\t"
  1599. "ldm %[a], {r3, r4}\n\t"
  1600. "ldm %[b]!, {r5, r6}\n\t"
  1601. "sbcs r3, r3, r5\n\t"
  1602. "sbcs r4, r4, r6\n\t"
  1603. "stm %[a]!, {r3, r4}\n\t"
  1604. "ldm %[a], {r3, r4}\n\t"
  1605. "ldm %[b]!, {r5, r6}\n\t"
  1606. "sbcs r3, r3, r5\n\t"
  1607. "sbcs r4, r4, r6\n\t"
  1608. "stm %[a]!, {r3, r4}\n\t"
  1609. "ldm %[a], {r3, r4}\n\t"
  1610. "ldm %[b]!, {r5, r6}\n\t"
  1611. "sbcs r3, r3, r5\n\t"
  1612. "sbcs r4, r4, r6\n\t"
  1613. "stm %[a]!, {r3, r4}\n\t"
  1614. "ldm %[a], {r3, r4}\n\t"
  1615. "ldm %[b]!, {r5, r6}\n\t"
  1616. "sbcs r3, r3, r5\n\t"
  1617. "sbcs r4, r4, r6\n\t"
  1618. "stm %[a]!, {r3, r4}\n\t"
  1619. "ldm %[a], {r3, r4}\n\t"
  1620. "ldm %[b]!, {r5, r6}\n\t"
  1621. "sbcs r3, r3, r5\n\t"
  1622. "sbcs r4, r4, r6\n\t"
  1623. "stm %[a]!, {r3, r4}\n\t"
  1624. "ldm %[a], {r3, r4}\n\t"
  1625. "ldm %[b]!, {r5, r6}\n\t"
  1626. "sbcs r3, r3, r5\n\t"
  1627. "sbcs r4, r4, r6\n\t"
  1628. "stm %[a]!, {r3, r4}\n\t"
  1629. "ldm %[a], {r3, r4}\n\t"
  1630. "ldm %[b]!, {r5, r6}\n\t"
  1631. "sbcs r3, r3, r5\n\t"
  1632. "sbcs r4, r4, r6\n\t"
  1633. "stm %[a]!, {r3, r4}\n\t"
  1634. "ldm %[a], {r3, r4}\n\t"
  1635. "ldm %[b]!, {r5, r6}\n\t"
  1636. "sbcs r3, r3, r5\n\t"
  1637. "sbcs r4, r4, r6\n\t"
  1638. "stm %[a]!, {r3, r4}\n\t"
  1639. "ldm %[a], {r3, r4}\n\t"
  1640. "ldm %[b]!, {r5, r6}\n\t"
  1641. "sbcs r3, r3, r5\n\t"
  1642. "sbcs r4, r4, r6\n\t"
  1643. "stm %[a]!, {r3, r4}\n\t"
  1644. "ldm %[a], {r3, r4}\n\t"
  1645. "ldm %[b]!, {r5, r6}\n\t"
  1646. "sbcs r3, r3, r5\n\t"
  1647. "sbcs r4, r4, r6\n\t"
  1648. "stm %[a]!, {r3, r4}\n\t"
  1649. "ldm %[a], {r3, r4}\n\t"
  1650. "ldm %[b]!, {r5, r6}\n\t"
  1651. "sbcs r3, r3, r5\n\t"
  1652. "sbcs r4, r4, r6\n\t"
  1653. "stm %[a]!, {r3, r4}\n\t"
  1654. "ldm %[a], {r3, r4}\n\t"
  1655. "ldm %[b]!, {r5, r6}\n\t"
  1656. "sbcs r3, r3, r5\n\t"
  1657. "sbcs r4, r4, r6\n\t"
  1658. "stm %[a]!, {r3, r4}\n\t"
  1659. "ldm %[a], {r3, r4}\n\t"
  1660. "ldm %[b]!, {r5, r6}\n\t"
  1661. "sbcs r3, r3, r5\n\t"
  1662. "sbcs r4, r4, r6\n\t"
  1663. "stm %[a]!, {r3, r4}\n\t"
  1664. "ldm %[a], {r3, r4}\n\t"
  1665. "ldm %[b]!, {r5, r6}\n\t"
  1666. "sbcs r3, r3, r5\n\t"
  1667. "sbcs r4, r4, r6\n\t"
  1668. "stm %[a]!, {r3, r4}\n\t"
  1669. "ldm %[a], {r3, r4}\n\t"
  1670. "ldm %[b]!, {r5, r6}\n\t"
  1671. "sbcs r3, r3, r5\n\t"
  1672. "sbcs r4, r4, r6\n\t"
  1673. "stm %[a]!, {r3, r4}\n\t"
  1674. "ldm %[a], {r3, r4}\n\t"
  1675. "ldm %[b]!, {r5, r6}\n\t"
  1676. "sbcs r3, r3, r5\n\t"
  1677. "sbcs r4, r4, r6\n\t"
  1678. "stm %[a]!, {r3, r4}\n\t"
  1679. "ldm %[a], {r3, r4}\n\t"
  1680. "ldm %[b]!, {r5, r6}\n\t"
  1681. "sbcs r3, r3, r5\n\t"
  1682. "sbcs r4, r4, r6\n\t"
  1683. "stm %[a]!, {r3, r4}\n\t"
  1684. "ldm %[a], {r3, r4}\n\t"
  1685. "ldm %[b]!, {r5, r6}\n\t"
  1686. "sbcs r3, r3, r5\n\t"
  1687. "sbcs r4, r4, r6\n\t"
  1688. "stm %[a]!, {r3, r4}\n\t"
  1689. "ldm %[a], {r3, r4}\n\t"
  1690. "ldm %[b]!, {r5, r6}\n\t"
  1691. "sbcs r3, r3, r5\n\t"
  1692. "sbcs r4, r4, r6\n\t"
  1693. "stm %[a]!, {r3, r4}\n\t"
  1694. "ldm %[a], {r3, r4}\n\t"
  1695. "ldm %[b]!, {r5, r6}\n\t"
  1696. "sbcs r3, r3, r5\n\t"
  1697. "sbcs r4, r4, r6\n\t"
  1698. "stm %[a]!, {r3, r4}\n\t"
  1699. "ldm %[a], {r3, r4}\n\t"
  1700. "ldm %[b]!, {r5, r6}\n\t"
  1701. "sbcs r3, r3, r5\n\t"
  1702. "sbcs r4, r4, r6\n\t"
  1703. "stm %[a]!, {r3, r4}\n\t"
  1704. "ldm %[a], {r3, r4}\n\t"
  1705. "ldm %[b]!, {r5, r6}\n\t"
  1706. "sbcs r3, r3, r5\n\t"
  1707. "sbcs r4, r4, r6\n\t"
  1708. "stm %[a]!, {r3, r4}\n\t"
  1709. "ldm %[a], {r3, r4}\n\t"
  1710. "ldm %[b]!, {r5, r6}\n\t"
  1711. "sbcs r3, r3, r5\n\t"
  1712. "sbcs r4, r4, r6\n\t"
  1713. "stm %[a]!, {r3, r4}\n\t"
  1714. "ldm %[a], {r3, r4}\n\t"
  1715. "ldm %[b]!, {r5, r6}\n\t"
  1716. "sbcs r3, r3, r5\n\t"
  1717. "sbcs r4, r4, r6\n\t"
  1718. "stm %[a]!, {r3, r4}\n\t"
  1719. "ldm %[a], {r3, r4}\n\t"
  1720. "ldm %[b]!, {r5, r6}\n\t"
  1721. "sbcs r3, r3, r5\n\t"
  1722. "sbcs r4, r4, r6\n\t"
  1723. "stm %[a]!, {r3, r4}\n\t"
  1724. "ldm %[a], {r3, r4}\n\t"
  1725. "ldm %[b]!, {r5, r6}\n\t"
  1726. "sbcs r3, r3, r5\n\t"
  1727. "sbcs r4, r4, r6\n\t"
  1728. "stm %[a]!, {r3, r4}\n\t"
  1729. "ldm %[a], {r3, r4}\n\t"
  1730. "ldm %[b]!, {r5, r6}\n\t"
  1731. "sbcs r3, r3, r5\n\t"
  1732. "sbcs r4, r4, r6\n\t"
  1733. "stm %[a]!, {r3, r4}\n\t"
  1734. "ldm %[a], {r3, r4}\n\t"
  1735. "ldm %[b]!, {r5, r6}\n\t"
  1736. "sbcs r3, r3, r5\n\t"
  1737. "sbcs r4, r4, r6\n\t"
  1738. "stm %[a]!, {r3, r4}\n\t"
  1739. "sbc %[c], %[c], %[c]\n\t"
  1740. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  1741. :
  1742. : "memory", "r3", "r4", "r5", "r6"
  1743. );
  1744. return c;
  1745. }
  1746. /* Add b to a into r. (r = a + b)
  1747. *
  1748. * r A single precision integer.
  1749. * a A single precision integer.
  1750. * b A single precision integer.
  1751. */
  1752. SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
  1753. const sp_digit* b)
  1754. {
  1755. sp_digit c = 0;
  1756. __asm__ __volatile__ (
  1757. "ldm %[a]!, {r4, r5}\n\t"
  1758. "ldm %[b]!, {r6, r8}\n\t"
  1759. "adds r4, r4, r6\n\t"
  1760. "adcs r5, r5, r8\n\t"
  1761. "stm %[r]!, {r4, r5}\n\t"
  1762. "ldm %[a]!, {r4, r5}\n\t"
  1763. "ldm %[b]!, {r6, r8}\n\t"
  1764. "adcs r4, r4, r6\n\t"
  1765. "adcs r5, r5, r8\n\t"
  1766. "stm %[r]!, {r4, r5}\n\t"
  1767. "ldm %[a]!, {r4, r5}\n\t"
  1768. "ldm %[b]!, {r6, r8}\n\t"
  1769. "adcs r4, r4, r6\n\t"
  1770. "adcs r5, r5, r8\n\t"
  1771. "stm %[r]!, {r4, r5}\n\t"
  1772. "ldm %[a]!, {r4, r5}\n\t"
  1773. "ldm %[b]!, {r6, r8}\n\t"
  1774. "adcs r4, r4, r6\n\t"
  1775. "adcs r5, r5, r8\n\t"
  1776. "stm %[r]!, {r4, r5}\n\t"
  1777. "ldm %[a]!, {r4, r5}\n\t"
  1778. "ldm %[b]!, {r6, r8}\n\t"
  1779. "adcs r4, r4, r6\n\t"
  1780. "adcs r5, r5, r8\n\t"
  1781. "stm %[r]!, {r4, r5}\n\t"
  1782. "ldm %[a]!, {r4, r5}\n\t"
  1783. "ldm %[b]!, {r6, r8}\n\t"
  1784. "adcs r4, r4, r6\n\t"
  1785. "adcs r5, r5, r8\n\t"
  1786. "stm %[r]!, {r4, r5}\n\t"
  1787. "ldm %[a]!, {r4, r5}\n\t"
  1788. "ldm %[b]!, {r6, r8}\n\t"
  1789. "adcs r4, r4, r6\n\t"
  1790. "adcs r5, r5, r8\n\t"
  1791. "stm %[r]!, {r4, r5}\n\t"
  1792. "ldm %[a]!, {r4, r5}\n\t"
  1793. "ldm %[b]!, {r6, r8}\n\t"
  1794. "adcs r4, r4, r6\n\t"
  1795. "adcs r5, r5, r8\n\t"
  1796. "stm %[r]!, {r4, r5}\n\t"
  1797. "ldm %[a]!, {r4, r5}\n\t"
  1798. "ldm %[b]!, {r6, r8}\n\t"
  1799. "adcs r4, r4, r6\n\t"
  1800. "adcs r5, r5, r8\n\t"
  1801. "stm %[r]!, {r4, r5}\n\t"
  1802. "ldm %[a]!, {r4, r5}\n\t"
  1803. "ldm %[b]!, {r6, r8}\n\t"
  1804. "adcs r4, r4, r6\n\t"
  1805. "adcs r5, r5, r8\n\t"
  1806. "stm %[r]!, {r4, r5}\n\t"
  1807. "ldm %[a]!, {r4, r5}\n\t"
  1808. "ldm %[b]!, {r6, r8}\n\t"
  1809. "adcs r4, r4, r6\n\t"
  1810. "adcs r5, r5, r8\n\t"
  1811. "stm %[r]!, {r4, r5}\n\t"
  1812. "ldm %[a]!, {r4, r5}\n\t"
  1813. "ldm %[b]!, {r6, r8}\n\t"
  1814. "adcs r4, r4, r6\n\t"
  1815. "adcs r5, r5, r8\n\t"
  1816. "stm %[r]!, {r4, r5}\n\t"
  1817. "ldm %[a]!, {r4, r5}\n\t"
  1818. "ldm %[b]!, {r6, r8}\n\t"
  1819. "adcs r4, r4, r6\n\t"
  1820. "adcs r5, r5, r8\n\t"
  1821. "stm %[r]!, {r4, r5}\n\t"
  1822. "ldm %[a]!, {r4, r5}\n\t"
  1823. "ldm %[b]!, {r6, r8}\n\t"
  1824. "adcs r4, r4, r6\n\t"
  1825. "adcs r5, r5, r8\n\t"
  1826. "stm %[r]!, {r4, r5}\n\t"
  1827. "ldm %[a]!, {r4, r5}\n\t"
  1828. "ldm %[b]!, {r6, r8}\n\t"
  1829. "adcs r4, r4, r6\n\t"
  1830. "adcs r5, r5, r8\n\t"
  1831. "stm %[r]!, {r4, r5}\n\t"
  1832. "ldm %[a]!, {r4, r5}\n\t"
  1833. "ldm %[b]!, {r6, r8}\n\t"
  1834. "adcs r4, r4, r6\n\t"
  1835. "adcs r5, r5, r8\n\t"
  1836. "stm %[r]!, {r4, r5}\n\t"
  1837. "ldm %[a]!, {r4, r5}\n\t"
  1838. "ldm %[b]!, {r6, r8}\n\t"
  1839. "adcs r4, r4, r6\n\t"
  1840. "adcs r5, r5, r8\n\t"
  1841. "stm %[r]!, {r4, r5}\n\t"
  1842. "ldm %[a]!, {r4, r5}\n\t"
  1843. "ldm %[b]!, {r6, r8}\n\t"
  1844. "adcs r4, r4, r6\n\t"
  1845. "adcs r5, r5, r8\n\t"
  1846. "stm %[r]!, {r4, r5}\n\t"
  1847. "ldm %[a]!, {r4, r5}\n\t"
  1848. "ldm %[b]!, {r6, r8}\n\t"
  1849. "adcs r4, r4, r6\n\t"
  1850. "adcs r5, r5, r8\n\t"
  1851. "stm %[r]!, {r4, r5}\n\t"
  1852. "ldm %[a]!, {r4, r5}\n\t"
  1853. "ldm %[b]!, {r6, r8}\n\t"
  1854. "adcs r4, r4, r6\n\t"
  1855. "adcs r5, r5, r8\n\t"
  1856. "stm %[r]!, {r4, r5}\n\t"
  1857. "ldm %[a]!, {r4, r5}\n\t"
  1858. "ldm %[b]!, {r6, r8}\n\t"
  1859. "adcs r4, r4, r6\n\t"
  1860. "adcs r5, r5, r8\n\t"
  1861. "stm %[r]!, {r4, r5}\n\t"
  1862. "ldm %[a]!, {r4, r5}\n\t"
  1863. "ldm %[b]!, {r6, r8}\n\t"
  1864. "adcs r4, r4, r6\n\t"
  1865. "adcs r5, r5, r8\n\t"
  1866. "stm %[r]!, {r4, r5}\n\t"
  1867. "ldm %[a]!, {r4, r5}\n\t"
  1868. "ldm %[b]!, {r6, r8}\n\t"
  1869. "adcs r4, r4, r6\n\t"
  1870. "adcs r5, r5, r8\n\t"
  1871. "stm %[r]!, {r4, r5}\n\t"
  1872. "ldm %[a]!, {r4, r5}\n\t"
  1873. "ldm %[b]!, {r6, r8}\n\t"
  1874. "adcs r4, r4, r6\n\t"
  1875. "adcs r5, r5, r8\n\t"
  1876. "stm %[r]!, {r4, r5}\n\t"
  1877. "ldm %[a]!, {r4, r5}\n\t"
  1878. "ldm %[b]!, {r6, r8}\n\t"
  1879. "adcs r4, r4, r6\n\t"
  1880. "adcs r5, r5, r8\n\t"
  1881. "stm %[r]!, {r4, r5}\n\t"
  1882. "ldm %[a]!, {r4, r5}\n\t"
  1883. "ldm %[b]!, {r6, r8}\n\t"
  1884. "adcs r4, r4, r6\n\t"
  1885. "adcs r5, r5, r8\n\t"
  1886. "stm %[r]!, {r4, r5}\n\t"
  1887. "ldm %[a]!, {r4, r5}\n\t"
  1888. "ldm %[b]!, {r6, r8}\n\t"
  1889. "adcs r4, r4, r6\n\t"
  1890. "adcs r5, r5, r8\n\t"
  1891. "stm %[r]!, {r4, r5}\n\t"
  1892. "ldm %[a]!, {r4, r5}\n\t"
  1893. "ldm %[b]!, {r6, r8}\n\t"
  1894. "adcs r4, r4, r6\n\t"
  1895. "adcs r5, r5, r8\n\t"
  1896. "stm %[r]!, {r4, r5}\n\t"
  1897. "ldm %[a]!, {r4, r5}\n\t"
  1898. "ldm %[b]!, {r6, r8}\n\t"
  1899. "adcs r4, r4, r6\n\t"
  1900. "adcs r5, r5, r8\n\t"
  1901. "stm %[r]!, {r4, r5}\n\t"
  1902. "ldm %[a]!, {r4, r5}\n\t"
  1903. "ldm %[b]!, {r6, r8}\n\t"
  1904. "adcs r4, r4, r6\n\t"
  1905. "adcs r5, r5, r8\n\t"
  1906. "stm %[r]!, {r4, r5}\n\t"
  1907. "ldm %[a]!, {r4, r5}\n\t"
  1908. "ldm %[b]!, {r6, r8}\n\t"
  1909. "adcs r4, r4, r6\n\t"
  1910. "adcs r5, r5, r8\n\t"
  1911. "stm %[r]!, {r4, r5}\n\t"
  1912. "ldm %[a]!, {r4, r5}\n\t"
  1913. "ldm %[b]!, {r6, r8}\n\t"
  1914. "adcs r4, r4, r6\n\t"
  1915. "adcs r5, r5, r8\n\t"
  1916. "stm %[r]!, {r4, r5}\n\t"
  1917. "mov %[c], #0\n\t"
  1918. "adc %[c], %[c], %[c]\n\t"
  1919. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  1920. :
  1921. : "memory", "r4", "r5", "r6", "r8"
  1922. );
  1923. return c;
  1924. }
  1925. /* AND m into each word of a and store in r.
  1926. *
  1927. * r A single precision integer.
  1928. * a A single precision integer.
  1929. * m Mask to AND against each digit.
  1930. */
  1931. static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
  1932. {
  1933. #ifdef WOLFSSL_SP_SMALL
  1934. int i;
  1935. for (i=0; i<32; i++) {
  1936. r[i] = a[i] & m;
  1937. }
  1938. #else
  1939. int i;
  1940. for (i = 0; i < 32; i += 8) {
  1941. r[i+0] = a[i+0] & m;
  1942. r[i+1] = a[i+1] & m;
  1943. r[i+2] = a[i+2] & m;
  1944. r[i+3] = a[i+3] & m;
  1945. r[i+4] = a[i+4] & m;
  1946. r[i+5] = a[i+5] & m;
  1947. r[i+6] = a[i+6] & m;
  1948. r[i+7] = a[i+7] & m;
  1949. }
  1950. #endif
  1951. }
  1952. /* Multiply a and b into r. (r = a * b)
  1953. *
  1954. * r A single precision integer.
  1955. * a A single precision integer.
  1956. * b A single precision integer.
  1957. */
  1958. SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
  1959. const sp_digit* b)
  1960. {
  1961. sp_digit* z0 = r;
  1962. sp_digit z1[64];
  1963. sp_digit a1[32];
  1964. sp_digit b1[32];
  1965. sp_digit z2[64];
  1966. sp_digit u, ca, cb;
  1967. ca = sp_2048_add_32(a1, a, &a[32]);
  1968. cb = sp_2048_add_32(b1, b, &b[32]);
  1969. u = ca & cb;
  1970. sp_2048_mul_32(z1, a1, b1);
  1971. sp_2048_mul_32(z2, &a[32], &b[32]);
  1972. sp_2048_mul_32(z0, a, b);
  1973. sp_2048_mask_32(r + 64, a1, 0 - cb);
  1974. sp_2048_mask_32(b1, b1, 0 - ca);
  1975. u += sp_2048_add_32(r + 64, r + 64, b1);
  1976. u += sp_2048_sub_in_place_64(z1, z2);
  1977. u += sp_2048_sub_in_place_64(z1, z0);
  1978. u += sp_2048_add_64(r + 32, r + 32, z1);
  1979. r[96] = u;
  1980. XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
  1981. (void)sp_2048_add_64(r + 64, r + 64, z2);
  1982. }
  1983. /* Square a and put result in r. (r = a * a)
  1984. *
  1985. * r A single precision integer.
  1986. * a A single precision integer.
  1987. */
  1988. SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
  1989. {
  1990. sp_digit* z0 = r;
  1991. sp_digit z2[64];
  1992. sp_digit z1[64];
  1993. sp_digit a1[32];
  1994. sp_digit u;
  1995. u = sp_2048_add_32(a1, a, &a[32]);
  1996. sp_2048_sqr_32(z1, a1);
  1997. sp_2048_sqr_32(z2, &a[32]);
  1998. sp_2048_sqr_32(z0, a);
  1999. sp_2048_mask_32(r + 64, a1, 0 - u);
  2000. u += sp_2048_add_32(r + 64, r + 64, r + 64);
  2001. u += sp_2048_sub_in_place_64(z1, z2);
  2002. u += sp_2048_sub_in_place_64(z1, z0);
  2003. u += sp_2048_add_64(r + 32, r + 32, z1);
  2004. r[96] = u;
  2005. XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
  2006. (void)sp_2048_add_64(r + 64, r + 64, z2);
  2007. }
  2008. #endif /* !WOLFSSL_SP_SMALL */
  2009. #ifdef WOLFSSL_SP_SMALL
  2010. /* Add b to a into r. (r = a + b)
  2011. *
  2012. * r A single precision integer.
  2013. * a A single precision integer.
  2014. * b A single precision integer.
  2015. */
  2016. SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
  2017. const sp_digit* b)
  2018. {
  2019. sp_digit c = 0;
  2020. __asm__ __volatile__ (
  2021. "mov r6, %[a]\n\t"
  2022. "mov r8, #0\n\t"
  2023. "add r6, r6, #256\n\t"
  2024. "sub r8, r8, #1\n\t"
  2025. "\n1:\n\t"
  2026. "adds %[c], %[c], r8\n\t"
  2027. "ldr r4, [%[a]]\n\t"
  2028. "ldr r5, [%[b]]\n\t"
  2029. "adcs r4, r4, r5\n\t"
  2030. "str r4, [%[r]]\n\t"
  2031. "mov %[c], #0\n\t"
  2032. "adc %[c], %[c], %[c]\n\t"
  2033. "add %[a], %[a], #4\n\t"
  2034. "add %[b], %[b], #4\n\t"
  2035. "add %[r], %[r], #4\n\t"
  2036. "cmp %[a], r6\n\t"
  2037. #ifdef __GNUC__
  2038. "bne 1b\n\t"
  2039. #else
  2040. "bne.n 1b\n\t"
  2041. #endif /* __GNUC__ */
  2042. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  2043. :
  2044. : "memory", "r4", "r5", "r6", "r8"
  2045. );
  2046. return c;
  2047. }
  2048. #endif /* WOLFSSL_SP_SMALL */
  2049. #ifdef WOLFSSL_SP_SMALL
  2050. /* Sub b from a into a. (a -= b)
  2051. *
  2052. * a A single precision integer.
  2053. * b A single precision integer.
  2054. */
  2055. SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
  2056. const sp_digit* b)
  2057. {
  2058. sp_digit c = 0;
  2059. __asm__ __volatile__ (
  2060. "mov r8, %[a]\n\t"
  2061. "add r8, r8, #256\n\t"
  2062. "\n1:\n\t"
  2063. "mov r5, #0\n\t"
  2064. "subs r5, r5, %[c]\n\t"
  2065. "ldr r3, [%[a]]\n\t"
  2066. "ldr r4, [%[a], #4]\n\t"
  2067. "ldr r5, [%[b]]\n\t"
  2068. "ldr r6, [%[b], #4]\n\t"
  2069. "sbcs r3, r3, r5\n\t"
  2070. "sbcs r4, r4, r6\n\t"
  2071. "str r3, [%[a]]\n\t"
  2072. "str r4, [%[a], #4]\n\t"
  2073. "sbc %[c], %[c], %[c]\n\t"
  2074. "add %[a], %[a], #8\n\t"
  2075. "add %[b], %[b], #8\n\t"
  2076. "cmp %[a], r8\n\t"
  2077. #ifdef __GNUC__
  2078. "bne 1b\n\t"
  2079. #else
  2080. "bne.n 1b\n\t"
  2081. #endif /* __GNUC__ */
  2082. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  2083. :
  2084. : "memory", "r3", "r4", "r5", "r6", "r8"
  2085. );
  2086. return c;
  2087. }
  2088. #endif /* WOLFSSL_SP_SMALL */
  2089. #ifdef WOLFSSL_SP_SMALL
  2090. /* Multiply a and b into r. (r = a * b)
  2091. *
  2092. * r A single precision integer.
  2093. * a A single precision integer.
  2094. * b A single precision integer.
  2095. */
  2096. SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
  2097. const sp_digit* b)
  2098. {
  2099. sp_digit tmp_arr[64 * 2];
  2100. sp_digit* tmp = tmp_arr;
  2101. __asm__ __volatile__ (
  2102. "mov r3, #0\n\t"
  2103. "mov r4, #0\n\t"
  2104. "mov r9, r3\n\t"
  2105. "mov r12, %[r]\n\t"
  2106. "mov r10, %[a]\n\t"
  2107. "mov r11, %[b]\n\t"
  2108. "mov r6, #1\n\t"
  2109. "lsl r6, r6, #8\n\t"
  2110. "add r6, r6, r10\n\t"
  2111. "mov r14, r6\n\t"
  2112. "\n1:\n\t"
  2113. "mov %[r], #0\n\t"
  2114. "mov r5, #0\n\t"
  2115. "mov r6, #252\n\t"
  2116. "mov %[a], r9\n\t"
  2117. "subs %[a], %[a], r6\n\t"
  2118. "sbc r6, r6, r6\n\t"
  2119. "mvn r6, r6\n\t"
  2120. "and %[a], %[a], r6\n\t"
  2121. "mov %[b], r9\n\t"
  2122. "sub %[b], %[b], %[a]\n\t"
  2123. "add %[a], %[a], r10\n\t"
  2124. "add %[b], %[b], r11\n\t"
  2125. "\n2:\n\t"
  2126. /* Multiply Start */
  2127. "ldr r6, [%[a]]\n\t"
  2128. "ldr r8, [%[b]]\n\t"
  2129. "umull r6, r8, r6, r8\n\t"
  2130. "adds r3, r3, r6\n\t"
  2131. "adcs r4, r4, r8\n\t"
  2132. "adc r5, r5, %[r]\n\t"
  2133. /* Multiply Done */
  2134. "add %[a], %[a], #4\n\t"
  2135. "sub %[b], %[b], #4\n\t"
  2136. "cmp %[a], r14\n\t"
  2137. #ifdef __GNUC__
  2138. "beq 3f\n\t"
  2139. #else
  2140. "beq.n 3f\n\t"
  2141. #endif /* __GNUC__ */
  2142. "mov r6, r9\n\t"
  2143. "add r6, r6, r10\n\t"
  2144. "cmp %[a], r6\n\t"
  2145. #ifdef __GNUC__
  2146. "ble 2b\n\t"
  2147. #else
  2148. "ble.n 2b\n\t"
  2149. #endif /* __GNUC__ */
  2150. "\n3:\n\t"
  2151. "mov %[r], r12\n\t"
  2152. "mov r8, r9\n\t"
  2153. "str r3, [%[r], r8]\n\t"
  2154. "mov r3, r4\n\t"
  2155. "mov r4, r5\n\t"
  2156. "add r8, r8, #4\n\t"
  2157. "mov r9, r8\n\t"
  2158. "mov r6, #1\n\t"
  2159. "lsl r6, r6, #8\n\t"
  2160. "add r6, r6, #248\n\t"
  2161. "cmp r8, r6\n\t"
  2162. #ifdef __GNUC__
  2163. "ble 1b\n\t"
  2164. #else
  2165. "ble.n 1b\n\t"
  2166. #endif /* __GNUC__ */
  2167. "str r3, [%[r], r8]\n\t"
  2168. "mov %[a], r10\n\t"
  2169. "mov %[b], r11\n\t"
  2170. :
  2171. : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
  2172. : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  2173. );
  2174. XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
  2175. }
  2176. /* Square a and put result in r. (r = a * a)
  2177. *
  2178. * r A single precision integer.
  2179. * a A single precision integer.
  2180. */
  2181. SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
  2182. {
  2183. __asm__ __volatile__ (
  2184. "mov r3, #0\n\t"
  2185. "mov r4, #0\n\t"
  2186. "mov r5, #0\n\t"
  2187. "mov r9, r3\n\t"
  2188. "mov r12, %[r]\n\t"
  2189. "mov r6, #2\n\t"
  2190. "lsl r6, r6, #8\n\t"
  2191. "neg r6, r6\n\t"
  2192. "add sp, sp, r6\n\t"
  2193. "mov r11, sp\n\t"
  2194. "mov r10, %[a]\n\t"
  2195. "\n1:\n\t"
  2196. "mov %[r], #0\n\t"
  2197. "mov r6, #252\n\t"
  2198. "mov %[a], r9\n\t"
  2199. "subs %[a], %[a], r6\n\t"
  2200. "sbc r6, r6, r6\n\t"
  2201. "mvn r6, r6\n\t"
  2202. "and %[a], %[a], r6\n\t"
  2203. "mov r2, r9\n\t"
  2204. "sub r2, r2, %[a]\n\t"
  2205. "add %[a], %[a], r10\n\t"
  2206. "add r2, r2, r10\n\t"
  2207. "\n2:\n\t"
  2208. "cmp r2, %[a]\n\t"
  2209. #ifdef __GNUC__
  2210. "beq 4f\n\t"
  2211. #else
  2212. "beq.n 4f\n\t"
  2213. #endif /* __GNUC__ */
  2214. /* Multiply * 2: Start */
  2215. "ldr r6, [%[a]]\n\t"
  2216. "ldr r8, [r2]\n\t"
  2217. "umull r6, r8, r6, r8\n\t"
  2218. "adds r3, r3, r6\n\t"
  2219. "adcs r4, r4, r8\n\t"
  2220. "adc r5, r5, %[r]\n\t"
  2221. "adds r3, r3, r6\n\t"
  2222. "adcs r4, r4, r8\n\t"
  2223. "adc r5, r5, %[r]\n\t"
  2224. /* Multiply * 2: Done */
  2225. #ifdef __GNUC__
  2226. "bal 5f\n\t"
  2227. #else
  2228. "bal.n 5f\n\t"
  2229. #endif /* __GNUC__ */
  2230. "\n4:\n\t"
  2231. /* Square: Start */
  2232. "ldr r6, [%[a]]\n\t"
  2233. "umull r6, r8, r6, r6\n\t"
  2234. "adds r3, r3, r6\n\t"
  2235. "adcs r4, r4, r8\n\t"
  2236. "adc r5, r5, %[r]\n\t"
  2237. /* Square: Done */
  2238. "\n5:\n\t"
  2239. "add %[a], %[a], #4\n\t"
  2240. "sub r2, r2, #4\n\t"
  2241. "mov r6, #1\n\t"
  2242. "lsl r6, r6, #8\n\t"
  2243. "add r6, r6, r10\n\t"
  2244. "cmp %[a], r6\n\t"
  2245. #ifdef __GNUC__
  2246. "beq 3f\n\t"
  2247. #else
  2248. "beq.n 3f\n\t"
  2249. #endif /* __GNUC__ */
  2250. "cmp %[a], r2\n\t"
  2251. #ifdef __GNUC__
  2252. "bgt 3f\n\t"
  2253. #else
  2254. "bgt.n 3f\n\t"
  2255. #endif /* __GNUC__ */
  2256. "mov r8, r9\n\t"
  2257. "add r8, r8, r10\n\t"
  2258. "cmp %[a], r8\n\t"
  2259. #ifdef __GNUC__
  2260. "ble 2b\n\t"
  2261. #else
  2262. "ble.n 2b\n\t"
  2263. #endif /* __GNUC__ */
  2264. "\n3:\n\t"
  2265. "mov %[r], r11\n\t"
  2266. "mov r8, r9\n\t"
  2267. "str r3, [%[r], r8]\n\t"
  2268. "mov r3, r4\n\t"
  2269. "mov r4, r5\n\t"
  2270. "mov r5, #0\n\t"
  2271. "add r8, r8, #4\n\t"
  2272. "mov r9, r8\n\t"
  2273. "mov r6, #1\n\t"
  2274. "lsl r6, r6, #8\n\t"
  2275. "add r6, r6, #248\n\t"
  2276. "cmp r8, r6\n\t"
  2277. #ifdef __GNUC__
  2278. "ble 1b\n\t"
  2279. #else
  2280. "ble.n 1b\n\t"
  2281. #endif /* __GNUC__ */
  2282. "mov %[a], r10\n\t"
  2283. "str r3, [%[r], r8]\n\t"
  2284. "mov %[r], r12\n\t"
  2285. "mov %[a], r11\n\t"
  2286. "mov r3, #1\n\t"
  2287. "lsl r3, r3, #8\n\t"
  2288. "add r3, r3, #252\n\t"
  2289. "\n4:\n\t"
  2290. "ldr r6, [%[a], r3]\n\t"
  2291. "str r6, [%[r], r3]\n\t"
  2292. "subs r3, r3, #4\n\t"
  2293. #ifdef __GNUC__
  2294. "bge 4b\n\t"
  2295. #else
  2296. "bge.n 4b\n\t"
  2297. #endif /* __GNUC__ */
  2298. "mov r6, #2\n\t"
  2299. "lsl r6, r6, #8\n\t"
  2300. "add sp, sp, r6\n\t"
  2301. :
  2302. : [r] "r" (r), [a] "r" (a)
  2303. : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
  2304. );
  2305. }
  2306. #endif /* WOLFSSL_SP_SMALL */
  2307. #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
  2308. #ifdef WOLFSSL_SP_SMALL
  2309. /* AND m into each word of a and store in r.
  2310. *
  2311. * r A single precision integer.
  2312. * a A single precision integer.
  2313. * m Mask to AND against each digit.
  2314. */
  2315. static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
  2316. {
  2317. int i;
  2318. for (i=0; i<32; i++) {
  2319. r[i] = a[i] & m;
  2320. }
  2321. }
  2322. #endif /* WOLFSSL_SP_SMALL */
  2323. #ifdef WOLFSSL_SP_SMALL
  2324. /* Add b to a into r. (r = a + b)
  2325. *
  2326. * r A single precision integer.
  2327. * a A single precision integer.
  2328. * b A single precision integer.
  2329. */
  2330. SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
  2331. const sp_digit* b)
  2332. {
  2333. sp_digit c = 0;
  2334. __asm__ __volatile__ (
  2335. "mov r6, %[a]\n\t"
  2336. "mov r8, #0\n\t"
  2337. "add r6, r6, #128\n\t"
  2338. "sub r8, r8, #1\n\t"
  2339. "\n1:\n\t"
  2340. "adds %[c], %[c], r8\n\t"
  2341. "ldr r4, [%[a]]\n\t"
  2342. "ldr r5, [%[b]]\n\t"
  2343. "adcs r4, r4, r5\n\t"
  2344. "str r4, [%[r]]\n\t"
  2345. "mov %[c], #0\n\t"
  2346. "adc %[c], %[c], %[c]\n\t"
  2347. "add %[a], %[a], #4\n\t"
  2348. "add %[b], %[b], #4\n\t"
  2349. "add %[r], %[r], #4\n\t"
  2350. "cmp %[a], r6\n\t"
  2351. #ifdef __GNUC__
  2352. "bne 1b\n\t"
  2353. #else
  2354. "bne.n 1b\n\t"
  2355. #endif /* __GNUC__ */
  2356. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  2357. :
  2358. : "memory", "r4", "r5", "r6", "r8"
  2359. );
  2360. return c;
  2361. }
  2362. #endif /* WOLFSSL_SP_SMALL */
  2363. #ifdef WOLFSSL_SP_SMALL
  2364. /* Sub b from a into a. (a -= b)
  2365. *
  2366. * a A single precision integer.
  2367. * b A single precision integer.
  2368. */
  2369. SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
  2370. const sp_digit* b)
  2371. {
  2372. sp_digit c = 0;
  2373. __asm__ __volatile__ (
  2374. "mov r8, %[a]\n\t"
  2375. "add r8, r8, #128\n\t"
  2376. "\n1:\n\t"
  2377. "mov r5, #0\n\t"
  2378. "subs r5, r5, %[c]\n\t"
  2379. "ldr r3, [%[a]]\n\t"
  2380. "ldr r4, [%[a], #4]\n\t"
  2381. "ldr r5, [%[b]]\n\t"
  2382. "ldr r6, [%[b], #4]\n\t"
  2383. "sbcs r3, r3, r5\n\t"
  2384. "sbcs r4, r4, r6\n\t"
  2385. "str r3, [%[a]]\n\t"
  2386. "str r4, [%[a], #4]\n\t"
  2387. "sbc %[c], %[c], %[c]\n\t"
  2388. "add %[a], %[a], #8\n\t"
  2389. "add %[b], %[b], #8\n\t"
  2390. "cmp %[a], r8\n\t"
  2391. #ifdef __GNUC__
  2392. "bne 1b\n\t"
  2393. #else
  2394. "bne.n 1b\n\t"
  2395. #endif /* __GNUC__ */
  2396. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  2397. :
  2398. : "memory", "r3", "r4", "r5", "r6", "r8"
  2399. );
  2400. return c;
  2401. }
  2402. #endif /* WOLFSSL_SP_SMALL */
  2403. #ifdef WOLFSSL_SP_SMALL
  2404. /* Multiply a and b into r. (r = a * b)
  2405. *
  2406. * r A single precision integer.
  2407. * a A single precision integer.
  2408. * b A single precision integer.
  2409. */
  2410. SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
  2411. const sp_digit* b)
  2412. {
  2413. sp_digit tmp_arr[32 * 2];
  2414. sp_digit* tmp = tmp_arr;
  2415. __asm__ __volatile__ (
  2416. "mov r3, #0\n\t"
  2417. "mov r4, #0\n\t"
  2418. "mov r9, r3\n\t"
  2419. "mov r12, %[r]\n\t"
  2420. "mov r10, %[a]\n\t"
  2421. "mov r11, %[b]\n\t"
  2422. "mov r6, #128\n\t"
  2423. "add r6, r6, r10\n\t"
  2424. "mov r14, r6\n\t"
  2425. "\n1:\n\t"
  2426. "mov %[r], #0\n\t"
  2427. "mov r5, #0\n\t"
  2428. "mov r6, #124\n\t"
  2429. "mov %[a], r9\n\t"
  2430. "subs %[a], %[a], r6\n\t"
  2431. "sbc r6, r6, r6\n\t"
  2432. "mvn r6, r6\n\t"
  2433. "and %[a], %[a], r6\n\t"
  2434. "mov %[b], r9\n\t"
  2435. "sub %[b], %[b], %[a]\n\t"
  2436. "add %[a], %[a], r10\n\t"
  2437. "add %[b], %[b], r11\n\t"
  2438. "\n2:\n\t"
  2439. /* Multiply Start */
  2440. "ldr r6, [%[a]]\n\t"
  2441. "ldr r8, [%[b]]\n\t"
  2442. "umull r6, r8, r6, r8\n\t"
  2443. "adds r3, r3, r6\n\t"
  2444. "adcs r4, r4, r8\n\t"
  2445. "adc r5, r5, %[r]\n\t"
  2446. /* Multiply Done */
  2447. "add %[a], %[a], #4\n\t"
  2448. "sub %[b], %[b], #4\n\t"
  2449. "cmp %[a], r14\n\t"
  2450. #ifdef __GNUC__
  2451. "beq 3f\n\t"
  2452. #else
  2453. "beq.n 3f\n\t"
  2454. #endif /* __GNUC__ */
  2455. "mov r6, r9\n\t"
  2456. "add r6, r6, r10\n\t"
  2457. "cmp %[a], r6\n\t"
  2458. #ifdef __GNUC__
  2459. "ble 2b\n\t"
  2460. #else
  2461. "ble.n 2b\n\t"
  2462. #endif /* __GNUC__ */
  2463. "\n3:\n\t"
  2464. "mov %[r], r12\n\t"
  2465. "mov r8, r9\n\t"
  2466. "str r3, [%[r], r8]\n\t"
  2467. "mov r3, r4\n\t"
  2468. "mov r4, r5\n\t"
  2469. "add r8, r8, #4\n\t"
  2470. "mov r9, r8\n\t"
  2471. "mov r6, #248\n\t"
  2472. "cmp r8, r6\n\t"
  2473. #ifdef __GNUC__
  2474. "ble 1b\n\t"
  2475. #else
  2476. "ble.n 1b\n\t"
  2477. #endif /* __GNUC__ */
  2478. "str r3, [%[r], r8]\n\t"
  2479. "mov %[a], r10\n\t"
  2480. "mov %[b], r11\n\t"
  2481. :
  2482. : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
  2483. : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  2484. );
  2485. XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
  2486. }
  2487. /* Square a and put result in r. (r = a * a)
  2488. *
  2489. * r A single precision integer.
  2490. * a A single precision integer.
  2491. */
  2492. SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
  2493. {
  2494. __asm__ __volatile__ (
  2495. "mov r3, #0\n\t"
  2496. "mov r4, #0\n\t"
  2497. "mov r5, #0\n\t"
  2498. "mov r9, r3\n\t"
  2499. "mov r12, %[r]\n\t"
  2500. "mov r6, #1\n\t"
  2501. "lsl r6, r6, #8\n\t"
  2502. "neg r6, r6\n\t"
  2503. "add sp, sp, r6\n\t"
  2504. "mov r11, sp\n\t"
  2505. "mov r10, %[a]\n\t"
  2506. "\n1:\n\t"
  2507. "mov %[r], #0\n\t"
  2508. "mov r6, #124\n\t"
  2509. "mov %[a], r9\n\t"
  2510. "subs %[a], %[a], r6\n\t"
  2511. "sbc r6, r6, r6\n\t"
  2512. "mvn r6, r6\n\t"
  2513. "and %[a], %[a], r6\n\t"
  2514. "mov r2, r9\n\t"
  2515. "sub r2, r2, %[a]\n\t"
  2516. "add %[a], %[a], r10\n\t"
  2517. "add r2, r2, r10\n\t"
  2518. "\n2:\n\t"
  2519. "cmp r2, %[a]\n\t"
  2520. #ifdef __GNUC__
  2521. "beq 4f\n\t"
  2522. #else
  2523. "beq.n 4f\n\t"
  2524. #endif /* __GNUC__ */
  2525. /* Multiply * 2: Start */
  2526. "ldr r6, [%[a]]\n\t"
  2527. "ldr r8, [r2]\n\t"
  2528. "umull r6, r8, r6, r8\n\t"
  2529. "adds r3, r3, r6\n\t"
  2530. "adcs r4, r4, r8\n\t"
  2531. "adc r5, r5, %[r]\n\t"
  2532. "adds r3, r3, r6\n\t"
  2533. "adcs r4, r4, r8\n\t"
  2534. "adc r5, r5, %[r]\n\t"
  2535. /* Multiply * 2: Done */
  2536. #ifdef __GNUC__
  2537. "bal 5f\n\t"
  2538. #else
  2539. "bal.n 5f\n\t"
  2540. #endif /* __GNUC__ */
  2541. "\n4:\n\t"
  2542. /* Square: Start */
  2543. "ldr r6, [%[a]]\n\t"
  2544. "umull r6, r8, r6, r6\n\t"
  2545. "adds r3, r3, r6\n\t"
  2546. "adcs r4, r4, r8\n\t"
  2547. "adc r5, r5, %[r]\n\t"
  2548. /* Square: Done */
  2549. "\n5:\n\t"
  2550. "add %[a], %[a], #4\n\t"
  2551. "sub r2, r2, #4\n\t"
  2552. "mov r6, #128\n\t"
  2553. "add r6, r6, r10\n\t"
  2554. "cmp %[a], r6\n\t"
  2555. #ifdef __GNUC__
  2556. "beq 3f\n\t"
  2557. #else
  2558. "beq.n 3f\n\t"
  2559. #endif /* __GNUC__ */
  2560. "cmp %[a], r2\n\t"
  2561. #ifdef __GNUC__
  2562. "bgt 3f\n\t"
  2563. #else
  2564. "bgt.n 3f\n\t"
  2565. #endif /* __GNUC__ */
  2566. "mov r8, r9\n\t"
  2567. "add r8, r8, r10\n\t"
  2568. "cmp %[a], r8\n\t"
  2569. #ifdef __GNUC__
  2570. "ble 2b\n\t"
  2571. #else
  2572. "ble.n 2b\n\t"
  2573. #endif /* __GNUC__ */
  2574. "\n3:\n\t"
  2575. "mov %[r], r11\n\t"
  2576. "mov r8, r9\n\t"
  2577. "str r3, [%[r], r8]\n\t"
  2578. "mov r3, r4\n\t"
  2579. "mov r4, r5\n\t"
  2580. "mov r5, #0\n\t"
  2581. "add r8, r8, #4\n\t"
  2582. "mov r9, r8\n\t"
  2583. "mov r6, #248\n\t"
  2584. "cmp r8, r6\n\t"
  2585. #ifdef __GNUC__
  2586. "ble 1b\n\t"
  2587. #else
  2588. "ble.n 1b\n\t"
  2589. #endif /* __GNUC__ */
  2590. "mov %[a], r10\n\t"
  2591. "str r3, [%[r], r8]\n\t"
  2592. "mov %[r], r12\n\t"
  2593. "mov %[a], r11\n\t"
  2594. "mov r3, #252\n\t"
  2595. "\n4:\n\t"
  2596. "ldr r6, [%[a], r3]\n\t"
  2597. "str r6, [%[r], r3]\n\t"
  2598. "subs r3, r3, #4\n\t"
  2599. #ifdef __GNUC__
  2600. "bge 4b\n\t"
  2601. #else
  2602. "bge.n 4b\n\t"
  2603. #endif /* __GNUC__ */
  2604. "mov r6, #1\n\t"
  2605. "lsl r6, r6, #8\n\t"
  2606. "add sp, sp, r6\n\t"
  2607. :
  2608. : [r] "r" (r), [a] "r" (a)
  2609. : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
  2610. );
  2611. }
  2612. #endif /* WOLFSSL_SP_SMALL */
  2613. #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
  2614. /* Caclulate the bottom digit of -1/a mod 2^n.
  2615. *
  2616. * a A single precision number.
  2617. * rho Bottom word of inverse.
  2618. */
  2619. static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
  2620. {
  2621. sp_digit x, b;
  2622. b = a[0];
  2623. x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
  2624. x *= 2 - b * x; /* here x*a==1 mod 2**8 */
  2625. x *= 2 - b * x; /* here x*a==1 mod 2**16 */
  2626. x *= 2 - b * x; /* here x*a==1 mod 2**32 */
  2627. /* rho = -1/m mod b */
  2628. *rho = -x;
  2629. }
  2630. /* Mul a by digit b into r. (r = a * b)
  2631. *
  2632. * r A single precision integer.
  2633. * a A single precision integer.
  2634. * b A single precision digit.
  2635. */
  2636. SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
  2637. sp_digit b)
  2638. {
  2639. __asm__ __volatile__ (
  2640. "add r9, %[a], #256\n\t"
  2641. /* A[0] * B */
  2642. "ldr r6, [%[a]], #4\n\t"
  2643. "umull r5, r3, r6, %[b]\n\t"
  2644. "mov r4, #0\n\t"
  2645. "str r5, [%[r]], #4\n\t"
  2646. /* A[0] * B - Done */
  2647. "\n1:\n\t"
  2648. "mov r5, #0\n\t"
  2649. /* A[] * B */
  2650. "ldr r6, [%[a]], #4\n\t"
  2651. "umull r6, r8, r6, %[b]\n\t"
  2652. "adds r3, r3, r6\n\t"
  2653. "adcs r4, r4, r8\n\t"
  2654. "adc r5, r5, #0\n\t"
  2655. /* A[] * B - Done */
  2656. "str r3, [%[r]], #4\n\t"
  2657. "mov r3, r4\n\t"
  2658. "mov r4, r5\n\t"
  2659. "cmp %[a], r9\n\t"
  2660. #ifdef __GNUC__
  2661. "blt 1b\n\t"
  2662. #else
  2663. "blt.n 1b\n\t"
  2664. #endif /* __GNUC__ */
  2665. "str r3, [%[r]]\n\t"
  2666. : [r] "+r" (r), [a] "+r" (a)
  2667. : [b] "r" (b)
  2668. : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
  2669. );
  2670. }
  2671. #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
  2672. /* r = 2^n mod m where n is the number of bits to reduce by.
  2673. * Given m must be 2048 bits, just need to subtract.
  2674. *
  2675. * r A single precision number.
  2676. * m A single precision number.
  2677. */
  2678. static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
  2679. {
  2680. XMEMSET(r, 0, sizeof(sp_digit) * 32);
  2681. /* r = 2^n mod m */
  2682. sp_2048_sub_in_place_32(r, m);
  2683. }
  2684. /* Conditionally subtract b from a using the mask m.
  2685. * m is -1 to subtract and 0 when not copying.
  2686. *
  2687. * r A single precision number representing condition subtract result.
  2688. * a A single precision number to subtract from.
  2689. * b A single precision number to subtract.
  2690. * m Mask value to apply.
  2691. */
  2692. SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a,
  2693. const sp_digit* b, sp_digit m)
  2694. {
  2695. sp_digit c = 0;
  2696. __asm__ __volatile__ (
  2697. "mov r5, #128\n\t"
  2698. "mov r9, r5\n\t"
  2699. "mov r8, #0\n\t"
  2700. "\n1:\n\t"
  2701. "ldr r6, [%[b], r8]\n\t"
  2702. "and r6, r6, %[m]\n\t"
  2703. "mov r5, #0\n\t"
  2704. "subs r5, r5, %[c]\n\t"
  2705. "ldr r5, [%[a], r8]\n\t"
  2706. "sbcs r5, r5, r6\n\t"
  2707. "sbcs %[c], %[c], %[c]\n\t"
  2708. "str r5, [%[r], r8]\n\t"
  2709. "add r8, r8, #4\n\t"
  2710. "cmp r8, r9\n\t"
  2711. #ifdef __GNUC__
  2712. "blt 1b\n\t"
  2713. #else
  2714. "blt.n 1b\n\t"
  2715. #endif /* __GNUC__ */
  2716. : [c] "+r" (c)
  2717. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  2718. : "memory", "r5", "r6", "r8", "r9"
  2719. );
  2720. return c;
  2721. }
  2722. /* Reduce the number back to 2048 bits using Montgomery reduction.
  2723. *
  2724. * a A single precision number to reduce in place.
  2725. * m The single precision number representing the modulus.
  2726. * mp The digit representing the negative inverse of m mod 2^n.
  2727. */
  2728. SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
  2729. sp_digit mp)
  2730. {
  2731. sp_digit ca = 0;
  2732. __asm__ __volatile__ (
  2733. "mov r9, %[mp]\n\t"
  2734. "mov r12, %[m]\n\t"
  2735. "mov r10, %[a]\n\t"
  2736. "mov r4, #0\n\t"
  2737. "add r11, r10, #128\n\t"
  2738. "\n1:\n\t"
  2739. /* mu = a[i] * mp */
  2740. "mov %[mp], r9\n\t"
  2741. "ldr %[a], [r10]\n\t"
  2742. "mul %[mp], %[mp], %[a]\n\t"
  2743. "mov %[m], r12\n\t"
  2744. "add r14, r10, #120\n\t"
  2745. "\n2:\n\t"
  2746. /* a[i+j] += m[j] * mu */
  2747. "ldr %[a], [r10]\n\t"
  2748. "mov r5, #0\n\t"
  2749. /* Multiply m[j] and mu - Start */
  2750. "ldr r8, [%[m]], #4\n\t"
  2751. "umull r6, r8, %[mp], r8\n\t"
  2752. "adds %[a], %[a], r6\n\t"
  2753. "adc r5, r5, r8\n\t"
  2754. /* Multiply m[j] and mu - Done */
  2755. "adds r4, r4, %[a]\n\t"
  2756. "adc r5, r5, #0\n\t"
  2757. "str r4, [r10], #4\n\t"
  2758. /* a[i+j+1] += m[j+1] * mu */
  2759. "ldr %[a], [r10]\n\t"
  2760. "mov r4, #0\n\t"
  2761. /* Multiply m[j] and mu - Start */
  2762. "ldr r8, [%[m]], #4\n\t"
  2763. "umull r6, r8, %[mp], r8\n\t"
  2764. "adds %[a], %[a], r6\n\t"
  2765. "adc r4, r4, r8\n\t"
  2766. /* Multiply m[j] and mu - Done */
  2767. "adds r5, r5, %[a]\n\t"
  2768. "adc r4, r4, #0\n\t"
  2769. "str r5, [r10], #4\n\t"
  2770. "cmp r10, r14\n\t"
  2771. #ifdef __GNUC__
  2772. "blt 2b\n\t"
  2773. #else
  2774. "blt.n 2b\n\t"
  2775. #endif /* __GNUC__ */
  2776. /* a[i+30] += m[30] * mu */
  2777. "ldr %[a], [r10]\n\t"
  2778. "mov r5, #0\n\t"
  2779. /* Multiply m[j] and mu - Start */
  2780. "ldr r8, [%[m]], #4\n\t"
  2781. "umull r6, r8, %[mp], r8\n\t"
  2782. "adds %[a], %[a], r6\n\t"
  2783. "adc r5, r5, r8\n\t"
  2784. /* Multiply m[j] and mu - Done */
  2785. "adds r4, r4, %[a]\n\t"
  2786. "adc r5, r5, #0\n\t"
  2787. "str r4, [r10], #4\n\t"
  2788. /* a[i+31] += m[31] * mu */
  2789. "mov r4, %[ca]\n\t"
  2790. "mov %[ca], #0\n\t"
  2791. /* Multiply m[31] and mu - Start */
  2792. "ldr r8, [%[m]]\n\t"
  2793. "umull r6, r8, %[mp], r8\n\t"
  2794. "adds r5, r5, r6\n\t"
  2795. "adcs r4, r4, r8\n\t"
  2796. "adc %[ca], %[ca], #0\n\t"
  2797. /* Multiply m[31] and mu - Done */
  2798. "ldr r6, [r10]\n\t"
  2799. "ldr r8, [r10, #4]\n\t"
  2800. "adds r6, r6, r5\n\t"
  2801. "adcs r8, r8, r4\n\t"
  2802. "adc %[ca], %[ca], #0\n\t"
  2803. "str r6, [r10]\n\t"
  2804. "str r8, [r10, #4]\n\t"
  2805. /* Next word in a */
  2806. "sub r10, r10, #120\n\t"
  2807. "cmp r10, r11\n\t"
  2808. #ifdef __GNUC__
  2809. "blt 1b\n\t"
  2810. #else
  2811. "blt.n 1b\n\t"
  2812. #endif /* __GNUC__ */
  2813. "mov %[a], r10\n\t"
  2814. "mov %[m], r12\n\t"
  2815. : [ca] "+r" (ca), [a] "+r" (a)
  2816. : [m] "r" (m), [mp] "r" (mp)
  2817. : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  2818. );
  2819. sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
  2820. }
  2821. /* Multiply two Montogmery form numbers mod the modulus (prime).
  2822. * (r = a * b mod m)
  2823. *
  2824. * r Result of multiplication.
  2825. * a First number to multiply in Montogmery form.
  2826. * b Second number to multiply in Montogmery form.
  2827. * m Modulus (prime).
  2828. * mp Montogmery mulitplier.
  2829. */
  2830. static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
  2831. const sp_digit* m, sp_digit mp)
  2832. {
  2833. sp_2048_mul_32(r, a, b);
  2834. sp_2048_mont_reduce_32(r, m, mp);
  2835. }
  2836. /* Square the Montgomery form number. (r = a * a mod m)
  2837. *
  2838. * r Result of squaring.
  2839. * a Number to square in Montogmery form.
  2840. * m Modulus (prime).
  2841. * mp Montogmery mulitplier.
  2842. */
  2843. static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
  2844. sp_digit mp)
  2845. {
  2846. sp_2048_sqr_32(r, a);
  2847. sp_2048_mont_reduce_32(r, m, mp);
  2848. }
  2849. /* Mul a by digit b into r. (r = a * b)
  2850. *
  2851. * r A single precision integer.
  2852. * a A single precision integer.
  2853. * b A single precision digit.
  2854. */
  2855. SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
  2856. sp_digit b)
  2857. {
  2858. __asm__ __volatile__ (
  2859. "add r9, %[a], #128\n\t"
  2860. /* A[0] * B */
  2861. "ldr r6, [%[a]], #4\n\t"
  2862. "umull r5, r3, r6, %[b]\n\t"
  2863. "mov r4, #0\n\t"
  2864. "str r5, [%[r]], #4\n\t"
  2865. /* A[0] * B - Done */
  2866. "\n1:\n\t"
  2867. "mov r5, #0\n\t"
  2868. /* A[] * B */
  2869. "ldr r6, [%[a]], #4\n\t"
  2870. "umull r6, r8, r6, %[b]\n\t"
  2871. "adds r3, r3, r6\n\t"
  2872. "adcs r4, r4, r8\n\t"
  2873. "adc r5, r5, #0\n\t"
  2874. /* A[] * B - Done */
  2875. "str r3, [%[r]], #4\n\t"
  2876. "mov r3, r4\n\t"
  2877. "mov r4, r5\n\t"
  2878. "cmp %[a], r9\n\t"
  2879. #ifdef __GNUC__
  2880. "blt 1b\n\t"
  2881. #else
  2882. "blt.n 1b\n\t"
  2883. #endif /* __GNUC__ */
  2884. "str r3, [%[r]]\n\t"
  2885. : [r] "+r" (r), [a] "+r" (a)
  2886. : [b] "r" (b)
  2887. : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
  2888. );
  2889. }
  2890. /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  2891. *
  2892. * d1 The high order half of the number to divide.
  2893. * d0 The low order half of the number to divide.
  2894. * div The dividend.
  2895. * returns the result of the division.
  2896. *
  2897. * Note that this is an approximate div. It may give an answer 1 larger.
  2898. */
  2899. SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
  2900. sp_digit div)
  2901. {
  2902. sp_digit r = 0;
  2903. __asm__ __volatile__ (
  2904. "lsr r6, %[div], #16\n\t"
  2905. "add r6, r6, #1\n\t"
  2906. "udiv r4, %[d1], r6\n\t"
  2907. "lsl r8, r4, #16\n\t"
  2908. "umull r4, r5, %[div], r8\n\t"
  2909. "subs %[d0], %[d0], r4\n\t"
  2910. "sbc %[d1], %[d1], r5\n\t"
  2911. "udiv r5, %[d1], r6\n\t"
  2912. "lsl r4, r5, #16\n\t"
  2913. "add r8, r8, r4\n\t"
  2914. "umull r4, r5, %[div], r4\n\t"
  2915. "subs %[d0], %[d0], r4\n\t"
  2916. "sbc %[d1], %[d1], r5\n\t"
  2917. "lsl r4, %[d1], #16\n\t"
  2918. "orr r4, r4, %[d0], lsr #16\n\t"
  2919. "udiv r4, r4, r6\n\t"
  2920. "add r8, r8, r4\n\t"
  2921. "umull r4, r5, %[div], r4\n\t"
  2922. "subs %[d0], %[d0], r4\n\t"
  2923. "sbc %[d1], %[d1], r5\n\t"
  2924. "lsl r4, %[d1], #16\n\t"
  2925. "orr r4, r4, %[d0], lsr #16\n\t"
  2926. "udiv r4, r4, r6\n\t"
  2927. "add r8, r8, r4\n\t"
  2928. "umull r4, r5, %[div], r4\n\t"
  2929. "subs %[d0], %[d0], r4\n\t"
  2930. "sbc %[d1], %[d1], r5\n\t"
  2931. "udiv r4, %[d0], %[div]\n\t"
  2932. "add r8, r8, r4\n\t"
  2933. "mov %[r], r8\n\t"
  2934. : [r] "+r" (r)
  2935. : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
  2936. : "r4", "r5", "r6", "r8"
  2937. );
  2938. return r;
  2939. }
  2940. /* Compare a with b in constant time.
  2941. *
  2942. * a A single precision integer.
  2943. * b A single precision integer.
  2944. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  2945. * respectively.
  2946. */
  2947. SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
  2948. {
  2949. sp_digit r = 0;
  2950. __asm__ __volatile__ (
  2951. "mov r3, #0\n\t"
  2952. "mvn r3, r3\n\t"
  2953. "mov r6, #124\n\t"
  2954. "\n1:\n\t"
  2955. "ldr r8, [%[a], r6]\n\t"
  2956. "ldr r5, [%[b], r6]\n\t"
  2957. "and r8, r8, r3\n\t"
  2958. "and r5, r5, r3\n\t"
  2959. "mov r4, r8\n\t"
  2960. "subs r8, r8, r5\n\t"
  2961. "sbc r8, r8, r8\n\t"
  2962. "add %[r], %[r], r8\n\t"
  2963. "mvn r8, r8\n\t"
  2964. "and r3, r3, r8\n\t"
  2965. "subs r5, r5, r4\n\t"
  2966. "sbc r8, r8, r8\n\t"
  2967. "sub %[r], %[r], r8\n\t"
  2968. "mvn r8, r8\n\t"
  2969. "and r3, r3, r8\n\t"
  2970. "sub r6, r6, #4\n\t"
  2971. "cmp r6, #0\n\t"
  2972. #ifdef __GNUC__
  2973. "bge 1b\n\t"
  2974. #else
  2975. "bge.n 1b\n\t"
  2976. #endif /* __GNUC__ */
  2977. : [r] "+r" (r)
  2978. : [a] "r" (a), [b] "r" (b)
  2979. : "r3", "r4", "r5", "r6", "r8"
  2980. );
  2981. return r;
  2982. }
  2983. /* Divide d in a and put remainder into r (m*d + r = a)
  2984. * m is not calculated as it is not needed at this time.
  2985. *
  2986. * a Number to be divided.
  2987. * d Number to divide with.
  2988. * m Multiplier result.
  2989. * r Remainder from the division.
  2990. * returns MP_OKAY indicating success.
  2991. */
  2992. static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
  2993. sp_digit* r)
  2994. {
  2995. sp_digit t1[64], t2[33];
  2996. sp_digit div, r1;
  2997. int i;
  2998. (void)m;
  2999. div = d[31];
  3000. XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
  3001. for (i=31; i>=0; i--) {
  3002. sp_digit hi = t1[32 + i] - (t1[32 + i] == div);
  3003. r1 = div_2048_word_32(hi, t1[32 + i - 1], div);
  3004. sp_2048_mul_d_32(t2, d, r1);
  3005. t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
  3006. t1[32 + i] -= t2[32];
  3007. sp_2048_mask_32(t2, d, t1[32 + i]);
  3008. t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
  3009. sp_2048_mask_32(t2, d, t1[32 + i]);
  3010. t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
  3011. }
  3012. r1 = sp_2048_cmp_32(t1, d) >= 0;
  3013. sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
  3014. return MP_OKAY;
  3015. }
  3016. /* Reduce a modulo m into r. (r = a mod m)
  3017. *
  3018. * r A single precision number that is the reduced result.
  3019. * a A single precision number that is to be reduced.
  3020. * m A single precision number that is the modulus to reduce with.
  3021. * returns MP_OKAY indicating success.
  3022. */
  3023. static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
  3024. {
  3025. return sp_2048_div_32(a, m, NULL, r);
  3026. }
  3027. #ifdef WOLFSSL_SP_SMALL
  3028. /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  3029. *
  3030. * r A single precision number that is the result of the operation.
  3031. * a A single precision number being exponentiated.
  3032. * e A single precision number that is the exponent.
  3033. * bits The number of bits in the exponent.
  3034. * m A single precision number that is the modulus.
  3035. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  3036. */
  3037. static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
  3038. int bits, const sp_digit* m, int reduceA)
  3039. {
  3040. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3041. sp_digit* td;
  3042. #else
  3043. sp_digit td[16 * 64];
  3044. #endif
  3045. sp_digit* t[16];
  3046. sp_digit* norm;
  3047. sp_digit mp = 1;
  3048. sp_digit n;
  3049. sp_digit mask;
  3050. int i;
  3051. int c, y;
  3052. int err = MP_OKAY;
  3053. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3054. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 64), NULL,
  3055. DYNAMIC_TYPE_TMP_BUFFER);
  3056. if (td == NULL) {
  3057. err = MEMORY_E;
  3058. }
  3059. #endif
  3060. if (err == MP_OKAY) {
  3061. norm = td;
  3062. for (i=0; i<16; i++) {
  3063. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3064. t[i] = td + i * 64;
  3065. #else
  3066. t[i] = &td[i * 64];
  3067. #endif
  3068. }
  3069. sp_2048_mont_setup(m, &mp);
  3070. sp_2048_mont_norm_32(norm, m);
  3071. XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
  3072. if (reduceA != 0) {
  3073. err = sp_2048_mod_32(t[1] + 32, a, m);
  3074. if (err == MP_OKAY) {
  3075. err = sp_2048_mod_32(t[1], t[1], m);
  3076. }
  3077. }
  3078. else {
  3079. XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
  3080. err = sp_2048_mod_32(t[1], t[1], m);
  3081. }
  3082. }
  3083. if (err == MP_OKAY) {
  3084. sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
  3085. sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
  3086. sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
  3087. sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
  3088. sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
  3089. sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
  3090. sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
  3091. sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
  3092. sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
  3093. sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
  3094. sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
  3095. sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
  3096. sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
  3097. sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
  3098. i = (bits - 1) / 32;
  3099. n = e[i--];
  3100. c = bits & 31;
  3101. if (c == 0) {
  3102. c = 32;
  3103. }
  3104. c -= bits % 4;
  3105. if (c == 32) {
  3106. c = 28;
  3107. }
  3108. if (c < 0) {
  3109. /* Number of bits in top word is less than number needed. */
  3110. c = -c;
  3111. y = (int)(n << c);
  3112. n = e[i--];
  3113. y |= (int)(n >> (64 - c));
  3114. n <<= c;
  3115. c = 64 - c;
  3116. }
  3117. else {
  3118. y = (int)(n >> c);
  3119. n <<= 32 - c;
  3120. }
  3121. XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
  3122. for (; i>=0 || c>=4; ) {
  3123. if (c == 0) {
  3124. n = e[i--];
  3125. y = (int)(n >> 28);
  3126. n <<= 4;
  3127. c = 28;
  3128. }
  3129. else if (c < 4) {
  3130. y = (int)(n >> 28);
  3131. n = e[i--];
  3132. c = 4 - c;
  3133. y |= (int)(n >> (32 - c));
  3134. n <<= c;
  3135. c = 32 - c;
  3136. }
  3137. else {
  3138. y = (int)((n >> 28) & 0xf);
  3139. n <<= 4;
  3140. c -= 4;
  3141. }
  3142. sp_2048_mont_sqr_32(r, r, m, mp);
  3143. sp_2048_mont_sqr_32(r, r, m, mp);
  3144. sp_2048_mont_sqr_32(r, r, m, mp);
  3145. sp_2048_mont_sqr_32(r, r, m, mp);
  3146. sp_2048_mont_mul_32(r, r, t[y], m, mp);
  3147. }
  3148. XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
  3149. sp_2048_mont_reduce_32(r, m, mp);
  3150. mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
  3151. sp_2048_cond_sub_32(r, r, m, mask);
  3152. }
  3153. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3154. if (td != NULL) {
  3155. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  3156. }
  3157. #endif
  3158. return err;
  3159. }
  3160. #else
  3161. /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  3162. *
  3163. * r A single precision number that is the result of the operation.
  3164. * a A single precision number being exponentiated.
  3165. * e A single precision number that is the exponent.
  3166. * bits The number of bits in the exponent.
  3167. * m A single precision number that is the modulus.
  3168. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  3169. */
  3170. static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
  3171. int bits, const sp_digit* m, int reduceA)
  3172. {
  3173. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3174. sp_digit* td;
  3175. #else
  3176. sp_digit td[32 * 64];
  3177. #endif
  3178. sp_digit* t[32];
  3179. sp_digit* norm;
  3180. sp_digit mp = 1;
  3181. sp_digit n;
  3182. sp_digit mask;
  3183. int i;
  3184. int c, y;
  3185. int err = MP_OKAY;
  3186. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3187. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 64), NULL,
  3188. DYNAMIC_TYPE_TMP_BUFFER);
  3189. if (td == NULL) {
  3190. err = MEMORY_E;
  3191. }
  3192. #endif
  3193. if (err == MP_OKAY) {
  3194. norm = td;
  3195. for (i=0; i<32; i++) {
  3196. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3197. t[i] = td + i * 64;
  3198. #else
  3199. t[i] = &td[i * 64];
  3200. #endif
  3201. }
  3202. sp_2048_mont_setup(m, &mp);
  3203. sp_2048_mont_norm_32(norm, m);
  3204. XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
  3205. if (reduceA != 0) {
  3206. err = sp_2048_mod_32(t[1] + 32, a, m);
  3207. if (err == MP_OKAY) {
  3208. err = sp_2048_mod_32(t[1], t[1], m);
  3209. }
  3210. }
  3211. else {
  3212. XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
  3213. err = sp_2048_mod_32(t[1], t[1], m);
  3214. }
  3215. }
  3216. if (err == MP_OKAY) {
  3217. sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
  3218. sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
  3219. sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
  3220. sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
  3221. sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
  3222. sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
  3223. sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
  3224. sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
  3225. sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
  3226. sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
  3227. sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
  3228. sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
  3229. sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
  3230. sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
  3231. sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
  3232. sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
  3233. sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
  3234. sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
  3235. sp_2048_mont_sqr_32(t[20], t[10], m, mp);
  3236. sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
  3237. sp_2048_mont_sqr_32(t[22], t[11], m, mp);
  3238. sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
  3239. sp_2048_mont_sqr_32(t[24], t[12], m, mp);
  3240. sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
  3241. sp_2048_mont_sqr_32(t[26], t[13], m, mp);
  3242. sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
  3243. sp_2048_mont_sqr_32(t[28], t[14], m, mp);
  3244. sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
  3245. sp_2048_mont_sqr_32(t[30], t[15], m, mp);
  3246. sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
  3247. i = (bits - 1) / 32;
  3248. n = e[i--];
  3249. c = bits & 31;
  3250. if (c == 0) {
  3251. c = 32;
  3252. }
  3253. c -= bits % 5;
  3254. if (c == 32) {
  3255. c = 27;
  3256. }
  3257. if (c < 0) {
  3258. /* Number of bits in top word is less than number needed. */
  3259. c = -c;
  3260. y = (int)(n << c);
  3261. n = e[i--];
  3262. y |= (int)(n >> (64 - c));
  3263. n <<= c;
  3264. c = 64 - c;
  3265. }
  3266. else {
  3267. y = (int)(n >> c);
  3268. n <<= 32 - c;
  3269. }
  3270. XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
  3271. for (; i>=0 || c>=5; ) {
  3272. if (c == 0) {
  3273. n = e[i--];
  3274. y = (int)(n >> 27);
  3275. n <<= 5;
  3276. c = 27;
  3277. }
  3278. else if (c < 5) {
  3279. y = (int)(n >> 27);
  3280. n = e[i--];
  3281. c = 5 - c;
  3282. y |= (int)(n >> (32 - c));
  3283. n <<= c;
  3284. c = 32 - c;
  3285. }
  3286. else {
  3287. y = (int)((n >> 27) & 0x1f);
  3288. n <<= 5;
  3289. c -= 5;
  3290. }
  3291. sp_2048_mont_sqr_32(r, r, m, mp);
  3292. sp_2048_mont_sqr_32(r, r, m, mp);
  3293. sp_2048_mont_sqr_32(r, r, m, mp);
  3294. sp_2048_mont_sqr_32(r, r, m, mp);
  3295. sp_2048_mont_sqr_32(r, r, m, mp);
  3296. sp_2048_mont_mul_32(r, r, t[y], m, mp);
  3297. }
  3298. XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
  3299. sp_2048_mont_reduce_32(r, m, mp);
  3300. mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
  3301. sp_2048_cond_sub_32(r, r, m, mask);
  3302. }
  3303. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3304. if (td != NULL) {
  3305. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  3306. }
  3307. #endif
  3308. return err;
  3309. }
  3310. #endif /* WOLFSSL_SP_SMALL */
  3311. #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
  3312. #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
  3313. /* r = 2^n mod m where n is the number of bits to reduce by.
  3314. * Given m must be 2048 bits, just need to subtract.
  3315. *
  3316. * r A single precision number.
  3317. * m A single precision number.
  3318. */
  3319. static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
  3320. {
  3321. XMEMSET(r, 0, sizeof(sp_digit) * 64);
  3322. /* r = 2^n mod m */
  3323. sp_2048_sub_in_place_64(r, m);
  3324. }
  3325. #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
  3326. /* Conditionally subtract b from a using the mask m.
  3327. * m is -1 to subtract and 0 when not copying.
  3328. *
  3329. * r A single precision number representing condition subtract result.
  3330. * a A single precision number to subtract from.
  3331. * b A single precision number to subtract.
  3332. * m Mask value to apply.
  3333. */
  3334. SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a,
  3335. const sp_digit* b, sp_digit m)
  3336. {
  3337. sp_digit c = 0;
  3338. __asm__ __volatile__ (
  3339. "mov r5, #1\n\t"
  3340. "lsl r5, r5, #8\n\t"
  3341. "mov r9, r5\n\t"
  3342. "mov r8, #0\n\t"
  3343. "\n1:\n\t"
  3344. "ldr r6, [%[b], r8]\n\t"
  3345. "and r6, r6, %[m]\n\t"
  3346. "mov r5, #0\n\t"
  3347. "subs r5, r5, %[c]\n\t"
  3348. "ldr r5, [%[a], r8]\n\t"
  3349. "sbcs r5, r5, r6\n\t"
  3350. "sbcs %[c], %[c], %[c]\n\t"
  3351. "str r5, [%[r], r8]\n\t"
  3352. "add r8, r8, #4\n\t"
  3353. "cmp r8, r9\n\t"
  3354. #ifdef __GNUC__
  3355. "blt 1b\n\t"
  3356. #else
  3357. "blt.n 1b\n\t"
  3358. #endif /* __GNUC__ */
  3359. : [c] "+r" (c)
  3360. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  3361. : "memory", "r5", "r6", "r8", "r9"
  3362. );
  3363. return c;
  3364. }
  3365. /* Reduce the number back to 2048 bits using Montgomery reduction.
  3366. *
  3367. * a A single precision number to reduce in place.
  3368. * m The single precision number representing the modulus.
  3369. * mp The digit representing the negative inverse of m mod 2^n.
  3370. */
  3371. SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
  3372. sp_digit mp)
  3373. {
  3374. sp_digit ca = 0;
  3375. __asm__ __volatile__ (
  3376. "mov r9, %[mp]\n\t"
  3377. "mov r12, %[m]\n\t"
  3378. "mov r10, %[a]\n\t"
  3379. "mov r4, #0\n\t"
  3380. "add r11, r10, #256\n\t"
  3381. "\n1:\n\t"
  3382. /* mu = a[i] * mp */
  3383. "mov %[mp], r9\n\t"
  3384. "ldr %[a], [r10]\n\t"
  3385. "mul %[mp], %[mp], %[a]\n\t"
  3386. "mov %[m], r12\n\t"
  3387. "add r14, r10, #248\n\t"
  3388. "\n2:\n\t"
  3389. /* a[i+j] += m[j] * mu */
  3390. "ldr %[a], [r10]\n\t"
  3391. "mov r5, #0\n\t"
  3392. /* Multiply m[j] and mu - Start */
  3393. "ldr r8, [%[m]], #4\n\t"
  3394. "umull r6, r8, %[mp], r8\n\t"
  3395. "adds %[a], %[a], r6\n\t"
  3396. "adc r5, r5, r8\n\t"
  3397. /* Multiply m[j] and mu - Done */
  3398. "adds r4, r4, %[a]\n\t"
  3399. "adc r5, r5, #0\n\t"
  3400. "str r4, [r10], #4\n\t"
  3401. /* a[i+j+1] += m[j+1] * mu */
  3402. "ldr %[a], [r10]\n\t"
  3403. "mov r4, #0\n\t"
  3404. /* Multiply m[j] and mu - Start */
  3405. "ldr r8, [%[m]], #4\n\t"
  3406. "umull r6, r8, %[mp], r8\n\t"
  3407. "adds %[a], %[a], r6\n\t"
  3408. "adc r4, r4, r8\n\t"
  3409. /* Multiply m[j] and mu - Done */
  3410. "adds r5, r5, %[a]\n\t"
  3411. "adc r4, r4, #0\n\t"
  3412. "str r5, [r10], #4\n\t"
  3413. "cmp r10, r14\n\t"
  3414. #ifdef __GNUC__
  3415. "blt 2b\n\t"
  3416. #else
  3417. "blt.n 2b\n\t"
  3418. #endif /* __GNUC__ */
  3419. /* a[i+62] += m[62] * mu */
  3420. "ldr %[a], [r10]\n\t"
  3421. "mov r5, #0\n\t"
  3422. /* Multiply m[j] and mu - Start */
  3423. "ldr r8, [%[m]], #4\n\t"
  3424. "umull r6, r8, %[mp], r8\n\t"
  3425. "adds %[a], %[a], r6\n\t"
  3426. "adc r5, r5, r8\n\t"
  3427. /* Multiply m[j] and mu - Done */
  3428. "adds r4, r4, %[a]\n\t"
  3429. "adc r5, r5, #0\n\t"
  3430. "str r4, [r10], #4\n\t"
  3431. /* a[i+63] += m[63] * mu */
  3432. "mov r4, %[ca]\n\t"
  3433. "mov %[ca], #0\n\t"
  3434. /* Multiply m[63] and mu - Start */
  3435. "ldr r8, [%[m]]\n\t"
  3436. "umull r6, r8, %[mp], r8\n\t"
  3437. "adds r5, r5, r6\n\t"
  3438. "adcs r4, r4, r8\n\t"
  3439. "adc %[ca], %[ca], #0\n\t"
  3440. /* Multiply m[63] and mu - Done */
  3441. "ldr r6, [r10]\n\t"
  3442. "ldr r8, [r10, #4]\n\t"
  3443. "adds r6, r6, r5\n\t"
  3444. "adcs r8, r8, r4\n\t"
  3445. "adc %[ca], %[ca], #0\n\t"
  3446. "str r6, [r10]\n\t"
  3447. "str r8, [r10, #4]\n\t"
  3448. /* Next word in a */
  3449. "sub r10, r10, #248\n\t"
  3450. "cmp r10, r11\n\t"
  3451. #ifdef __GNUC__
  3452. "blt 1b\n\t"
  3453. #else
  3454. "blt.n 1b\n\t"
  3455. #endif /* __GNUC__ */
  3456. "mov %[a], r10\n\t"
  3457. "mov %[m], r12\n\t"
  3458. : [ca] "+r" (ca), [a] "+r" (a)
  3459. : [m] "r" (m), [mp] "r" (mp)
  3460. : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  3461. );
  3462. sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
  3463. }
  3464. /* Multiply two Montogmery form numbers mod the modulus (prime).
  3465. * (r = a * b mod m)
  3466. *
  3467. * r Result of multiplication.
  3468. * a First number to multiply in Montogmery form.
  3469. * b Second number to multiply in Montogmery form.
  3470. * m Modulus (prime).
  3471. * mp Montogmery mulitplier.
  3472. */
  3473. static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
  3474. const sp_digit* m, sp_digit mp)
  3475. {
  3476. sp_2048_mul_64(r, a, b);
  3477. sp_2048_mont_reduce_64(r, m, mp);
  3478. }
  3479. /* Square the Montgomery form number. (r = a * a mod m)
  3480. *
  3481. * r Result of squaring.
  3482. * a Number to square in Montogmery form.
  3483. * m Modulus (prime).
  3484. * mp Montogmery mulitplier.
  3485. */
  3486. static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
  3487. sp_digit mp)
  3488. {
  3489. sp_2048_sqr_64(r, a);
  3490. sp_2048_mont_reduce_64(r, m, mp);
  3491. }
  3492. /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  3493. *
  3494. * d1 The high order half of the number to divide.
  3495. * d0 The low order half of the number to divide.
  3496. * div The dividend.
  3497. * returns the result of the division.
  3498. *
  3499. * Note that this is an approximate div. It may give an answer 1 larger.
  3500. */
  3501. SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0,
  3502. sp_digit div)
  3503. {
  3504. sp_digit r = 0;
  3505. __asm__ __volatile__ (
  3506. "lsr r6, %[div], #16\n\t"
  3507. "add r6, r6, #1\n\t"
  3508. "udiv r4, %[d1], r6\n\t"
  3509. "lsl r8, r4, #16\n\t"
  3510. "umull r4, r5, %[div], r8\n\t"
  3511. "subs %[d0], %[d0], r4\n\t"
  3512. "sbc %[d1], %[d1], r5\n\t"
  3513. "udiv r5, %[d1], r6\n\t"
  3514. "lsl r4, r5, #16\n\t"
  3515. "add r8, r8, r4\n\t"
  3516. "umull r4, r5, %[div], r4\n\t"
  3517. "subs %[d0], %[d0], r4\n\t"
  3518. "sbc %[d1], %[d1], r5\n\t"
  3519. "lsl r4, %[d1], #16\n\t"
  3520. "orr r4, r4, %[d0], lsr #16\n\t"
  3521. "udiv r4, r4, r6\n\t"
  3522. "add r8, r8, r4\n\t"
  3523. "umull r4, r5, %[div], r4\n\t"
  3524. "subs %[d0], %[d0], r4\n\t"
  3525. "sbc %[d1], %[d1], r5\n\t"
  3526. "lsl r4, %[d1], #16\n\t"
  3527. "orr r4, r4, %[d0], lsr #16\n\t"
  3528. "udiv r4, r4, r6\n\t"
  3529. "add r8, r8, r4\n\t"
  3530. "umull r4, r5, %[div], r4\n\t"
  3531. "subs %[d0], %[d0], r4\n\t"
  3532. "sbc %[d1], %[d1], r5\n\t"
  3533. "udiv r4, %[d0], %[div]\n\t"
  3534. "add r8, r8, r4\n\t"
  3535. "mov %[r], r8\n\t"
  3536. : [r] "+r" (r)
  3537. : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
  3538. : "r4", "r5", "r6", "r8"
  3539. );
  3540. return r;
  3541. }
  3542. /* AND m into each word of a and store in r.
  3543. *
  3544. * r A single precision integer.
  3545. * a A single precision integer.
  3546. * m Mask to AND against each digit.
  3547. */
  3548. static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
  3549. {
  3550. #ifdef WOLFSSL_SP_SMALL
  3551. int i;
  3552. for (i=0; i<64; i++) {
  3553. r[i] = a[i] & m;
  3554. }
  3555. #else
  3556. int i;
  3557. for (i = 0; i < 64; i += 8) {
  3558. r[i+0] = a[i+0] & m;
  3559. r[i+1] = a[i+1] & m;
  3560. r[i+2] = a[i+2] & m;
  3561. r[i+3] = a[i+3] & m;
  3562. r[i+4] = a[i+4] & m;
  3563. r[i+5] = a[i+5] & m;
  3564. r[i+6] = a[i+6] & m;
  3565. r[i+7] = a[i+7] & m;
  3566. }
  3567. #endif
  3568. }
  3569. /* Compare a with b in constant time.
  3570. *
  3571. * a A single precision integer.
  3572. * b A single precision integer.
  3573. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  3574. * respectively.
  3575. */
  3576. SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
  3577. {
  3578. sp_digit r = 0;
  3579. __asm__ __volatile__ (
  3580. "mov r3, #0\n\t"
  3581. "mvn r3, r3\n\t"
  3582. "mov r6, #252\n\t"
  3583. "\n1:\n\t"
  3584. "ldr r8, [%[a], r6]\n\t"
  3585. "ldr r5, [%[b], r6]\n\t"
  3586. "and r8, r8, r3\n\t"
  3587. "and r5, r5, r3\n\t"
  3588. "mov r4, r8\n\t"
  3589. "subs r8, r8, r5\n\t"
  3590. "sbc r8, r8, r8\n\t"
  3591. "add %[r], %[r], r8\n\t"
  3592. "mvn r8, r8\n\t"
  3593. "and r3, r3, r8\n\t"
  3594. "subs r5, r5, r4\n\t"
  3595. "sbc r8, r8, r8\n\t"
  3596. "sub %[r], %[r], r8\n\t"
  3597. "mvn r8, r8\n\t"
  3598. "and r3, r3, r8\n\t"
  3599. "sub r6, r6, #4\n\t"
  3600. "cmp r6, #0\n\t"
  3601. #ifdef __GNUC__
  3602. "bge 1b\n\t"
  3603. #else
  3604. "bge.n 1b\n\t"
  3605. #endif /* __GNUC__ */
  3606. : [r] "+r" (r)
  3607. : [a] "r" (a), [b] "r" (b)
  3608. : "r3", "r4", "r5", "r6", "r8"
  3609. );
  3610. return r;
  3611. }
  3612. /* Divide d in a and put remainder into r (m*d + r = a)
  3613. * m is not calculated as it is not needed at this time.
  3614. *
  3615. * a Number to be divided.
  3616. * d Number to divide with.
  3617. * m Multiplier result.
  3618. * r Remainder from the division.
  3619. * returns MP_OKAY indicating success.
  3620. */
  3621. static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
  3622. sp_digit* r)
  3623. {
  3624. sp_digit t1[128], t2[65];
  3625. sp_digit div, r1;
  3626. int i;
  3627. (void)m;
  3628. div = d[63];
  3629. XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
  3630. for (i=63; i>=0; i--) {
  3631. sp_digit hi = t1[64 + i] - (t1[64 + i] == div);
  3632. r1 = div_2048_word_64(hi, t1[64 + i - 1], div);
  3633. sp_2048_mul_d_64(t2, d, r1);
  3634. t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
  3635. t1[64 + i] -= t2[64];
  3636. sp_2048_mask_64(t2, d, t1[64 + i]);
  3637. t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
  3638. sp_2048_mask_64(t2, d, t1[64 + i]);
  3639. t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
  3640. }
  3641. r1 = sp_2048_cmp_64(t1, d) >= 0;
  3642. sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
  3643. return MP_OKAY;
  3644. }
  3645. /* Reduce a modulo m into r. (r = a mod m)
  3646. *
  3647. * r A single precision number that is the reduced result.
  3648. * a A single precision number that is to be reduced.
  3649. * m A single precision number that is the modulus to reduce with.
  3650. * returns MP_OKAY indicating success.
  3651. */
  3652. static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
  3653. {
  3654. return sp_2048_div_64(a, m, NULL, r);
  3655. }
  3656. /* Divide d in a and put remainder into r (m*d + r = a)
  3657. * m is not calculated as it is not needed at this time.
  3658. *
  3659. * a Number to be divided.
  3660. * d Number to divide with.
  3661. * m Multiplier result.
  3662. * r Remainder from the division.
  3663. * returns MP_OKAY indicating success.
  3664. */
  3665. static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
  3666. sp_digit* r)
  3667. {
  3668. sp_digit t1[128], t2[65];
  3669. sp_digit div, r1;
  3670. int i;
  3671. (void)m;
  3672. div = d[63];
  3673. XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
  3674. for (i=63; i>=0; i--) {
  3675. sp_digit hi = t1[64 + i] - (t1[64 + i] == div);
  3676. r1 = div_2048_word_64(hi, t1[64 + i - 1], div);
  3677. sp_2048_mul_d_64(t2, d, r1);
  3678. t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
  3679. t1[64 + i] -= t2[64];
  3680. if (t1[64 + i] != 0) {
  3681. t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
  3682. if (t1[64 + i] != 0)
  3683. t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
  3684. }
  3685. }
  3686. r1 = sp_2048_cmp_64(t1, d) >= 0;
  3687. sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
  3688. return MP_OKAY;
  3689. }
  3690. /* Reduce a modulo m into r. (r = a mod m)
  3691. *
  3692. * r A single precision number that is the reduced result.
  3693. * a A single precision number that is to be reduced.
  3694. * m A single precision number that is the modulus to reduce with.
  3695. * returns MP_OKAY indicating success.
  3696. */
  3697. static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
  3698. {
  3699. return sp_2048_div_64_cond(a, m, NULL, r);
  3700. }
  3701. #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
  3702. defined(WOLFSSL_HAVE_SP_DH)
  3703. #ifdef WOLFSSL_SP_SMALL
  3704. /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  3705. *
  3706. * r A single precision number that is the result of the operation.
  3707. * a A single precision number being exponentiated.
  3708. * e A single precision number that is the exponent.
  3709. * bits The number of bits in the exponent.
  3710. * m A single precision number that is the modulus.
  3711. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  3712. */
  3713. static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
  3714. int bits, const sp_digit* m, int reduceA)
  3715. {
  3716. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3717. sp_digit* td;
  3718. #else
  3719. sp_digit td[16 * 128];
  3720. #endif
  3721. sp_digit* t[16];
  3722. sp_digit* norm;
  3723. sp_digit mp = 1;
  3724. sp_digit n;
  3725. sp_digit mask;
  3726. int i;
  3727. int c, y;
  3728. int err = MP_OKAY;
  3729. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3730. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 128), NULL,
  3731. DYNAMIC_TYPE_TMP_BUFFER);
  3732. if (td == NULL) {
  3733. err = MEMORY_E;
  3734. }
  3735. #endif
  3736. if (err == MP_OKAY) {
  3737. norm = td;
  3738. for (i=0; i<16; i++) {
  3739. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3740. t[i] = td + i * 128;
  3741. #else
  3742. t[i] = &td[i * 128];
  3743. #endif
  3744. }
  3745. sp_2048_mont_setup(m, &mp);
  3746. sp_2048_mont_norm_64(norm, m);
  3747. XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
  3748. if (reduceA != 0) {
  3749. err = sp_2048_mod_64(t[1] + 64, a, m);
  3750. if (err == MP_OKAY) {
  3751. err = sp_2048_mod_64(t[1], t[1], m);
  3752. }
  3753. }
  3754. else {
  3755. XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
  3756. err = sp_2048_mod_64(t[1], t[1], m);
  3757. }
  3758. }
  3759. if (err == MP_OKAY) {
  3760. sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
  3761. sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
  3762. sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
  3763. sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
  3764. sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
  3765. sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
  3766. sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
  3767. sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
  3768. sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
  3769. sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
  3770. sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
  3771. sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
  3772. sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
  3773. sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
  3774. i = (bits - 1) / 32;
  3775. n = e[i--];
  3776. c = bits & 31;
  3777. if (c == 0) {
  3778. c = 32;
  3779. }
  3780. c -= bits % 4;
  3781. if (c == 32) {
  3782. c = 28;
  3783. }
  3784. if (c < 0) {
  3785. /* Number of bits in top word is less than number needed. */
  3786. c = -c;
  3787. y = (int)(n << c);
  3788. n = e[i--];
  3789. y |= (int)(n >> (64 - c));
  3790. n <<= c;
  3791. c = 64 - c;
  3792. }
  3793. else {
  3794. y = (int)(n >> c);
  3795. n <<= 32 - c;
  3796. }
  3797. XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
  3798. for (; i>=0 || c>=4; ) {
  3799. if (c == 0) {
  3800. n = e[i--];
  3801. y = (int)(n >> 28);
  3802. n <<= 4;
  3803. c = 28;
  3804. }
  3805. else if (c < 4) {
  3806. y = (int)(n >> 28);
  3807. n = e[i--];
  3808. c = 4 - c;
  3809. y |= (int)(n >> (32 - c));
  3810. n <<= c;
  3811. c = 32 - c;
  3812. }
  3813. else {
  3814. y = (int)((n >> 28) & 0xf);
  3815. n <<= 4;
  3816. c -= 4;
  3817. }
  3818. sp_2048_mont_sqr_64(r, r, m, mp);
  3819. sp_2048_mont_sqr_64(r, r, m, mp);
  3820. sp_2048_mont_sqr_64(r, r, m, mp);
  3821. sp_2048_mont_sqr_64(r, r, m, mp);
  3822. sp_2048_mont_mul_64(r, r, t[y], m, mp);
  3823. }
  3824. XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
  3825. sp_2048_mont_reduce_64(r, m, mp);
  3826. mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
  3827. sp_2048_cond_sub_64(r, r, m, mask);
  3828. }
  3829. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3830. if (td != NULL) {
  3831. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  3832. }
  3833. #endif
  3834. return err;
  3835. }
  3836. #else
  3837. /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  3838. *
  3839. * r A single precision number that is the result of the operation.
  3840. * a A single precision number being exponentiated.
  3841. * e A single precision number that is the exponent.
  3842. * bits The number of bits in the exponent.
  3843. * m A single precision number that is the modulus.
  3844. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  3845. */
  3846. static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
  3847. int bits, const sp_digit* m, int reduceA)
  3848. {
  3849. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3850. sp_digit* td;
  3851. #else
  3852. sp_digit td[32 * 128];
  3853. #endif
  3854. sp_digit* t[32];
  3855. sp_digit* norm;
  3856. sp_digit mp = 1;
  3857. sp_digit n;
  3858. sp_digit mask;
  3859. int i;
  3860. int c, y;
  3861. int err = MP_OKAY;
  3862. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3863. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 128), NULL,
  3864. DYNAMIC_TYPE_TMP_BUFFER);
  3865. if (td == NULL) {
  3866. err = MEMORY_E;
  3867. }
  3868. #endif
  3869. if (err == MP_OKAY) {
  3870. norm = td;
  3871. for (i=0; i<32; i++) {
  3872. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3873. t[i] = td + i * 128;
  3874. #else
  3875. t[i] = &td[i * 128];
  3876. #endif
  3877. }
  3878. sp_2048_mont_setup(m, &mp);
  3879. sp_2048_mont_norm_64(norm, m);
  3880. XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
  3881. if (reduceA != 0) {
  3882. err = sp_2048_mod_64(t[1] + 64, a, m);
  3883. if (err == MP_OKAY) {
  3884. err = sp_2048_mod_64(t[1], t[1], m);
  3885. }
  3886. }
  3887. else {
  3888. XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
  3889. err = sp_2048_mod_64(t[1], t[1], m);
  3890. }
  3891. }
  3892. if (err == MP_OKAY) {
  3893. sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
  3894. sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
  3895. sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
  3896. sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
  3897. sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
  3898. sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
  3899. sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
  3900. sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
  3901. sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
  3902. sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
  3903. sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
  3904. sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
  3905. sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
  3906. sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
  3907. sp_2048_mont_sqr_64(t[16], t[ 8], m, mp);
  3908. sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
  3909. sp_2048_mont_sqr_64(t[18], t[ 9], m, mp);
  3910. sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp);
  3911. sp_2048_mont_sqr_64(t[20], t[10], m, mp);
  3912. sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp);
  3913. sp_2048_mont_sqr_64(t[22], t[11], m, mp);
  3914. sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp);
  3915. sp_2048_mont_sqr_64(t[24], t[12], m, mp);
  3916. sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp);
  3917. sp_2048_mont_sqr_64(t[26], t[13], m, mp);
  3918. sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp);
  3919. sp_2048_mont_sqr_64(t[28], t[14], m, mp);
  3920. sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp);
  3921. sp_2048_mont_sqr_64(t[30], t[15], m, mp);
  3922. sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp);
  3923. i = (bits - 1) / 32;
  3924. n = e[i--];
  3925. c = bits & 31;
  3926. if (c == 0) {
  3927. c = 32;
  3928. }
  3929. c -= bits % 5;
  3930. if (c == 32) {
  3931. c = 27;
  3932. }
  3933. if (c < 0) {
  3934. /* Number of bits in top word is less than number needed. */
  3935. c = -c;
  3936. y = (int)(n << c);
  3937. n = e[i--];
  3938. y |= (int)(n >> (64 - c));
  3939. n <<= c;
  3940. c = 64 - c;
  3941. }
  3942. else {
  3943. y = (int)(n >> c);
  3944. n <<= 32 - c;
  3945. }
  3946. XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
  3947. for (; i>=0 || c>=5; ) {
  3948. if (c == 0) {
  3949. n = e[i--];
  3950. y = (int)(n >> 27);
  3951. n <<= 5;
  3952. c = 27;
  3953. }
  3954. else if (c < 5) {
  3955. y = (int)(n >> 27);
  3956. n = e[i--];
  3957. c = 5 - c;
  3958. y |= (int)(n >> (32 - c));
  3959. n <<= c;
  3960. c = 32 - c;
  3961. }
  3962. else {
  3963. y = (int)((n >> 27) & 0x1f);
  3964. n <<= 5;
  3965. c -= 5;
  3966. }
  3967. sp_2048_mont_sqr_64(r, r, m, mp);
  3968. sp_2048_mont_sqr_64(r, r, m, mp);
  3969. sp_2048_mont_sqr_64(r, r, m, mp);
  3970. sp_2048_mont_sqr_64(r, r, m, mp);
  3971. sp_2048_mont_sqr_64(r, r, m, mp);
  3972. sp_2048_mont_mul_64(r, r, t[y], m, mp);
  3973. }
  3974. XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
  3975. sp_2048_mont_reduce_64(r, m, mp);
  3976. mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
  3977. sp_2048_cond_sub_64(r, r, m, mask);
  3978. }
  3979. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  3980. if (td != NULL) {
  3981. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  3982. }
  3983. #endif
  3984. return err;
  3985. }
  3986. #endif /* WOLFSSL_SP_SMALL */
  3987. #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
  3988. #ifdef WOLFSSL_HAVE_SP_RSA
  3989. /* RSA public key operation.
  3990. *
  3991. * in Array of bytes representing the number to exponentiate, base.
  3992. * inLen Number of bytes in base.
  3993. * em Public exponent.
  3994. * mm Modulus.
  3995. * out Buffer to hold big-endian bytes of exponentiation result.
  3996. * Must be at least 256 bytes long.
  3997. * outLen Number of bytes in result.
  3998. * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
  3999. * an array is too long and MEMORY_E when dynamic memory allocation fails.
  4000. */
  4001. int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
  4002. byte* out, word32* outLen)
  4003. {
  4004. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  4005. sp_digit a[128], m[64], r[128];
  4006. #else
  4007. sp_digit* d = NULL;
  4008. sp_digit* a = NULL;
  4009. sp_digit* m = NULL;
  4010. sp_digit* r = NULL;
  4011. #endif
  4012. sp_digit *ah = NULL;
  4013. sp_digit e[1];
  4014. int err = MP_OKAY;
  4015. if (*outLen < 256) {
  4016. err = MP_TO_E;
  4017. }
  4018. else if (mp_count_bits(em) > 32 || inLen > 256 ||
  4019. mp_count_bits(mm) != 2048) {
  4020. err = MP_READ_E;
  4021. }
  4022. else if (mp_iseven(mm)) {
  4023. err = MP_VAL;
  4024. }
  4025. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  4026. if (err == MP_OKAY) {
  4027. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
  4028. DYNAMIC_TYPE_RSA);
  4029. if (d == NULL)
  4030. err = MEMORY_E;
  4031. }
  4032. if (err == MP_OKAY) {
  4033. a = d;
  4034. r = a + 64 * 2;
  4035. m = r + 64 * 2;
  4036. }
  4037. #endif
  4038. if (err == MP_OKAY) {
  4039. ah = a + 64;
  4040. sp_2048_from_bin(ah, 64, in, inLen);
  4041. #if DIGIT_BIT >= 32
  4042. e[0] = em->dp[0];
  4043. #else
  4044. e[0] = em->dp[0];
  4045. if (em->used > 1) {
  4046. e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
  4047. }
  4048. #endif
  4049. if (e[0] == 0) {
  4050. err = MP_EXPTMOD_E;
  4051. }
  4052. }
  4053. if (err == MP_OKAY) {
  4054. sp_2048_from_mp(m, 64, mm);
  4055. if (e[0] == 0x3) {
  4056. if (err == MP_OKAY) {
  4057. sp_2048_sqr_64(r, ah);
  4058. err = sp_2048_mod_64_cond(r, r, m);
  4059. }
  4060. if (err == MP_OKAY) {
  4061. sp_2048_mul_64(r, ah, r);
  4062. err = sp_2048_mod_64_cond(r, r, m);
  4063. }
  4064. }
  4065. else {
  4066. int i;
  4067. sp_digit mp;
  4068. sp_2048_mont_setup(m, &mp);
  4069. /* Convert to Montgomery form. */
  4070. XMEMSET(a, 0, sizeof(sp_digit) * 64);
  4071. err = sp_2048_mod_64_cond(a, a, m);
  4072. if (err == MP_OKAY) {
  4073. for (i = 31; i >= 0; i--) {
  4074. if (e[0] >> i) {
  4075. break;
  4076. }
  4077. }
  4078. XMEMCPY(r, a, sizeof(sp_digit) * 64);
  4079. for (i--; i>=0; i--) {
  4080. sp_2048_mont_sqr_64(r, r, m, mp);
  4081. if (((e[0] >> i) & 1) == 1) {
  4082. sp_2048_mont_mul_64(r, r, a, m, mp);
  4083. }
  4084. }
  4085. XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
  4086. sp_2048_mont_reduce_64(r, m, mp);
  4087. for (i = 63; i > 0; i--) {
  4088. if (r[i] != m[i]) {
  4089. break;
  4090. }
  4091. }
  4092. if (r[i] >= m[i]) {
  4093. sp_2048_sub_in_place_64(r, m);
  4094. }
  4095. }
  4096. }
  4097. }
  4098. if (err == MP_OKAY) {
  4099. sp_2048_to_bin(r, out);
  4100. *outLen = 256;
  4101. }
  4102. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  4103. if (d != NULL) {
  4104. XFREE(d, NULL, DYNAMIC_TYPE_RSA);
  4105. }
  4106. #endif
  4107. return err;
  4108. }
  4109. #ifndef WOLFSSL_RSA_PUBLIC_ONLY
  4110. /* Conditionally add a and b using the mask m.
  4111. * m is -1 to add and 0 when not.
  4112. *
  4113. * r A single precision number representing conditional add result.
  4114. * a A single precision number to add with.
  4115. * b A single precision number to add.
  4116. * m Mask value to apply.
  4117. */
  4118. SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
  4119. sp_digit m)
  4120. {
  4121. sp_digit c = 0;
  4122. __asm__ __volatile__ (
  4123. "mov r5, #128\n\t"
  4124. "mov r9, r5\n\t"
  4125. "mov r8, #0\n\t"
  4126. "\n1:\n\t"
  4127. "ldr r6, [%[b], r8]\n\t"
  4128. "and r6, r6, %[m]\n\t"
  4129. "adds r5, %[c], #-1\n\t"
  4130. "ldr r5, [%[a], r8]\n\t"
  4131. "adcs r5, r5, r6\n\t"
  4132. "mov %[c], #0\n\t"
  4133. "adcs %[c], %[c], %[c]\n\t"
  4134. "str r5, [%[r], r8]\n\t"
  4135. "add r8, r8, #4\n\t"
  4136. "cmp r8, r9\n\t"
  4137. #ifdef __GNUC__
  4138. "blt 1b\n\t"
  4139. #else
  4140. "blt.n 1b\n\t"
  4141. #endif /* __GNUC__ */
  4142. : [c] "+r" (c)
  4143. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  4144. : "memory", "r5", "r6", "r8", "r9"
  4145. );
  4146. return c;
  4147. }
  4148. /* RSA private key operation.
  4149. *
  4150. * in Array of bytes representing the number to exponentiate, base.
  4151. * inLen Number of bytes in base.
  4152. * dm Private exponent.
  4153. * pm First prime.
  4154. * qm Second prime.
  4155. * dpm First prime's CRT exponent.
  4156. * dqm Second prime's CRT exponent.
  4157. * qim Inverse of second prime mod p.
  4158. * mm Modulus.
  4159. * out Buffer to hold big-endian bytes of exponentiation result.
  4160. * Must be at least 256 bytes long.
  4161. * outLen Number of bytes in result.
  4162. * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
  4163. * an array is too long and MEMORY_E when dynamic memory allocation fails.
  4164. */
  4165. int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
  4166. mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
  4167. byte* out, word32* outLen)
  4168. {
  4169. #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
  4170. sp_digit* a = NULL;
  4171. sp_digit* d = NULL;
  4172. sp_digit* m = NULL;
  4173. sp_digit* r = NULL;
  4174. int err = MP_OKAY;
  4175. (void)pm;
  4176. (void)qm;
  4177. (void)dpm;
  4178. (void)dqm;
  4179. (void)qim;
  4180. if (*outLen < 256U) {
  4181. err = MP_TO_E;
  4182. }
  4183. if (err == MP_OKAY) {
  4184. if (mp_count_bits(dm) > 2048) {
  4185. err = MP_READ_E;
  4186. }
  4187. else if (inLen > 256) {
  4188. err = MP_READ_E;
  4189. }
  4190. else if (mp_count_bits(mm) != 2048) {
  4191. err = MP_READ_E;
  4192. }
  4193. else if (mp_iseven(mm)) {
  4194. err = MP_VAL;
  4195. }
  4196. }
  4197. if (err == MP_OKAY) {
  4198. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
  4199. DYNAMIC_TYPE_RSA);
  4200. if (d == NULL) {
  4201. err = MEMORY_E;
  4202. }
  4203. }
  4204. if (err == MP_OKAY) {
  4205. a = d + 64;
  4206. m = a + 128;
  4207. r = a;
  4208. sp_2048_from_bin(a, 64, in, inLen);
  4209. sp_2048_from_mp(d, 64, dm);
  4210. sp_2048_from_mp(m, 64, mm);
  4211. err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0);
  4212. }
  4213. if (err == MP_OKAY) {
  4214. sp_2048_to_bin(r, out);
  4215. *outLen = 256;
  4216. }
  4217. if (d != NULL) {
  4218. XMEMSET(d, 0, sizeof(sp_digit) * 64);
  4219. XFREE(d, NULL, DYNAMIC_TYPE_RSA);
  4220. }
  4221. return err;
  4222. #else
  4223. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  4224. sp_digit a[64 * 2];
  4225. sp_digit p[32], q[32], dp[32];
  4226. sp_digit tmpa[64], tmpb[64];
  4227. #else
  4228. sp_digit* t = NULL;
  4229. sp_digit* a = NULL;
  4230. sp_digit* p = NULL;
  4231. sp_digit* q = NULL;
  4232. sp_digit* dp = NULL;
  4233. sp_digit* tmpa = NULL;
  4234. sp_digit* tmpb = NULL;
  4235. #endif
  4236. sp_digit* r = NULL;
  4237. sp_digit* qi = NULL;
  4238. sp_digit* dq = NULL;
  4239. sp_digit c;
  4240. int err = MP_OKAY;
  4241. (void)dm;
  4242. (void)mm;
  4243. if (*outLen < 256) {
  4244. err = MP_TO_E;
  4245. }
  4246. else if (inLen > 256 || mp_count_bits(mm) != 2048) {
  4247. err = MP_READ_E;
  4248. }
  4249. else if (mp_iseven(mm)) {
  4250. err = MP_VAL;
  4251. }
  4252. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  4253. if (err == MP_OKAY) {
  4254. t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
  4255. DYNAMIC_TYPE_RSA);
  4256. if (t == NULL)
  4257. err = MEMORY_E;
  4258. }
  4259. if (err == MP_OKAY) {
  4260. a = t;
  4261. p = a + 64 * 2;
  4262. q = p + 32;
  4263. qi = dq = dp = q + 32;
  4264. tmpa = qi + 32;
  4265. tmpb = tmpa + 64;
  4266. r = t + 64;
  4267. }
  4268. #else
  4269. #endif
  4270. if (err == MP_OKAY) {
  4271. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  4272. r = a;
  4273. qi = dq = dp;
  4274. #endif
  4275. sp_2048_from_bin(a, 64, in, inLen);
  4276. sp_2048_from_mp(p, 32, pm);
  4277. sp_2048_from_mp(q, 32, qm);
  4278. sp_2048_from_mp(dp, 32, dpm);
  4279. err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
  4280. }
  4281. if (err == MP_OKAY) {
  4282. sp_2048_from_mp(dq, 32, dqm);
  4283. err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
  4284. }
  4285. if (err == MP_OKAY) {
  4286. c = sp_2048_sub_in_place_32(tmpa, tmpb);
  4287. c += sp_2048_cond_add_32(tmpa, tmpa, p, c);
  4288. sp_2048_cond_add_32(tmpa, tmpa, p, c);
  4289. sp_2048_from_mp(qi, 32, qim);
  4290. sp_2048_mul_32(tmpa, tmpa, qi);
  4291. err = sp_2048_mod_32(tmpa, tmpa, p);
  4292. }
  4293. if (err == MP_OKAY) {
  4294. sp_2048_mul_32(tmpa, q, tmpa);
  4295. XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
  4296. sp_2048_add_64(r, tmpb, tmpa);
  4297. sp_2048_to_bin(r, out);
  4298. *outLen = 256;
  4299. }
  4300. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  4301. if (t != NULL) {
  4302. XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
  4303. XFREE(t, NULL, DYNAMIC_TYPE_RSA);
  4304. }
  4305. #else
  4306. XMEMSET(tmpa, 0, sizeof(tmpa));
  4307. XMEMSET(tmpb, 0, sizeof(tmpb));
  4308. XMEMSET(p, 0, sizeof(p));
  4309. XMEMSET(q, 0, sizeof(q));
  4310. XMEMSET(dp, 0, sizeof(dp));
  4311. #endif
  4312. #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
  4313. return err;
  4314. }
  4315. #endif /* WOLFSSL_RSA_PUBLIC_ONLY */
  4316. #endif /* WOLFSSL_HAVE_SP_RSA */
  4317. #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
  4318. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4319. /* Convert an array of sp_digit to an mp_int.
  4320. *
  4321. * a A single precision integer.
  4322. * r A multi-precision integer.
  4323. */
  4324. static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
  4325. {
  4326. int err;
  4327. err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
  4328. if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
  4329. #if DIGIT_BIT == 32
  4330. XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
  4331. r->used = 64;
  4332. mp_clamp(r);
  4333. #elif DIGIT_BIT < 32
  4334. int i, j = 0, s = 0;
  4335. r->dp[0] = 0;
  4336. for (i = 0; i < 64; i++) {
  4337. r->dp[j] |= (mp_digit)(a[i] << s);
  4338. r->dp[j] &= (1L << DIGIT_BIT) - 1;
  4339. s = DIGIT_BIT - s;
  4340. r->dp[++j] = (mp_digit)(a[i] >> s);
  4341. while (s + DIGIT_BIT <= 32) {
  4342. s += DIGIT_BIT;
  4343. r->dp[j++] &= (1L << DIGIT_BIT) - 1;
  4344. if (s == SP_WORD_SIZE) {
  4345. r->dp[j] = 0;
  4346. }
  4347. else {
  4348. r->dp[j] = (mp_digit)(a[i] >> s);
  4349. }
  4350. }
  4351. s = 32 - s;
  4352. }
  4353. r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
  4354. mp_clamp(r);
  4355. #else
  4356. int i, j = 0, s = 0;
  4357. r->dp[0] = 0;
  4358. for (i = 0; i < 64; i++) {
  4359. r->dp[j] |= ((mp_digit)a[i]) << s;
  4360. if (s + 32 >= DIGIT_BIT) {
  4361. #if DIGIT_BIT != 32 && DIGIT_BIT != 64
  4362. r->dp[j] &= (1L << DIGIT_BIT) - 1;
  4363. #endif
  4364. s = DIGIT_BIT - s;
  4365. r->dp[++j] = a[i] >> s;
  4366. s = 32 - s;
  4367. }
  4368. else {
  4369. s += 32;
  4370. }
  4371. }
  4372. r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
  4373. mp_clamp(r);
  4374. #endif
  4375. }
  4376. return err;
  4377. }
  4378. /* Perform the modular exponentiation for Diffie-Hellman.
  4379. *
  4380. * base Base. MP integer.
  4381. * exp Exponent. MP integer.
  4382. * mod Modulus. MP integer.
  4383. * res Result. MP integer.
  4384. * returns 0 on success, MP_READ_E if there are too many bytes in an array
  4385. * and MEMORY_E if memory allocation fails.
  4386. */
  4387. int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
  4388. {
  4389. int err = MP_OKAY;
  4390. sp_digit b[128], e[64], m[64];
  4391. sp_digit* r = b;
  4392. int expBits = mp_count_bits(exp);
  4393. if (mp_count_bits(base) > 2048) {
  4394. err = MP_READ_E;
  4395. }
  4396. else if (expBits > 2048) {
  4397. err = MP_READ_E;
  4398. }
  4399. else if (mp_count_bits(mod) != 2048) {
  4400. err = MP_READ_E;
  4401. }
  4402. else if (mp_iseven(mod)) {
  4403. err = MP_VAL;
  4404. }
  4405. if (err == MP_OKAY) {
  4406. sp_2048_from_mp(b, 64, base);
  4407. sp_2048_from_mp(e, 64, exp);
  4408. sp_2048_from_mp(m, 64, mod);
  4409. err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
  4410. }
  4411. if (err == MP_OKAY) {
  4412. err = sp_2048_to_mp(r, res);
  4413. }
  4414. XMEMSET(e, 0, sizeof(e));
  4415. return err;
  4416. }
  4417. #ifdef WOLFSSL_HAVE_SP_DH
  4418. #ifdef HAVE_FFDHE_2048
  4419. static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n)
  4420. {
  4421. __asm__ __volatile__ (
  4422. "mov r6, #31\n\t"
  4423. "sub r6, r6, %[n]\n\t"
  4424. "add %[a], %[a], #192\n\t"
  4425. "add %[r], %[r], #192\n\t"
  4426. "ldr r3, [%[a], #60]\n\t"
  4427. "lsr r4, r3, #1\n\t"
  4428. "lsl r3, r3, %[n]\n\t"
  4429. "lsr r4, r4, r6\n\t"
  4430. "ldr r2, [%[a], #56]\n\t"
  4431. "str r4, [%[r], #64]\n\t"
  4432. "lsr r5, r2, #1\n\t"
  4433. "lsl r2, r2, %[n]\n\t"
  4434. "lsr r5, r5, r6\n\t"
  4435. "orr r3, r3, r5\n\t"
  4436. "ldr r4, [%[a], #52]\n\t"
  4437. "str r3, [%[r], #60]\n\t"
  4438. "lsr r5, r4, #1\n\t"
  4439. "lsl r4, r4, %[n]\n\t"
  4440. "lsr r5, r5, r6\n\t"
  4441. "orr r2, r2, r5\n\t"
  4442. "ldr r3, [%[a], #48]\n\t"
  4443. "str r2, [%[r], #56]\n\t"
  4444. "lsr r5, r3, #1\n\t"
  4445. "lsl r3, r3, %[n]\n\t"
  4446. "lsr r5, r5, r6\n\t"
  4447. "orr r4, r4, r5\n\t"
  4448. "ldr r2, [%[a], #44]\n\t"
  4449. "str r4, [%[r], #52]\n\t"
  4450. "lsr r5, r2, #1\n\t"
  4451. "lsl r2, r2, %[n]\n\t"
  4452. "lsr r5, r5, r6\n\t"
  4453. "orr r3, r3, r5\n\t"
  4454. "ldr r4, [%[a], #40]\n\t"
  4455. "str r3, [%[r], #48]\n\t"
  4456. "lsr r5, r4, #1\n\t"
  4457. "lsl r4, r4, %[n]\n\t"
  4458. "lsr r5, r5, r6\n\t"
  4459. "orr r2, r2, r5\n\t"
  4460. "ldr r3, [%[a], #36]\n\t"
  4461. "str r2, [%[r], #44]\n\t"
  4462. "lsr r5, r3, #1\n\t"
  4463. "lsl r3, r3, %[n]\n\t"
  4464. "lsr r5, r5, r6\n\t"
  4465. "orr r4, r4, r5\n\t"
  4466. "ldr r2, [%[a], #32]\n\t"
  4467. "str r4, [%[r], #40]\n\t"
  4468. "lsr r5, r2, #1\n\t"
  4469. "lsl r2, r2, %[n]\n\t"
  4470. "lsr r5, r5, r6\n\t"
  4471. "orr r3, r3, r5\n\t"
  4472. "ldr r4, [%[a], #28]\n\t"
  4473. "str r3, [%[r], #36]\n\t"
  4474. "lsr r5, r4, #1\n\t"
  4475. "lsl r4, r4, %[n]\n\t"
  4476. "lsr r5, r5, r6\n\t"
  4477. "orr r2, r2, r5\n\t"
  4478. "ldr r3, [%[a], #24]\n\t"
  4479. "str r2, [%[r], #32]\n\t"
  4480. "lsr r5, r3, #1\n\t"
  4481. "lsl r3, r3, %[n]\n\t"
  4482. "lsr r5, r5, r6\n\t"
  4483. "orr r4, r4, r5\n\t"
  4484. "ldr r2, [%[a], #20]\n\t"
  4485. "str r4, [%[r], #28]\n\t"
  4486. "lsr r5, r2, #1\n\t"
  4487. "lsl r2, r2, %[n]\n\t"
  4488. "lsr r5, r5, r6\n\t"
  4489. "orr r3, r3, r5\n\t"
  4490. "ldr r4, [%[a], #16]\n\t"
  4491. "str r3, [%[r], #24]\n\t"
  4492. "lsr r5, r4, #1\n\t"
  4493. "lsl r4, r4, %[n]\n\t"
  4494. "lsr r5, r5, r6\n\t"
  4495. "orr r2, r2, r5\n\t"
  4496. "ldr r3, [%[a], #12]\n\t"
  4497. "str r2, [%[r], #20]\n\t"
  4498. "lsr r5, r3, #1\n\t"
  4499. "lsl r3, r3, %[n]\n\t"
  4500. "lsr r5, r5, r6\n\t"
  4501. "orr r4, r4, r5\n\t"
  4502. "ldr r2, [%[a], #8]\n\t"
  4503. "str r4, [%[r], #16]\n\t"
  4504. "lsr r5, r2, #1\n\t"
  4505. "lsl r2, r2, %[n]\n\t"
  4506. "lsr r5, r5, r6\n\t"
  4507. "orr r3, r3, r5\n\t"
  4508. "ldr r4, [%[a], #4]\n\t"
  4509. "str r3, [%[r], #12]\n\t"
  4510. "lsr r5, r4, #1\n\t"
  4511. "lsl r4, r4, %[n]\n\t"
  4512. "lsr r5, r5, r6\n\t"
  4513. "orr r2, r2, r5\n\t"
  4514. "ldr r3, [%[a], #0]\n\t"
  4515. "str r2, [%[r], #8]\n\t"
  4516. "lsr r5, r3, #1\n\t"
  4517. "lsl r3, r3, %[n]\n\t"
  4518. "lsr r5, r5, r6\n\t"
  4519. "orr r4, r4, r5\n\t"
  4520. "sub %[a], %[a], #64\n\t"
  4521. "sub %[r], %[r], #64\n\t"
  4522. "ldr r2, [%[a], #60]\n\t"
  4523. "str r4, [%[r], #68]\n\t"
  4524. "lsr r5, r2, #1\n\t"
  4525. "lsl r2, r2, %[n]\n\t"
  4526. "lsr r5, r5, r6\n\t"
  4527. "orr r3, r3, r5\n\t"
  4528. "ldr r4, [%[a], #56]\n\t"
  4529. "str r3, [%[r], #64]\n\t"
  4530. "lsr r5, r4, #1\n\t"
  4531. "lsl r4, r4, %[n]\n\t"
  4532. "lsr r5, r5, r6\n\t"
  4533. "orr r2, r2, r5\n\t"
  4534. "ldr r3, [%[a], #52]\n\t"
  4535. "str r2, [%[r], #60]\n\t"
  4536. "lsr r5, r3, #1\n\t"
  4537. "lsl r3, r3, %[n]\n\t"
  4538. "lsr r5, r5, r6\n\t"
  4539. "orr r4, r4, r5\n\t"
  4540. "ldr r2, [%[a], #48]\n\t"
  4541. "str r4, [%[r], #56]\n\t"
  4542. "lsr r5, r2, #1\n\t"
  4543. "lsl r2, r2, %[n]\n\t"
  4544. "lsr r5, r5, r6\n\t"
  4545. "orr r3, r3, r5\n\t"
  4546. "ldr r4, [%[a], #44]\n\t"
  4547. "str r3, [%[r], #52]\n\t"
  4548. "lsr r5, r4, #1\n\t"
  4549. "lsl r4, r4, %[n]\n\t"
  4550. "lsr r5, r5, r6\n\t"
  4551. "orr r2, r2, r5\n\t"
  4552. "ldr r3, [%[a], #40]\n\t"
  4553. "str r2, [%[r], #48]\n\t"
  4554. "lsr r5, r3, #1\n\t"
  4555. "lsl r3, r3, %[n]\n\t"
  4556. "lsr r5, r5, r6\n\t"
  4557. "orr r4, r4, r5\n\t"
  4558. "ldr r2, [%[a], #36]\n\t"
  4559. "str r4, [%[r], #44]\n\t"
  4560. "lsr r5, r2, #1\n\t"
  4561. "lsl r2, r2, %[n]\n\t"
  4562. "lsr r5, r5, r6\n\t"
  4563. "orr r3, r3, r5\n\t"
  4564. "ldr r4, [%[a], #32]\n\t"
  4565. "str r3, [%[r], #40]\n\t"
  4566. "lsr r5, r4, #1\n\t"
  4567. "lsl r4, r4, %[n]\n\t"
  4568. "lsr r5, r5, r6\n\t"
  4569. "orr r2, r2, r5\n\t"
  4570. "ldr r3, [%[a], #28]\n\t"
  4571. "str r2, [%[r], #36]\n\t"
  4572. "lsr r5, r3, #1\n\t"
  4573. "lsl r3, r3, %[n]\n\t"
  4574. "lsr r5, r5, r6\n\t"
  4575. "orr r4, r4, r5\n\t"
  4576. "ldr r2, [%[a], #24]\n\t"
  4577. "str r4, [%[r], #32]\n\t"
  4578. "lsr r5, r2, #1\n\t"
  4579. "lsl r2, r2, %[n]\n\t"
  4580. "lsr r5, r5, r6\n\t"
  4581. "orr r3, r3, r5\n\t"
  4582. "ldr r4, [%[a], #20]\n\t"
  4583. "str r3, [%[r], #28]\n\t"
  4584. "lsr r5, r4, #1\n\t"
  4585. "lsl r4, r4, %[n]\n\t"
  4586. "lsr r5, r5, r6\n\t"
  4587. "orr r2, r2, r5\n\t"
  4588. "ldr r3, [%[a], #16]\n\t"
  4589. "str r2, [%[r], #24]\n\t"
  4590. "lsr r5, r3, #1\n\t"
  4591. "lsl r3, r3, %[n]\n\t"
  4592. "lsr r5, r5, r6\n\t"
  4593. "orr r4, r4, r5\n\t"
  4594. "ldr r2, [%[a], #12]\n\t"
  4595. "str r4, [%[r], #20]\n\t"
  4596. "lsr r5, r2, #1\n\t"
  4597. "lsl r2, r2, %[n]\n\t"
  4598. "lsr r5, r5, r6\n\t"
  4599. "orr r3, r3, r5\n\t"
  4600. "ldr r4, [%[a], #8]\n\t"
  4601. "str r3, [%[r], #16]\n\t"
  4602. "lsr r5, r4, #1\n\t"
  4603. "lsl r4, r4, %[n]\n\t"
  4604. "lsr r5, r5, r6\n\t"
  4605. "orr r2, r2, r5\n\t"
  4606. "ldr r3, [%[a], #4]\n\t"
  4607. "str r2, [%[r], #12]\n\t"
  4608. "lsr r5, r3, #1\n\t"
  4609. "lsl r3, r3, %[n]\n\t"
  4610. "lsr r5, r5, r6\n\t"
  4611. "orr r4, r4, r5\n\t"
  4612. "ldr r2, [%[a], #0]\n\t"
  4613. "str r4, [%[r], #8]\n\t"
  4614. "lsr r5, r2, #1\n\t"
  4615. "lsl r2, r2, %[n]\n\t"
  4616. "lsr r5, r5, r6\n\t"
  4617. "orr r3, r3, r5\n\t"
  4618. "sub %[a], %[a], #64\n\t"
  4619. "sub %[r], %[r], #64\n\t"
  4620. "ldr r4, [%[a], #60]\n\t"
  4621. "str r3, [%[r], #68]\n\t"
  4622. "lsr r5, r4, #1\n\t"
  4623. "lsl r4, r4, %[n]\n\t"
  4624. "lsr r5, r5, r6\n\t"
  4625. "orr r2, r2, r5\n\t"
  4626. "ldr r3, [%[a], #56]\n\t"
  4627. "str r2, [%[r], #64]\n\t"
  4628. "lsr r5, r3, #1\n\t"
  4629. "lsl r3, r3, %[n]\n\t"
  4630. "lsr r5, r5, r6\n\t"
  4631. "orr r4, r4, r5\n\t"
  4632. "ldr r2, [%[a], #52]\n\t"
  4633. "str r4, [%[r], #60]\n\t"
  4634. "lsr r5, r2, #1\n\t"
  4635. "lsl r2, r2, %[n]\n\t"
  4636. "lsr r5, r5, r6\n\t"
  4637. "orr r3, r3, r5\n\t"
  4638. "ldr r4, [%[a], #48]\n\t"
  4639. "str r3, [%[r], #56]\n\t"
  4640. "lsr r5, r4, #1\n\t"
  4641. "lsl r4, r4, %[n]\n\t"
  4642. "lsr r5, r5, r6\n\t"
  4643. "orr r2, r2, r5\n\t"
  4644. "ldr r3, [%[a], #44]\n\t"
  4645. "str r2, [%[r], #52]\n\t"
  4646. "lsr r5, r3, #1\n\t"
  4647. "lsl r3, r3, %[n]\n\t"
  4648. "lsr r5, r5, r6\n\t"
  4649. "orr r4, r4, r5\n\t"
  4650. "ldr r2, [%[a], #40]\n\t"
  4651. "str r4, [%[r], #48]\n\t"
  4652. "lsr r5, r2, #1\n\t"
  4653. "lsl r2, r2, %[n]\n\t"
  4654. "lsr r5, r5, r6\n\t"
  4655. "orr r3, r3, r5\n\t"
  4656. "ldr r4, [%[a], #36]\n\t"
  4657. "str r3, [%[r], #44]\n\t"
  4658. "lsr r5, r4, #1\n\t"
  4659. "lsl r4, r4, %[n]\n\t"
  4660. "lsr r5, r5, r6\n\t"
  4661. "orr r2, r2, r5\n\t"
  4662. "ldr r3, [%[a], #32]\n\t"
  4663. "str r2, [%[r], #40]\n\t"
  4664. "lsr r5, r3, #1\n\t"
  4665. "lsl r3, r3, %[n]\n\t"
  4666. "lsr r5, r5, r6\n\t"
  4667. "orr r4, r4, r5\n\t"
  4668. "ldr r2, [%[a], #28]\n\t"
  4669. "str r4, [%[r], #36]\n\t"
  4670. "lsr r5, r2, #1\n\t"
  4671. "lsl r2, r2, %[n]\n\t"
  4672. "lsr r5, r5, r6\n\t"
  4673. "orr r3, r3, r5\n\t"
  4674. "ldr r4, [%[a], #24]\n\t"
  4675. "str r3, [%[r], #32]\n\t"
  4676. "lsr r5, r4, #1\n\t"
  4677. "lsl r4, r4, %[n]\n\t"
  4678. "lsr r5, r5, r6\n\t"
  4679. "orr r2, r2, r5\n\t"
  4680. "ldr r3, [%[a], #20]\n\t"
  4681. "str r2, [%[r], #28]\n\t"
  4682. "lsr r5, r3, #1\n\t"
  4683. "lsl r3, r3, %[n]\n\t"
  4684. "lsr r5, r5, r6\n\t"
  4685. "orr r4, r4, r5\n\t"
  4686. "ldr r2, [%[a], #16]\n\t"
  4687. "str r4, [%[r], #24]\n\t"
  4688. "lsr r5, r2, #1\n\t"
  4689. "lsl r2, r2, %[n]\n\t"
  4690. "lsr r5, r5, r6\n\t"
  4691. "orr r3, r3, r5\n\t"
  4692. "ldr r4, [%[a], #12]\n\t"
  4693. "str r3, [%[r], #20]\n\t"
  4694. "lsr r5, r4, #1\n\t"
  4695. "lsl r4, r4, %[n]\n\t"
  4696. "lsr r5, r5, r6\n\t"
  4697. "orr r2, r2, r5\n\t"
  4698. "ldr r3, [%[a], #8]\n\t"
  4699. "str r2, [%[r], #16]\n\t"
  4700. "lsr r5, r3, #1\n\t"
  4701. "lsl r3, r3, %[n]\n\t"
  4702. "lsr r5, r5, r6\n\t"
  4703. "orr r4, r4, r5\n\t"
  4704. "ldr r2, [%[a], #4]\n\t"
  4705. "str r4, [%[r], #12]\n\t"
  4706. "lsr r5, r2, #1\n\t"
  4707. "lsl r2, r2, %[n]\n\t"
  4708. "lsr r5, r5, r6\n\t"
  4709. "orr r3, r3, r5\n\t"
  4710. "ldr r4, [%[a], #0]\n\t"
  4711. "str r3, [%[r], #8]\n\t"
  4712. "lsr r5, r4, #1\n\t"
  4713. "lsl r4, r4, %[n]\n\t"
  4714. "lsr r5, r5, r6\n\t"
  4715. "orr r2, r2, r5\n\t"
  4716. "sub %[a], %[a], #64\n\t"
  4717. "sub %[r], %[r], #64\n\t"
  4718. "ldr r3, [%[a], #60]\n\t"
  4719. "str r2, [%[r], #68]\n\t"
  4720. "lsr r5, r3, #1\n\t"
  4721. "lsl r3, r3, %[n]\n\t"
  4722. "lsr r5, r5, r6\n\t"
  4723. "orr r4, r4, r5\n\t"
  4724. "ldr r2, [%[a], #56]\n\t"
  4725. "str r4, [%[r], #64]\n\t"
  4726. "lsr r5, r2, #1\n\t"
  4727. "lsl r2, r2, %[n]\n\t"
  4728. "lsr r5, r5, r6\n\t"
  4729. "orr r3, r3, r5\n\t"
  4730. "ldr r4, [%[a], #52]\n\t"
  4731. "str r3, [%[r], #60]\n\t"
  4732. "lsr r5, r4, #1\n\t"
  4733. "lsl r4, r4, %[n]\n\t"
  4734. "lsr r5, r5, r6\n\t"
  4735. "orr r2, r2, r5\n\t"
  4736. "ldr r3, [%[a], #48]\n\t"
  4737. "str r2, [%[r], #56]\n\t"
  4738. "lsr r5, r3, #1\n\t"
  4739. "lsl r3, r3, %[n]\n\t"
  4740. "lsr r5, r5, r6\n\t"
  4741. "orr r4, r4, r5\n\t"
  4742. "ldr r2, [%[a], #44]\n\t"
  4743. "str r4, [%[r], #52]\n\t"
  4744. "lsr r5, r2, #1\n\t"
  4745. "lsl r2, r2, %[n]\n\t"
  4746. "lsr r5, r5, r6\n\t"
  4747. "orr r3, r3, r5\n\t"
  4748. "ldr r4, [%[a], #40]\n\t"
  4749. "str r3, [%[r], #48]\n\t"
  4750. "lsr r5, r4, #1\n\t"
  4751. "lsl r4, r4, %[n]\n\t"
  4752. "lsr r5, r5, r6\n\t"
  4753. "orr r2, r2, r5\n\t"
  4754. "ldr r3, [%[a], #36]\n\t"
  4755. "str r2, [%[r], #44]\n\t"
  4756. "lsr r5, r3, #1\n\t"
  4757. "lsl r3, r3, %[n]\n\t"
  4758. "lsr r5, r5, r6\n\t"
  4759. "orr r4, r4, r5\n\t"
  4760. "ldr r2, [%[a], #32]\n\t"
  4761. "str r4, [%[r], #40]\n\t"
  4762. "lsr r5, r2, #1\n\t"
  4763. "lsl r2, r2, %[n]\n\t"
  4764. "lsr r5, r5, r6\n\t"
  4765. "orr r3, r3, r5\n\t"
  4766. "ldr r4, [%[a], #28]\n\t"
  4767. "str r3, [%[r], #36]\n\t"
  4768. "lsr r5, r4, #1\n\t"
  4769. "lsl r4, r4, %[n]\n\t"
  4770. "lsr r5, r5, r6\n\t"
  4771. "orr r2, r2, r5\n\t"
  4772. "ldr r3, [%[a], #24]\n\t"
  4773. "str r2, [%[r], #32]\n\t"
  4774. "lsr r5, r3, #1\n\t"
  4775. "lsl r3, r3, %[n]\n\t"
  4776. "lsr r5, r5, r6\n\t"
  4777. "orr r4, r4, r5\n\t"
  4778. "ldr r2, [%[a], #20]\n\t"
  4779. "str r4, [%[r], #28]\n\t"
  4780. "lsr r5, r2, #1\n\t"
  4781. "lsl r2, r2, %[n]\n\t"
  4782. "lsr r5, r5, r6\n\t"
  4783. "orr r3, r3, r5\n\t"
  4784. "ldr r4, [%[a], #16]\n\t"
  4785. "str r3, [%[r], #24]\n\t"
  4786. "lsr r5, r4, #1\n\t"
  4787. "lsl r4, r4, %[n]\n\t"
  4788. "lsr r5, r5, r6\n\t"
  4789. "orr r2, r2, r5\n\t"
  4790. "ldr r3, [%[a], #12]\n\t"
  4791. "str r2, [%[r], #20]\n\t"
  4792. "lsr r5, r3, #1\n\t"
  4793. "lsl r3, r3, %[n]\n\t"
  4794. "lsr r5, r5, r6\n\t"
  4795. "orr r4, r4, r5\n\t"
  4796. "ldr r2, [%[a], #8]\n\t"
  4797. "str r4, [%[r], #16]\n\t"
  4798. "lsr r5, r2, #1\n\t"
  4799. "lsl r2, r2, %[n]\n\t"
  4800. "lsr r5, r5, r6\n\t"
  4801. "orr r3, r3, r5\n\t"
  4802. "ldr r4, [%[a], #4]\n\t"
  4803. "str r3, [%[r], #12]\n\t"
  4804. "lsr r5, r4, #1\n\t"
  4805. "lsl r4, r4, %[n]\n\t"
  4806. "lsr r5, r5, r6\n\t"
  4807. "orr r2, r2, r5\n\t"
  4808. "ldr r3, [%[a], #0]\n\t"
  4809. "str r2, [%[r], #8]\n\t"
  4810. "lsr r5, r3, #1\n\t"
  4811. "lsl r3, r3, %[n]\n\t"
  4812. "lsr r5, r5, r6\n\t"
  4813. "orr r4, r4, r5\n\t"
  4814. "str r3, [%[r]]\n\t"
  4815. "str r4, [%[r], #4]\n\t"
  4816. :
  4817. : [r] "r" (r), [a] "r" (a), [n] "r" (n)
  4818. : "memory", "r2", "r3", "r4", "r5", "r6"
  4819. );
  4820. }
  4821. /* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
  4822. *
  4823. * r A single precision number that is the result of the operation.
  4824. * e A single precision number that is the exponent.
  4825. * bits The number of bits in the exponent.
  4826. * m A single precision number that is the modulus.
  4827. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  4828. */
  4829. static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
  4830. const sp_digit* m)
  4831. {
  4832. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4833. sp_digit* td;
  4834. #else
  4835. sp_digit td[193];
  4836. #endif
  4837. sp_digit* norm;
  4838. sp_digit* tmp;
  4839. sp_digit mp = 1;
  4840. sp_digit n, o;
  4841. sp_digit mask;
  4842. int i;
  4843. int c, y;
  4844. int err = MP_OKAY;
  4845. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4846. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
  4847. DYNAMIC_TYPE_TMP_BUFFER);
  4848. if (td == NULL) {
  4849. err = MEMORY_E;
  4850. }
  4851. #endif
  4852. if (err == MP_OKAY) {
  4853. norm = td;
  4854. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4855. tmp = td + 128;
  4856. #else
  4857. tmp = &td[128];
  4858. #endif
  4859. sp_2048_mont_setup(m, &mp);
  4860. sp_2048_mont_norm_64(norm, m);
  4861. i = (bits - 1) / 32;
  4862. n = e[i--];
  4863. c = bits & 31;
  4864. if (c == 0) {
  4865. c = 32;
  4866. }
  4867. c -= bits % 5;
  4868. if (c == 32) {
  4869. c = 27;
  4870. }
  4871. if (c < 0) {
  4872. /* Number of bits in top word is less than number needed. */
  4873. c = -c;
  4874. y = (int)(n << c);
  4875. n = e[i--];
  4876. y |= (int)(n >> (64 - c));
  4877. n <<= c;
  4878. c = 64 - c;
  4879. }
  4880. else {
  4881. y = (int)(n >> c);
  4882. n <<= 32 - c;
  4883. }
  4884. sp_2048_lshift_64(r, norm, (byte)y);
  4885. for (; i>=0 || c>=5; ) {
  4886. if (c == 0) {
  4887. n = e[i--];
  4888. y = (int)(n >> 27);
  4889. n <<= 5;
  4890. c = 27;
  4891. }
  4892. else if (c < 5) {
  4893. y = (int)(n >> 27);
  4894. n = e[i--];
  4895. c = 5 - c;
  4896. y |= (int)(n >> (32 - c));
  4897. n <<= c;
  4898. c = 32 - c;
  4899. }
  4900. else {
  4901. y = (int)((n >> 27) & 0x1f);
  4902. n <<= 5;
  4903. c -= 5;
  4904. }
  4905. sp_2048_mont_sqr_64(r, r, m, mp);
  4906. sp_2048_mont_sqr_64(r, r, m, mp);
  4907. sp_2048_mont_sqr_64(r, r, m, mp);
  4908. sp_2048_mont_sqr_64(r, r, m, mp);
  4909. sp_2048_mont_sqr_64(r, r, m, mp);
  4910. sp_2048_lshift_64(r, r, (byte)y);
  4911. sp_2048_mul_d_64(tmp, norm, r[64]);
  4912. r[64] = 0;
  4913. o = sp_2048_add_64(r, r, tmp);
  4914. sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
  4915. }
  4916. XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
  4917. sp_2048_mont_reduce_64(r, m, mp);
  4918. mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
  4919. sp_2048_cond_sub_64(r, r, m, mask);
  4920. }
  4921. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4922. if (td != NULL) {
  4923. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  4924. }
  4925. #endif
  4926. return err;
  4927. }
  4928. #endif /* HAVE_FFDHE_2048 */
  4929. /* Perform the modular exponentiation for Diffie-Hellman.
  4930. *
  4931. * base Base.
  4932. * exp Array of bytes that is the exponent.
  4933. * expLen Length of data, in bytes, in exponent.
  4934. * mod Modulus.
  4935. * out Buffer to hold big-endian bytes of exponentiation result.
  4936. * Must be at least 256 bytes long.
  4937. * outLen Length, in bytes, of exponentiation result.
  4938. * returns 0 on success, MP_READ_E if there are too many bytes in an array
  4939. * and MEMORY_E if memory allocation fails.
  4940. */
  4941. int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
  4942. mp_int* mod, byte* out, word32* outLen)
  4943. {
  4944. int err = MP_OKAY;
  4945. sp_digit b[128], e[64], m[64];
  4946. sp_digit* r = b;
  4947. word32 i;
  4948. if (mp_count_bits(base) > 2048) {
  4949. err = MP_READ_E;
  4950. }
  4951. else if (expLen > 256) {
  4952. err = MP_READ_E;
  4953. }
  4954. else if (mp_count_bits(mod) != 2048) {
  4955. err = MP_READ_E;
  4956. }
  4957. else if (mp_iseven(mod)) {
  4958. err = MP_VAL;
  4959. }
  4960. if (err == MP_OKAY) {
  4961. sp_2048_from_mp(b, 64, base);
  4962. sp_2048_from_bin(e, 64, exp, expLen);
  4963. sp_2048_from_mp(m, 64, mod);
  4964. #ifdef HAVE_FFDHE_2048
  4965. if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
  4966. err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
  4967. else
  4968. #endif
  4969. err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
  4970. }
  4971. if (err == MP_OKAY) {
  4972. sp_2048_to_bin(r, out);
  4973. *outLen = 256;
  4974. for (i=0; i<256 && out[i] == 0; i++) {
  4975. }
  4976. *outLen -= i;
  4977. XMEMMOVE(out, out + i, *outLen);
  4978. }
  4979. XMEMSET(e, 0, sizeof(e));
  4980. return err;
  4981. }
  4982. #endif /* WOLFSSL_HAVE_SP_DH */
  4983. /* Perform the modular exponentiation for Diffie-Hellman.
  4984. *
  4985. * base Base. MP integer.
  4986. * exp Exponent. MP integer.
  4987. * mod Modulus. MP integer.
  4988. * res Result. MP integer.
  4989. * returns 0 on success, MP_READ_E if there are too many bytes in an array
  4990. * and MEMORY_E if memory allocation fails.
  4991. */
  4992. int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
  4993. {
  4994. int err = MP_OKAY;
  4995. sp_digit b[64], e[32], m[32];
  4996. sp_digit* r = b;
  4997. int expBits = mp_count_bits(exp);
  4998. if (mp_count_bits(base) > 1024) {
  4999. err = MP_READ_E;
  5000. }
  5001. else if (expBits > 1024) {
  5002. err = MP_READ_E;
  5003. }
  5004. else if (mp_count_bits(mod) != 1024) {
  5005. err = MP_READ_E;
  5006. }
  5007. else if (mp_iseven(mod)) {
  5008. err = MP_VAL;
  5009. }
  5010. if (err == MP_OKAY) {
  5011. sp_2048_from_mp(b, 32, base);
  5012. sp_2048_from_mp(e, 32, exp);
  5013. sp_2048_from_mp(m, 32, mod);
  5014. err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
  5015. }
  5016. if (err == MP_OKAY) {
  5017. XMEMSET(r + 32, 0, sizeof(*r) * 32U);
  5018. err = sp_2048_to_mp(r, res);
  5019. res->used = mod->used;
  5020. mp_clamp(res);
  5021. }
  5022. XMEMSET(e, 0, sizeof(e));
  5023. return err;
  5024. }
  5025. #endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
  5026. #endif /* !WOLFSSL_SP_NO_2048 */
  5027. #ifndef WOLFSSL_SP_NO_3072
  5028. /* Read big endian unsigned byte array into r.
  5029. *
  5030. * r A single precision integer.
  5031. * size Maximum number of bytes to convert
  5032. * a Byte array.
  5033. * n Number of bytes in array to read.
  5034. */
  5035. static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
  5036. {
  5037. int i, j = 0;
  5038. word32 s = 0;
  5039. r[0] = 0;
  5040. for (i = n-1; i >= 0; i--) {
  5041. r[j] |= (((sp_digit)a[i]) << s);
  5042. if (s >= 24U) {
  5043. r[j] &= 0xffffffff;
  5044. s = 32U - s;
  5045. if (j + 1 >= size) {
  5046. break;
  5047. }
  5048. r[++j] = (sp_digit)a[i] >> s;
  5049. s = 8U - s;
  5050. }
  5051. else {
  5052. s += 8U;
  5053. }
  5054. }
  5055. for (j++; j < size; j++) {
  5056. r[j] = 0;
  5057. }
  5058. }
  5059. /* Convert an mp_int to an array of sp_digit.
  5060. *
  5061. * r A single precision integer.
  5062. * size Maximum number of bytes to convert
  5063. * a A multi-precision integer.
  5064. */
  5065. static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
  5066. {
  5067. #if DIGIT_BIT == 32
  5068. int j;
  5069. XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
  5070. for (j = a->used; j < size; j++) {
  5071. r[j] = 0;
  5072. }
  5073. #elif DIGIT_BIT > 32
  5074. int i, j = 0;
  5075. word32 s = 0;
  5076. r[0] = 0;
  5077. for (i = 0; i < a->used && j < size; i++) {
  5078. r[j] |= ((sp_digit)a->dp[i] << s);
  5079. r[j] &= 0xffffffff;
  5080. s = 32U - s;
  5081. if (j + 1 >= size) {
  5082. break;
  5083. }
  5084. /* lint allow cast of mismatch word32 and mp_digit */
  5085. r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
  5086. while ((s + 32U) <= (word32)DIGIT_BIT) {
  5087. s += 32U;
  5088. r[j] &= 0xffffffff;
  5089. if (j + 1 >= size) {
  5090. break;
  5091. }
  5092. if (s < (word32)DIGIT_BIT) {
  5093. /* lint allow cast of mismatch word32 and mp_digit */
  5094. r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
  5095. }
  5096. else {
  5097. r[++j] = 0L;
  5098. }
  5099. }
  5100. s = (word32)DIGIT_BIT - s;
  5101. }
  5102. for (j++; j < size; j++) {
  5103. r[j] = 0;
  5104. }
  5105. #else
  5106. int i, j = 0, s = 0;
  5107. r[0] = 0;
  5108. for (i = 0; i < a->used && j < size; i++) {
  5109. r[j] |= ((sp_digit)a->dp[i]) << s;
  5110. if (s + DIGIT_BIT >= 32) {
  5111. r[j] &= 0xffffffff;
  5112. if (j + 1 >= size) {
  5113. break;
  5114. }
  5115. s = 32 - s;
  5116. if (s == DIGIT_BIT) {
  5117. r[++j] = 0;
  5118. s = 0;
  5119. }
  5120. else {
  5121. r[++j] = a->dp[i] >> s;
  5122. s = DIGIT_BIT - s;
  5123. }
  5124. }
  5125. else {
  5126. s += DIGIT_BIT;
  5127. }
  5128. }
  5129. for (j++; j < size; j++) {
  5130. r[j] = 0;
  5131. }
  5132. #endif
  5133. }
  5134. /* Write r as big endian to byte array.
  5135. * Fixed length number of bytes written: 384
  5136. *
  5137. * r A single precision integer.
  5138. * a Byte array.
  5139. */
  5140. static void sp_3072_to_bin(sp_digit* r, byte* a)
  5141. {
  5142. int i, j, s = 0, b;
  5143. j = 3072 / 8 - 1;
  5144. a[j] = 0;
  5145. for (i=0; i<96 && j>=0; i++) {
  5146. b = 0;
  5147. /* lint allow cast of mismatch sp_digit and int */
  5148. a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
  5149. b += 8 - s;
  5150. if (j < 0) {
  5151. break;
  5152. }
  5153. while (b < 32) {
  5154. a[j--] = (byte)(r[i] >> b);
  5155. b += 8;
  5156. if (j < 0) {
  5157. break;
  5158. }
  5159. }
  5160. s = 8 - (b - 32);
  5161. if (j >= 0) {
  5162. a[j] = 0;
  5163. }
  5164. if (s != 0) {
  5165. j++;
  5166. }
  5167. }
  5168. }
  5169. #ifndef WOLFSSL_SP_SMALL
  5170. /* Multiply a and b into r. (r = a * b)
  5171. *
  5172. * r A single precision integer.
  5173. * a A single precision integer.
  5174. * b A single precision integer.
  5175. */
  5176. SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a,
  5177. const sp_digit* b)
  5178. {
  5179. sp_digit tmp_arr[12 * 2];
  5180. sp_digit* tmp = tmp_arr;
  5181. __asm__ __volatile__ (
  5182. "mov r3, #0\n\t"
  5183. "mov r4, #0\n\t"
  5184. "mov r9, r3\n\t"
  5185. "mov r12, %[r]\n\t"
  5186. "mov r10, %[a]\n\t"
  5187. "mov r11, %[b]\n\t"
  5188. "mov r6, #48\n\t"
  5189. "add r6, r6, r10\n\t"
  5190. "mov r14, r6\n\t"
  5191. "\n1:\n\t"
  5192. "mov %[r], #0\n\t"
  5193. "mov r5, #0\n\t"
  5194. "mov r6, #44\n\t"
  5195. "mov %[a], r9\n\t"
  5196. "subs %[a], %[a], r6\n\t"
  5197. "sbc r6, r6, r6\n\t"
  5198. "mvn r6, r6\n\t"
  5199. "and %[a], %[a], r6\n\t"
  5200. "mov %[b], r9\n\t"
  5201. "sub %[b], %[b], %[a]\n\t"
  5202. "add %[a], %[a], r10\n\t"
  5203. "add %[b], %[b], r11\n\t"
  5204. "\n2:\n\t"
  5205. /* Multiply Start */
  5206. "ldr r6, [%[a]]\n\t"
  5207. "ldr r8, [%[b]]\n\t"
  5208. "umull r6, r8, r6, r8\n\t"
  5209. "adds r3, r3, r6\n\t"
  5210. "adcs r4, r4, r8\n\t"
  5211. "adc r5, r5, %[r]\n\t"
  5212. /* Multiply Done */
  5213. "add %[a], %[a], #4\n\t"
  5214. "sub %[b], %[b], #4\n\t"
  5215. "cmp %[a], r14\n\t"
  5216. #ifdef __GNUC__
  5217. "beq 3f\n\t"
  5218. #else
  5219. "beq.n 3f\n\t"
  5220. #endif /* __GNUC__ */
  5221. "mov r6, r9\n\t"
  5222. "add r6, r6, r10\n\t"
  5223. "cmp %[a], r6\n\t"
  5224. #ifdef __GNUC__
  5225. "ble 2b\n\t"
  5226. #else
  5227. "ble.n 2b\n\t"
  5228. #endif /* __GNUC__ */
  5229. "\n3:\n\t"
  5230. "mov %[r], r12\n\t"
  5231. "mov r8, r9\n\t"
  5232. "str r3, [%[r], r8]\n\t"
  5233. "mov r3, r4\n\t"
  5234. "mov r4, r5\n\t"
  5235. "add r8, r8, #4\n\t"
  5236. "mov r9, r8\n\t"
  5237. "mov r6, #88\n\t"
  5238. "cmp r8, r6\n\t"
  5239. #ifdef __GNUC__
  5240. "ble 1b\n\t"
  5241. #else
  5242. "ble.n 1b\n\t"
  5243. #endif /* __GNUC__ */
  5244. "str r3, [%[r], r8]\n\t"
  5245. "mov %[a], r10\n\t"
  5246. "mov %[b], r11\n\t"
  5247. :
  5248. : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
  5249. : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  5250. );
  5251. XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
  5252. }
  5253. /* Square a and put result in r. (r = a * a)
  5254. *
  5255. * r A single precision integer.
  5256. * a A single precision integer.
  5257. */
  5258. SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
  5259. {
  5260. __asm__ __volatile__ (
  5261. "mov r3, #0\n\t"
  5262. "mov r4, #0\n\t"
  5263. "mov r5, #0\n\t"
  5264. "mov r9, r3\n\t"
  5265. "mov r12, %[r]\n\t"
  5266. "mov r6, #96\n\t"
  5267. "neg r6, r6\n\t"
  5268. "add sp, sp, r6\n\t"
  5269. "mov r11, sp\n\t"
  5270. "mov r10, %[a]\n\t"
  5271. "\n1:\n\t"
  5272. "mov %[r], #0\n\t"
  5273. "mov r6, #44\n\t"
  5274. "mov %[a], r9\n\t"
  5275. "subs %[a], %[a], r6\n\t"
  5276. "sbc r6, r6, r6\n\t"
  5277. "mvn r6, r6\n\t"
  5278. "and %[a], %[a], r6\n\t"
  5279. "mov r2, r9\n\t"
  5280. "sub r2, r2, %[a]\n\t"
  5281. "add %[a], %[a], r10\n\t"
  5282. "add r2, r2, r10\n\t"
  5283. "\n2:\n\t"
  5284. "cmp r2, %[a]\n\t"
  5285. #ifdef __GNUC__
  5286. "beq 4f\n\t"
  5287. #else
  5288. "beq.n 4f\n\t"
  5289. #endif /* __GNUC__ */
  5290. /* Multiply * 2: Start */
  5291. "ldr r6, [%[a]]\n\t"
  5292. "ldr r8, [r2]\n\t"
  5293. "umull r6, r8, r6, r8\n\t"
  5294. "adds r3, r3, r6\n\t"
  5295. "adcs r4, r4, r8\n\t"
  5296. "adc r5, r5, %[r]\n\t"
  5297. "adds r3, r3, r6\n\t"
  5298. "adcs r4, r4, r8\n\t"
  5299. "adc r5, r5, %[r]\n\t"
  5300. /* Multiply * 2: Done */
  5301. #ifdef __GNUC__
  5302. "bal 5f\n\t"
  5303. #else
  5304. "bal.n 5f\n\t"
  5305. #endif /* __GNUC__ */
  5306. "\n4:\n\t"
  5307. /* Square: Start */
  5308. "ldr r6, [%[a]]\n\t"
  5309. "umull r6, r8, r6, r6\n\t"
  5310. "adds r3, r3, r6\n\t"
  5311. "adcs r4, r4, r8\n\t"
  5312. "adc r5, r5, %[r]\n\t"
  5313. /* Square: Done */
  5314. "\n5:\n\t"
  5315. "add %[a], %[a], #4\n\t"
  5316. "sub r2, r2, #4\n\t"
  5317. "mov r6, #48\n\t"
  5318. "add r6, r6, r10\n\t"
  5319. "cmp %[a], r6\n\t"
  5320. #ifdef __GNUC__
  5321. "beq 3f\n\t"
  5322. #else
  5323. "beq.n 3f\n\t"
  5324. #endif /* __GNUC__ */
  5325. "cmp %[a], r2\n\t"
  5326. #ifdef __GNUC__
  5327. "bgt 3f\n\t"
  5328. #else
  5329. "bgt.n 3f\n\t"
  5330. #endif /* __GNUC__ */
  5331. "mov r8, r9\n\t"
  5332. "add r8, r8, r10\n\t"
  5333. "cmp %[a], r8\n\t"
  5334. #ifdef __GNUC__
  5335. "ble 2b\n\t"
  5336. #else
  5337. "ble.n 2b\n\t"
  5338. #endif /* __GNUC__ */
  5339. "\n3:\n\t"
  5340. "mov %[r], r11\n\t"
  5341. "mov r8, r9\n\t"
  5342. "str r3, [%[r], r8]\n\t"
  5343. "mov r3, r4\n\t"
  5344. "mov r4, r5\n\t"
  5345. "mov r5, #0\n\t"
  5346. "add r8, r8, #4\n\t"
  5347. "mov r9, r8\n\t"
  5348. "mov r6, #88\n\t"
  5349. "cmp r8, r6\n\t"
  5350. #ifdef __GNUC__
  5351. "ble 1b\n\t"
  5352. #else
  5353. "ble.n 1b\n\t"
  5354. #endif /* __GNUC__ */
  5355. "mov %[a], r10\n\t"
  5356. "str r3, [%[r], r8]\n\t"
  5357. "mov %[r], r12\n\t"
  5358. "mov %[a], r11\n\t"
  5359. "mov r3, #92\n\t"
  5360. "\n4:\n\t"
  5361. "ldr r6, [%[a], r3]\n\t"
  5362. "str r6, [%[r], r3]\n\t"
  5363. "subs r3, r3, #4\n\t"
  5364. #ifdef __GNUC__
  5365. "bge 4b\n\t"
  5366. #else
  5367. "bge.n 4b\n\t"
  5368. #endif /* __GNUC__ */
  5369. "mov r6, #96\n\t"
  5370. "add sp, sp, r6\n\t"
  5371. :
  5372. : [r] "r" (r), [a] "r" (a)
  5373. : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
  5374. );
  5375. }
  5376. /* Add b to a into r. (r = a + b)
  5377. *
  5378. * r A single precision integer.
  5379. * a A single precision integer.
  5380. * b A single precision integer.
  5381. */
  5382. SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
  5383. const sp_digit* b)
  5384. {
  5385. sp_digit c = 0;
  5386. __asm__ __volatile__ (
  5387. "ldm %[a]!, {r4, r5}\n\t"
  5388. "ldm %[b]!, {r6, r8}\n\t"
  5389. "adds r4, r4, r6\n\t"
  5390. "adcs r5, r5, r8\n\t"
  5391. "stm %[r]!, {r4, r5}\n\t"
  5392. "ldm %[a]!, {r4, r5}\n\t"
  5393. "ldm %[b]!, {r6, r8}\n\t"
  5394. "adcs r4, r4, r6\n\t"
  5395. "adcs r5, r5, r8\n\t"
  5396. "stm %[r]!, {r4, r5}\n\t"
  5397. "ldm %[a]!, {r4, r5}\n\t"
  5398. "ldm %[b]!, {r6, r8}\n\t"
  5399. "adcs r4, r4, r6\n\t"
  5400. "adcs r5, r5, r8\n\t"
  5401. "stm %[r]!, {r4, r5}\n\t"
  5402. "ldm %[a]!, {r4, r5}\n\t"
  5403. "ldm %[b]!, {r6, r8}\n\t"
  5404. "adcs r4, r4, r6\n\t"
  5405. "adcs r5, r5, r8\n\t"
  5406. "stm %[r]!, {r4, r5}\n\t"
  5407. "ldm %[a]!, {r4, r5}\n\t"
  5408. "ldm %[b]!, {r6, r8}\n\t"
  5409. "adcs r4, r4, r6\n\t"
  5410. "adcs r5, r5, r8\n\t"
  5411. "stm %[r]!, {r4, r5}\n\t"
  5412. "ldm %[a]!, {r4, r5}\n\t"
  5413. "ldm %[b]!, {r6, r8}\n\t"
  5414. "adcs r4, r4, r6\n\t"
  5415. "adcs r5, r5, r8\n\t"
  5416. "stm %[r]!, {r4, r5}\n\t"
  5417. "mov %[c], #0\n\t"
  5418. "adc %[c], %[c], %[c]\n\t"
  5419. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  5420. :
  5421. : "memory", "r4", "r5", "r6", "r8"
  5422. );
  5423. return c;
  5424. }
  5425. /* Sub b from a into r. (r = a - b)
  5426. *
  5427. * r A single precision integer.
  5428. * a A single precision integer.
  5429. * b A single precision integer.
  5430. */
  5431. SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a,
  5432. const sp_digit* b)
  5433. {
  5434. sp_digit c = 0;
  5435. __asm__ __volatile__ (
  5436. "ldm %[a], {r3, r4}\n\t"
  5437. "ldm %[b]!, {r5, r6}\n\t"
  5438. "subs r3, r3, r5\n\t"
  5439. "sbcs r4, r4, r6\n\t"
  5440. "stm %[a]!, {r3, r4}\n\t"
  5441. "ldm %[a], {r3, r4}\n\t"
  5442. "ldm %[b]!, {r5, r6}\n\t"
  5443. "sbcs r3, r3, r5\n\t"
  5444. "sbcs r4, r4, r6\n\t"
  5445. "stm %[a]!, {r3, r4}\n\t"
  5446. "ldm %[a], {r3, r4}\n\t"
  5447. "ldm %[b]!, {r5, r6}\n\t"
  5448. "sbcs r3, r3, r5\n\t"
  5449. "sbcs r4, r4, r6\n\t"
  5450. "stm %[a]!, {r3, r4}\n\t"
  5451. "ldm %[a], {r3, r4}\n\t"
  5452. "ldm %[b]!, {r5, r6}\n\t"
  5453. "sbcs r3, r3, r5\n\t"
  5454. "sbcs r4, r4, r6\n\t"
  5455. "stm %[a]!, {r3, r4}\n\t"
  5456. "ldm %[a], {r3, r4}\n\t"
  5457. "ldm %[b]!, {r5, r6}\n\t"
  5458. "sbcs r3, r3, r5\n\t"
  5459. "sbcs r4, r4, r6\n\t"
  5460. "stm %[a]!, {r3, r4}\n\t"
  5461. "ldm %[a], {r3, r4}\n\t"
  5462. "ldm %[b]!, {r5, r6}\n\t"
  5463. "sbcs r3, r3, r5\n\t"
  5464. "sbcs r4, r4, r6\n\t"
  5465. "stm %[a]!, {r3, r4}\n\t"
  5466. "ldm %[a], {r3, r4}\n\t"
  5467. "ldm %[b]!, {r5, r6}\n\t"
  5468. "sbcs r3, r3, r5\n\t"
  5469. "sbcs r4, r4, r6\n\t"
  5470. "stm %[a]!, {r3, r4}\n\t"
  5471. "ldm %[a], {r3, r4}\n\t"
  5472. "ldm %[b]!, {r5, r6}\n\t"
  5473. "sbcs r3, r3, r5\n\t"
  5474. "sbcs r4, r4, r6\n\t"
  5475. "stm %[a]!, {r3, r4}\n\t"
  5476. "ldm %[a], {r3, r4}\n\t"
  5477. "ldm %[b]!, {r5, r6}\n\t"
  5478. "sbcs r3, r3, r5\n\t"
  5479. "sbcs r4, r4, r6\n\t"
  5480. "stm %[a]!, {r3, r4}\n\t"
  5481. "ldm %[a], {r3, r4}\n\t"
  5482. "ldm %[b]!, {r5, r6}\n\t"
  5483. "sbcs r3, r3, r5\n\t"
  5484. "sbcs r4, r4, r6\n\t"
  5485. "stm %[a]!, {r3, r4}\n\t"
  5486. "ldm %[a], {r3, r4}\n\t"
  5487. "ldm %[b]!, {r5, r6}\n\t"
  5488. "sbcs r3, r3, r5\n\t"
  5489. "sbcs r4, r4, r6\n\t"
  5490. "stm %[a]!, {r3, r4}\n\t"
  5491. "ldm %[a], {r3, r4}\n\t"
  5492. "ldm %[b]!, {r5, r6}\n\t"
  5493. "sbcs r3, r3, r5\n\t"
  5494. "sbcs r4, r4, r6\n\t"
  5495. "stm %[a]!, {r3, r4}\n\t"
  5496. "sbc %[c], %[c], %[c]\n\t"
  5497. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  5498. :
  5499. : "memory", "r3", "r4", "r5", "r6"
  5500. );
  5501. return c;
  5502. }
  5503. /* Add b to a into r. (r = a + b)
  5504. *
  5505. * r A single precision integer.
  5506. * a A single precision integer.
  5507. * b A single precision integer.
  5508. */
  5509. SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
  5510. const sp_digit* b)
  5511. {
  5512. sp_digit c = 0;
  5513. __asm__ __volatile__ (
  5514. "ldm %[a]!, {r4, r5}\n\t"
  5515. "ldm %[b]!, {r6, r8}\n\t"
  5516. "adds r4, r4, r6\n\t"
  5517. "adcs r5, r5, r8\n\t"
  5518. "stm %[r]!, {r4, r5}\n\t"
  5519. "ldm %[a]!, {r4, r5}\n\t"
  5520. "ldm %[b]!, {r6, r8}\n\t"
  5521. "adcs r4, r4, r6\n\t"
  5522. "adcs r5, r5, r8\n\t"
  5523. "stm %[r]!, {r4, r5}\n\t"
  5524. "ldm %[a]!, {r4, r5}\n\t"
  5525. "ldm %[b]!, {r6, r8}\n\t"
  5526. "adcs r4, r4, r6\n\t"
  5527. "adcs r5, r5, r8\n\t"
  5528. "stm %[r]!, {r4, r5}\n\t"
  5529. "ldm %[a]!, {r4, r5}\n\t"
  5530. "ldm %[b]!, {r6, r8}\n\t"
  5531. "adcs r4, r4, r6\n\t"
  5532. "adcs r5, r5, r8\n\t"
  5533. "stm %[r]!, {r4, r5}\n\t"
  5534. "ldm %[a]!, {r4, r5}\n\t"
  5535. "ldm %[b]!, {r6, r8}\n\t"
  5536. "adcs r4, r4, r6\n\t"
  5537. "adcs r5, r5, r8\n\t"
  5538. "stm %[r]!, {r4, r5}\n\t"
  5539. "ldm %[a]!, {r4, r5}\n\t"
  5540. "ldm %[b]!, {r6, r8}\n\t"
  5541. "adcs r4, r4, r6\n\t"
  5542. "adcs r5, r5, r8\n\t"
  5543. "stm %[r]!, {r4, r5}\n\t"
  5544. "ldm %[a]!, {r4, r5}\n\t"
  5545. "ldm %[b]!, {r6, r8}\n\t"
  5546. "adcs r4, r4, r6\n\t"
  5547. "adcs r5, r5, r8\n\t"
  5548. "stm %[r]!, {r4, r5}\n\t"
  5549. "ldm %[a]!, {r4, r5}\n\t"
  5550. "ldm %[b]!, {r6, r8}\n\t"
  5551. "adcs r4, r4, r6\n\t"
  5552. "adcs r5, r5, r8\n\t"
  5553. "stm %[r]!, {r4, r5}\n\t"
  5554. "ldm %[a]!, {r4, r5}\n\t"
  5555. "ldm %[b]!, {r6, r8}\n\t"
  5556. "adcs r4, r4, r6\n\t"
  5557. "adcs r5, r5, r8\n\t"
  5558. "stm %[r]!, {r4, r5}\n\t"
  5559. "ldm %[a]!, {r4, r5}\n\t"
  5560. "ldm %[b]!, {r6, r8}\n\t"
  5561. "adcs r4, r4, r6\n\t"
  5562. "adcs r5, r5, r8\n\t"
  5563. "stm %[r]!, {r4, r5}\n\t"
  5564. "ldm %[a]!, {r4, r5}\n\t"
  5565. "ldm %[b]!, {r6, r8}\n\t"
  5566. "adcs r4, r4, r6\n\t"
  5567. "adcs r5, r5, r8\n\t"
  5568. "stm %[r]!, {r4, r5}\n\t"
  5569. "ldm %[a]!, {r4, r5}\n\t"
  5570. "ldm %[b]!, {r6, r8}\n\t"
  5571. "adcs r4, r4, r6\n\t"
  5572. "adcs r5, r5, r8\n\t"
  5573. "stm %[r]!, {r4, r5}\n\t"
  5574. "mov %[c], #0\n\t"
  5575. "adc %[c], %[c], %[c]\n\t"
  5576. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  5577. :
  5578. : "memory", "r4", "r5", "r6", "r8"
  5579. );
  5580. return c;
  5581. }
  5582. /* AND m into each word of a and store in r.
  5583. *
  5584. * r A single precision integer.
  5585. * a A single precision integer.
  5586. * m Mask to AND against each digit.
  5587. */
  5588. static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
  5589. {
  5590. #ifdef WOLFSSL_SP_SMALL
  5591. int i;
  5592. for (i=0; i<12; i++) {
  5593. r[i] = a[i] & m;
  5594. }
  5595. #else
  5596. r[0] = a[0] & m;
  5597. r[1] = a[1] & m;
  5598. r[2] = a[2] & m;
  5599. r[3] = a[3] & m;
  5600. r[4] = a[4] & m;
  5601. r[5] = a[5] & m;
  5602. r[6] = a[6] & m;
  5603. r[7] = a[7] & m;
  5604. r[8] = a[8] & m;
  5605. r[9] = a[9] & m;
  5606. r[10] = a[10] & m;
  5607. r[11] = a[11] & m;
  5608. #endif
  5609. }
  5610. /* Multiply a and b into r. (r = a * b)
  5611. *
  5612. * r A single precision integer.
  5613. * a A single precision integer.
  5614. * b A single precision integer.
  5615. */
  5616. SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
  5617. const sp_digit* b)
  5618. {
  5619. sp_digit* z0 = r;
  5620. sp_digit z1[24];
  5621. sp_digit a1[12];
  5622. sp_digit b1[12];
  5623. sp_digit z2[24];
  5624. sp_digit u, ca, cb;
  5625. ca = sp_3072_add_12(a1, a, &a[12]);
  5626. cb = sp_3072_add_12(b1, b, &b[12]);
  5627. u = ca & cb;
  5628. sp_3072_mul_12(z1, a1, b1);
  5629. sp_3072_mul_12(z2, &a[12], &b[12]);
  5630. sp_3072_mul_12(z0, a, b);
  5631. sp_3072_mask_12(r + 24, a1, 0 - cb);
  5632. sp_3072_mask_12(b1, b1, 0 - ca);
  5633. u += sp_3072_add_12(r + 24, r + 24, b1);
  5634. u += sp_3072_sub_in_place_24(z1, z2);
  5635. u += sp_3072_sub_in_place_24(z1, z0);
  5636. u += sp_3072_add_24(r + 12, r + 12, z1);
  5637. r[36] = u;
  5638. XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
  5639. (void)sp_3072_add_24(r + 24, r + 24, z2);
  5640. }
  5641. /* Square a and put result in r. (r = a * a)
  5642. *
  5643. * r A single precision integer.
  5644. * a A single precision integer.
  5645. */
  5646. SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
  5647. {
  5648. sp_digit* z0 = r;
  5649. sp_digit z2[24];
  5650. sp_digit z1[24];
  5651. sp_digit a1[12];
  5652. sp_digit u;
  5653. u = sp_3072_add_12(a1, a, &a[12]);
  5654. sp_3072_sqr_12(z1, a1);
  5655. sp_3072_sqr_12(z2, &a[12]);
  5656. sp_3072_sqr_12(z0, a);
  5657. sp_3072_mask_12(r + 24, a1, 0 - u);
  5658. u += sp_3072_add_12(r + 24, r + 24, r + 24);
  5659. u += sp_3072_sub_in_place_24(z1, z2);
  5660. u += sp_3072_sub_in_place_24(z1, z0);
  5661. u += sp_3072_add_24(r + 12, r + 12, z1);
  5662. r[36] = u;
  5663. XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
  5664. (void)sp_3072_add_24(r + 24, r + 24, z2);
  5665. }
  5666. /* Sub b from a into r. (r = a - b)
  5667. *
  5668. * r A single precision integer.
  5669. * a A single precision integer.
  5670. * b A single precision integer.
  5671. */
  5672. SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
  5673. const sp_digit* b)
  5674. {
  5675. sp_digit c = 0;
  5676. __asm__ __volatile__ (
  5677. "ldm %[a], {r3, r4}\n\t"
  5678. "ldm %[b]!, {r5, r6}\n\t"
  5679. "subs r3, r3, r5\n\t"
  5680. "sbcs r4, r4, r6\n\t"
  5681. "stm %[a]!, {r3, r4}\n\t"
  5682. "ldm %[a], {r3, r4}\n\t"
  5683. "ldm %[b]!, {r5, r6}\n\t"
  5684. "sbcs r3, r3, r5\n\t"
  5685. "sbcs r4, r4, r6\n\t"
  5686. "stm %[a]!, {r3, r4}\n\t"
  5687. "ldm %[a], {r3, r4}\n\t"
  5688. "ldm %[b]!, {r5, r6}\n\t"
  5689. "sbcs r3, r3, r5\n\t"
  5690. "sbcs r4, r4, r6\n\t"
  5691. "stm %[a]!, {r3, r4}\n\t"
  5692. "ldm %[a], {r3, r4}\n\t"
  5693. "ldm %[b]!, {r5, r6}\n\t"
  5694. "sbcs r3, r3, r5\n\t"
  5695. "sbcs r4, r4, r6\n\t"
  5696. "stm %[a]!, {r3, r4}\n\t"
  5697. "ldm %[a], {r3, r4}\n\t"
  5698. "ldm %[b]!, {r5, r6}\n\t"
  5699. "sbcs r3, r3, r5\n\t"
  5700. "sbcs r4, r4, r6\n\t"
  5701. "stm %[a]!, {r3, r4}\n\t"
  5702. "ldm %[a], {r3, r4}\n\t"
  5703. "ldm %[b]!, {r5, r6}\n\t"
  5704. "sbcs r3, r3, r5\n\t"
  5705. "sbcs r4, r4, r6\n\t"
  5706. "stm %[a]!, {r3, r4}\n\t"
  5707. "ldm %[a], {r3, r4}\n\t"
  5708. "ldm %[b]!, {r5, r6}\n\t"
  5709. "sbcs r3, r3, r5\n\t"
  5710. "sbcs r4, r4, r6\n\t"
  5711. "stm %[a]!, {r3, r4}\n\t"
  5712. "ldm %[a], {r3, r4}\n\t"
  5713. "ldm %[b]!, {r5, r6}\n\t"
  5714. "sbcs r3, r3, r5\n\t"
  5715. "sbcs r4, r4, r6\n\t"
  5716. "stm %[a]!, {r3, r4}\n\t"
  5717. "ldm %[a], {r3, r4}\n\t"
  5718. "ldm %[b]!, {r5, r6}\n\t"
  5719. "sbcs r3, r3, r5\n\t"
  5720. "sbcs r4, r4, r6\n\t"
  5721. "stm %[a]!, {r3, r4}\n\t"
  5722. "ldm %[a], {r3, r4}\n\t"
  5723. "ldm %[b]!, {r5, r6}\n\t"
  5724. "sbcs r3, r3, r5\n\t"
  5725. "sbcs r4, r4, r6\n\t"
  5726. "stm %[a]!, {r3, r4}\n\t"
  5727. "ldm %[a], {r3, r4}\n\t"
  5728. "ldm %[b]!, {r5, r6}\n\t"
  5729. "sbcs r3, r3, r5\n\t"
  5730. "sbcs r4, r4, r6\n\t"
  5731. "stm %[a]!, {r3, r4}\n\t"
  5732. "ldm %[a], {r3, r4}\n\t"
  5733. "ldm %[b]!, {r5, r6}\n\t"
  5734. "sbcs r3, r3, r5\n\t"
  5735. "sbcs r4, r4, r6\n\t"
  5736. "stm %[a]!, {r3, r4}\n\t"
  5737. "ldm %[a], {r3, r4}\n\t"
  5738. "ldm %[b]!, {r5, r6}\n\t"
  5739. "sbcs r3, r3, r5\n\t"
  5740. "sbcs r4, r4, r6\n\t"
  5741. "stm %[a]!, {r3, r4}\n\t"
  5742. "ldm %[a], {r3, r4}\n\t"
  5743. "ldm %[b]!, {r5, r6}\n\t"
  5744. "sbcs r3, r3, r5\n\t"
  5745. "sbcs r4, r4, r6\n\t"
  5746. "stm %[a]!, {r3, r4}\n\t"
  5747. "ldm %[a], {r3, r4}\n\t"
  5748. "ldm %[b]!, {r5, r6}\n\t"
  5749. "sbcs r3, r3, r5\n\t"
  5750. "sbcs r4, r4, r6\n\t"
  5751. "stm %[a]!, {r3, r4}\n\t"
  5752. "ldm %[a], {r3, r4}\n\t"
  5753. "ldm %[b]!, {r5, r6}\n\t"
  5754. "sbcs r3, r3, r5\n\t"
  5755. "sbcs r4, r4, r6\n\t"
  5756. "stm %[a]!, {r3, r4}\n\t"
  5757. "ldm %[a], {r3, r4}\n\t"
  5758. "ldm %[b]!, {r5, r6}\n\t"
  5759. "sbcs r3, r3, r5\n\t"
  5760. "sbcs r4, r4, r6\n\t"
  5761. "stm %[a]!, {r3, r4}\n\t"
  5762. "ldm %[a], {r3, r4}\n\t"
  5763. "ldm %[b]!, {r5, r6}\n\t"
  5764. "sbcs r3, r3, r5\n\t"
  5765. "sbcs r4, r4, r6\n\t"
  5766. "stm %[a]!, {r3, r4}\n\t"
  5767. "ldm %[a], {r3, r4}\n\t"
  5768. "ldm %[b]!, {r5, r6}\n\t"
  5769. "sbcs r3, r3, r5\n\t"
  5770. "sbcs r4, r4, r6\n\t"
  5771. "stm %[a]!, {r3, r4}\n\t"
  5772. "ldm %[a], {r3, r4}\n\t"
  5773. "ldm %[b]!, {r5, r6}\n\t"
  5774. "sbcs r3, r3, r5\n\t"
  5775. "sbcs r4, r4, r6\n\t"
  5776. "stm %[a]!, {r3, r4}\n\t"
  5777. "ldm %[a], {r3, r4}\n\t"
  5778. "ldm %[b]!, {r5, r6}\n\t"
  5779. "sbcs r3, r3, r5\n\t"
  5780. "sbcs r4, r4, r6\n\t"
  5781. "stm %[a]!, {r3, r4}\n\t"
  5782. "ldm %[a], {r3, r4}\n\t"
  5783. "ldm %[b]!, {r5, r6}\n\t"
  5784. "sbcs r3, r3, r5\n\t"
  5785. "sbcs r4, r4, r6\n\t"
  5786. "stm %[a]!, {r3, r4}\n\t"
  5787. "ldm %[a], {r3, r4}\n\t"
  5788. "ldm %[b]!, {r5, r6}\n\t"
  5789. "sbcs r3, r3, r5\n\t"
  5790. "sbcs r4, r4, r6\n\t"
  5791. "stm %[a]!, {r3, r4}\n\t"
  5792. "ldm %[a], {r3, r4}\n\t"
  5793. "ldm %[b]!, {r5, r6}\n\t"
  5794. "sbcs r3, r3, r5\n\t"
  5795. "sbcs r4, r4, r6\n\t"
  5796. "stm %[a]!, {r3, r4}\n\t"
  5797. "sbc %[c], %[c], %[c]\n\t"
  5798. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  5799. :
  5800. : "memory", "r3", "r4", "r5", "r6"
  5801. );
  5802. return c;
  5803. }
  5804. /* Add b to a into r. (r = a + b)
  5805. *
  5806. * r A single precision integer.
  5807. * a A single precision integer.
  5808. * b A single precision integer.
  5809. */
  5810. SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
  5811. const sp_digit* b)
  5812. {
  5813. sp_digit c = 0;
  5814. __asm__ __volatile__ (
  5815. "ldm %[a]!, {r4, r5}\n\t"
  5816. "ldm %[b]!, {r6, r8}\n\t"
  5817. "adds r4, r4, r6\n\t"
  5818. "adcs r5, r5, r8\n\t"
  5819. "stm %[r]!, {r4, r5}\n\t"
  5820. "ldm %[a]!, {r4, r5}\n\t"
  5821. "ldm %[b]!, {r6, r8}\n\t"
  5822. "adcs r4, r4, r6\n\t"
  5823. "adcs r5, r5, r8\n\t"
  5824. "stm %[r]!, {r4, r5}\n\t"
  5825. "ldm %[a]!, {r4, r5}\n\t"
  5826. "ldm %[b]!, {r6, r8}\n\t"
  5827. "adcs r4, r4, r6\n\t"
  5828. "adcs r5, r5, r8\n\t"
  5829. "stm %[r]!, {r4, r5}\n\t"
  5830. "ldm %[a]!, {r4, r5}\n\t"
  5831. "ldm %[b]!, {r6, r8}\n\t"
  5832. "adcs r4, r4, r6\n\t"
  5833. "adcs r5, r5, r8\n\t"
  5834. "stm %[r]!, {r4, r5}\n\t"
  5835. "ldm %[a]!, {r4, r5}\n\t"
  5836. "ldm %[b]!, {r6, r8}\n\t"
  5837. "adcs r4, r4, r6\n\t"
  5838. "adcs r5, r5, r8\n\t"
  5839. "stm %[r]!, {r4, r5}\n\t"
  5840. "ldm %[a]!, {r4, r5}\n\t"
  5841. "ldm %[b]!, {r6, r8}\n\t"
  5842. "adcs r4, r4, r6\n\t"
  5843. "adcs r5, r5, r8\n\t"
  5844. "stm %[r]!, {r4, r5}\n\t"
  5845. "ldm %[a]!, {r4, r5}\n\t"
  5846. "ldm %[b]!, {r6, r8}\n\t"
  5847. "adcs r4, r4, r6\n\t"
  5848. "adcs r5, r5, r8\n\t"
  5849. "stm %[r]!, {r4, r5}\n\t"
  5850. "ldm %[a]!, {r4, r5}\n\t"
  5851. "ldm %[b]!, {r6, r8}\n\t"
  5852. "adcs r4, r4, r6\n\t"
  5853. "adcs r5, r5, r8\n\t"
  5854. "stm %[r]!, {r4, r5}\n\t"
  5855. "ldm %[a]!, {r4, r5}\n\t"
  5856. "ldm %[b]!, {r6, r8}\n\t"
  5857. "adcs r4, r4, r6\n\t"
  5858. "adcs r5, r5, r8\n\t"
  5859. "stm %[r]!, {r4, r5}\n\t"
  5860. "ldm %[a]!, {r4, r5}\n\t"
  5861. "ldm %[b]!, {r6, r8}\n\t"
  5862. "adcs r4, r4, r6\n\t"
  5863. "adcs r5, r5, r8\n\t"
  5864. "stm %[r]!, {r4, r5}\n\t"
  5865. "ldm %[a]!, {r4, r5}\n\t"
  5866. "ldm %[b]!, {r6, r8}\n\t"
  5867. "adcs r4, r4, r6\n\t"
  5868. "adcs r5, r5, r8\n\t"
  5869. "stm %[r]!, {r4, r5}\n\t"
  5870. "ldm %[a]!, {r4, r5}\n\t"
  5871. "ldm %[b]!, {r6, r8}\n\t"
  5872. "adcs r4, r4, r6\n\t"
  5873. "adcs r5, r5, r8\n\t"
  5874. "stm %[r]!, {r4, r5}\n\t"
  5875. "ldm %[a]!, {r4, r5}\n\t"
  5876. "ldm %[b]!, {r6, r8}\n\t"
  5877. "adcs r4, r4, r6\n\t"
  5878. "adcs r5, r5, r8\n\t"
  5879. "stm %[r]!, {r4, r5}\n\t"
  5880. "ldm %[a]!, {r4, r5}\n\t"
  5881. "ldm %[b]!, {r6, r8}\n\t"
  5882. "adcs r4, r4, r6\n\t"
  5883. "adcs r5, r5, r8\n\t"
  5884. "stm %[r]!, {r4, r5}\n\t"
  5885. "ldm %[a]!, {r4, r5}\n\t"
  5886. "ldm %[b]!, {r6, r8}\n\t"
  5887. "adcs r4, r4, r6\n\t"
  5888. "adcs r5, r5, r8\n\t"
  5889. "stm %[r]!, {r4, r5}\n\t"
  5890. "ldm %[a]!, {r4, r5}\n\t"
  5891. "ldm %[b]!, {r6, r8}\n\t"
  5892. "adcs r4, r4, r6\n\t"
  5893. "adcs r5, r5, r8\n\t"
  5894. "stm %[r]!, {r4, r5}\n\t"
  5895. "ldm %[a]!, {r4, r5}\n\t"
  5896. "ldm %[b]!, {r6, r8}\n\t"
  5897. "adcs r4, r4, r6\n\t"
  5898. "adcs r5, r5, r8\n\t"
  5899. "stm %[r]!, {r4, r5}\n\t"
  5900. "ldm %[a]!, {r4, r5}\n\t"
  5901. "ldm %[b]!, {r6, r8}\n\t"
  5902. "adcs r4, r4, r6\n\t"
  5903. "adcs r5, r5, r8\n\t"
  5904. "stm %[r]!, {r4, r5}\n\t"
  5905. "ldm %[a]!, {r4, r5}\n\t"
  5906. "ldm %[b]!, {r6, r8}\n\t"
  5907. "adcs r4, r4, r6\n\t"
  5908. "adcs r5, r5, r8\n\t"
  5909. "stm %[r]!, {r4, r5}\n\t"
  5910. "ldm %[a]!, {r4, r5}\n\t"
  5911. "ldm %[b]!, {r6, r8}\n\t"
  5912. "adcs r4, r4, r6\n\t"
  5913. "adcs r5, r5, r8\n\t"
  5914. "stm %[r]!, {r4, r5}\n\t"
  5915. "ldm %[a]!, {r4, r5}\n\t"
  5916. "ldm %[b]!, {r6, r8}\n\t"
  5917. "adcs r4, r4, r6\n\t"
  5918. "adcs r5, r5, r8\n\t"
  5919. "stm %[r]!, {r4, r5}\n\t"
  5920. "ldm %[a]!, {r4, r5}\n\t"
  5921. "ldm %[b]!, {r6, r8}\n\t"
  5922. "adcs r4, r4, r6\n\t"
  5923. "adcs r5, r5, r8\n\t"
  5924. "stm %[r]!, {r4, r5}\n\t"
  5925. "ldm %[a]!, {r4, r5}\n\t"
  5926. "ldm %[b]!, {r6, r8}\n\t"
  5927. "adcs r4, r4, r6\n\t"
  5928. "adcs r5, r5, r8\n\t"
  5929. "stm %[r]!, {r4, r5}\n\t"
  5930. "ldm %[a]!, {r4, r5}\n\t"
  5931. "ldm %[b]!, {r6, r8}\n\t"
  5932. "adcs r4, r4, r6\n\t"
  5933. "adcs r5, r5, r8\n\t"
  5934. "stm %[r]!, {r4, r5}\n\t"
  5935. "mov %[c], #0\n\t"
  5936. "adc %[c], %[c], %[c]\n\t"
  5937. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  5938. :
  5939. : "memory", "r4", "r5", "r6", "r8"
  5940. );
  5941. return c;
  5942. }
  5943. /* AND m into each word of a and store in r.
  5944. *
  5945. * r A single precision integer.
  5946. * a A single precision integer.
  5947. * m Mask to AND against each digit.
  5948. */
  5949. static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
  5950. {
  5951. #ifdef WOLFSSL_SP_SMALL
  5952. int i;
  5953. for (i=0; i<24; i++) {
  5954. r[i] = a[i] & m;
  5955. }
  5956. #else
  5957. int i;
  5958. for (i = 0; i < 24; i += 8) {
  5959. r[i+0] = a[i+0] & m;
  5960. r[i+1] = a[i+1] & m;
  5961. r[i+2] = a[i+2] & m;
  5962. r[i+3] = a[i+3] & m;
  5963. r[i+4] = a[i+4] & m;
  5964. r[i+5] = a[i+5] & m;
  5965. r[i+6] = a[i+6] & m;
  5966. r[i+7] = a[i+7] & m;
  5967. }
  5968. #endif
  5969. }
  5970. /* Multiply a and b into r. (r = a * b)
  5971. *
  5972. * r A single precision integer.
  5973. * a A single precision integer.
  5974. * b A single precision integer.
  5975. */
  5976. SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
  5977. const sp_digit* b)
  5978. {
  5979. sp_digit* z0 = r;
  5980. sp_digit z1[48];
  5981. sp_digit a1[24];
  5982. sp_digit b1[24];
  5983. sp_digit z2[48];
  5984. sp_digit u, ca, cb;
  5985. ca = sp_3072_add_24(a1, a, &a[24]);
  5986. cb = sp_3072_add_24(b1, b, &b[24]);
  5987. u = ca & cb;
  5988. sp_3072_mul_24(z1, a1, b1);
  5989. sp_3072_mul_24(z2, &a[24], &b[24]);
  5990. sp_3072_mul_24(z0, a, b);
  5991. sp_3072_mask_24(r + 48, a1, 0 - cb);
  5992. sp_3072_mask_24(b1, b1, 0 - ca);
  5993. u += sp_3072_add_24(r + 48, r + 48, b1);
  5994. u += sp_3072_sub_in_place_48(z1, z2);
  5995. u += sp_3072_sub_in_place_48(z1, z0);
  5996. u += sp_3072_add_48(r + 24, r + 24, z1);
  5997. r[72] = u;
  5998. XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
  5999. (void)sp_3072_add_48(r + 48, r + 48, z2);
  6000. }
  6001. /* Square a and put result in r. (r = a * a)
  6002. *
  6003. * r A single precision integer.
  6004. * a A single precision integer.
  6005. */
  6006. SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
  6007. {
  6008. sp_digit* z0 = r;
  6009. sp_digit z2[48];
  6010. sp_digit z1[48];
  6011. sp_digit a1[24];
  6012. sp_digit u;
  6013. u = sp_3072_add_24(a1, a, &a[24]);
  6014. sp_3072_sqr_24(z1, a1);
  6015. sp_3072_sqr_24(z2, &a[24]);
  6016. sp_3072_sqr_24(z0, a);
  6017. sp_3072_mask_24(r + 48, a1, 0 - u);
  6018. u += sp_3072_add_24(r + 48, r + 48, r + 48);
  6019. u += sp_3072_sub_in_place_48(z1, z2);
  6020. u += sp_3072_sub_in_place_48(z1, z0);
  6021. u += sp_3072_add_48(r + 24, r + 24, z1);
  6022. r[72] = u;
  6023. XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
  6024. (void)sp_3072_add_48(r + 48, r + 48, z2);
  6025. }
  6026. /* Sub b from a into r. (r = a - b)
  6027. *
  6028. * r A single precision integer.
  6029. * a A single precision integer.
  6030. * b A single precision integer.
  6031. */
  6032. SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
  6033. const sp_digit* b)
  6034. {
  6035. sp_digit c = 0;
  6036. __asm__ __volatile__ (
  6037. "ldm %[a], {r3, r4}\n\t"
  6038. "ldm %[b]!, {r5, r6}\n\t"
  6039. "subs r3, r3, r5\n\t"
  6040. "sbcs r4, r4, r6\n\t"
  6041. "stm %[a]!, {r3, r4}\n\t"
  6042. "ldm %[a], {r3, r4}\n\t"
  6043. "ldm %[b]!, {r5, r6}\n\t"
  6044. "sbcs r3, r3, r5\n\t"
  6045. "sbcs r4, r4, r6\n\t"
  6046. "stm %[a]!, {r3, r4}\n\t"
  6047. "ldm %[a], {r3, r4}\n\t"
  6048. "ldm %[b]!, {r5, r6}\n\t"
  6049. "sbcs r3, r3, r5\n\t"
  6050. "sbcs r4, r4, r6\n\t"
  6051. "stm %[a]!, {r3, r4}\n\t"
  6052. "ldm %[a], {r3, r4}\n\t"
  6053. "ldm %[b]!, {r5, r6}\n\t"
  6054. "sbcs r3, r3, r5\n\t"
  6055. "sbcs r4, r4, r6\n\t"
  6056. "stm %[a]!, {r3, r4}\n\t"
  6057. "ldm %[a], {r3, r4}\n\t"
  6058. "ldm %[b]!, {r5, r6}\n\t"
  6059. "sbcs r3, r3, r5\n\t"
  6060. "sbcs r4, r4, r6\n\t"
  6061. "stm %[a]!, {r3, r4}\n\t"
  6062. "ldm %[a], {r3, r4}\n\t"
  6063. "ldm %[b]!, {r5, r6}\n\t"
  6064. "sbcs r3, r3, r5\n\t"
  6065. "sbcs r4, r4, r6\n\t"
  6066. "stm %[a]!, {r3, r4}\n\t"
  6067. "ldm %[a], {r3, r4}\n\t"
  6068. "ldm %[b]!, {r5, r6}\n\t"
  6069. "sbcs r3, r3, r5\n\t"
  6070. "sbcs r4, r4, r6\n\t"
  6071. "stm %[a]!, {r3, r4}\n\t"
  6072. "ldm %[a], {r3, r4}\n\t"
  6073. "ldm %[b]!, {r5, r6}\n\t"
  6074. "sbcs r3, r3, r5\n\t"
  6075. "sbcs r4, r4, r6\n\t"
  6076. "stm %[a]!, {r3, r4}\n\t"
  6077. "ldm %[a], {r3, r4}\n\t"
  6078. "ldm %[b]!, {r5, r6}\n\t"
  6079. "sbcs r3, r3, r5\n\t"
  6080. "sbcs r4, r4, r6\n\t"
  6081. "stm %[a]!, {r3, r4}\n\t"
  6082. "ldm %[a], {r3, r4}\n\t"
  6083. "ldm %[b]!, {r5, r6}\n\t"
  6084. "sbcs r3, r3, r5\n\t"
  6085. "sbcs r4, r4, r6\n\t"
  6086. "stm %[a]!, {r3, r4}\n\t"
  6087. "ldm %[a], {r3, r4}\n\t"
  6088. "ldm %[b]!, {r5, r6}\n\t"
  6089. "sbcs r3, r3, r5\n\t"
  6090. "sbcs r4, r4, r6\n\t"
  6091. "stm %[a]!, {r3, r4}\n\t"
  6092. "ldm %[a], {r3, r4}\n\t"
  6093. "ldm %[b]!, {r5, r6}\n\t"
  6094. "sbcs r3, r3, r5\n\t"
  6095. "sbcs r4, r4, r6\n\t"
  6096. "stm %[a]!, {r3, r4}\n\t"
  6097. "ldm %[a], {r3, r4}\n\t"
  6098. "ldm %[b]!, {r5, r6}\n\t"
  6099. "sbcs r3, r3, r5\n\t"
  6100. "sbcs r4, r4, r6\n\t"
  6101. "stm %[a]!, {r3, r4}\n\t"
  6102. "ldm %[a], {r3, r4}\n\t"
  6103. "ldm %[b]!, {r5, r6}\n\t"
  6104. "sbcs r3, r3, r5\n\t"
  6105. "sbcs r4, r4, r6\n\t"
  6106. "stm %[a]!, {r3, r4}\n\t"
  6107. "ldm %[a], {r3, r4}\n\t"
  6108. "ldm %[b]!, {r5, r6}\n\t"
  6109. "sbcs r3, r3, r5\n\t"
  6110. "sbcs r4, r4, r6\n\t"
  6111. "stm %[a]!, {r3, r4}\n\t"
  6112. "ldm %[a], {r3, r4}\n\t"
  6113. "ldm %[b]!, {r5, r6}\n\t"
  6114. "sbcs r3, r3, r5\n\t"
  6115. "sbcs r4, r4, r6\n\t"
  6116. "stm %[a]!, {r3, r4}\n\t"
  6117. "ldm %[a], {r3, r4}\n\t"
  6118. "ldm %[b]!, {r5, r6}\n\t"
  6119. "sbcs r3, r3, r5\n\t"
  6120. "sbcs r4, r4, r6\n\t"
  6121. "stm %[a]!, {r3, r4}\n\t"
  6122. "ldm %[a], {r3, r4}\n\t"
  6123. "ldm %[b]!, {r5, r6}\n\t"
  6124. "sbcs r3, r3, r5\n\t"
  6125. "sbcs r4, r4, r6\n\t"
  6126. "stm %[a]!, {r3, r4}\n\t"
  6127. "ldm %[a], {r3, r4}\n\t"
  6128. "ldm %[b]!, {r5, r6}\n\t"
  6129. "sbcs r3, r3, r5\n\t"
  6130. "sbcs r4, r4, r6\n\t"
  6131. "stm %[a]!, {r3, r4}\n\t"
  6132. "ldm %[a], {r3, r4}\n\t"
  6133. "ldm %[b]!, {r5, r6}\n\t"
  6134. "sbcs r3, r3, r5\n\t"
  6135. "sbcs r4, r4, r6\n\t"
  6136. "stm %[a]!, {r3, r4}\n\t"
  6137. "ldm %[a], {r3, r4}\n\t"
  6138. "ldm %[b]!, {r5, r6}\n\t"
  6139. "sbcs r3, r3, r5\n\t"
  6140. "sbcs r4, r4, r6\n\t"
  6141. "stm %[a]!, {r3, r4}\n\t"
  6142. "ldm %[a], {r3, r4}\n\t"
  6143. "ldm %[b]!, {r5, r6}\n\t"
  6144. "sbcs r3, r3, r5\n\t"
  6145. "sbcs r4, r4, r6\n\t"
  6146. "stm %[a]!, {r3, r4}\n\t"
  6147. "ldm %[a], {r3, r4}\n\t"
  6148. "ldm %[b]!, {r5, r6}\n\t"
  6149. "sbcs r3, r3, r5\n\t"
  6150. "sbcs r4, r4, r6\n\t"
  6151. "stm %[a]!, {r3, r4}\n\t"
  6152. "ldm %[a], {r3, r4}\n\t"
  6153. "ldm %[b]!, {r5, r6}\n\t"
  6154. "sbcs r3, r3, r5\n\t"
  6155. "sbcs r4, r4, r6\n\t"
  6156. "stm %[a]!, {r3, r4}\n\t"
  6157. "ldm %[a], {r3, r4}\n\t"
  6158. "ldm %[b]!, {r5, r6}\n\t"
  6159. "sbcs r3, r3, r5\n\t"
  6160. "sbcs r4, r4, r6\n\t"
  6161. "stm %[a]!, {r3, r4}\n\t"
  6162. "ldm %[a], {r3, r4}\n\t"
  6163. "ldm %[b]!, {r5, r6}\n\t"
  6164. "sbcs r3, r3, r5\n\t"
  6165. "sbcs r4, r4, r6\n\t"
  6166. "stm %[a]!, {r3, r4}\n\t"
  6167. "ldm %[a], {r3, r4}\n\t"
  6168. "ldm %[b]!, {r5, r6}\n\t"
  6169. "sbcs r3, r3, r5\n\t"
  6170. "sbcs r4, r4, r6\n\t"
  6171. "stm %[a]!, {r3, r4}\n\t"
  6172. "ldm %[a], {r3, r4}\n\t"
  6173. "ldm %[b]!, {r5, r6}\n\t"
  6174. "sbcs r3, r3, r5\n\t"
  6175. "sbcs r4, r4, r6\n\t"
  6176. "stm %[a]!, {r3, r4}\n\t"
  6177. "ldm %[a], {r3, r4}\n\t"
  6178. "ldm %[b]!, {r5, r6}\n\t"
  6179. "sbcs r3, r3, r5\n\t"
  6180. "sbcs r4, r4, r6\n\t"
  6181. "stm %[a]!, {r3, r4}\n\t"
  6182. "ldm %[a], {r3, r4}\n\t"
  6183. "ldm %[b]!, {r5, r6}\n\t"
  6184. "sbcs r3, r3, r5\n\t"
  6185. "sbcs r4, r4, r6\n\t"
  6186. "stm %[a]!, {r3, r4}\n\t"
  6187. "ldm %[a], {r3, r4}\n\t"
  6188. "ldm %[b]!, {r5, r6}\n\t"
  6189. "sbcs r3, r3, r5\n\t"
  6190. "sbcs r4, r4, r6\n\t"
  6191. "stm %[a]!, {r3, r4}\n\t"
  6192. "ldm %[a], {r3, r4}\n\t"
  6193. "ldm %[b]!, {r5, r6}\n\t"
  6194. "sbcs r3, r3, r5\n\t"
  6195. "sbcs r4, r4, r6\n\t"
  6196. "stm %[a]!, {r3, r4}\n\t"
  6197. "ldm %[a], {r3, r4}\n\t"
  6198. "ldm %[b]!, {r5, r6}\n\t"
  6199. "sbcs r3, r3, r5\n\t"
  6200. "sbcs r4, r4, r6\n\t"
  6201. "stm %[a]!, {r3, r4}\n\t"
  6202. "ldm %[a], {r3, r4}\n\t"
  6203. "ldm %[b]!, {r5, r6}\n\t"
  6204. "sbcs r3, r3, r5\n\t"
  6205. "sbcs r4, r4, r6\n\t"
  6206. "stm %[a]!, {r3, r4}\n\t"
  6207. "ldm %[a], {r3, r4}\n\t"
  6208. "ldm %[b]!, {r5, r6}\n\t"
  6209. "sbcs r3, r3, r5\n\t"
  6210. "sbcs r4, r4, r6\n\t"
  6211. "stm %[a]!, {r3, r4}\n\t"
  6212. "ldm %[a], {r3, r4}\n\t"
  6213. "ldm %[b]!, {r5, r6}\n\t"
  6214. "sbcs r3, r3, r5\n\t"
  6215. "sbcs r4, r4, r6\n\t"
  6216. "stm %[a]!, {r3, r4}\n\t"
  6217. "ldm %[a], {r3, r4}\n\t"
  6218. "ldm %[b]!, {r5, r6}\n\t"
  6219. "sbcs r3, r3, r5\n\t"
  6220. "sbcs r4, r4, r6\n\t"
  6221. "stm %[a]!, {r3, r4}\n\t"
  6222. "ldm %[a], {r3, r4}\n\t"
  6223. "ldm %[b]!, {r5, r6}\n\t"
  6224. "sbcs r3, r3, r5\n\t"
  6225. "sbcs r4, r4, r6\n\t"
  6226. "stm %[a]!, {r3, r4}\n\t"
  6227. "ldm %[a], {r3, r4}\n\t"
  6228. "ldm %[b]!, {r5, r6}\n\t"
  6229. "sbcs r3, r3, r5\n\t"
  6230. "sbcs r4, r4, r6\n\t"
  6231. "stm %[a]!, {r3, r4}\n\t"
  6232. "ldm %[a], {r3, r4}\n\t"
  6233. "ldm %[b]!, {r5, r6}\n\t"
  6234. "sbcs r3, r3, r5\n\t"
  6235. "sbcs r4, r4, r6\n\t"
  6236. "stm %[a]!, {r3, r4}\n\t"
  6237. "ldm %[a], {r3, r4}\n\t"
  6238. "ldm %[b]!, {r5, r6}\n\t"
  6239. "sbcs r3, r3, r5\n\t"
  6240. "sbcs r4, r4, r6\n\t"
  6241. "stm %[a]!, {r3, r4}\n\t"
  6242. "ldm %[a], {r3, r4}\n\t"
  6243. "ldm %[b]!, {r5, r6}\n\t"
  6244. "sbcs r3, r3, r5\n\t"
  6245. "sbcs r4, r4, r6\n\t"
  6246. "stm %[a]!, {r3, r4}\n\t"
  6247. "ldm %[a], {r3, r4}\n\t"
  6248. "ldm %[b]!, {r5, r6}\n\t"
  6249. "sbcs r3, r3, r5\n\t"
  6250. "sbcs r4, r4, r6\n\t"
  6251. "stm %[a]!, {r3, r4}\n\t"
  6252. "ldm %[a], {r3, r4}\n\t"
  6253. "ldm %[b]!, {r5, r6}\n\t"
  6254. "sbcs r3, r3, r5\n\t"
  6255. "sbcs r4, r4, r6\n\t"
  6256. "stm %[a]!, {r3, r4}\n\t"
  6257. "ldm %[a], {r3, r4}\n\t"
  6258. "ldm %[b]!, {r5, r6}\n\t"
  6259. "sbcs r3, r3, r5\n\t"
  6260. "sbcs r4, r4, r6\n\t"
  6261. "stm %[a]!, {r3, r4}\n\t"
  6262. "ldm %[a], {r3, r4}\n\t"
  6263. "ldm %[b]!, {r5, r6}\n\t"
  6264. "sbcs r3, r3, r5\n\t"
  6265. "sbcs r4, r4, r6\n\t"
  6266. "stm %[a]!, {r3, r4}\n\t"
  6267. "ldm %[a], {r3, r4}\n\t"
  6268. "ldm %[b]!, {r5, r6}\n\t"
  6269. "sbcs r3, r3, r5\n\t"
  6270. "sbcs r4, r4, r6\n\t"
  6271. "stm %[a]!, {r3, r4}\n\t"
  6272. "ldm %[a], {r3, r4}\n\t"
  6273. "ldm %[b]!, {r5, r6}\n\t"
  6274. "sbcs r3, r3, r5\n\t"
  6275. "sbcs r4, r4, r6\n\t"
  6276. "stm %[a]!, {r3, r4}\n\t"
  6277. "sbc %[c], %[c], %[c]\n\t"
  6278. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  6279. :
  6280. : "memory", "r3", "r4", "r5", "r6"
  6281. );
  6282. return c;
  6283. }
  6284. /* Add b to a into r. (r = a + b)
  6285. *
  6286. * r A single precision integer.
  6287. * a A single precision integer.
  6288. * b A single precision integer.
  6289. */
  6290. SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
  6291. const sp_digit* b)
  6292. {
  6293. sp_digit c = 0;
  6294. __asm__ __volatile__ (
  6295. "ldm %[a]!, {r4, r5}\n\t"
  6296. "ldm %[b]!, {r6, r8}\n\t"
  6297. "adds r4, r4, r6\n\t"
  6298. "adcs r5, r5, r8\n\t"
  6299. "stm %[r]!, {r4, r5}\n\t"
  6300. "ldm %[a]!, {r4, r5}\n\t"
  6301. "ldm %[b]!, {r6, r8}\n\t"
  6302. "adcs r4, r4, r6\n\t"
  6303. "adcs r5, r5, r8\n\t"
  6304. "stm %[r]!, {r4, r5}\n\t"
  6305. "ldm %[a]!, {r4, r5}\n\t"
  6306. "ldm %[b]!, {r6, r8}\n\t"
  6307. "adcs r4, r4, r6\n\t"
  6308. "adcs r5, r5, r8\n\t"
  6309. "stm %[r]!, {r4, r5}\n\t"
  6310. "ldm %[a]!, {r4, r5}\n\t"
  6311. "ldm %[b]!, {r6, r8}\n\t"
  6312. "adcs r4, r4, r6\n\t"
  6313. "adcs r5, r5, r8\n\t"
  6314. "stm %[r]!, {r4, r5}\n\t"
  6315. "ldm %[a]!, {r4, r5}\n\t"
  6316. "ldm %[b]!, {r6, r8}\n\t"
  6317. "adcs r4, r4, r6\n\t"
  6318. "adcs r5, r5, r8\n\t"
  6319. "stm %[r]!, {r4, r5}\n\t"
  6320. "ldm %[a]!, {r4, r5}\n\t"
  6321. "ldm %[b]!, {r6, r8}\n\t"
  6322. "adcs r4, r4, r6\n\t"
  6323. "adcs r5, r5, r8\n\t"
  6324. "stm %[r]!, {r4, r5}\n\t"
  6325. "ldm %[a]!, {r4, r5}\n\t"
  6326. "ldm %[b]!, {r6, r8}\n\t"
  6327. "adcs r4, r4, r6\n\t"
  6328. "adcs r5, r5, r8\n\t"
  6329. "stm %[r]!, {r4, r5}\n\t"
  6330. "ldm %[a]!, {r4, r5}\n\t"
  6331. "ldm %[b]!, {r6, r8}\n\t"
  6332. "adcs r4, r4, r6\n\t"
  6333. "adcs r5, r5, r8\n\t"
  6334. "stm %[r]!, {r4, r5}\n\t"
  6335. "ldm %[a]!, {r4, r5}\n\t"
  6336. "ldm %[b]!, {r6, r8}\n\t"
  6337. "adcs r4, r4, r6\n\t"
  6338. "adcs r5, r5, r8\n\t"
  6339. "stm %[r]!, {r4, r5}\n\t"
  6340. "ldm %[a]!, {r4, r5}\n\t"
  6341. "ldm %[b]!, {r6, r8}\n\t"
  6342. "adcs r4, r4, r6\n\t"
  6343. "adcs r5, r5, r8\n\t"
  6344. "stm %[r]!, {r4, r5}\n\t"
  6345. "ldm %[a]!, {r4, r5}\n\t"
  6346. "ldm %[b]!, {r6, r8}\n\t"
  6347. "adcs r4, r4, r6\n\t"
  6348. "adcs r5, r5, r8\n\t"
  6349. "stm %[r]!, {r4, r5}\n\t"
  6350. "ldm %[a]!, {r4, r5}\n\t"
  6351. "ldm %[b]!, {r6, r8}\n\t"
  6352. "adcs r4, r4, r6\n\t"
  6353. "adcs r5, r5, r8\n\t"
  6354. "stm %[r]!, {r4, r5}\n\t"
  6355. "ldm %[a]!, {r4, r5}\n\t"
  6356. "ldm %[b]!, {r6, r8}\n\t"
  6357. "adcs r4, r4, r6\n\t"
  6358. "adcs r5, r5, r8\n\t"
  6359. "stm %[r]!, {r4, r5}\n\t"
  6360. "ldm %[a]!, {r4, r5}\n\t"
  6361. "ldm %[b]!, {r6, r8}\n\t"
  6362. "adcs r4, r4, r6\n\t"
  6363. "adcs r5, r5, r8\n\t"
  6364. "stm %[r]!, {r4, r5}\n\t"
  6365. "ldm %[a]!, {r4, r5}\n\t"
  6366. "ldm %[b]!, {r6, r8}\n\t"
  6367. "adcs r4, r4, r6\n\t"
  6368. "adcs r5, r5, r8\n\t"
  6369. "stm %[r]!, {r4, r5}\n\t"
  6370. "ldm %[a]!, {r4, r5}\n\t"
  6371. "ldm %[b]!, {r6, r8}\n\t"
  6372. "adcs r4, r4, r6\n\t"
  6373. "adcs r5, r5, r8\n\t"
  6374. "stm %[r]!, {r4, r5}\n\t"
  6375. "ldm %[a]!, {r4, r5}\n\t"
  6376. "ldm %[b]!, {r6, r8}\n\t"
  6377. "adcs r4, r4, r6\n\t"
  6378. "adcs r5, r5, r8\n\t"
  6379. "stm %[r]!, {r4, r5}\n\t"
  6380. "ldm %[a]!, {r4, r5}\n\t"
  6381. "ldm %[b]!, {r6, r8}\n\t"
  6382. "adcs r4, r4, r6\n\t"
  6383. "adcs r5, r5, r8\n\t"
  6384. "stm %[r]!, {r4, r5}\n\t"
  6385. "ldm %[a]!, {r4, r5}\n\t"
  6386. "ldm %[b]!, {r6, r8}\n\t"
  6387. "adcs r4, r4, r6\n\t"
  6388. "adcs r5, r5, r8\n\t"
  6389. "stm %[r]!, {r4, r5}\n\t"
  6390. "ldm %[a]!, {r4, r5}\n\t"
  6391. "ldm %[b]!, {r6, r8}\n\t"
  6392. "adcs r4, r4, r6\n\t"
  6393. "adcs r5, r5, r8\n\t"
  6394. "stm %[r]!, {r4, r5}\n\t"
  6395. "ldm %[a]!, {r4, r5}\n\t"
  6396. "ldm %[b]!, {r6, r8}\n\t"
  6397. "adcs r4, r4, r6\n\t"
  6398. "adcs r5, r5, r8\n\t"
  6399. "stm %[r]!, {r4, r5}\n\t"
  6400. "ldm %[a]!, {r4, r5}\n\t"
  6401. "ldm %[b]!, {r6, r8}\n\t"
  6402. "adcs r4, r4, r6\n\t"
  6403. "adcs r5, r5, r8\n\t"
  6404. "stm %[r]!, {r4, r5}\n\t"
  6405. "ldm %[a]!, {r4, r5}\n\t"
  6406. "ldm %[b]!, {r6, r8}\n\t"
  6407. "adcs r4, r4, r6\n\t"
  6408. "adcs r5, r5, r8\n\t"
  6409. "stm %[r]!, {r4, r5}\n\t"
  6410. "ldm %[a]!, {r4, r5}\n\t"
  6411. "ldm %[b]!, {r6, r8}\n\t"
  6412. "adcs r4, r4, r6\n\t"
  6413. "adcs r5, r5, r8\n\t"
  6414. "stm %[r]!, {r4, r5}\n\t"
  6415. "ldm %[a]!, {r4, r5}\n\t"
  6416. "ldm %[b]!, {r6, r8}\n\t"
  6417. "adcs r4, r4, r6\n\t"
  6418. "adcs r5, r5, r8\n\t"
  6419. "stm %[r]!, {r4, r5}\n\t"
  6420. "ldm %[a]!, {r4, r5}\n\t"
  6421. "ldm %[b]!, {r6, r8}\n\t"
  6422. "adcs r4, r4, r6\n\t"
  6423. "adcs r5, r5, r8\n\t"
  6424. "stm %[r]!, {r4, r5}\n\t"
  6425. "ldm %[a]!, {r4, r5}\n\t"
  6426. "ldm %[b]!, {r6, r8}\n\t"
  6427. "adcs r4, r4, r6\n\t"
  6428. "adcs r5, r5, r8\n\t"
  6429. "stm %[r]!, {r4, r5}\n\t"
  6430. "ldm %[a]!, {r4, r5}\n\t"
  6431. "ldm %[b]!, {r6, r8}\n\t"
  6432. "adcs r4, r4, r6\n\t"
  6433. "adcs r5, r5, r8\n\t"
  6434. "stm %[r]!, {r4, r5}\n\t"
  6435. "ldm %[a]!, {r4, r5}\n\t"
  6436. "ldm %[b]!, {r6, r8}\n\t"
  6437. "adcs r4, r4, r6\n\t"
  6438. "adcs r5, r5, r8\n\t"
  6439. "stm %[r]!, {r4, r5}\n\t"
  6440. "ldm %[a]!, {r4, r5}\n\t"
  6441. "ldm %[b]!, {r6, r8}\n\t"
  6442. "adcs r4, r4, r6\n\t"
  6443. "adcs r5, r5, r8\n\t"
  6444. "stm %[r]!, {r4, r5}\n\t"
  6445. "ldm %[a]!, {r4, r5}\n\t"
  6446. "ldm %[b]!, {r6, r8}\n\t"
  6447. "adcs r4, r4, r6\n\t"
  6448. "adcs r5, r5, r8\n\t"
  6449. "stm %[r]!, {r4, r5}\n\t"
  6450. "ldm %[a]!, {r4, r5}\n\t"
  6451. "ldm %[b]!, {r6, r8}\n\t"
  6452. "adcs r4, r4, r6\n\t"
  6453. "adcs r5, r5, r8\n\t"
  6454. "stm %[r]!, {r4, r5}\n\t"
  6455. "ldm %[a]!, {r4, r5}\n\t"
  6456. "ldm %[b]!, {r6, r8}\n\t"
  6457. "adcs r4, r4, r6\n\t"
  6458. "adcs r5, r5, r8\n\t"
  6459. "stm %[r]!, {r4, r5}\n\t"
  6460. "ldm %[a]!, {r4, r5}\n\t"
  6461. "ldm %[b]!, {r6, r8}\n\t"
  6462. "adcs r4, r4, r6\n\t"
  6463. "adcs r5, r5, r8\n\t"
  6464. "stm %[r]!, {r4, r5}\n\t"
  6465. "ldm %[a]!, {r4, r5}\n\t"
  6466. "ldm %[b]!, {r6, r8}\n\t"
  6467. "adcs r4, r4, r6\n\t"
  6468. "adcs r5, r5, r8\n\t"
  6469. "stm %[r]!, {r4, r5}\n\t"
  6470. "ldm %[a]!, {r4, r5}\n\t"
  6471. "ldm %[b]!, {r6, r8}\n\t"
  6472. "adcs r4, r4, r6\n\t"
  6473. "adcs r5, r5, r8\n\t"
  6474. "stm %[r]!, {r4, r5}\n\t"
  6475. "ldm %[a]!, {r4, r5}\n\t"
  6476. "ldm %[b]!, {r6, r8}\n\t"
  6477. "adcs r4, r4, r6\n\t"
  6478. "adcs r5, r5, r8\n\t"
  6479. "stm %[r]!, {r4, r5}\n\t"
  6480. "ldm %[a]!, {r4, r5}\n\t"
  6481. "ldm %[b]!, {r6, r8}\n\t"
  6482. "adcs r4, r4, r6\n\t"
  6483. "adcs r5, r5, r8\n\t"
  6484. "stm %[r]!, {r4, r5}\n\t"
  6485. "ldm %[a]!, {r4, r5}\n\t"
  6486. "ldm %[b]!, {r6, r8}\n\t"
  6487. "adcs r4, r4, r6\n\t"
  6488. "adcs r5, r5, r8\n\t"
  6489. "stm %[r]!, {r4, r5}\n\t"
  6490. "ldm %[a]!, {r4, r5}\n\t"
  6491. "ldm %[b]!, {r6, r8}\n\t"
  6492. "adcs r4, r4, r6\n\t"
  6493. "adcs r5, r5, r8\n\t"
  6494. "stm %[r]!, {r4, r5}\n\t"
  6495. "ldm %[a]!, {r4, r5}\n\t"
  6496. "ldm %[b]!, {r6, r8}\n\t"
  6497. "adcs r4, r4, r6\n\t"
  6498. "adcs r5, r5, r8\n\t"
  6499. "stm %[r]!, {r4, r5}\n\t"
  6500. "ldm %[a]!, {r4, r5}\n\t"
  6501. "ldm %[b]!, {r6, r8}\n\t"
  6502. "adcs r4, r4, r6\n\t"
  6503. "adcs r5, r5, r8\n\t"
  6504. "stm %[r]!, {r4, r5}\n\t"
  6505. "ldm %[a]!, {r4, r5}\n\t"
  6506. "ldm %[b]!, {r6, r8}\n\t"
  6507. "adcs r4, r4, r6\n\t"
  6508. "adcs r5, r5, r8\n\t"
  6509. "stm %[r]!, {r4, r5}\n\t"
  6510. "ldm %[a]!, {r4, r5}\n\t"
  6511. "ldm %[b]!, {r6, r8}\n\t"
  6512. "adcs r4, r4, r6\n\t"
  6513. "adcs r5, r5, r8\n\t"
  6514. "stm %[r]!, {r4, r5}\n\t"
  6515. "ldm %[a]!, {r4, r5}\n\t"
  6516. "ldm %[b]!, {r6, r8}\n\t"
  6517. "adcs r4, r4, r6\n\t"
  6518. "adcs r5, r5, r8\n\t"
  6519. "stm %[r]!, {r4, r5}\n\t"
  6520. "ldm %[a]!, {r4, r5}\n\t"
  6521. "ldm %[b]!, {r6, r8}\n\t"
  6522. "adcs r4, r4, r6\n\t"
  6523. "adcs r5, r5, r8\n\t"
  6524. "stm %[r]!, {r4, r5}\n\t"
  6525. "ldm %[a]!, {r4, r5}\n\t"
  6526. "ldm %[b]!, {r6, r8}\n\t"
  6527. "adcs r4, r4, r6\n\t"
  6528. "adcs r5, r5, r8\n\t"
  6529. "stm %[r]!, {r4, r5}\n\t"
  6530. "ldm %[a]!, {r4, r5}\n\t"
  6531. "ldm %[b]!, {r6, r8}\n\t"
  6532. "adcs r4, r4, r6\n\t"
  6533. "adcs r5, r5, r8\n\t"
  6534. "stm %[r]!, {r4, r5}\n\t"
  6535. "mov %[c], #0\n\t"
  6536. "adc %[c], %[c], %[c]\n\t"
  6537. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  6538. :
  6539. : "memory", "r4", "r5", "r6", "r8"
  6540. );
  6541. return c;
  6542. }
  6543. /* AND m into each word of a and store in r.
  6544. *
  6545. * r A single precision integer.
  6546. * a A single precision integer.
  6547. * m Mask to AND against each digit.
  6548. */
  6549. static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
  6550. {
  6551. #ifdef WOLFSSL_SP_SMALL
  6552. int i;
  6553. for (i=0; i<48; i++) {
  6554. r[i] = a[i] & m;
  6555. }
  6556. #else
  6557. int i;
  6558. for (i = 0; i < 48; i += 8) {
  6559. r[i+0] = a[i+0] & m;
  6560. r[i+1] = a[i+1] & m;
  6561. r[i+2] = a[i+2] & m;
  6562. r[i+3] = a[i+3] & m;
  6563. r[i+4] = a[i+4] & m;
  6564. r[i+5] = a[i+5] & m;
  6565. r[i+6] = a[i+6] & m;
  6566. r[i+7] = a[i+7] & m;
  6567. }
  6568. #endif
  6569. }
  6570. /* Multiply a and b into r. (r = a * b)
  6571. *
  6572. * r A single precision integer.
  6573. * a A single precision integer.
  6574. * b A single precision integer.
  6575. */
  6576. SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
  6577. const sp_digit* b)
  6578. {
  6579. sp_digit* z0 = r;
  6580. sp_digit z1[96];
  6581. sp_digit a1[48];
  6582. sp_digit b1[48];
  6583. sp_digit z2[96];
  6584. sp_digit u, ca, cb;
  6585. ca = sp_3072_add_48(a1, a, &a[48]);
  6586. cb = sp_3072_add_48(b1, b, &b[48]);
  6587. u = ca & cb;
  6588. sp_3072_mul_48(z1, a1, b1);
  6589. sp_3072_mul_48(z2, &a[48], &b[48]);
  6590. sp_3072_mul_48(z0, a, b);
  6591. sp_3072_mask_48(r + 96, a1, 0 - cb);
  6592. sp_3072_mask_48(b1, b1, 0 - ca);
  6593. u += sp_3072_add_48(r + 96, r + 96, b1);
  6594. u += sp_3072_sub_in_place_96(z1, z2);
  6595. u += sp_3072_sub_in_place_96(z1, z0);
  6596. u += sp_3072_add_96(r + 48, r + 48, z1);
  6597. r[144] = u;
  6598. XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
  6599. (void)sp_3072_add_96(r + 96, r + 96, z2);
  6600. }
  6601. /* Square a and put result in r. (r = a * a)
  6602. *
  6603. * r A single precision integer.
  6604. * a A single precision integer.
  6605. */
  6606. SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
  6607. {
  6608. sp_digit* z0 = r;
  6609. sp_digit z2[96];
  6610. sp_digit z1[96];
  6611. sp_digit a1[48];
  6612. sp_digit u;
  6613. u = sp_3072_add_48(a1, a, &a[48]);
  6614. sp_3072_sqr_48(z1, a1);
  6615. sp_3072_sqr_48(z2, &a[48]);
  6616. sp_3072_sqr_48(z0, a);
  6617. sp_3072_mask_48(r + 96, a1, 0 - u);
  6618. u += sp_3072_add_48(r + 96, r + 96, r + 96);
  6619. u += sp_3072_sub_in_place_96(z1, z2);
  6620. u += sp_3072_sub_in_place_96(z1, z0);
  6621. u += sp_3072_add_96(r + 48, r + 48, z1);
  6622. r[144] = u;
  6623. XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
  6624. (void)sp_3072_add_96(r + 96, r + 96, z2);
  6625. }
  6626. #endif /* !WOLFSSL_SP_SMALL */
  6627. #ifdef WOLFSSL_SP_SMALL
  6628. /* Add b to a into r. (r = a + b)
  6629. *
  6630. * r A single precision integer.
  6631. * a A single precision integer.
  6632. * b A single precision integer.
  6633. */
  6634. SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
  6635. const sp_digit* b)
  6636. {
  6637. sp_digit c = 0;
  6638. __asm__ __volatile__ (
  6639. "mov r6, %[a]\n\t"
  6640. "mov r8, #0\n\t"
  6641. "add r6, r6, #384\n\t"
  6642. "sub r8, r8, #1\n\t"
  6643. "\n1:\n\t"
  6644. "adds %[c], %[c], r8\n\t"
  6645. "ldr r4, [%[a]]\n\t"
  6646. "ldr r5, [%[b]]\n\t"
  6647. "adcs r4, r4, r5\n\t"
  6648. "str r4, [%[r]]\n\t"
  6649. "mov %[c], #0\n\t"
  6650. "adc %[c], %[c], %[c]\n\t"
  6651. "add %[a], %[a], #4\n\t"
  6652. "add %[b], %[b], #4\n\t"
  6653. "add %[r], %[r], #4\n\t"
  6654. "cmp %[a], r6\n\t"
  6655. #ifdef __GNUC__
  6656. "bne 1b\n\t"
  6657. #else
  6658. "bne.n 1b\n\t"
  6659. #endif /* __GNUC__ */
  6660. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  6661. :
  6662. : "memory", "r4", "r5", "r6", "r8"
  6663. );
  6664. return c;
  6665. }
  6666. #endif /* WOLFSSL_SP_SMALL */
  6667. #ifdef WOLFSSL_SP_SMALL
  6668. /* Sub b from a into a. (a -= b)
  6669. *
  6670. * a A single precision integer.
  6671. * b A single precision integer.
  6672. */
  6673. SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
  6674. const sp_digit* b)
  6675. {
  6676. sp_digit c = 0;
  6677. __asm__ __volatile__ (
  6678. "mov r8, %[a]\n\t"
  6679. "add r8, r8, #384\n\t"
  6680. "\n1:\n\t"
  6681. "mov r5, #0\n\t"
  6682. "subs r5, r5, %[c]\n\t"
  6683. "ldr r3, [%[a]]\n\t"
  6684. "ldr r4, [%[a], #4]\n\t"
  6685. "ldr r5, [%[b]]\n\t"
  6686. "ldr r6, [%[b], #4]\n\t"
  6687. "sbcs r3, r3, r5\n\t"
  6688. "sbcs r4, r4, r6\n\t"
  6689. "str r3, [%[a]]\n\t"
  6690. "str r4, [%[a], #4]\n\t"
  6691. "sbc %[c], %[c], %[c]\n\t"
  6692. "add %[a], %[a], #8\n\t"
  6693. "add %[b], %[b], #8\n\t"
  6694. "cmp %[a], r8\n\t"
  6695. #ifdef __GNUC__
  6696. "bne 1b\n\t"
  6697. #else
  6698. "bne.n 1b\n\t"
  6699. #endif /* __GNUC__ */
  6700. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  6701. :
  6702. : "memory", "r3", "r4", "r5", "r6", "r8"
  6703. );
  6704. return c;
  6705. }
  6706. #endif /* WOLFSSL_SP_SMALL */
  6707. #ifdef WOLFSSL_SP_SMALL
  6708. /* Multiply a and b into r. (r = a * b)
  6709. *
  6710. * r A single precision integer.
  6711. * a A single precision integer.
  6712. * b A single precision integer.
  6713. */
  6714. SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
  6715. const sp_digit* b)
  6716. {
  6717. sp_digit tmp_arr[96 * 2];
  6718. sp_digit* tmp = tmp_arr;
  6719. __asm__ __volatile__ (
  6720. "mov r3, #0\n\t"
  6721. "mov r4, #0\n\t"
  6722. "mov r9, r3\n\t"
  6723. "mov r12, %[r]\n\t"
  6724. "mov r10, %[a]\n\t"
  6725. "mov r11, %[b]\n\t"
  6726. "mov r6, #1\n\t"
  6727. "lsl r6, r6, #8\n\t"
  6728. "add r6, r6, #128\n\t"
  6729. "add r6, r6, r10\n\t"
  6730. "mov r14, r6\n\t"
  6731. "\n1:\n\t"
  6732. "mov %[r], #0\n\t"
  6733. "mov r5, #0\n\t"
  6734. "mov r6, #1\n\t"
  6735. "lsl r6, r6, #8\n\t"
  6736. "add r6, r6, #124\n\t"
  6737. "mov %[a], r9\n\t"
  6738. "subs %[a], %[a], r6\n\t"
  6739. "sbc r6, r6, r6\n\t"
  6740. "mvn r6, r6\n\t"
  6741. "and %[a], %[a], r6\n\t"
  6742. "mov %[b], r9\n\t"
  6743. "sub %[b], %[b], %[a]\n\t"
  6744. "add %[a], %[a], r10\n\t"
  6745. "add %[b], %[b], r11\n\t"
  6746. "\n2:\n\t"
  6747. /* Multiply Start */
  6748. "ldr r6, [%[a]]\n\t"
  6749. "ldr r8, [%[b]]\n\t"
  6750. "umull r6, r8, r6, r8\n\t"
  6751. "adds r3, r3, r6\n\t"
  6752. "adcs r4, r4, r8\n\t"
  6753. "adc r5, r5, %[r]\n\t"
  6754. /* Multiply Done */
  6755. "add %[a], %[a], #4\n\t"
  6756. "sub %[b], %[b], #4\n\t"
  6757. "cmp %[a], r14\n\t"
  6758. #ifdef __GNUC__
  6759. "beq 3f\n\t"
  6760. #else
  6761. "beq.n 3f\n\t"
  6762. #endif /* __GNUC__ */
  6763. "mov r6, r9\n\t"
  6764. "add r6, r6, r10\n\t"
  6765. "cmp %[a], r6\n\t"
  6766. #ifdef __GNUC__
  6767. "ble 2b\n\t"
  6768. #else
  6769. "ble.n 2b\n\t"
  6770. #endif /* __GNUC__ */
  6771. "\n3:\n\t"
  6772. "mov %[r], r12\n\t"
  6773. "mov r8, r9\n\t"
  6774. "str r3, [%[r], r8]\n\t"
  6775. "mov r3, r4\n\t"
  6776. "mov r4, r5\n\t"
  6777. "add r8, r8, #4\n\t"
  6778. "mov r9, r8\n\t"
  6779. "mov r6, #2\n\t"
  6780. "lsl r6, r6, #8\n\t"
  6781. "add r6, r6, #248\n\t"
  6782. "cmp r8, r6\n\t"
  6783. #ifdef __GNUC__
  6784. "ble 1b\n\t"
  6785. #else
  6786. "ble.n 1b\n\t"
  6787. #endif /* __GNUC__ */
  6788. "str r3, [%[r], r8]\n\t"
  6789. "mov %[a], r10\n\t"
  6790. "mov %[b], r11\n\t"
  6791. :
  6792. : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
  6793. : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  6794. );
  6795. XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
  6796. }
  6797. /* Square a and put result in r. (r = a * a)
  6798. *
  6799. * r A single precision integer.
  6800. * a A single precision integer.
  6801. */
  6802. SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
  6803. {
  6804. __asm__ __volatile__ (
  6805. "mov r3, #0\n\t"
  6806. "mov r4, #0\n\t"
  6807. "mov r5, #0\n\t"
  6808. "mov r9, r3\n\t"
  6809. "mov r12, %[r]\n\t"
  6810. "mov r6, #3\n\t"
  6811. "lsl r6, r6, #8\n\t"
  6812. "neg r6, r6\n\t"
  6813. "add sp, sp, r6\n\t"
  6814. "mov r11, sp\n\t"
  6815. "mov r10, %[a]\n\t"
  6816. "\n1:\n\t"
  6817. "mov %[r], #0\n\t"
  6818. "mov r6, #1\n\t"
  6819. "lsl r6, r6, #8\n\t"
  6820. "add r6, r6, #124\n\t"
  6821. "mov %[a], r9\n\t"
  6822. "subs %[a], %[a], r6\n\t"
  6823. "sbc r6, r6, r6\n\t"
  6824. "mvn r6, r6\n\t"
  6825. "and %[a], %[a], r6\n\t"
  6826. "mov r2, r9\n\t"
  6827. "sub r2, r2, %[a]\n\t"
  6828. "add %[a], %[a], r10\n\t"
  6829. "add r2, r2, r10\n\t"
  6830. "\n2:\n\t"
  6831. "cmp r2, %[a]\n\t"
  6832. #ifdef __GNUC__
  6833. "beq 4f\n\t"
  6834. #else
  6835. "beq.n 4f\n\t"
  6836. #endif /* __GNUC__ */
  6837. /* Multiply * 2: Start */
  6838. "ldr r6, [%[a]]\n\t"
  6839. "ldr r8, [r2]\n\t"
  6840. "umull r6, r8, r6, r8\n\t"
  6841. "adds r3, r3, r6\n\t"
  6842. "adcs r4, r4, r8\n\t"
  6843. "adc r5, r5, %[r]\n\t"
  6844. "adds r3, r3, r6\n\t"
  6845. "adcs r4, r4, r8\n\t"
  6846. "adc r5, r5, %[r]\n\t"
  6847. /* Multiply * 2: Done */
  6848. #ifdef __GNUC__
  6849. "bal 5f\n\t"
  6850. #else
  6851. "bal.n 5f\n\t"
  6852. #endif /* __GNUC__ */
  6853. "\n4:\n\t"
  6854. /* Square: Start */
  6855. "ldr r6, [%[a]]\n\t"
  6856. "umull r6, r8, r6, r6\n\t"
  6857. "adds r3, r3, r6\n\t"
  6858. "adcs r4, r4, r8\n\t"
  6859. "adc r5, r5, %[r]\n\t"
  6860. /* Square: Done */
  6861. "\n5:\n\t"
  6862. "add %[a], %[a], #4\n\t"
  6863. "sub r2, r2, #4\n\t"
  6864. "mov r6, #1\n\t"
  6865. "lsl r6, r6, #8\n\t"
  6866. "add r6, r6, #128\n\t"
  6867. "add r6, r6, r10\n\t"
  6868. "cmp %[a], r6\n\t"
  6869. #ifdef __GNUC__
  6870. "beq 3f\n\t"
  6871. #else
  6872. "beq.n 3f\n\t"
  6873. #endif /* __GNUC__ */
  6874. "cmp %[a], r2\n\t"
  6875. #ifdef __GNUC__
  6876. "bgt 3f\n\t"
  6877. #else
  6878. "bgt.n 3f\n\t"
  6879. #endif /* __GNUC__ */
  6880. "mov r8, r9\n\t"
  6881. "add r8, r8, r10\n\t"
  6882. "cmp %[a], r8\n\t"
  6883. #ifdef __GNUC__
  6884. "ble 2b\n\t"
  6885. #else
  6886. "ble.n 2b\n\t"
  6887. #endif /* __GNUC__ */
  6888. "\n3:\n\t"
  6889. "mov %[r], r11\n\t"
  6890. "mov r8, r9\n\t"
  6891. "str r3, [%[r], r8]\n\t"
  6892. "mov r3, r4\n\t"
  6893. "mov r4, r5\n\t"
  6894. "mov r5, #0\n\t"
  6895. "add r8, r8, #4\n\t"
  6896. "mov r9, r8\n\t"
  6897. "mov r6, #2\n\t"
  6898. "lsl r6, r6, #8\n\t"
  6899. "add r6, r6, #248\n\t"
  6900. "cmp r8, r6\n\t"
  6901. #ifdef __GNUC__
  6902. "ble 1b\n\t"
  6903. #else
  6904. "ble.n 1b\n\t"
  6905. #endif /* __GNUC__ */
  6906. "mov %[a], r10\n\t"
  6907. "str r3, [%[r], r8]\n\t"
  6908. "mov %[r], r12\n\t"
  6909. "mov %[a], r11\n\t"
  6910. "mov r3, #2\n\t"
  6911. "lsl r3, r3, #8\n\t"
  6912. "add r3, r3, #252\n\t"
  6913. "\n4:\n\t"
  6914. "ldr r6, [%[a], r3]\n\t"
  6915. "str r6, [%[r], r3]\n\t"
  6916. "subs r3, r3, #4\n\t"
  6917. #ifdef __GNUC__
  6918. "bge 4b\n\t"
  6919. #else
  6920. "bge.n 4b\n\t"
  6921. #endif /* __GNUC__ */
  6922. "mov r6, #3\n\t"
  6923. "lsl r6, r6, #8\n\t"
  6924. "add sp, sp, r6\n\t"
  6925. :
  6926. : [r] "r" (r), [a] "r" (a)
  6927. : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
  6928. );
  6929. }
  6930. #endif /* WOLFSSL_SP_SMALL */
  6931. #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
  6932. #ifdef WOLFSSL_SP_SMALL
  6933. /* AND m into each word of a and store in r.
  6934. *
  6935. * r A single precision integer.
  6936. * a A single precision integer.
  6937. * m Mask to AND against each digit.
  6938. */
  6939. static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
  6940. {
  6941. int i;
  6942. for (i=0; i<48; i++) {
  6943. r[i] = a[i] & m;
  6944. }
  6945. }
  6946. #endif /* WOLFSSL_SP_SMALL */
  6947. #ifdef WOLFSSL_SP_SMALL
  6948. /* Add b to a into r. (r = a + b)
  6949. *
  6950. * r A single precision integer.
  6951. * a A single precision integer.
  6952. * b A single precision integer.
  6953. */
  6954. SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
  6955. const sp_digit* b)
  6956. {
  6957. sp_digit c = 0;
  6958. __asm__ __volatile__ (
  6959. "mov r6, %[a]\n\t"
  6960. "mov r8, #0\n\t"
  6961. "add r6, r6, #192\n\t"
  6962. "sub r8, r8, #1\n\t"
  6963. "\n1:\n\t"
  6964. "adds %[c], %[c], r8\n\t"
  6965. "ldr r4, [%[a]]\n\t"
  6966. "ldr r5, [%[b]]\n\t"
  6967. "adcs r4, r4, r5\n\t"
  6968. "str r4, [%[r]]\n\t"
  6969. "mov %[c], #0\n\t"
  6970. "adc %[c], %[c], %[c]\n\t"
  6971. "add %[a], %[a], #4\n\t"
  6972. "add %[b], %[b], #4\n\t"
  6973. "add %[r], %[r], #4\n\t"
  6974. "cmp %[a], r6\n\t"
  6975. #ifdef __GNUC__
  6976. "bne 1b\n\t"
  6977. #else
  6978. "bne.n 1b\n\t"
  6979. #endif /* __GNUC__ */
  6980. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  6981. :
  6982. : "memory", "r4", "r5", "r6", "r8"
  6983. );
  6984. return c;
  6985. }
  6986. #endif /* WOLFSSL_SP_SMALL */
  6987. #ifdef WOLFSSL_SP_SMALL
  6988. /* Sub b from a into a. (a -= b)
  6989. *
  6990. * a A single precision integer.
  6991. * b A single precision integer.
  6992. */
  6993. SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
  6994. const sp_digit* b)
  6995. {
  6996. sp_digit c = 0;
  6997. __asm__ __volatile__ (
  6998. "mov r8, %[a]\n\t"
  6999. "add r8, r8, #192\n\t"
  7000. "\n1:\n\t"
  7001. "mov r5, #0\n\t"
  7002. "subs r5, r5, %[c]\n\t"
  7003. "ldr r3, [%[a]]\n\t"
  7004. "ldr r4, [%[a], #4]\n\t"
  7005. "ldr r5, [%[b]]\n\t"
  7006. "ldr r6, [%[b], #4]\n\t"
  7007. "sbcs r3, r3, r5\n\t"
  7008. "sbcs r4, r4, r6\n\t"
  7009. "str r3, [%[a]]\n\t"
  7010. "str r4, [%[a], #4]\n\t"
  7011. "sbc %[c], %[c], %[c]\n\t"
  7012. "add %[a], %[a], #8\n\t"
  7013. "add %[b], %[b], #8\n\t"
  7014. "cmp %[a], r8\n\t"
  7015. #ifdef __GNUC__
  7016. "bne 1b\n\t"
  7017. #else
  7018. "bne.n 1b\n\t"
  7019. #endif /* __GNUC__ */
  7020. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  7021. :
  7022. : "memory", "r3", "r4", "r5", "r6", "r8"
  7023. );
  7024. return c;
  7025. }
  7026. #endif /* WOLFSSL_SP_SMALL */
  7027. #ifdef WOLFSSL_SP_SMALL
  7028. /* Multiply a and b into r. (r = a * b)
  7029. *
  7030. * r A single precision integer.
  7031. * a A single precision integer.
  7032. * b A single precision integer.
  7033. */
  7034. SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
  7035. const sp_digit* b)
  7036. {
  7037. sp_digit tmp_arr[48 * 2];
  7038. sp_digit* tmp = tmp_arr;
  7039. __asm__ __volatile__ (
  7040. "mov r3, #0\n\t"
  7041. "mov r4, #0\n\t"
  7042. "mov r9, r3\n\t"
  7043. "mov r12, %[r]\n\t"
  7044. "mov r10, %[a]\n\t"
  7045. "mov r11, %[b]\n\t"
  7046. "mov r6, #192\n\t"
  7047. "add r6, r6, r10\n\t"
  7048. "mov r14, r6\n\t"
  7049. "\n1:\n\t"
  7050. "mov %[r], #0\n\t"
  7051. "mov r5, #0\n\t"
  7052. "mov r6, #188\n\t"
  7053. "mov %[a], r9\n\t"
  7054. "subs %[a], %[a], r6\n\t"
  7055. "sbc r6, r6, r6\n\t"
  7056. "mvn r6, r6\n\t"
  7057. "and %[a], %[a], r6\n\t"
  7058. "mov %[b], r9\n\t"
  7059. "sub %[b], %[b], %[a]\n\t"
  7060. "add %[a], %[a], r10\n\t"
  7061. "add %[b], %[b], r11\n\t"
  7062. "\n2:\n\t"
  7063. /* Multiply Start */
  7064. "ldr r6, [%[a]]\n\t"
  7065. "ldr r8, [%[b]]\n\t"
  7066. "umull r6, r8, r6, r8\n\t"
  7067. "adds r3, r3, r6\n\t"
  7068. "adcs r4, r4, r8\n\t"
  7069. "adc r5, r5, %[r]\n\t"
  7070. /* Multiply Done */
  7071. "add %[a], %[a], #4\n\t"
  7072. "sub %[b], %[b], #4\n\t"
  7073. "cmp %[a], r14\n\t"
  7074. #ifdef __GNUC__
  7075. "beq 3f\n\t"
  7076. #else
  7077. "beq.n 3f\n\t"
  7078. #endif /* __GNUC__ */
  7079. "mov r6, r9\n\t"
  7080. "add r6, r6, r10\n\t"
  7081. "cmp %[a], r6\n\t"
  7082. #ifdef __GNUC__
  7083. "ble 2b\n\t"
  7084. #else
  7085. "ble.n 2b\n\t"
  7086. #endif /* __GNUC__ */
  7087. "\n3:\n\t"
  7088. "mov %[r], r12\n\t"
  7089. "mov r8, r9\n\t"
  7090. "str r3, [%[r], r8]\n\t"
  7091. "mov r3, r4\n\t"
  7092. "mov r4, r5\n\t"
  7093. "add r8, r8, #4\n\t"
  7094. "mov r9, r8\n\t"
  7095. "mov r6, #1\n\t"
  7096. "lsl r6, r6, #8\n\t"
  7097. "add r6, r6, #120\n\t"
  7098. "cmp r8, r6\n\t"
  7099. #ifdef __GNUC__
  7100. "ble 1b\n\t"
  7101. #else
  7102. "ble.n 1b\n\t"
  7103. #endif /* __GNUC__ */
  7104. "str r3, [%[r], r8]\n\t"
  7105. "mov %[a], r10\n\t"
  7106. "mov %[b], r11\n\t"
  7107. :
  7108. : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
  7109. : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  7110. );
  7111. XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
  7112. }
  7113. /* Square a and put result in r. (r = a * a)
  7114. *
  7115. * r A single precision integer.
  7116. * a A single precision integer.
  7117. */
  7118. SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
  7119. {
  7120. __asm__ __volatile__ (
  7121. "mov r3, #0\n\t"
  7122. "mov r4, #0\n\t"
  7123. "mov r5, #0\n\t"
  7124. "mov r9, r3\n\t"
  7125. "mov r12, %[r]\n\t"
  7126. "mov r6, #1\n\t"
  7127. "lsl r6, r6, #8\n\t"
  7128. "add r6, r6, #128\n\t"
  7129. "neg r6, r6\n\t"
  7130. "add sp, sp, r6\n\t"
  7131. "mov r11, sp\n\t"
  7132. "mov r10, %[a]\n\t"
  7133. "\n1:\n\t"
  7134. "mov %[r], #0\n\t"
  7135. "mov r6, #188\n\t"
  7136. "mov %[a], r9\n\t"
  7137. "subs %[a], %[a], r6\n\t"
  7138. "sbc r6, r6, r6\n\t"
  7139. "mvn r6, r6\n\t"
  7140. "and %[a], %[a], r6\n\t"
  7141. "mov r2, r9\n\t"
  7142. "sub r2, r2, %[a]\n\t"
  7143. "add %[a], %[a], r10\n\t"
  7144. "add r2, r2, r10\n\t"
  7145. "\n2:\n\t"
  7146. "cmp r2, %[a]\n\t"
  7147. #ifdef __GNUC__
  7148. "beq 4f\n\t"
  7149. #else
  7150. "beq.n 4f\n\t"
  7151. #endif /* __GNUC__ */
  7152. /* Multiply * 2: Start */
  7153. "ldr r6, [%[a]]\n\t"
  7154. "ldr r8, [r2]\n\t"
  7155. "umull r6, r8, r6, r8\n\t"
  7156. "adds r3, r3, r6\n\t"
  7157. "adcs r4, r4, r8\n\t"
  7158. "adc r5, r5, %[r]\n\t"
  7159. "adds r3, r3, r6\n\t"
  7160. "adcs r4, r4, r8\n\t"
  7161. "adc r5, r5, %[r]\n\t"
  7162. /* Multiply * 2: Done */
  7163. #ifdef __GNUC__
  7164. "bal 5f\n\t"
  7165. #else
  7166. "bal.n 5f\n\t"
  7167. #endif /* __GNUC__ */
  7168. "\n4:\n\t"
  7169. /* Square: Start */
  7170. "ldr r6, [%[a]]\n\t"
  7171. "umull r6, r8, r6, r6\n\t"
  7172. "adds r3, r3, r6\n\t"
  7173. "adcs r4, r4, r8\n\t"
  7174. "adc r5, r5, %[r]\n\t"
  7175. /* Square: Done */
  7176. "\n5:\n\t"
  7177. "add %[a], %[a], #4\n\t"
  7178. "sub r2, r2, #4\n\t"
  7179. "mov r6, #192\n\t"
  7180. "add r6, r6, r10\n\t"
  7181. "cmp %[a], r6\n\t"
  7182. #ifdef __GNUC__
  7183. "beq 3f\n\t"
  7184. #else
  7185. "beq.n 3f\n\t"
  7186. #endif /* __GNUC__ */
  7187. "cmp %[a], r2\n\t"
  7188. #ifdef __GNUC__
  7189. "bgt 3f\n\t"
  7190. #else
  7191. "bgt.n 3f\n\t"
  7192. #endif /* __GNUC__ */
  7193. "mov r8, r9\n\t"
  7194. "add r8, r8, r10\n\t"
  7195. "cmp %[a], r8\n\t"
  7196. #ifdef __GNUC__
  7197. "ble 2b\n\t"
  7198. #else
  7199. "ble.n 2b\n\t"
  7200. #endif /* __GNUC__ */
  7201. "\n3:\n\t"
  7202. "mov %[r], r11\n\t"
  7203. "mov r8, r9\n\t"
  7204. "str r3, [%[r], r8]\n\t"
  7205. "mov r3, r4\n\t"
  7206. "mov r4, r5\n\t"
  7207. "mov r5, #0\n\t"
  7208. "add r8, r8, #4\n\t"
  7209. "mov r9, r8\n\t"
  7210. "mov r6, #1\n\t"
  7211. "lsl r6, r6, #8\n\t"
  7212. "add r6, r6, #120\n\t"
  7213. "cmp r8, r6\n\t"
  7214. #ifdef __GNUC__
  7215. "ble 1b\n\t"
  7216. #else
  7217. "ble.n 1b\n\t"
  7218. #endif /* __GNUC__ */
  7219. "mov %[a], r10\n\t"
  7220. "str r3, [%[r], r8]\n\t"
  7221. "mov %[r], r12\n\t"
  7222. "mov %[a], r11\n\t"
  7223. "mov r3, #1\n\t"
  7224. "lsl r3, r3, #8\n\t"
  7225. "add r3, r3, #124\n\t"
  7226. "\n4:\n\t"
  7227. "ldr r6, [%[a], r3]\n\t"
  7228. "str r6, [%[r], r3]\n\t"
  7229. "subs r3, r3, #4\n\t"
  7230. #ifdef __GNUC__
  7231. "bge 4b\n\t"
  7232. #else
  7233. "bge.n 4b\n\t"
  7234. #endif /* __GNUC__ */
  7235. "mov r6, #1\n\t"
  7236. "lsl r6, r6, #8\n\t"
  7237. "add r6, r6, #128\n\t"
  7238. "add sp, sp, r6\n\t"
  7239. :
  7240. : [r] "r" (r), [a] "r" (a)
  7241. : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
  7242. );
  7243. }
  7244. #endif /* WOLFSSL_SP_SMALL */
  7245. #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
  7246. /* Caclulate the bottom digit of -1/a mod 2^n.
  7247. *
  7248. * a A single precision number.
  7249. * rho Bottom word of inverse.
  7250. */
  7251. static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
  7252. {
  7253. sp_digit x, b;
  7254. b = a[0];
  7255. x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
  7256. x *= 2 - b * x; /* here x*a==1 mod 2**8 */
  7257. x *= 2 - b * x; /* here x*a==1 mod 2**16 */
  7258. x *= 2 - b * x; /* here x*a==1 mod 2**32 */
  7259. /* rho = -1/m mod b */
  7260. *rho = -x;
  7261. }
  7262. /* Mul a by digit b into r. (r = a * b)
  7263. *
  7264. * r A single precision integer.
  7265. * a A single precision integer.
  7266. * b A single precision digit.
  7267. */
  7268. SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
  7269. sp_digit b)
  7270. {
  7271. __asm__ __volatile__ (
  7272. "add r9, %[a], #384\n\t"
  7273. /* A[0] * B */
  7274. "ldr r6, [%[a]], #4\n\t"
  7275. "umull r5, r3, r6, %[b]\n\t"
  7276. "mov r4, #0\n\t"
  7277. "str r5, [%[r]], #4\n\t"
  7278. /* A[0] * B - Done */
  7279. "\n1:\n\t"
  7280. "mov r5, #0\n\t"
  7281. /* A[] * B */
  7282. "ldr r6, [%[a]], #4\n\t"
  7283. "umull r6, r8, r6, %[b]\n\t"
  7284. "adds r3, r3, r6\n\t"
  7285. "adcs r4, r4, r8\n\t"
  7286. "adc r5, r5, #0\n\t"
  7287. /* A[] * B - Done */
  7288. "str r3, [%[r]], #4\n\t"
  7289. "mov r3, r4\n\t"
  7290. "mov r4, r5\n\t"
  7291. "cmp %[a], r9\n\t"
  7292. #ifdef __GNUC__
  7293. "blt 1b\n\t"
  7294. #else
  7295. "blt.n 1b\n\t"
  7296. #endif /* __GNUC__ */
  7297. "str r3, [%[r]]\n\t"
  7298. : [r] "+r" (r), [a] "+r" (a)
  7299. : [b] "r" (b)
  7300. : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
  7301. );
  7302. }
  7303. #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
  7304. /* r = 2^n mod m where n is the number of bits to reduce by.
  7305. * Given m must be 3072 bits, just need to subtract.
  7306. *
  7307. * r A single precision number.
  7308. * m A single precision number.
  7309. */
  7310. static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
  7311. {
  7312. XMEMSET(r, 0, sizeof(sp_digit) * 48);
  7313. /* r = 2^n mod m */
  7314. sp_3072_sub_in_place_48(r, m);
  7315. }
  7316. /* Conditionally subtract b from a using the mask m.
  7317. * m is -1 to subtract and 0 when not copying.
  7318. *
  7319. * r A single precision number representing condition subtract result.
  7320. * a A single precision number to subtract from.
  7321. * b A single precision number to subtract.
  7322. * m Mask value to apply.
  7323. */
  7324. SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a,
  7325. const sp_digit* b, sp_digit m)
  7326. {
  7327. sp_digit c = 0;
  7328. __asm__ __volatile__ (
  7329. "mov r5, #192\n\t"
  7330. "mov r9, r5\n\t"
  7331. "mov r8, #0\n\t"
  7332. "\n1:\n\t"
  7333. "ldr r6, [%[b], r8]\n\t"
  7334. "and r6, r6, %[m]\n\t"
  7335. "mov r5, #0\n\t"
  7336. "subs r5, r5, %[c]\n\t"
  7337. "ldr r5, [%[a], r8]\n\t"
  7338. "sbcs r5, r5, r6\n\t"
  7339. "sbcs %[c], %[c], %[c]\n\t"
  7340. "str r5, [%[r], r8]\n\t"
  7341. "add r8, r8, #4\n\t"
  7342. "cmp r8, r9\n\t"
  7343. #ifdef __GNUC__
  7344. "blt 1b\n\t"
  7345. #else
  7346. "blt.n 1b\n\t"
  7347. #endif /* __GNUC__ */
  7348. : [c] "+r" (c)
  7349. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  7350. : "memory", "r5", "r6", "r8", "r9"
  7351. );
  7352. return c;
  7353. }
  7354. /* Reduce the number back to 3072 bits using Montgomery reduction.
  7355. *
  7356. * a A single precision number to reduce in place.
  7357. * m The single precision number representing the modulus.
  7358. * mp The digit representing the negative inverse of m mod 2^n.
  7359. */
  7360. SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
  7361. sp_digit mp)
  7362. {
  7363. sp_digit ca = 0;
  7364. __asm__ __volatile__ (
  7365. "mov r9, %[mp]\n\t"
  7366. "mov r12, %[m]\n\t"
  7367. "mov r10, %[a]\n\t"
  7368. "mov r4, #0\n\t"
  7369. "add r11, r10, #192\n\t"
  7370. "\n1:\n\t"
  7371. /* mu = a[i] * mp */
  7372. "mov %[mp], r9\n\t"
  7373. "ldr %[a], [r10]\n\t"
  7374. "mul %[mp], %[mp], %[a]\n\t"
  7375. "mov %[m], r12\n\t"
  7376. "add r14, r10, #184\n\t"
  7377. "\n2:\n\t"
  7378. /* a[i+j] += m[j] * mu */
  7379. "ldr %[a], [r10]\n\t"
  7380. "mov r5, #0\n\t"
  7381. /* Multiply m[j] and mu - Start */
  7382. "ldr r8, [%[m]], #4\n\t"
  7383. "umull r6, r8, %[mp], r8\n\t"
  7384. "adds %[a], %[a], r6\n\t"
  7385. "adc r5, r5, r8\n\t"
  7386. /* Multiply m[j] and mu - Done */
  7387. "adds r4, r4, %[a]\n\t"
  7388. "adc r5, r5, #0\n\t"
  7389. "str r4, [r10], #4\n\t"
  7390. /* a[i+j+1] += m[j+1] * mu */
  7391. "ldr %[a], [r10]\n\t"
  7392. "mov r4, #0\n\t"
  7393. /* Multiply m[j] and mu - Start */
  7394. "ldr r8, [%[m]], #4\n\t"
  7395. "umull r6, r8, %[mp], r8\n\t"
  7396. "adds %[a], %[a], r6\n\t"
  7397. "adc r4, r4, r8\n\t"
  7398. /* Multiply m[j] and mu - Done */
  7399. "adds r5, r5, %[a]\n\t"
  7400. "adc r4, r4, #0\n\t"
  7401. "str r5, [r10], #4\n\t"
  7402. "cmp r10, r14\n\t"
  7403. #ifdef __GNUC__
  7404. "blt 2b\n\t"
  7405. #else
  7406. "blt.n 2b\n\t"
  7407. #endif /* __GNUC__ */
  7408. /* a[i+46] += m[46] * mu */
  7409. "ldr %[a], [r10]\n\t"
  7410. "mov r5, #0\n\t"
  7411. /* Multiply m[j] and mu - Start */
  7412. "ldr r8, [%[m]], #4\n\t"
  7413. "umull r6, r8, %[mp], r8\n\t"
  7414. "adds %[a], %[a], r6\n\t"
  7415. "adc r5, r5, r8\n\t"
  7416. /* Multiply m[j] and mu - Done */
  7417. "adds r4, r4, %[a]\n\t"
  7418. "adc r5, r5, #0\n\t"
  7419. "str r4, [r10], #4\n\t"
  7420. /* a[i+47] += m[47] * mu */
  7421. "mov r4, %[ca]\n\t"
  7422. "mov %[ca], #0\n\t"
  7423. /* Multiply m[47] and mu - Start */
  7424. "ldr r8, [%[m]]\n\t"
  7425. "umull r6, r8, %[mp], r8\n\t"
  7426. "adds r5, r5, r6\n\t"
  7427. "adcs r4, r4, r8\n\t"
  7428. "adc %[ca], %[ca], #0\n\t"
  7429. /* Multiply m[47] and mu - Done */
  7430. "ldr r6, [r10]\n\t"
  7431. "ldr r8, [r10, #4]\n\t"
  7432. "adds r6, r6, r5\n\t"
  7433. "adcs r8, r8, r4\n\t"
  7434. "adc %[ca], %[ca], #0\n\t"
  7435. "str r6, [r10]\n\t"
  7436. "str r8, [r10, #4]\n\t"
  7437. /* Next word in a */
  7438. "sub r10, r10, #184\n\t"
  7439. "cmp r10, r11\n\t"
  7440. #ifdef __GNUC__
  7441. "blt 1b\n\t"
  7442. #else
  7443. "blt.n 1b\n\t"
  7444. #endif /* __GNUC__ */
  7445. "mov %[a], r10\n\t"
  7446. "mov %[m], r12\n\t"
  7447. : [ca] "+r" (ca), [a] "+r" (a)
  7448. : [m] "r" (m), [mp] "r" (mp)
  7449. : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  7450. );
  7451. sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
  7452. }
  7453. /* Multiply two Montogmery form numbers mod the modulus (prime).
  7454. * (r = a * b mod m)
  7455. *
  7456. * r Result of multiplication.
  7457. * a First number to multiply in Montogmery form.
  7458. * b Second number to multiply in Montogmery form.
  7459. * m Modulus (prime).
  7460. * mp Montogmery mulitplier.
  7461. */
  7462. static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
  7463. const sp_digit* m, sp_digit mp)
  7464. {
  7465. sp_3072_mul_48(r, a, b);
  7466. sp_3072_mont_reduce_48(r, m, mp);
  7467. }
  7468. /* Square the Montgomery form number. (r = a * a mod m)
  7469. *
  7470. * r Result of squaring.
  7471. * a Number to square in Montogmery form.
  7472. * m Modulus (prime).
  7473. * mp Montogmery mulitplier.
  7474. */
  7475. static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
  7476. sp_digit mp)
  7477. {
  7478. sp_3072_sqr_48(r, a);
  7479. sp_3072_mont_reduce_48(r, m, mp);
  7480. }
  7481. /* Mul a by digit b into r. (r = a * b)
  7482. *
  7483. * r A single precision integer.
  7484. * a A single precision integer.
  7485. * b A single precision digit.
  7486. */
  7487. SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
  7488. sp_digit b)
  7489. {
  7490. __asm__ __volatile__ (
  7491. "add r9, %[a], #192\n\t"
  7492. /* A[0] * B */
  7493. "ldr r6, [%[a]], #4\n\t"
  7494. "umull r5, r3, r6, %[b]\n\t"
  7495. "mov r4, #0\n\t"
  7496. "str r5, [%[r]], #4\n\t"
  7497. /* A[0] * B - Done */
  7498. "\n1:\n\t"
  7499. "mov r5, #0\n\t"
  7500. /* A[] * B */
  7501. "ldr r6, [%[a]], #4\n\t"
  7502. "umull r6, r8, r6, %[b]\n\t"
  7503. "adds r3, r3, r6\n\t"
  7504. "adcs r4, r4, r8\n\t"
  7505. "adc r5, r5, #0\n\t"
  7506. /* A[] * B - Done */
  7507. "str r3, [%[r]], #4\n\t"
  7508. "mov r3, r4\n\t"
  7509. "mov r4, r5\n\t"
  7510. "cmp %[a], r9\n\t"
  7511. #ifdef __GNUC__
  7512. "blt 1b\n\t"
  7513. #else
  7514. "blt.n 1b\n\t"
  7515. #endif /* __GNUC__ */
  7516. "str r3, [%[r]]\n\t"
  7517. : [r] "+r" (r), [a] "+r" (a)
  7518. : [b] "r" (b)
  7519. : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
  7520. );
  7521. }
  7522. /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  7523. *
  7524. * d1 The high order half of the number to divide.
  7525. * d0 The low order half of the number to divide.
  7526. * div The dividend.
  7527. * returns the result of the division.
  7528. *
  7529. * Note that this is an approximate div. It may give an answer 1 larger.
  7530. */
  7531. SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
  7532. sp_digit div)
  7533. {
  7534. sp_digit r = 0;
  7535. __asm__ __volatile__ (
  7536. "lsr r6, %[div], #16\n\t"
  7537. "add r6, r6, #1\n\t"
  7538. "udiv r4, %[d1], r6\n\t"
  7539. "lsl r8, r4, #16\n\t"
  7540. "umull r4, r5, %[div], r8\n\t"
  7541. "subs %[d0], %[d0], r4\n\t"
  7542. "sbc %[d1], %[d1], r5\n\t"
  7543. "udiv r5, %[d1], r6\n\t"
  7544. "lsl r4, r5, #16\n\t"
  7545. "add r8, r8, r4\n\t"
  7546. "umull r4, r5, %[div], r4\n\t"
  7547. "subs %[d0], %[d0], r4\n\t"
  7548. "sbc %[d1], %[d1], r5\n\t"
  7549. "lsl r4, %[d1], #16\n\t"
  7550. "orr r4, r4, %[d0], lsr #16\n\t"
  7551. "udiv r4, r4, r6\n\t"
  7552. "add r8, r8, r4\n\t"
  7553. "umull r4, r5, %[div], r4\n\t"
  7554. "subs %[d0], %[d0], r4\n\t"
  7555. "sbc %[d1], %[d1], r5\n\t"
  7556. "lsl r4, %[d1], #16\n\t"
  7557. "orr r4, r4, %[d0], lsr #16\n\t"
  7558. "udiv r4, r4, r6\n\t"
  7559. "add r8, r8, r4\n\t"
  7560. "umull r4, r5, %[div], r4\n\t"
  7561. "subs %[d0], %[d0], r4\n\t"
  7562. "sbc %[d1], %[d1], r5\n\t"
  7563. "udiv r4, %[d0], %[div]\n\t"
  7564. "add r8, r8, r4\n\t"
  7565. "mov %[r], r8\n\t"
  7566. : [r] "+r" (r)
  7567. : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
  7568. : "r4", "r5", "r6", "r8"
  7569. );
  7570. return r;
  7571. }
  7572. /* Compare a with b in constant time.
  7573. *
  7574. * a A single precision integer.
  7575. * b A single precision integer.
  7576. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  7577. * respectively.
  7578. */
  7579. SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
  7580. {
  7581. sp_digit r = 0;
  7582. __asm__ __volatile__ (
  7583. "mov r3, #0\n\t"
  7584. "mvn r3, r3\n\t"
  7585. "mov r6, #188\n\t"
  7586. "\n1:\n\t"
  7587. "ldr r8, [%[a], r6]\n\t"
  7588. "ldr r5, [%[b], r6]\n\t"
  7589. "and r8, r8, r3\n\t"
  7590. "and r5, r5, r3\n\t"
  7591. "mov r4, r8\n\t"
  7592. "subs r8, r8, r5\n\t"
  7593. "sbc r8, r8, r8\n\t"
  7594. "add %[r], %[r], r8\n\t"
  7595. "mvn r8, r8\n\t"
  7596. "and r3, r3, r8\n\t"
  7597. "subs r5, r5, r4\n\t"
  7598. "sbc r8, r8, r8\n\t"
  7599. "sub %[r], %[r], r8\n\t"
  7600. "mvn r8, r8\n\t"
  7601. "and r3, r3, r8\n\t"
  7602. "sub r6, r6, #4\n\t"
  7603. "cmp r6, #0\n\t"
  7604. #ifdef __GNUC__
  7605. "bge 1b\n\t"
  7606. #else
  7607. "bge.n 1b\n\t"
  7608. #endif /* __GNUC__ */
  7609. : [r] "+r" (r)
  7610. : [a] "r" (a), [b] "r" (b)
  7611. : "r3", "r4", "r5", "r6", "r8"
  7612. );
  7613. return r;
  7614. }
  7615. /* Divide d in a and put remainder into r (m*d + r = a)
  7616. * m is not calculated as it is not needed at this time.
  7617. *
  7618. * a Number to be divided.
  7619. * d Number to divide with.
  7620. * m Multiplier result.
  7621. * r Remainder from the division.
  7622. * returns MP_OKAY indicating success.
  7623. */
  7624. static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
  7625. sp_digit* r)
  7626. {
  7627. sp_digit t1[96], t2[49];
  7628. sp_digit div, r1;
  7629. int i;
  7630. (void)m;
  7631. div = d[47];
  7632. XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
  7633. for (i=47; i>=0; i--) {
  7634. sp_digit hi = t1[48 + i] - (t1[48 + i] == div);
  7635. r1 = div_3072_word_48(hi, t1[48 + i - 1], div);
  7636. sp_3072_mul_d_48(t2, d, r1);
  7637. t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
  7638. t1[48 + i] -= t2[48];
  7639. sp_3072_mask_48(t2, d, t1[48 + i]);
  7640. t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
  7641. sp_3072_mask_48(t2, d, t1[48 + i]);
  7642. t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
  7643. }
  7644. r1 = sp_3072_cmp_48(t1, d) >= 0;
  7645. sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
  7646. return MP_OKAY;
  7647. }
  7648. /* Reduce a modulo m into r. (r = a mod m)
  7649. *
  7650. * r A single precision number that is the reduced result.
  7651. * a A single precision number that is to be reduced.
  7652. * m A single precision number that is the modulus to reduce with.
  7653. * returns MP_OKAY indicating success.
  7654. */
  7655. static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
  7656. {
  7657. return sp_3072_div_48(a, m, NULL, r);
  7658. }
  7659. #ifdef WOLFSSL_SP_SMALL
  7660. /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  7661. *
  7662. * r A single precision number that is the result of the operation.
  7663. * a A single precision number being exponentiated.
  7664. * e A single precision number that is the exponent.
  7665. * bits The number of bits in the exponent.
  7666. * m A single precision number that is the modulus.
  7667. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  7668. */
  7669. static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
  7670. int bits, const sp_digit* m, int reduceA)
  7671. {
  7672. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7673. sp_digit* td;
  7674. #else
  7675. sp_digit td[16 * 96];
  7676. #endif
  7677. sp_digit* t[16];
  7678. sp_digit* norm;
  7679. sp_digit mp = 1;
  7680. sp_digit n;
  7681. sp_digit mask;
  7682. int i;
  7683. int c, y;
  7684. int err = MP_OKAY;
  7685. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7686. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 96), NULL,
  7687. DYNAMIC_TYPE_TMP_BUFFER);
  7688. if (td == NULL) {
  7689. err = MEMORY_E;
  7690. }
  7691. #endif
  7692. if (err == MP_OKAY) {
  7693. norm = td;
  7694. for (i=0; i<16; i++) {
  7695. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7696. t[i] = td + i * 96;
  7697. #else
  7698. t[i] = &td[i * 96];
  7699. #endif
  7700. }
  7701. sp_3072_mont_setup(m, &mp);
  7702. sp_3072_mont_norm_48(norm, m);
  7703. XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
  7704. if (reduceA != 0) {
  7705. err = sp_3072_mod_48(t[1] + 48, a, m);
  7706. if (err == MP_OKAY) {
  7707. err = sp_3072_mod_48(t[1], t[1], m);
  7708. }
  7709. }
  7710. else {
  7711. XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
  7712. err = sp_3072_mod_48(t[1], t[1], m);
  7713. }
  7714. }
  7715. if (err == MP_OKAY) {
  7716. sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
  7717. sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
  7718. sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
  7719. sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
  7720. sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
  7721. sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
  7722. sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
  7723. sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
  7724. sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
  7725. sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
  7726. sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
  7727. sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
  7728. sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
  7729. sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
  7730. i = (bits - 1) / 32;
  7731. n = e[i--];
  7732. c = bits & 31;
  7733. if (c == 0) {
  7734. c = 32;
  7735. }
  7736. c -= bits % 4;
  7737. if (c == 32) {
  7738. c = 28;
  7739. }
  7740. if (c < 0) {
  7741. /* Number of bits in top word is less than number needed. */
  7742. c = -c;
  7743. y = (int)(n << c);
  7744. n = e[i--];
  7745. y |= (int)(n >> (64 - c));
  7746. n <<= c;
  7747. c = 64 - c;
  7748. }
  7749. else {
  7750. y = (int)(n >> c);
  7751. n <<= 32 - c;
  7752. }
  7753. XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
  7754. for (; i>=0 || c>=4; ) {
  7755. if (c == 0) {
  7756. n = e[i--];
  7757. y = (int)(n >> 28);
  7758. n <<= 4;
  7759. c = 28;
  7760. }
  7761. else if (c < 4) {
  7762. y = (int)(n >> 28);
  7763. n = e[i--];
  7764. c = 4 - c;
  7765. y |= (int)(n >> (32 - c));
  7766. n <<= c;
  7767. c = 32 - c;
  7768. }
  7769. else {
  7770. y = (int)((n >> 28) & 0xf);
  7771. n <<= 4;
  7772. c -= 4;
  7773. }
  7774. sp_3072_mont_sqr_48(r, r, m, mp);
  7775. sp_3072_mont_sqr_48(r, r, m, mp);
  7776. sp_3072_mont_sqr_48(r, r, m, mp);
  7777. sp_3072_mont_sqr_48(r, r, m, mp);
  7778. sp_3072_mont_mul_48(r, r, t[y], m, mp);
  7779. }
  7780. XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
  7781. sp_3072_mont_reduce_48(r, m, mp);
  7782. mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
  7783. sp_3072_cond_sub_48(r, r, m, mask);
  7784. }
  7785. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7786. if (td != NULL) {
  7787. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  7788. }
  7789. #endif
  7790. return err;
  7791. }
  7792. #else
  7793. /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  7794. *
  7795. * r A single precision number that is the result of the operation.
  7796. * a A single precision number being exponentiated.
  7797. * e A single precision number that is the exponent.
  7798. * bits The number of bits in the exponent.
  7799. * m A single precision number that is the modulus.
  7800. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  7801. */
  7802. static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
  7803. int bits, const sp_digit* m, int reduceA)
  7804. {
  7805. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7806. sp_digit* td;
  7807. #else
  7808. sp_digit td[32 * 96];
  7809. #endif
  7810. sp_digit* t[32];
  7811. sp_digit* norm;
  7812. sp_digit mp = 1;
  7813. sp_digit n;
  7814. sp_digit mask;
  7815. int i;
  7816. int c, y;
  7817. int err = MP_OKAY;
  7818. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7819. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 96), NULL,
  7820. DYNAMIC_TYPE_TMP_BUFFER);
  7821. if (td == NULL) {
  7822. err = MEMORY_E;
  7823. }
  7824. #endif
  7825. if (err == MP_OKAY) {
  7826. norm = td;
  7827. for (i=0; i<32; i++) {
  7828. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7829. t[i] = td + i * 96;
  7830. #else
  7831. t[i] = &td[i * 96];
  7832. #endif
  7833. }
  7834. sp_3072_mont_setup(m, &mp);
  7835. sp_3072_mont_norm_48(norm, m);
  7836. XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
  7837. if (reduceA != 0) {
  7838. err = sp_3072_mod_48(t[1] + 48, a, m);
  7839. if (err == MP_OKAY) {
  7840. err = sp_3072_mod_48(t[1], t[1], m);
  7841. }
  7842. }
  7843. else {
  7844. XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
  7845. err = sp_3072_mod_48(t[1], t[1], m);
  7846. }
  7847. }
  7848. if (err == MP_OKAY) {
  7849. sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
  7850. sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
  7851. sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
  7852. sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
  7853. sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
  7854. sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
  7855. sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
  7856. sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
  7857. sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
  7858. sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
  7859. sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
  7860. sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
  7861. sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
  7862. sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
  7863. sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
  7864. sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
  7865. sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
  7866. sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
  7867. sp_3072_mont_sqr_48(t[20], t[10], m, mp);
  7868. sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
  7869. sp_3072_mont_sqr_48(t[22], t[11], m, mp);
  7870. sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
  7871. sp_3072_mont_sqr_48(t[24], t[12], m, mp);
  7872. sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
  7873. sp_3072_mont_sqr_48(t[26], t[13], m, mp);
  7874. sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
  7875. sp_3072_mont_sqr_48(t[28], t[14], m, mp);
  7876. sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
  7877. sp_3072_mont_sqr_48(t[30], t[15], m, mp);
  7878. sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
  7879. i = (bits - 1) / 32;
  7880. n = e[i--];
  7881. c = bits & 31;
  7882. if (c == 0) {
  7883. c = 32;
  7884. }
  7885. c -= bits % 5;
  7886. if (c == 32) {
  7887. c = 27;
  7888. }
  7889. if (c < 0) {
  7890. /* Number of bits in top word is less than number needed. */
  7891. c = -c;
  7892. y = (int)(n << c);
  7893. n = e[i--];
  7894. y |= (int)(n >> (64 - c));
  7895. n <<= c;
  7896. c = 64 - c;
  7897. }
  7898. else {
  7899. y = (int)(n >> c);
  7900. n <<= 32 - c;
  7901. }
  7902. XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
  7903. for (; i>=0 || c>=5; ) {
  7904. if (c == 0) {
  7905. n = e[i--];
  7906. y = (int)(n >> 27);
  7907. n <<= 5;
  7908. c = 27;
  7909. }
  7910. else if (c < 5) {
  7911. y = (int)(n >> 27);
  7912. n = e[i--];
  7913. c = 5 - c;
  7914. y |= (int)(n >> (32 - c));
  7915. n <<= c;
  7916. c = 32 - c;
  7917. }
  7918. else {
  7919. y = (int)((n >> 27) & 0x1f);
  7920. n <<= 5;
  7921. c -= 5;
  7922. }
  7923. sp_3072_mont_sqr_48(r, r, m, mp);
  7924. sp_3072_mont_sqr_48(r, r, m, mp);
  7925. sp_3072_mont_sqr_48(r, r, m, mp);
  7926. sp_3072_mont_sqr_48(r, r, m, mp);
  7927. sp_3072_mont_sqr_48(r, r, m, mp);
  7928. sp_3072_mont_mul_48(r, r, t[y], m, mp);
  7929. }
  7930. XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
  7931. sp_3072_mont_reduce_48(r, m, mp);
  7932. mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
  7933. sp_3072_cond_sub_48(r, r, m, mask);
  7934. }
  7935. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7936. if (td != NULL) {
  7937. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  7938. }
  7939. #endif
  7940. return err;
  7941. }
  7942. #endif /* WOLFSSL_SP_SMALL */
  7943. #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
  7944. #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
  7945. /* r = 2^n mod m where n is the number of bits to reduce by.
  7946. * Given m must be 3072 bits, just need to subtract.
  7947. *
  7948. * r A single precision number.
  7949. * m A single precision number.
  7950. */
  7951. static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
  7952. {
  7953. XMEMSET(r, 0, sizeof(sp_digit) * 96);
  7954. /* r = 2^n mod m */
  7955. sp_3072_sub_in_place_96(r, m);
  7956. }
  7957. #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
  7958. /* Conditionally subtract b from a using the mask m.
  7959. * m is -1 to subtract and 0 when not copying.
  7960. *
  7961. * r A single precision number representing condition subtract result.
  7962. * a A single precision number to subtract from.
  7963. * b A single precision number to subtract.
  7964. * m Mask value to apply.
  7965. */
  7966. SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a,
  7967. const sp_digit* b, sp_digit m)
  7968. {
  7969. sp_digit c = 0;
  7970. __asm__ __volatile__ (
  7971. "mov r5, #1\n\t"
  7972. "lsl r5, r5, #8\n\t"
  7973. "add r5, r5, #128\n\t"
  7974. "mov r9, r5\n\t"
  7975. "mov r8, #0\n\t"
  7976. "\n1:\n\t"
  7977. "ldr r6, [%[b], r8]\n\t"
  7978. "and r6, r6, %[m]\n\t"
  7979. "mov r5, #0\n\t"
  7980. "subs r5, r5, %[c]\n\t"
  7981. "ldr r5, [%[a], r8]\n\t"
  7982. "sbcs r5, r5, r6\n\t"
  7983. "sbcs %[c], %[c], %[c]\n\t"
  7984. "str r5, [%[r], r8]\n\t"
  7985. "add r8, r8, #4\n\t"
  7986. "cmp r8, r9\n\t"
  7987. #ifdef __GNUC__
  7988. "blt 1b\n\t"
  7989. #else
  7990. "blt.n 1b\n\t"
  7991. #endif /* __GNUC__ */
  7992. : [c] "+r" (c)
  7993. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  7994. : "memory", "r5", "r6", "r8", "r9"
  7995. );
  7996. return c;
  7997. }
  7998. /* Reduce the number back to 3072 bits using Montgomery reduction.
  7999. *
  8000. * a A single precision number to reduce in place.
  8001. * m The single precision number representing the modulus.
  8002. * mp The digit representing the negative inverse of m mod 2^n.
  8003. */
  8004. SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m,
  8005. sp_digit mp)
  8006. {
  8007. sp_digit ca = 0;
  8008. __asm__ __volatile__ (
  8009. "mov r9, %[mp]\n\t"
  8010. "mov r12, %[m]\n\t"
  8011. "mov r10, %[a]\n\t"
  8012. "mov r4, #0\n\t"
  8013. "add r11, r10, #384\n\t"
  8014. "\n1:\n\t"
  8015. /* mu = a[i] * mp */
  8016. "mov %[mp], r9\n\t"
  8017. "ldr %[a], [r10]\n\t"
  8018. "mul %[mp], %[mp], %[a]\n\t"
  8019. "mov %[m], r12\n\t"
  8020. "add r14, r10, #376\n\t"
  8021. "\n2:\n\t"
  8022. /* a[i+j] += m[j] * mu */
  8023. "ldr %[a], [r10]\n\t"
  8024. "mov r5, #0\n\t"
  8025. /* Multiply m[j] and mu - Start */
  8026. "ldr r8, [%[m]], #4\n\t"
  8027. "umull r6, r8, %[mp], r8\n\t"
  8028. "adds %[a], %[a], r6\n\t"
  8029. "adc r5, r5, r8\n\t"
  8030. /* Multiply m[j] and mu - Done */
  8031. "adds r4, r4, %[a]\n\t"
  8032. "adc r5, r5, #0\n\t"
  8033. "str r4, [r10], #4\n\t"
  8034. /* a[i+j+1] += m[j+1] * mu */
  8035. "ldr %[a], [r10]\n\t"
  8036. "mov r4, #0\n\t"
  8037. /* Multiply m[j] and mu - Start */
  8038. "ldr r8, [%[m]], #4\n\t"
  8039. "umull r6, r8, %[mp], r8\n\t"
  8040. "adds %[a], %[a], r6\n\t"
  8041. "adc r4, r4, r8\n\t"
  8042. /* Multiply m[j] and mu - Done */
  8043. "adds r5, r5, %[a]\n\t"
  8044. "adc r4, r4, #0\n\t"
  8045. "str r5, [r10], #4\n\t"
  8046. "cmp r10, r14\n\t"
  8047. #ifdef __GNUC__
  8048. "blt 2b\n\t"
  8049. #else
  8050. "blt.n 2b\n\t"
  8051. #endif /* __GNUC__ */
  8052. /* a[i+94] += m[94] * mu */
  8053. "ldr %[a], [r10]\n\t"
  8054. "mov r5, #0\n\t"
  8055. /* Multiply m[j] and mu - Start */
  8056. "ldr r8, [%[m]], #4\n\t"
  8057. "umull r6, r8, %[mp], r8\n\t"
  8058. "adds %[a], %[a], r6\n\t"
  8059. "adc r5, r5, r8\n\t"
  8060. /* Multiply m[j] and mu - Done */
  8061. "adds r4, r4, %[a]\n\t"
  8062. "adc r5, r5, #0\n\t"
  8063. "str r4, [r10], #4\n\t"
  8064. /* a[i+95] += m[95] * mu */
  8065. "mov r4, %[ca]\n\t"
  8066. "mov %[ca], #0\n\t"
  8067. /* Multiply m[95] and mu - Start */
  8068. "ldr r8, [%[m]]\n\t"
  8069. "umull r6, r8, %[mp], r8\n\t"
  8070. "adds r5, r5, r6\n\t"
  8071. "adcs r4, r4, r8\n\t"
  8072. "adc %[ca], %[ca], #0\n\t"
  8073. /* Multiply m[95] and mu - Done */
  8074. "ldr r6, [r10]\n\t"
  8075. "ldr r8, [r10, #4]\n\t"
  8076. "adds r6, r6, r5\n\t"
  8077. "adcs r8, r8, r4\n\t"
  8078. "adc %[ca], %[ca], #0\n\t"
  8079. "str r6, [r10]\n\t"
  8080. "str r8, [r10, #4]\n\t"
  8081. /* Next word in a */
  8082. "sub r10, r10, #376\n\t"
  8083. "cmp r10, r11\n\t"
  8084. #ifdef __GNUC__
  8085. "blt 1b\n\t"
  8086. #else
  8087. "blt.n 1b\n\t"
  8088. #endif /* __GNUC__ */
  8089. "mov %[a], r10\n\t"
  8090. "mov %[m], r12\n\t"
  8091. : [ca] "+r" (ca), [a] "+r" (a)
  8092. : [m] "r" (m), [mp] "r" (mp)
  8093. : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  8094. );
  8095. sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca);
  8096. }
  8097. /* Multiply two Montogmery form numbers mod the modulus (prime).
  8098. * (r = a * b mod m)
  8099. *
  8100. * r Result of multiplication.
  8101. * a First number to multiply in Montogmery form.
  8102. * b Second number to multiply in Montogmery form.
  8103. * m Modulus (prime).
  8104. * mp Montogmery mulitplier.
  8105. */
  8106. static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
  8107. const sp_digit* m, sp_digit mp)
  8108. {
  8109. sp_3072_mul_96(r, a, b);
  8110. sp_3072_mont_reduce_96(r, m, mp);
  8111. }
  8112. /* Square the Montgomery form number. (r = a * a mod m)
  8113. *
  8114. * r Result of squaring.
  8115. * a Number to square in Montogmery form.
  8116. * m Modulus (prime).
  8117. * mp Montogmery mulitplier.
  8118. */
  8119. static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m,
  8120. sp_digit mp)
  8121. {
  8122. sp_3072_sqr_96(r, a);
  8123. sp_3072_mont_reduce_96(r, m, mp);
  8124. }
  8125. /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  8126. *
  8127. * d1 The high order half of the number to divide.
  8128. * d0 The low order half of the number to divide.
  8129. * div The dividend.
  8130. * returns the result of the division.
  8131. *
  8132. * Note that this is an approximate div. It may give an answer 1 larger.
  8133. */
  8134. SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0,
  8135. sp_digit div)
  8136. {
  8137. sp_digit r = 0;
  8138. __asm__ __volatile__ (
  8139. "lsr r6, %[div], #16\n\t"
  8140. "add r6, r6, #1\n\t"
  8141. "udiv r4, %[d1], r6\n\t"
  8142. "lsl r8, r4, #16\n\t"
  8143. "umull r4, r5, %[div], r8\n\t"
  8144. "subs %[d0], %[d0], r4\n\t"
  8145. "sbc %[d1], %[d1], r5\n\t"
  8146. "udiv r5, %[d1], r6\n\t"
  8147. "lsl r4, r5, #16\n\t"
  8148. "add r8, r8, r4\n\t"
  8149. "umull r4, r5, %[div], r4\n\t"
  8150. "subs %[d0], %[d0], r4\n\t"
  8151. "sbc %[d1], %[d1], r5\n\t"
  8152. "lsl r4, %[d1], #16\n\t"
  8153. "orr r4, r4, %[d0], lsr #16\n\t"
  8154. "udiv r4, r4, r6\n\t"
  8155. "add r8, r8, r4\n\t"
  8156. "umull r4, r5, %[div], r4\n\t"
  8157. "subs %[d0], %[d0], r4\n\t"
  8158. "sbc %[d1], %[d1], r5\n\t"
  8159. "lsl r4, %[d1], #16\n\t"
  8160. "orr r4, r4, %[d0], lsr #16\n\t"
  8161. "udiv r4, r4, r6\n\t"
  8162. "add r8, r8, r4\n\t"
  8163. "umull r4, r5, %[div], r4\n\t"
  8164. "subs %[d0], %[d0], r4\n\t"
  8165. "sbc %[d1], %[d1], r5\n\t"
  8166. "udiv r4, %[d0], %[div]\n\t"
  8167. "add r8, r8, r4\n\t"
  8168. "mov %[r], r8\n\t"
  8169. : [r] "+r" (r)
  8170. : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
  8171. : "r4", "r5", "r6", "r8"
  8172. );
  8173. return r;
  8174. }
  8175. /* AND m into each word of a and store in r.
  8176. *
  8177. * r A single precision integer.
  8178. * a A single precision integer.
  8179. * m Mask to AND against each digit.
  8180. */
  8181. static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
  8182. {
  8183. #ifdef WOLFSSL_SP_SMALL
  8184. int i;
  8185. for (i=0; i<96; i++) {
  8186. r[i] = a[i] & m;
  8187. }
  8188. #else
  8189. int i;
  8190. for (i = 0; i < 96; i += 8) {
  8191. r[i+0] = a[i+0] & m;
  8192. r[i+1] = a[i+1] & m;
  8193. r[i+2] = a[i+2] & m;
  8194. r[i+3] = a[i+3] & m;
  8195. r[i+4] = a[i+4] & m;
  8196. r[i+5] = a[i+5] & m;
  8197. r[i+6] = a[i+6] & m;
  8198. r[i+7] = a[i+7] & m;
  8199. }
  8200. #endif
  8201. }
  8202. /* Compare a with b in constant time.
  8203. *
  8204. * a A single precision integer.
  8205. * b A single precision integer.
  8206. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  8207. * respectively.
  8208. */
  8209. SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
  8210. {
  8211. sp_digit r = 0;
  8212. __asm__ __volatile__ (
  8213. "mov r3, #0\n\t"
  8214. "mvn r3, r3\n\t"
  8215. "mov r6, #1\n\t"
  8216. "lsl r6, r6, #8\n\t"
  8217. "add r6, r6, #124\n\t"
  8218. "\n1:\n\t"
  8219. "ldr r8, [%[a], r6]\n\t"
  8220. "ldr r5, [%[b], r6]\n\t"
  8221. "and r8, r8, r3\n\t"
  8222. "and r5, r5, r3\n\t"
  8223. "mov r4, r8\n\t"
  8224. "subs r8, r8, r5\n\t"
  8225. "sbc r8, r8, r8\n\t"
  8226. "add %[r], %[r], r8\n\t"
  8227. "mvn r8, r8\n\t"
  8228. "and r3, r3, r8\n\t"
  8229. "subs r5, r5, r4\n\t"
  8230. "sbc r8, r8, r8\n\t"
  8231. "sub %[r], %[r], r8\n\t"
  8232. "mvn r8, r8\n\t"
  8233. "and r3, r3, r8\n\t"
  8234. "sub r6, r6, #4\n\t"
  8235. "cmp r6, #0\n\t"
  8236. #ifdef __GNUC__
  8237. "bge 1b\n\t"
  8238. #else
  8239. "bge.n 1b\n\t"
  8240. #endif /* __GNUC__ */
  8241. : [r] "+r" (r)
  8242. : [a] "r" (a), [b] "r" (b)
  8243. : "r3", "r4", "r5", "r6", "r8"
  8244. );
  8245. return r;
  8246. }
  8247. /* Divide d in a and put remainder into r (m*d + r = a)
  8248. * m is not calculated as it is not needed at this time.
  8249. *
  8250. * a Number to be divided.
  8251. * d Number to divide with.
  8252. * m Multiplier result.
  8253. * r Remainder from the division.
  8254. * returns MP_OKAY indicating success.
  8255. */
  8256. static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m,
  8257. sp_digit* r)
  8258. {
  8259. sp_digit t1[192], t2[97];
  8260. sp_digit div, r1;
  8261. int i;
  8262. (void)m;
  8263. div = d[95];
  8264. XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
  8265. for (i=95; i>=0; i--) {
  8266. sp_digit hi = t1[96 + i] - (t1[96 + i] == div);
  8267. r1 = div_3072_word_96(hi, t1[96 + i - 1], div);
  8268. sp_3072_mul_d_96(t2, d, r1);
  8269. t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
  8270. t1[96 + i] -= t2[96];
  8271. sp_3072_mask_96(t2, d, t1[96 + i]);
  8272. t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
  8273. sp_3072_mask_96(t2, d, t1[96 + i]);
  8274. t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
  8275. }
  8276. r1 = sp_3072_cmp_96(t1, d) >= 0;
  8277. sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
  8278. return MP_OKAY;
  8279. }
  8280. /* Reduce a modulo m into r. (r = a mod m)
  8281. *
  8282. * r A single precision number that is the reduced result.
  8283. * a A single precision number that is to be reduced.
  8284. * m A single precision number that is the modulus to reduce with.
  8285. * returns MP_OKAY indicating success.
  8286. */
  8287. static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m)
  8288. {
  8289. return sp_3072_div_96(a, m, NULL, r);
  8290. }
  8291. /* Divide d in a and put remainder into r (m*d + r = a)
  8292. * m is not calculated as it is not needed at this time.
  8293. *
  8294. * a Number to be divided.
  8295. * d Number to divide with.
  8296. * m Multiplier result.
  8297. * r Remainder from the division.
  8298. * returns MP_OKAY indicating success.
  8299. */
  8300. static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
  8301. sp_digit* r)
  8302. {
  8303. sp_digit t1[192], t2[97];
  8304. sp_digit div, r1;
  8305. int i;
  8306. (void)m;
  8307. div = d[95];
  8308. XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
  8309. for (i=95; i>=0; i--) {
  8310. sp_digit hi = t1[96 + i] - (t1[96 + i] == div);
  8311. r1 = div_3072_word_96(hi, t1[96 + i - 1], div);
  8312. sp_3072_mul_d_96(t2, d, r1);
  8313. t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
  8314. t1[96 + i] -= t2[96];
  8315. if (t1[96 + i] != 0) {
  8316. t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
  8317. if (t1[96 + i] != 0)
  8318. t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
  8319. }
  8320. }
  8321. r1 = sp_3072_cmp_96(t1, d) >= 0;
  8322. sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
  8323. return MP_OKAY;
  8324. }
  8325. /* Reduce a modulo m into r. (r = a mod m)
  8326. *
  8327. * r A single precision number that is the reduced result.
  8328. * a A single precision number that is to be reduced.
  8329. * m A single precision number that is the modulus to reduce with.
  8330. * returns MP_OKAY indicating success.
  8331. */
  8332. static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
  8333. {
  8334. return sp_3072_div_96_cond(a, m, NULL, r);
  8335. }
  8336. #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
  8337. defined(WOLFSSL_HAVE_SP_DH)
  8338. #ifdef WOLFSSL_SP_SMALL
  8339. /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  8340. *
  8341. * r A single precision number that is the result of the operation.
  8342. * a A single precision number being exponentiated.
  8343. * e A single precision number that is the exponent.
  8344. * bits The number of bits in the exponent.
  8345. * m A single precision number that is the modulus.
  8346. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  8347. */
  8348. static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
  8349. int bits, const sp_digit* m, int reduceA)
  8350. {
  8351. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8352. sp_digit* td;
  8353. #else
  8354. sp_digit td[16 * 192];
  8355. #endif
  8356. sp_digit* t[16];
  8357. sp_digit* norm;
  8358. sp_digit mp = 1;
  8359. sp_digit n;
  8360. sp_digit mask;
  8361. int i;
  8362. int c, y;
  8363. int err = MP_OKAY;
  8364. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8365. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 192), NULL,
  8366. DYNAMIC_TYPE_TMP_BUFFER);
  8367. if (td == NULL) {
  8368. err = MEMORY_E;
  8369. }
  8370. #endif
  8371. if (err == MP_OKAY) {
  8372. norm = td;
  8373. for (i=0; i<16; i++) {
  8374. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8375. t[i] = td + i * 192;
  8376. #else
  8377. t[i] = &td[i * 192];
  8378. #endif
  8379. }
  8380. sp_3072_mont_setup(m, &mp);
  8381. sp_3072_mont_norm_96(norm, m);
  8382. XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
  8383. if (reduceA != 0) {
  8384. err = sp_3072_mod_96(t[1] + 96, a, m);
  8385. if (err == MP_OKAY) {
  8386. err = sp_3072_mod_96(t[1], t[1], m);
  8387. }
  8388. }
  8389. else {
  8390. XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
  8391. err = sp_3072_mod_96(t[1], t[1], m);
  8392. }
  8393. }
  8394. if (err == MP_OKAY) {
  8395. sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
  8396. sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
  8397. sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
  8398. sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
  8399. sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
  8400. sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
  8401. sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
  8402. sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
  8403. sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
  8404. sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
  8405. sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
  8406. sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
  8407. sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
  8408. sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
  8409. i = (bits - 1) / 32;
  8410. n = e[i--];
  8411. c = bits & 31;
  8412. if (c == 0) {
  8413. c = 32;
  8414. }
  8415. c -= bits % 4;
  8416. if (c == 32) {
  8417. c = 28;
  8418. }
  8419. if (c < 0) {
  8420. /* Number of bits in top word is less than number needed. */
  8421. c = -c;
  8422. y = (int)(n << c);
  8423. n = e[i--];
  8424. y |= (int)(n >> (64 - c));
  8425. n <<= c;
  8426. c = 64 - c;
  8427. }
  8428. else {
  8429. y = (int)(n >> c);
  8430. n <<= 32 - c;
  8431. }
  8432. XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
  8433. for (; i>=0 || c>=4; ) {
  8434. if (c == 0) {
  8435. n = e[i--];
  8436. y = (int)(n >> 28);
  8437. n <<= 4;
  8438. c = 28;
  8439. }
  8440. else if (c < 4) {
  8441. y = (int)(n >> 28);
  8442. n = e[i--];
  8443. c = 4 - c;
  8444. y |= (int)(n >> (32 - c));
  8445. n <<= c;
  8446. c = 32 - c;
  8447. }
  8448. else {
  8449. y = (int)((n >> 28) & 0xf);
  8450. n <<= 4;
  8451. c -= 4;
  8452. }
  8453. sp_3072_mont_sqr_96(r, r, m, mp);
  8454. sp_3072_mont_sqr_96(r, r, m, mp);
  8455. sp_3072_mont_sqr_96(r, r, m, mp);
  8456. sp_3072_mont_sqr_96(r, r, m, mp);
  8457. sp_3072_mont_mul_96(r, r, t[y], m, mp);
  8458. }
  8459. XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
  8460. sp_3072_mont_reduce_96(r, m, mp);
  8461. mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
  8462. sp_3072_cond_sub_96(r, r, m, mask);
  8463. }
  8464. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8465. if (td != NULL) {
  8466. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  8467. }
  8468. #endif
  8469. return err;
  8470. }
  8471. #else
  8472. /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  8473. *
  8474. * r A single precision number that is the result of the operation.
  8475. * a A single precision number being exponentiated.
  8476. * e A single precision number that is the exponent.
  8477. * bits The number of bits in the exponent.
  8478. * m A single precision number that is the modulus.
  8479. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  8480. */
  8481. static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
  8482. int bits, const sp_digit* m, int reduceA)
  8483. {
  8484. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8485. sp_digit* td;
  8486. #else
  8487. sp_digit td[32 * 192];
  8488. #endif
  8489. sp_digit* t[32];
  8490. sp_digit* norm;
  8491. sp_digit mp = 1;
  8492. sp_digit n;
  8493. sp_digit mask;
  8494. int i;
  8495. int c, y;
  8496. int err = MP_OKAY;
  8497. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8498. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 192), NULL,
  8499. DYNAMIC_TYPE_TMP_BUFFER);
  8500. if (td == NULL) {
  8501. err = MEMORY_E;
  8502. }
  8503. #endif
  8504. if (err == MP_OKAY) {
  8505. norm = td;
  8506. for (i=0; i<32; i++) {
  8507. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8508. t[i] = td + i * 192;
  8509. #else
  8510. t[i] = &td[i * 192];
  8511. #endif
  8512. }
  8513. sp_3072_mont_setup(m, &mp);
  8514. sp_3072_mont_norm_96(norm, m);
  8515. XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
  8516. if (reduceA != 0) {
  8517. err = sp_3072_mod_96(t[1] + 96, a, m);
  8518. if (err == MP_OKAY) {
  8519. err = sp_3072_mod_96(t[1], t[1], m);
  8520. }
  8521. }
  8522. else {
  8523. XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
  8524. err = sp_3072_mod_96(t[1], t[1], m);
  8525. }
  8526. }
  8527. if (err == MP_OKAY) {
  8528. sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
  8529. sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
  8530. sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
  8531. sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
  8532. sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
  8533. sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
  8534. sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
  8535. sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
  8536. sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
  8537. sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
  8538. sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
  8539. sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
  8540. sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
  8541. sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
  8542. sp_3072_mont_sqr_96(t[16], t[ 8], m, mp);
  8543. sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp);
  8544. sp_3072_mont_sqr_96(t[18], t[ 9], m, mp);
  8545. sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp);
  8546. sp_3072_mont_sqr_96(t[20], t[10], m, mp);
  8547. sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp);
  8548. sp_3072_mont_sqr_96(t[22], t[11], m, mp);
  8549. sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp);
  8550. sp_3072_mont_sqr_96(t[24], t[12], m, mp);
  8551. sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp);
  8552. sp_3072_mont_sqr_96(t[26], t[13], m, mp);
  8553. sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp);
  8554. sp_3072_mont_sqr_96(t[28], t[14], m, mp);
  8555. sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp);
  8556. sp_3072_mont_sqr_96(t[30], t[15], m, mp);
  8557. sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp);
  8558. i = (bits - 1) / 32;
  8559. n = e[i--];
  8560. c = bits & 31;
  8561. if (c == 0) {
  8562. c = 32;
  8563. }
  8564. c -= bits % 5;
  8565. if (c == 32) {
  8566. c = 27;
  8567. }
  8568. if (c < 0) {
  8569. /* Number of bits in top word is less than number needed. */
  8570. c = -c;
  8571. y = (int)(n << c);
  8572. n = e[i--];
  8573. y |= (int)(n >> (64 - c));
  8574. n <<= c;
  8575. c = 64 - c;
  8576. }
  8577. else {
  8578. y = (int)(n >> c);
  8579. n <<= 32 - c;
  8580. }
  8581. XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
  8582. for (; i>=0 || c>=5; ) {
  8583. if (c == 0) {
  8584. n = e[i--];
  8585. y = (int)(n >> 27);
  8586. n <<= 5;
  8587. c = 27;
  8588. }
  8589. else if (c < 5) {
  8590. y = (int)(n >> 27);
  8591. n = e[i--];
  8592. c = 5 - c;
  8593. y |= (int)(n >> (32 - c));
  8594. n <<= c;
  8595. c = 32 - c;
  8596. }
  8597. else {
  8598. y = (int)((n >> 27) & 0x1f);
  8599. n <<= 5;
  8600. c -= 5;
  8601. }
  8602. sp_3072_mont_sqr_96(r, r, m, mp);
  8603. sp_3072_mont_sqr_96(r, r, m, mp);
  8604. sp_3072_mont_sqr_96(r, r, m, mp);
  8605. sp_3072_mont_sqr_96(r, r, m, mp);
  8606. sp_3072_mont_sqr_96(r, r, m, mp);
  8607. sp_3072_mont_mul_96(r, r, t[y], m, mp);
  8608. }
  8609. XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
  8610. sp_3072_mont_reduce_96(r, m, mp);
  8611. mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
  8612. sp_3072_cond_sub_96(r, r, m, mask);
  8613. }
  8614. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8615. if (td != NULL) {
  8616. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  8617. }
  8618. #endif
  8619. return err;
  8620. }
  8621. #endif /* WOLFSSL_SP_SMALL */
  8622. #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
  8623. #ifdef WOLFSSL_HAVE_SP_RSA
  8624. /* RSA public key operation.
  8625. *
  8626. * in Array of bytes representing the number to exponentiate, base.
  8627. * inLen Number of bytes in base.
  8628. * em Public exponent.
  8629. * mm Modulus.
  8630. * out Buffer to hold big-endian bytes of exponentiation result.
  8631. * Must be at least 384 bytes long.
  8632. * outLen Number of bytes in result.
  8633. * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
  8634. * an array is too long and MEMORY_E when dynamic memory allocation fails.
  8635. */
  8636. int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
  8637. byte* out, word32* outLen)
  8638. {
  8639. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  8640. sp_digit a[192], m[96], r[192];
  8641. #else
  8642. sp_digit* d = NULL;
  8643. sp_digit* a = NULL;
  8644. sp_digit* m = NULL;
  8645. sp_digit* r = NULL;
  8646. #endif
  8647. sp_digit *ah = NULL;
  8648. sp_digit e[1];
  8649. int err = MP_OKAY;
  8650. if (*outLen < 384) {
  8651. err = MP_TO_E;
  8652. }
  8653. else if (mp_count_bits(em) > 32 || inLen > 384 ||
  8654. mp_count_bits(mm) != 3072) {
  8655. err = MP_READ_E;
  8656. }
  8657. else if (mp_iseven(mm)) {
  8658. err = MP_VAL;
  8659. }
  8660. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  8661. if (err == MP_OKAY) {
  8662. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
  8663. DYNAMIC_TYPE_RSA);
  8664. if (d == NULL)
  8665. err = MEMORY_E;
  8666. }
  8667. if (err == MP_OKAY) {
  8668. a = d;
  8669. r = a + 96 * 2;
  8670. m = r + 96 * 2;
  8671. }
  8672. #endif
  8673. if (err == MP_OKAY) {
  8674. ah = a + 96;
  8675. sp_3072_from_bin(ah, 96, in, inLen);
  8676. #if DIGIT_BIT >= 32
  8677. e[0] = em->dp[0];
  8678. #else
  8679. e[0] = em->dp[0];
  8680. if (em->used > 1) {
  8681. e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
  8682. }
  8683. #endif
  8684. if (e[0] == 0) {
  8685. err = MP_EXPTMOD_E;
  8686. }
  8687. }
  8688. if (err == MP_OKAY) {
  8689. sp_3072_from_mp(m, 96, mm);
  8690. if (e[0] == 0x3) {
  8691. if (err == MP_OKAY) {
  8692. sp_3072_sqr_96(r, ah);
  8693. err = sp_3072_mod_96_cond(r, r, m);
  8694. }
  8695. if (err == MP_OKAY) {
  8696. sp_3072_mul_96(r, ah, r);
  8697. err = sp_3072_mod_96_cond(r, r, m);
  8698. }
  8699. }
  8700. else {
  8701. int i;
  8702. sp_digit mp;
  8703. sp_3072_mont_setup(m, &mp);
  8704. /* Convert to Montgomery form. */
  8705. XMEMSET(a, 0, sizeof(sp_digit) * 96);
  8706. err = sp_3072_mod_96_cond(a, a, m);
  8707. if (err == MP_OKAY) {
  8708. for (i = 31; i >= 0; i--) {
  8709. if (e[0] >> i) {
  8710. break;
  8711. }
  8712. }
  8713. XMEMCPY(r, a, sizeof(sp_digit) * 96);
  8714. for (i--; i>=0; i--) {
  8715. sp_3072_mont_sqr_96(r, r, m, mp);
  8716. if (((e[0] >> i) & 1) == 1) {
  8717. sp_3072_mont_mul_96(r, r, a, m, mp);
  8718. }
  8719. }
  8720. XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
  8721. sp_3072_mont_reduce_96(r, m, mp);
  8722. for (i = 95; i > 0; i--) {
  8723. if (r[i] != m[i]) {
  8724. break;
  8725. }
  8726. }
  8727. if (r[i] >= m[i]) {
  8728. sp_3072_sub_in_place_96(r, m);
  8729. }
  8730. }
  8731. }
  8732. }
  8733. if (err == MP_OKAY) {
  8734. sp_3072_to_bin(r, out);
  8735. *outLen = 384;
  8736. }
  8737. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  8738. if (d != NULL) {
  8739. XFREE(d, NULL, DYNAMIC_TYPE_RSA);
  8740. }
  8741. #endif
  8742. return err;
  8743. }
  8744. #ifndef WOLFSSL_RSA_PUBLIC_ONLY
  8745. /* Conditionally add a and b using the mask m.
  8746. * m is -1 to add and 0 when not.
  8747. *
  8748. * r A single precision number representing conditional add result.
  8749. * a A single precision number to add with.
  8750. * b A single precision number to add.
  8751. * m Mask value to apply.
  8752. */
  8753. SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
  8754. sp_digit m)
  8755. {
  8756. sp_digit c = 0;
  8757. __asm__ __volatile__ (
  8758. "mov r5, #192\n\t"
  8759. "mov r9, r5\n\t"
  8760. "mov r8, #0\n\t"
  8761. "\n1:\n\t"
  8762. "ldr r6, [%[b], r8]\n\t"
  8763. "and r6, r6, %[m]\n\t"
  8764. "adds r5, %[c], #-1\n\t"
  8765. "ldr r5, [%[a], r8]\n\t"
  8766. "adcs r5, r5, r6\n\t"
  8767. "mov %[c], #0\n\t"
  8768. "adcs %[c], %[c], %[c]\n\t"
  8769. "str r5, [%[r], r8]\n\t"
  8770. "add r8, r8, #4\n\t"
  8771. "cmp r8, r9\n\t"
  8772. #ifdef __GNUC__
  8773. "blt 1b\n\t"
  8774. #else
  8775. "blt.n 1b\n\t"
  8776. #endif /* __GNUC__ */
  8777. : [c] "+r" (c)
  8778. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  8779. : "memory", "r5", "r6", "r8", "r9"
  8780. );
  8781. return c;
  8782. }
  8783. /* RSA private key operation.
  8784. *
  8785. * in Array of bytes representing the number to exponentiate, base.
  8786. * inLen Number of bytes in base.
  8787. * dm Private exponent.
  8788. * pm First prime.
  8789. * qm Second prime.
  8790. * dpm First prime's CRT exponent.
  8791. * dqm Second prime's CRT exponent.
  8792. * qim Inverse of second prime mod p.
  8793. * mm Modulus.
  8794. * out Buffer to hold big-endian bytes of exponentiation result.
  8795. * Must be at least 384 bytes long.
  8796. * outLen Number of bytes in result.
  8797. * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
  8798. * an array is too long and MEMORY_E when dynamic memory allocation fails.
  8799. */
  8800. int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
  8801. mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
  8802. byte* out, word32* outLen)
  8803. {
  8804. #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
  8805. sp_digit* a = NULL;
  8806. sp_digit* d = NULL;
  8807. sp_digit* m = NULL;
  8808. sp_digit* r = NULL;
  8809. int err = MP_OKAY;
  8810. (void)pm;
  8811. (void)qm;
  8812. (void)dpm;
  8813. (void)dqm;
  8814. (void)qim;
  8815. if (*outLen < 384U) {
  8816. err = MP_TO_E;
  8817. }
  8818. if (err == MP_OKAY) {
  8819. if (mp_count_bits(dm) > 3072) {
  8820. err = MP_READ_E;
  8821. }
  8822. else if (inLen > 384) {
  8823. err = MP_READ_E;
  8824. }
  8825. else if (mp_count_bits(mm) != 3072) {
  8826. err = MP_READ_E;
  8827. }
  8828. else if (mp_iseven(mm)) {
  8829. err = MP_VAL;
  8830. }
  8831. }
  8832. if (err == MP_OKAY) {
  8833. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL,
  8834. DYNAMIC_TYPE_RSA);
  8835. if (d == NULL) {
  8836. err = MEMORY_E;
  8837. }
  8838. }
  8839. if (err == MP_OKAY) {
  8840. a = d + 96;
  8841. m = a + 192;
  8842. r = a;
  8843. sp_3072_from_bin(a, 96, in, inLen);
  8844. sp_3072_from_mp(d, 96, dm);
  8845. sp_3072_from_mp(m, 96, mm);
  8846. err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0);
  8847. }
  8848. if (err == MP_OKAY) {
  8849. sp_3072_to_bin(r, out);
  8850. *outLen = 384;
  8851. }
  8852. if (d != NULL) {
  8853. XMEMSET(d, 0, sizeof(sp_digit) * 96);
  8854. XFREE(d, NULL, DYNAMIC_TYPE_RSA);
  8855. }
  8856. return err;
  8857. #else
  8858. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  8859. sp_digit a[96 * 2];
  8860. sp_digit p[48], q[48], dp[48];
  8861. sp_digit tmpa[96], tmpb[96];
  8862. #else
  8863. sp_digit* t = NULL;
  8864. sp_digit* a = NULL;
  8865. sp_digit* p = NULL;
  8866. sp_digit* q = NULL;
  8867. sp_digit* dp = NULL;
  8868. sp_digit* tmpa = NULL;
  8869. sp_digit* tmpb = NULL;
  8870. #endif
  8871. sp_digit* r = NULL;
  8872. sp_digit* qi = NULL;
  8873. sp_digit* dq = NULL;
  8874. sp_digit c;
  8875. int err = MP_OKAY;
  8876. (void)dm;
  8877. (void)mm;
  8878. if (*outLen < 384) {
  8879. err = MP_TO_E;
  8880. }
  8881. else if (inLen > 384 || mp_count_bits(mm) != 3072) {
  8882. err = MP_READ_E;
  8883. }
  8884. else if (mp_iseven(mm)) {
  8885. err = MP_VAL;
  8886. }
  8887. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  8888. if (err == MP_OKAY) {
  8889. t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
  8890. DYNAMIC_TYPE_RSA);
  8891. if (t == NULL)
  8892. err = MEMORY_E;
  8893. }
  8894. if (err == MP_OKAY) {
  8895. a = t;
  8896. p = a + 96 * 2;
  8897. q = p + 48;
  8898. qi = dq = dp = q + 48;
  8899. tmpa = qi + 48;
  8900. tmpb = tmpa + 96;
  8901. r = t + 96;
  8902. }
  8903. #else
  8904. #endif
  8905. if (err == MP_OKAY) {
  8906. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  8907. r = a;
  8908. qi = dq = dp;
  8909. #endif
  8910. sp_3072_from_bin(a, 96, in, inLen);
  8911. sp_3072_from_mp(p, 48, pm);
  8912. sp_3072_from_mp(q, 48, qm);
  8913. sp_3072_from_mp(dp, 48, dpm);
  8914. err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1);
  8915. }
  8916. if (err == MP_OKAY) {
  8917. sp_3072_from_mp(dq, 48, dqm);
  8918. err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1);
  8919. }
  8920. if (err == MP_OKAY) {
  8921. c = sp_3072_sub_in_place_48(tmpa, tmpb);
  8922. c += sp_3072_cond_add_48(tmpa, tmpa, p, c);
  8923. sp_3072_cond_add_48(tmpa, tmpa, p, c);
  8924. sp_3072_from_mp(qi, 48, qim);
  8925. sp_3072_mul_48(tmpa, tmpa, qi);
  8926. err = sp_3072_mod_48(tmpa, tmpa, p);
  8927. }
  8928. if (err == MP_OKAY) {
  8929. sp_3072_mul_48(tmpa, q, tmpa);
  8930. XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48);
  8931. sp_3072_add_96(r, tmpb, tmpa);
  8932. sp_3072_to_bin(r, out);
  8933. *outLen = 384;
  8934. }
  8935. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  8936. if (t != NULL) {
  8937. XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11);
  8938. XFREE(t, NULL, DYNAMIC_TYPE_RSA);
  8939. }
  8940. #else
  8941. XMEMSET(tmpa, 0, sizeof(tmpa));
  8942. XMEMSET(tmpb, 0, sizeof(tmpb));
  8943. XMEMSET(p, 0, sizeof(p));
  8944. XMEMSET(q, 0, sizeof(q));
  8945. XMEMSET(dp, 0, sizeof(dp));
  8946. #endif
  8947. #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
  8948. return err;
  8949. }
  8950. #endif /* WOLFSSL_RSA_PUBLIC_ONLY */
  8951. #endif /* WOLFSSL_HAVE_SP_RSA */
  8952. #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
  8953. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  8954. /* Convert an array of sp_digit to an mp_int.
  8955. *
  8956. * a A single precision integer.
  8957. * r A multi-precision integer.
  8958. */
  8959. static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
  8960. {
  8961. int err;
  8962. err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
  8963. if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
  8964. #if DIGIT_BIT == 32
  8965. XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
  8966. r->used = 96;
  8967. mp_clamp(r);
  8968. #elif DIGIT_BIT < 32
  8969. int i, j = 0, s = 0;
  8970. r->dp[0] = 0;
  8971. for (i = 0; i < 96; i++) {
  8972. r->dp[j] |= (mp_digit)(a[i] << s);
  8973. r->dp[j] &= (1L << DIGIT_BIT) - 1;
  8974. s = DIGIT_BIT - s;
  8975. r->dp[++j] = (mp_digit)(a[i] >> s);
  8976. while (s + DIGIT_BIT <= 32) {
  8977. s += DIGIT_BIT;
  8978. r->dp[j++] &= (1L << DIGIT_BIT) - 1;
  8979. if (s == SP_WORD_SIZE) {
  8980. r->dp[j] = 0;
  8981. }
  8982. else {
  8983. r->dp[j] = (mp_digit)(a[i] >> s);
  8984. }
  8985. }
  8986. s = 32 - s;
  8987. }
  8988. r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
  8989. mp_clamp(r);
  8990. #else
  8991. int i, j = 0, s = 0;
  8992. r->dp[0] = 0;
  8993. for (i = 0; i < 96; i++) {
  8994. r->dp[j] |= ((mp_digit)a[i]) << s;
  8995. if (s + 32 >= DIGIT_BIT) {
  8996. #if DIGIT_BIT != 32 && DIGIT_BIT != 64
  8997. r->dp[j] &= (1L << DIGIT_BIT) - 1;
  8998. #endif
  8999. s = DIGIT_BIT - s;
  9000. r->dp[++j] = a[i] >> s;
  9001. s = 32 - s;
  9002. }
  9003. else {
  9004. s += 32;
  9005. }
  9006. }
  9007. r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
  9008. mp_clamp(r);
  9009. #endif
  9010. }
  9011. return err;
  9012. }
  9013. /* Perform the modular exponentiation for Diffie-Hellman.
  9014. *
  9015. * base Base. MP integer.
  9016. * exp Exponent. MP integer.
  9017. * mod Modulus. MP integer.
  9018. * res Result. MP integer.
  9019. * returns 0 on success, MP_READ_E if there are too many bytes in an array
  9020. * and MEMORY_E if memory allocation fails.
  9021. */
  9022. int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
  9023. {
  9024. int err = MP_OKAY;
  9025. sp_digit b[192], e[96], m[96];
  9026. sp_digit* r = b;
  9027. int expBits = mp_count_bits(exp);
  9028. if (mp_count_bits(base) > 3072) {
  9029. err = MP_READ_E;
  9030. }
  9031. else if (expBits > 3072) {
  9032. err = MP_READ_E;
  9033. }
  9034. else if (mp_count_bits(mod) != 3072) {
  9035. err = MP_READ_E;
  9036. }
  9037. else if (mp_iseven(mod)) {
  9038. err = MP_VAL;
  9039. }
  9040. if (err == MP_OKAY) {
  9041. sp_3072_from_mp(b, 96, base);
  9042. sp_3072_from_mp(e, 96, exp);
  9043. sp_3072_from_mp(m, 96, mod);
  9044. err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0);
  9045. }
  9046. if (err == MP_OKAY) {
  9047. err = sp_3072_to_mp(r, res);
  9048. }
  9049. XMEMSET(e, 0, sizeof(e));
  9050. return err;
  9051. }
  9052. #ifdef WOLFSSL_HAVE_SP_DH
  9053. #ifdef HAVE_FFDHE_3072
  9054. static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n)
  9055. {
  9056. __asm__ __volatile__ (
  9057. "mov r6, #31\n\t"
  9058. "sub r6, r6, %[n]\n\t"
  9059. "add %[a], %[a], #320\n\t"
  9060. "add %[r], %[r], #320\n\t"
  9061. "ldr r3, [%[a], #60]\n\t"
  9062. "lsr r4, r3, #1\n\t"
  9063. "lsl r3, r3, %[n]\n\t"
  9064. "lsr r4, r4, r6\n\t"
  9065. "ldr r2, [%[a], #56]\n\t"
  9066. "str r4, [%[r], #64]\n\t"
  9067. "lsr r5, r2, #1\n\t"
  9068. "lsl r2, r2, %[n]\n\t"
  9069. "lsr r5, r5, r6\n\t"
  9070. "orr r3, r3, r5\n\t"
  9071. "ldr r4, [%[a], #52]\n\t"
  9072. "str r3, [%[r], #60]\n\t"
  9073. "lsr r5, r4, #1\n\t"
  9074. "lsl r4, r4, %[n]\n\t"
  9075. "lsr r5, r5, r6\n\t"
  9076. "orr r2, r2, r5\n\t"
  9077. "ldr r3, [%[a], #48]\n\t"
  9078. "str r2, [%[r], #56]\n\t"
  9079. "lsr r5, r3, #1\n\t"
  9080. "lsl r3, r3, %[n]\n\t"
  9081. "lsr r5, r5, r6\n\t"
  9082. "orr r4, r4, r5\n\t"
  9083. "ldr r2, [%[a], #44]\n\t"
  9084. "str r4, [%[r], #52]\n\t"
  9085. "lsr r5, r2, #1\n\t"
  9086. "lsl r2, r2, %[n]\n\t"
  9087. "lsr r5, r5, r6\n\t"
  9088. "orr r3, r3, r5\n\t"
  9089. "ldr r4, [%[a], #40]\n\t"
  9090. "str r3, [%[r], #48]\n\t"
  9091. "lsr r5, r4, #1\n\t"
  9092. "lsl r4, r4, %[n]\n\t"
  9093. "lsr r5, r5, r6\n\t"
  9094. "orr r2, r2, r5\n\t"
  9095. "ldr r3, [%[a], #36]\n\t"
  9096. "str r2, [%[r], #44]\n\t"
  9097. "lsr r5, r3, #1\n\t"
  9098. "lsl r3, r3, %[n]\n\t"
  9099. "lsr r5, r5, r6\n\t"
  9100. "orr r4, r4, r5\n\t"
  9101. "ldr r2, [%[a], #32]\n\t"
  9102. "str r4, [%[r], #40]\n\t"
  9103. "lsr r5, r2, #1\n\t"
  9104. "lsl r2, r2, %[n]\n\t"
  9105. "lsr r5, r5, r6\n\t"
  9106. "orr r3, r3, r5\n\t"
  9107. "ldr r4, [%[a], #28]\n\t"
  9108. "str r3, [%[r], #36]\n\t"
  9109. "lsr r5, r4, #1\n\t"
  9110. "lsl r4, r4, %[n]\n\t"
  9111. "lsr r5, r5, r6\n\t"
  9112. "orr r2, r2, r5\n\t"
  9113. "ldr r3, [%[a], #24]\n\t"
  9114. "str r2, [%[r], #32]\n\t"
  9115. "lsr r5, r3, #1\n\t"
  9116. "lsl r3, r3, %[n]\n\t"
  9117. "lsr r5, r5, r6\n\t"
  9118. "orr r4, r4, r5\n\t"
  9119. "ldr r2, [%[a], #20]\n\t"
  9120. "str r4, [%[r], #28]\n\t"
  9121. "lsr r5, r2, #1\n\t"
  9122. "lsl r2, r2, %[n]\n\t"
  9123. "lsr r5, r5, r6\n\t"
  9124. "orr r3, r3, r5\n\t"
  9125. "ldr r4, [%[a], #16]\n\t"
  9126. "str r3, [%[r], #24]\n\t"
  9127. "lsr r5, r4, #1\n\t"
  9128. "lsl r4, r4, %[n]\n\t"
  9129. "lsr r5, r5, r6\n\t"
  9130. "orr r2, r2, r5\n\t"
  9131. "ldr r3, [%[a], #12]\n\t"
  9132. "str r2, [%[r], #20]\n\t"
  9133. "lsr r5, r3, #1\n\t"
  9134. "lsl r3, r3, %[n]\n\t"
  9135. "lsr r5, r5, r6\n\t"
  9136. "orr r4, r4, r5\n\t"
  9137. "ldr r2, [%[a], #8]\n\t"
  9138. "str r4, [%[r], #16]\n\t"
  9139. "lsr r5, r2, #1\n\t"
  9140. "lsl r2, r2, %[n]\n\t"
  9141. "lsr r5, r5, r6\n\t"
  9142. "orr r3, r3, r5\n\t"
  9143. "ldr r4, [%[a], #4]\n\t"
  9144. "str r3, [%[r], #12]\n\t"
  9145. "lsr r5, r4, #1\n\t"
  9146. "lsl r4, r4, %[n]\n\t"
  9147. "lsr r5, r5, r6\n\t"
  9148. "orr r2, r2, r5\n\t"
  9149. "ldr r3, [%[a], #0]\n\t"
  9150. "str r2, [%[r], #8]\n\t"
  9151. "lsr r5, r3, #1\n\t"
  9152. "lsl r3, r3, %[n]\n\t"
  9153. "lsr r5, r5, r6\n\t"
  9154. "orr r4, r4, r5\n\t"
  9155. "sub %[a], %[a], #64\n\t"
  9156. "sub %[r], %[r], #64\n\t"
  9157. "ldr r2, [%[a], #60]\n\t"
  9158. "str r4, [%[r], #68]\n\t"
  9159. "lsr r5, r2, #1\n\t"
  9160. "lsl r2, r2, %[n]\n\t"
  9161. "lsr r5, r5, r6\n\t"
  9162. "orr r3, r3, r5\n\t"
  9163. "ldr r4, [%[a], #56]\n\t"
  9164. "str r3, [%[r], #64]\n\t"
  9165. "lsr r5, r4, #1\n\t"
  9166. "lsl r4, r4, %[n]\n\t"
  9167. "lsr r5, r5, r6\n\t"
  9168. "orr r2, r2, r5\n\t"
  9169. "ldr r3, [%[a], #52]\n\t"
  9170. "str r2, [%[r], #60]\n\t"
  9171. "lsr r5, r3, #1\n\t"
  9172. "lsl r3, r3, %[n]\n\t"
  9173. "lsr r5, r5, r6\n\t"
  9174. "orr r4, r4, r5\n\t"
  9175. "ldr r2, [%[a], #48]\n\t"
  9176. "str r4, [%[r], #56]\n\t"
  9177. "lsr r5, r2, #1\n\t"
  9178. "lsl r2, r2, %[n]\n\t"
  9179. "lsr r5, r5, r6\n\t"
  9180. "orr r3, r3, r5\n\t"
  9181. "ldr r4, [%[a], #44]\n\t"
  9182. "str r3, [%[r], #52]\n\t"
  9183. "lsr r5, r4, #1\n\t"
  9184. "lsl r4, r4, %[n]\n\t"
  9185. "lsr r5, r5, r6\n\t"
  9186. "orr r2, r2, r5\n\t"
  9187. "ldr r3, [%[a], #40]\n\t"
  9188. "str r2, [%[r], #48]\n\t"
  9189. "lsr r5, r3, #1\n\t"
  9190. "lsl r3, r3, %[n]\n\t"
  9191. "lsr r5, r5, r6\n\t"
  9192. "orr r4, r4, r5\n\t"
  9193. "ldr r2, [%[a], #36]\n\t"
  9194. "str r4, [%[r], #44]\n\t"
  9195. "lsr r5, r2, #1\n\t"
  9196. "lsl r2, r2, %[n]\n\t"
  9197. "lsr r5, r5, r6\n\t"
  9198. "orr r3, r3, r5\n\t"
  9199. "ldr r4, [%[a], #32]\n\t"
  9200. "str r3, [%[r], #40]\n\t"
  9201. "lsr r5, r4, #1\n\t"
  9202. "lsl r4, r4, %[n]\n\t"
  9203. "lsr r5, r5, r6\n\t"
  9204. "orr r2, r2, r5\n\t"
  9205. "ldr r3, [%[a], #28]\n\t"
  9206. "str r2, [%[r], #36]\n\t"
  9207. "lsr r5, r3, #1\n\t"
  9208. "lsl r3, r3, %[n]\n\t"
  9209. "lsr r5, r5, r6\n\t"
  9210. "orr r4, r4, r5\n\t"
  9211. "ldr r2, [%[a], #24]\n\t"
  9212. "str r4, [%[r], #32]\n\t"
  9213. "lsr r5, r2, #1\n\t"
  9214. "lsl r2, r2, %[n]\n\t"
  9215. "lsr r5, r5, r6\n\t"
  9216. "orr r3, r3, r5\n\t"
  9217. "ldr r4, [%[a], #20]\n\t"
  9218. "str r3, [%[r], #28]\n\t"
  9219. "lsr r5, r4, #1\n\t"
  9220. "lsl r4, r4, %[n]\n\t"
  9221. "lsr r5, r5, r6\n\t"
  9222. "orr r2, r2, r5\n\t"
  9223. "ldr r3, [%[a], #16]\n\t"
  9224. "str r2, [%[r], #24]\n\t"
  9225. "lsr r5, r3, #1\n\t"
  9226. "lsl r3, r3, %[n]\n\t"
  9227. "lsr r5, r5, r6\n\t"
  9228. "orr r4, r4, r5\n\t"
  9229. "ldr r2, [%[a], #12]\n\t"
  9230. "str r4, [%[r], #20]\n\t"
  9231. "lsr r5, r2, #1\n\t"
  9232. "lsl r2, r2, %[n]\n\t"
  9233. "lsr r5, r5, r6\n\t"
  9234. "orr r3, r3, r5\n\t"
  9235. "ldr r4, [%[a], #8]\n\t"
  9236. "str r3, [%[r], #16]\n\t"
  9237. "lsr r5, r4, #1\n\t"
  9238. "lsl r4, r4, %[n]\n\t"
  9239. "lsr r5, r5, r6\n\t"
  9240. "orr r2, r2, r5\n\t"
  9241. "ldr r3, [%[a], #4]\n\t"
  9242. "str r2, [%[r], #12]\n\t"
  9243. "lsr r5, r3, #1\n\t"
  9244. "lsl r3, r3, %[n]\n\t"
  9245. "lsr r5, r5, r6\n\t"
  9246. "orr r4, r4, r5\n\t"
  9247. "ldr r2, [%[a], #0]\n\t"
  9248. "str r4, [%[r], #8]\n\t"
  9249. "lsr r5, r2, #1\n\t"
  9250. "lsl r2, r2, %[n]\n\t"
  9251. "lsr r5, r5, r6\n\t"
  9252. "orr r3, r3, r5\n\t"
  9253. "sub %[a], %[a], #64\n\t"
  9254. "sub %[r], %[r], #64\n\t"
  9255. "ldr r4, [%[a], #60]\n\t"
  9256. "str r3, [%[r], #68]\n\t"
  9257. "lsr r5, r4, #1\n\t"
  9258. "lsl r4, r4, %[n]\n\t"
  9259. "lsr r5, r5, r6\n\t"
  9260. "orr r2, r2, r5\n\t"
  9261. "ldr r3, [%[a], #56]\n\t"
  9262. "str r2, [%[r], #64]\n\t"
  9263. "lsr r5, r3, #1\n\t"
  9264. "lsl r3, r3, %[n]\n\t"
  9265. "lsr r5, r5, r6\n\t"
  9266. "orr r4, r4, r5\n\t"
  9267. "ldr r2, [%[a], #52]\n\t"
  9268. "str r4, [%[r], #60]\n\t"
  9269. "lsr r5, r2, #1\n\t"
  9270. "lsl r2, r2, %[n]\n\t"
  9271. "lsr r5, r5, r6\n\t"
  9272. "orr r3, r3, r5\n\t"
  9273. "ldr r4, [%[a], #48]\n\t"
  9274. "str r3, [%[r], #56]\n\t"
  9275. "lsr r5, r4, #1\n\t"
  9276. "lsl r4, r4, %[n]\n\t"
  9277. "lsr r5, r5, r6\n\t"
  9278. "orr r2, r2, r5\n\t"
  9279. "ldr r3, [%[a], #44]\n\t"
  9280. "str r2, [%[r], #52]\n\t"
  9281. "lsr r5, r3, #1\n\t"
  9282. "lsl r3, r3, %[n]\n\t"
  9283. "lsr r5, r5, r6\n\t"
  9284. "orr r4, r4, r5\n\t"
  9285. "ldr r2, [%[a], #40]\n\t"
  9286. "str r4, [%[r], #48]\n\t"
  9287. "lsr r5, r2, #1\n\t"
  9288. "lsl r2, r2, %[n]\n\t"
  9289. "lsr r5, r5, r6\n\t"
  9290. "orr r3, r3, r5\n\t"
  9291. "ldr r4, [%[a], #36]\n\t"
  9292. "str r3, [%[r], #44]\n\t"
  9293. "lsr r5, r4, #1\n\t"
  9294. "lsl r4, r4, %[n]\n\t"
  9295. "lsr r5, r5, r6\n\t"
  9296. "orr r2, r2, r5\n\t"
  9297. "ldr r3, [%[a], #32]\n\t"
  9298. "str r2, [%[r], #40]\n\t"
  9299. "lsr r5, r3, #1\n\t"
  9300. "lsl r3, r3, %[n]\n\t"
  9301. "lsr r5, r5, r6\n\t"
  9302. "orr r4, r4, r5\n\t"
  9303. "ldr r2, [%[a], #28]\n\t"
  9304. "str r4, [%[r], #36]\n\t"
  9305. "lsr r5, r2, #1\n\t"
  9306. "lsl r2, r2, %[n]\n\t"
  9307. "lsr r5, r5, r6\n\t"
  9308. "orr r3, r3, r5\n\t"
  9309. "ldr r4, [%[a], #24]\n\t"
  9310. "str r3, [%[r], #32]\n\t"
  9311. "lsr r5, r4, #1\n\t"
  9312. "lsl r4, r4, %[n]\n\t"
  9313. "lsr r5, r5, r6\n\t"
  9314. "orr r2, r2, r5\n\t"
  9315. "ldr r3, [%[a], #20]\n\t"
  9316. "str r2, [%[r], #28]\n\t"
  9317. "lsr r5, r3, #1\n\t"
  9318. "lsl r3, r3, %[n]\n\t"
  9319. "lsr r5, r5, r6\n\t"
  9320. "orr r4, r4, r5\n\t"
  9321. "ldr r2, [%[a], #16]\n\t"
  9322. "str r4, [%[r], #24]\n\t"
  9323. "lsr r5, r2, #1\n\t"
  9324. "lsl r2, r2, %[n]\n\t"
  9325. "lsr r5, r5, r6\n\t"
  9326. "orr r3, r3, r5\n\t"
  9327. "ldr r4, [%[a], #12]\n\t"
  9328. "str r3, [%[r], #20]\n\t"
  9329. "lsr r5, r4, #1\n\t"
  9330. "lsl r4, r4, %[n]\n\t"
  9331. "lsr r5, r5, r6\n\t"
  9332. "orr r2, r2, r5\n\t"
  9333. "ldr r3, [%[a], #8]\n\t"
  9334. "str r2, [%[r], #16]\n\t"
  9335. "lsr r5, r3, #1\n\t"
  9336. "lsl r3, r3, %[n]\n\t"
  9337. "lsr r5, r5, r6\n\t"
  9338. "orr r4, r4, r5\n\t"
  9339. "ldr r2, [%[a], #4]\n\t"
  9340. "str r4, [%[r], #12]\n\t"
  9341. "lsr r5, r2, #1\n\t"
  9342. "lsl r2, r2, %[n]\n\t"
  9343. "lsr r5, r5, r6\n\t"
  9344. "orr r3, r3, r5\n\t"
  9345. "ldr r4, [%[a], #0]\n\t"
  9346. "str r3, [%[r], #8]\n\t"
  9347. "lsr r5, r4, #1\n\t"
  9348. "lsl r4, r4, %[n]\n\t"
  9349. "lsr r5, r5, r6\n\t"
  9350. "orr r2, r2, r5\n\t"
  9351. "sub %[a], %[a], #64\n\t"
  9352. "sub %[r], %[r], #64\n\t"
  9353. "ldr r3, [%[a], #60]\n\t"
  9354. "str r2, [%[r], #68]\n\t"
  9355. "lsr r5, r3, #1\n\t"
  9356. "lsl r3, r3, %[n]\n\t"
  9357. "lsr r5, r5, r6\n\t"
  9358. "orr r4, r4, r5\n\t"
  9359. "ldr r2, [%[a], #56]\n\t"
  9360. "str r4, [%[r], #64]\n\t"
  9361. "lsr r5, r2, #1\n\t"
  9362. "lsl r2, r2, %[n]\n\t"
  9363. "lsr r5, r5, r6\n\t"
  9364. "orr r3, r3, r5\n\t"
  9365. "ldr r4, [%[a], #52]\n\t"
  9366. "str r3, [%[r], #60]\n\t"
  9367. "lsr r5, r4, #1\n\t"
  9368. "lsl r4, r4, %[n]\n\t"
  9369. "lsr r5, r5, r6\n\t"
  9370. "orr r2, r2, r5\n\t"
  9371. "ldr r3, [%[a], #48]\n\t"
  9372. "str r2, [%[r], #56]\n\t"
  9373. "lsr r5, r3, #1\n\t"
  9374. "lsl r3, r3, %[n]\n\t"
  9375. "lsr r5, r5, r6\n\t"
  9376. "orr r4, r4, r5\n\t"
  9377. "ldr r2, [%[a], #44]\n\t"
  9378. "str r4, [%[r], #52]\n\t"
  9379. "lsr r5, r2, #1\n\t"
  9380. "lsl r2, r2, %[n]\n\t"
  9381. "lsr r5, r5, r6\n\t"
  9382. "orr r3, r3, r5\n\t"
  9383. "ldr r4, [%[a], #40]\n\t"
  9384. "str r3, [%[r], #48]\n\t"
  9385. "lsr r5, r4, #1\n\t"
  9386. "lsl r4, r4, %[n]\n\t"
  9387. "lsr r5, r5, r6\n\t"
  9388. "orr r2, r2, r5\n\t"
  9389. "ldr r3, [%[a], #36]\n\t"
  9390. "str r2, [%[r], #44]\n\t"
  9391. "lsr r5, r3, #1\n\t"
  9392. "lsl r3, r3, %[n]\n\t"
  9393. "lsr r5, r5, r6\n\t"
  9394. "orr r4, r4, r5\n\t"
  9395. "ldr r2, [%[a], #32]\n\t"
  9396. "str r4, [%[r], #40]\n\t"
  9397. "lsr r5, r2, #1\n\t"
  9398. "lsl r2, r2, %[n]\n\t"
  9399. "lsr r5, r5, r6\n\t"
  9400. "orr r3, r3, r5\n\t"
  9401. "ldr r4, [%[a], #28]\n\t"
  9402. "str r3, [%[r], #36]\n\t"
  9403. "lsr r5, r4, #1\n\t"
  9404. "lsl r4, r4, %[n]\n\t"
  9405. "lsr r5, r5, r6\n\t"
  9406. "orr r2, r2, r5\n\t"
  9407. "ldr r3, [%[a], #24]\n\t"
  9408. "str r2, [%[r], #32]\n\t"
  9409. "lsr r5, r3, #1\n\t"
  9410. "lsl r3, r3, %[n]\n\t"
  9411. "lsr r5, r5, r6\n\t"
  9412. "orr r4, r4, r5\n\t"
  9413. "ldr r2, [%[a], #20]\n\t"
  9414. "str r4, [%[r], #28]\n\t"
  9415. "lsr r5, r2, #1\n\t"
  9416. "lsl r2, r2, %[n]\n\t"
  9417. "lsr r5, r5, r6\n\t"
  9418. "orr r3, r3, r5\n\t"
  9419. "ldr r4, [%[a], #16]\n\t"
  9420. "str r3, [%[r], #24]\n\t"
  9421. "lsr r5, r4, #1\n\t"
  9422. "lsl r4, r4, %[n]\n\t"
  9423. "lsr r5, r5, r6\n\t"
  9424. "orr r2, r2, r5\n\t"
  9425. "ldr r3, [%[a], #12]\n\t"
  9426. "str r2, [%[r], #20]\n\t"
  9427. "lsr r5, r3, #1\n\t"
  9428. "lsl r3, r3, %[n]\n\t"
  9429. "lsr r5, r5, r6\n\t"
  9430. "orr r4, r4, r5\n\t"
  9431. "ldr r2, [%[a], #8]\n\t"
  9432. "str r4, [%[r], #16]\n\t"
  9433. "lsr r5, r2, #1\n\t"
  9434. "lsl r2, r2, %[n]\n\t"
  9435. "lsr r5, r5, r6\n\t"
  9436. "orr r3, r3, r5\n\t"
  9437. "ldr r4, [%[a], #4]\n\t"
  9438. "str r3, [%[r], #12]\n\t"
  9439. "lsr r5, r4, #1\n\t"
  9440. "lsl r4, r4, %[n]\n\t"
  9441. "lsr r5, r5, r6\n\t"
  9442. "orr r2, r2, r5\n\t"
  9443. "ldr r3, [%[a], #0]\n\t"
  9444. "str r2, [%[r], #8]\n\t"
  9445. "lsr r5, r3, #1\n\t"
  9446. "lsl r3, r3, %[n]\n\t"
  9447. "lsr r5, r5, r6\n\t"
  9448. "orr r4, r4, r5\n\t"
  9449. "sub %[a], %[a], #64\n\t"
  9450. "sub %[r], %[r], #64\n\t"
  9451. "ldr r2, [%[a], #60]\n\t"
  9452. "str r4, [%[r], #68]\n\t"
  9453. "lsr r5, r2, #1\n\t"
  9454. "lsl r2, r2, %[n]\n\t"
  9455. "lsr r5, r5, r6\n\t"
  9456. "orr r3, r3, r5\n\t"
  9457. "ldr r4, [%[a], #56]\n\t"
  9458. "str r3, [%[r], #64]\n\t"
  9459. "lsr r5, r4, #1\n\t"
  9460. "lsl r4, r4, %[n]\n\t"
  9461. "lsr r5, r5, r6\n\t"
  9462. "orr r2, r2, r5\n\t"
  9463. "ldr r3, [%[a], #52]\n\t"
  9464. "str r2, [%[r], #60]\n\t"
  9465. "lsr r5, r3, #1\n\t"
  9466. "lsl r3, r3, %[n]\n\t"
  9467. "lsr r5, r5, r6\n\t"
  9468. "orr r4, r4, r5\n\t"
  9469. "ldr r2, [%[a], #48]\n\t"
  9470. "str r4, [%[r], #56]\n\t"
  9471. "lsr r5, r2, #1\n\t"
  9472. "lsl r2, r2, %[n]\n\t"
  9473. "lsr r5, r5, r6\n\t"
  9474. "orr r3, r3, r5\n\t"
  9475. "ldr r4, [%[a], #44]\n\t"
  9476. "str r3, [%[r], #52]\n\t"
  9477. "lsr r5, r4, #1\n\t"
  9478. "lsl r4, r4, %[n]\n\t"
  9479. "lsr r5, r5, r6\n\t"
  9480. "orr r2, r2, r5\n\t"
  9481. "ldr r3, [%[a], #40]\n\t"
  9482. "str r2, [%[r], #48]\n\t"
  9483. "lsr r5, r3, #1\n\t"
  9484. "lsl r3, r3, %[n]\n\t"
  9485. "lsr r5, r5, r6\n\t"
  9486. "orr r4, r4, r5\n\t"
  9487. "ldr r2, [%[a], #36]\n\t"
  9488. "str r4, [%[r], #44]\n\t"
  9489. "lsr r5, r2, #1\n\t"
  9490. "lsl r2, r2, %[n]\n\t"
  9491. "lsr r5, r5, r6\n\t"
  9492. "orr r3, r3, r5\n\t"
  9493. "ldr r4, [%[a], #32]\n\t"
  9494. "str r3, [%[r], #40]\n\t"
  9495. "lsr r5, r4, #1\n\t"
  9496. "lsl r4, r4, %[n]\n\t"
  9497. "lsr r5, r5, r6\n\t"
  9498. "orr r2, r2, r5\n\t"
  9499. "ldr r3, [%[a], #28]\n\t"
  9500. "str r2, [%[r], #36]\n\t"
  9501. "lsr r5, r3, #1\n\t"
  9502. "lsl r3, r3, %[n]\n\t"
  9503. "lsr r5, r5, r6\n\t"
  9504. "orr r4, r4, r5\n\t"
  9505. "ldr r2, [%[a], #24]\n\t"
  9506. "str r4, [%[r], #32]\n\t"
  9507. "lsr r5, r2, #1\n\t"
  9508. "lsl r2, r2, %[n]\n\t"
  9509. "lsr r5, r5, r6\n\t"
  9510. "orr r3, r3, r5\n\t"
  9511. "ldr r4, [%[a], #20]\n\t"
  9512. "str r3, [%[r], #28]\n\t"
  9513. "lsr r5, r4, #1\n\t"
  9514. "lsl r4, r4, %[n]\n\t"
  9515. "lsr r5, r5, r6\n\t"
  9516. "orr r2, r2, r5\n\t"
  9517. "ldr r3, [%[a], #16]\n\t"
  9518. "str r2, [%[r], #24]\n\t"
  9519. "lsr r5, r3, #1\n\t"
  9520. "lsl r3, r3, %[n]\n\t"
  9521. "lsr r5, r5, r6\n\t"
  9522. "orr r4, r4, r5\n\t"
  9523. "ldr r2, [%[a], #12]\n\t"
  9524. "str r4, [%[r], #20]\n\t"
  9525. "lsr r5, r2, #1\n\t"
  9526. "lsl r2, r2, %[n]\n\t"
  9527. "lsr r5, r5, r6\n\t"
  9528. "orr r3, r3, r5\n\t"
  9529. "ldr r4, [%[a], #8]\n\t"
  9530. "str r3, [%[r], #16]\n\t"
  9531. "lsr r5, r4, #1\n\t"
  9532. "lsl r4, r4, %[n]\n\t"
  9533. "lsr r5, r5, r6\n\t"
  9534. "orr r2, r2, r5\n\t"
  9535. "ldr r3, [%[a], #4]\n\t"
  9536. "str r2, [%[r], #12]\n\t"
  9537. "lsr r5, r3, #1\n\t"
  9538. "lsl r3, r3, %[n]\n\t"
  9539. "lsr r5, r5, r6\n\t"
  9540. "orr r4, r4, r5\n\t"
  9541. "ldr r2, [%[a], #0]\n\t"
  9542. "str r4, [%[r], #8]\n\t"
  9543. "lsr r5, r2, #1\n\t"
  9544. "lsl r2, r2, %[n]\n\t"
  9545. "lsr r5, r5, r6\n\t"
  9546. "orr r3, r3, r5\n\t"
  9547. "sub %[a], %[a], #64\n\t"
  9548. "sub %[r], %[r], #64\n\t"
  9549. "ldr r4, [%[a], #60]\n\t"
  9550. "str r3, [%[r], #68]\n\t"
  9551. "lsr r5, r4, #1\n\t"
  9552. "lsl r4, r4, %[n]\n\t"
  9553. "lsr r5, r5, r6\n\t"
  9554. "orr r2, r2, r5\n\t"
  9555. "ldr r3, [%[a], #56]\n\t"
  9556. "str r2, [%[r], #64]\n\t"
  9557. "lsr r5, r3, #1\n\t"
  9558. "lsl r3, r3, %[n]\n\t"
  9559. "lsr r5, r5, r6\n\t"
  9560. "orr r4, r4, r5\n\t"
  9561. "ldr r2, [%[a], #52]\n\t"
  9562. "str r4, [%[r], #60]\n\t"
  9563. "lsr r5, r2, #1\n\t"
  9564. "lsl r2, r2, %[n]\n\t"
  9565. "lsr r5, r5, r6\n\t"
  9566. "orr r3, r3, r5\n\t"
  9567. "ldr r4, [%[a], #48]\n\t"
  9568. "str r3, [%[r], #56]\n\t"
  9569. "lsr r5, r4, #1\n\t"
  9570. "lsl r4, r4, %[n]\n\t"
  9571. "lsr r5, r5, r6\n\t"
  9572. "orr r2, r2, r5\n\t"
  9573. "ldr r3, [%[a], #44]\n\t"
  9574. "str r2, [%[r], #52]\n\t"
  9575. "lsr r5, r3, #1\n\t"
  9576. "lsl r3, r3, %[n]\n\t"
  9577. "lsr r5, r5, r6\n\t"
  9578. "orr r4, r4, r5\n\t"
  9579. "ldr r2, [%[a], #40]\n\t"
  9580. "str r4, [%[r], #48]\n\t"
  9581. "lsr r5, r2, #1\n\t"
  9582. "lsl r2, r2, %[n]\n\t"
  9583. "lsr r5, r5, r6\n\t"
  9584. "orr r3, r3, r5\n\t"
  9585. "ldr r4, [%[a], #36]\n\t"
  9586. "str r3, [%[r], #44]\n\t"
  9587. "lsr r5, r4, #1\n\t"
  9588. "lsl r4, r4, %[n]\n\t"
  9589. "lsr r5, r5, r6\n\t"
  9590. "orr r2, r2, r5\n\t"
  9591. "ldr r3, [%[a], #32]\n\t"
  9592. "str r2, [%[r], #40]\n\t"
  9593. "lsr r5, r3, #1\n\t"
  9594. "lsl r3, r3, %[n]\n\t"
  9595. "lsr r5, r5, r6\n\t"
  9596. "orr r4, r4, r5\n\t"
  9597. "ldr r2, [%[a], #28]\n\t"
  9598. "str r4, [%[r], #36]\n\t"
  9599. "lsr r5, r2, #1\n\t"
  9600. "lsl r2, r2, %[n]\n\t"
  9601. "lsr r5, r5, r6\n\t"
  9602. "orr r3, r3, r5\n\t"
  9603. "ldr r4, [%[a], #24]\n\t"
  9604. "str r3, [%[r], #32]\n\t"
  9605. "lsr r5, r4, #1\n\t"
  9606. "lsl r4, r4, %[n]\n\t"
  9607. "lsr r5, r5, r6\n\t"
  9608. "orr r2, r2, r5\n\t"
  9609. "ldr r3, [%[a], #20]\n\t"
  9610. "str r2, [%[r], #28]\n\t"
  9611. "lsr r5, r3, #1\n\t"
  9612. "lsl r3, r3, %[n]\n\t"
  9613. "lsr r5, r5, r6\n\t"
  9614. "orr r4, r4, r5\n\t"
  9615. "ldr r2, [%[a], #16]\n\t"
  9616. "str r4, [%[r], #24]\n\t"
  9617. "lsr r5, r2, #1\n\t"
  9618. "lsl r2, r2, %[n]\n\t"
  9619. "lsr r5, r5, r6\n\t"
  9620. "orr r3, r3, r5\n\t"
  9621. "ldr r4, [%[a], #12]\n\t"
  9622. "str r3, [%[r], #20]\n\t"
  9623. "lsr r5, r4, #1\n\t"
  9624. "lsl r4, r4, %[n]\n\t"
  9625. "lsr r5, r5, r6\n\t"
  9626. "orr r2, r2, r5\n\t"
  9627. "ldr r3, [%[a], #8]\n\t"
  9628. "str r2, [%[r], #16]\n\t"
  9629. "lsr r5, r3, #1\n\t"
  9630. "lsl r3, r3, %[n]\n\t"
  9631. "lsr r5, r5, r6\n\t"
  9632. "orr r4, r4, r5\n\t"
  9633. "ldr r2, [%[a], #4]\n\t"
  9634. "str r4, [%[r], #12]\n\t"
  9635. "lsr r5, r2, #1\n\t"
  9636. "lsl r2, r2, %[n]\n\t"
  9637. "lsr r5, r5, r6\n\t"
  9638. "orr r3, r3, r5\n\t"
  9639. "ldr r4, [%[a], #0]\n\t"
  9640. "str r3, [%[r], #8]\n\t"
  9641. "lsr r5, r4, #1\n\t"
  9642. "lsl r4, r4, %[n]\n\t"
  9643. "lsr r5, r5, r6\n\t"
  9644. "orr r2, r2, r5\n\t"
  9645. "str r4, [%[r]]\n\t"
  9646. "str r2, [%[r], #4]\n\t"
  9647. :
  9648. : [r] "r" (r), [a] "r" (a), [n] "r" (n)
  9649. : "memory", "r2", "r3", "r4", "r5", "r6"
  9650. );
  9651. }
  9652. /* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
  9653. *
  9654. * r A single precision number that is the result of the operation.
  9655. * e A single precision number that is the exponent.
  9656. * bits The number of bits in the exponent.
  9657. * m A single precision number that is the modulus.
  9658. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  9659. */
  9660. static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits,
  9661. const sp_digit* m)
  9662. {
  9663. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9664. sp_digit* td;
  9665. #else
  9666. sp_digit td[289];
  9667. #endif
  9668. sp_digit* norm;
  9669. sp_digit* tmp;
  9670. sp_digit mp = 1;
  9671. sp_digit n, o;
  9672. sp_digit mask;
  9673. int i;
  9674. int c, y;
  9675. int err = MP_OKAY;
  9676. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9677. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL,
  9678. DYNAMIC_TYPE_TMP_BUFFER);
  9679. if (td == NULL) {
  9680. err = MEMORY_E;
  9681. }
  9682. #endif
  9683. if (err == MP_OKAY) {
  9684. norm = td;
  9685. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9686. tmp = td + 192;
  9687. #else
  9688. tmp = &td[192];
  9689. #endif
  9690. sp_3072_mont_setup(m, &mp);
  9691. sp_3072_mont_norm_96(norm, m);
  9692. i = (bits - 1) / 32;
  9693. n = e[i--];
  9694. c = bits & 31;
  9695. if (c == 0) {
  9696. c = 32;
  9697. }
  9698. c -= bits % 5;
  9699. if (c == 32) {
  9700. c = 27;
  9701. }
  9702. if (c < 0) {
  9703. /* Number of bits in top word is less than number needed. */
  9704. c = -c;
  9705. y = (int)(n << c);
  9706. n = e[i--];
  9707. y |= (int)(n >> (64 - c));
  9708. n <<= c;
  9709. c = 64 - c;
  9710. }
  9711. else {
  9712. y = (int)(n >> c);
  9713. n <<= 32 - c;
  9714. }
  9715. sp_3072_lshift_96(r, norm, (byte)y);
  9716. for (; i>=0 || c>=5; ) {
  9717. if (c == 0) {
  9718. n = e[i--];
  9719. y = (int)(n >> 27);
  9720. n <<= 5;
  9721. c = 27;
  9722. }
  9723. else if (c < 5) {
  9724. y = (int)(n >> 27);
  9725. n = e[i--];
  9726. c = 5 - c;
  9727. y |= (int)(n >> (32 - c));
  9728. n <<= c;
  9729. c = 32 - c;
  9730. }
  9731. else {
  9732. y = (int)((n >> 27) & 0x1f);
  9733. n <<= 5;
  9734. c -= 5;
  9735. }
  9736. sp_3072_mont_sqr_96(r, r, m, mp);
  9737. sp_3072_mont_sqr_96(r, r, m, mp);
  9738. sp_3072_mont_sqr_96(r, r, m, mp);
  9739. sp_3072_mont_sqr_96(r, r, m, mp);
  9740. sp_3072_mont_sqr_96(r, r, m, mp);
  9741. sp_3072_lshift_96(r, r, (byte)y);
  9742. sp_3072_mul_d_96(tmp, norm, r[96]);
  9743. r[96] = 0;
  9744. o = sp_3072_add_96(r, r, tmp);
  9745. sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o);
  9746. }
  9747. XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
  9748. sp_3072_mont_reduce_96(r, m, mp);
  9749. mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
  9750. sp_3072_cond_sub_96(r, r, m, mask);
  9751. }
  9752. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9753. if (td != NULL) {
  9754. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  9755. }
  9756. #endif
  9757. return err;
  9758. }
  9759. #endif /* HAVE_FFDHE_3072 */
  9760. /* Perform the modular exponentiation for Diffie-Hellman.
  9761. *
  9762. * base Base.
  9763. * exp Array of bytes that is the exponent.
  9764. * expLen Length of data, in bytes, in exponent.
  9765. * mod Modulus.
  9766. * out Buffer to hold big-endian bytes of exponentiation result.
  9767. * Must be at least 384 bytes long.
  9768. * outLen Length, in bytes, of exponentiation result.
  9769. * returns 0 on success, MP_READ_E if there are too many bytes in an array
  9770. * and MEMORY_E if memory allocation fails.
  9771. */
  9772. int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
  9773. mp_int* mod, byte* out, word32* outLen)
  9774. {
  9775. int err = MP_OKAY;
  9776. sp_digit b[192], e[96], m[96];
  9777. sp_digit* r = b;
  9778. word32 i;
  9779. if (mp_count_bits(base) > 3072) {
  9780. err = MP_READ_E;
  9781. }
  9782. else if (expLen > 384) {
  9783. err = MP_READ_E;
  9784. }
  9785. else if (mp_count_bits(mod) != 3072) {
  9786. err = MP_READ_E;
  9787. }
  9788. else if (mp_iseven(mod)) {
  9789. err = MP_VAL;
  9790. }
  9791. if (err == MP_OKAY) {
  9792. sp_3072_from_mp(b, 96, base);
  9793. sp_3072_from_bin(e, 96, exp, expLen);
  9794. sp_3072_from_mp(m, 96, mod);
  9795. #ifdef HAVE_FFDHE_3072
  9796. if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1)
  9797. err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m);
  9798. else
  9799. #endif
  9800. err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
  9801. }
  9802. if (err == MP_OKAY) {
  9803. sp_3072_to_bin(r, out);
  9804. *outLen = 384;
  9805. for (i=0; i<384 && out[i] == 0; i++) {
  9806. }
  9807. *outLen -= i;
  9808. XMEMMOVE(out, out + i, *outLen);
  9809. }
  9810. XMEMSET(e, 0, sizeof(e));
  9811. return err;
  9812. }
  9813. #endif /* WOLFSSL_HAVE_SP_DH */
  9814. /* Perform the modular exponentiation for Diffie-Hellman.
  9815. *
  9816. * base Base. MP integer.
  9817. * exp Exponent. MP integer.
  9818. * mod Modulus. MP integer.
  9819. * res Result. MP integer.
  9820. * returns 0 on success, MP_READ_E if there are too many bytes in an array
  9821. * and MEMORY_E if memory allocation fails.
  9822. */
  9823. int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
  9824. {
  9825. int err = MP_OKAY;
  9826. sp_digit b[96], e[48], m[48];
  9827. sp_digit* r = b;
  9828. int expBits = mp_count_bits(exp);
  9829. if (mp_count_bits(base) > 1536) {
  9830. err = MP_READ_E;
  9831. }
  9832. else if (expBits > 1536) {
  9833. err = MP_READ_E;
  9834. }
  9835. else if (mp_count_bits(mod) != 1536) {
  9836. err = MP_READ_E;
  9837. }
  9838. else if (mp_iseven(mod)) {
  9839. err = MP_VAL;
  9840. }
  9841. if (err == MP_OKAY) {
  9842. sp_3072_from_mp(b, 48, base);
  9843. sp_3072_from_mp(e, 48, exp);
  9844. sp_3072_from_mp(m, 48, mod);
  9845. err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
  9846. }
  9847. if (err == MP_OKAY) {
  9848. XMEMSET(r + 48, 0, sizeof(*r) * 48U);
  9849. err = sp_3072_to_mp(r, res);
  9850. res->used = mod->used;
  9851. mp_clamp(res);
  9852. }
  9853. XMEMSET(e, 0, sizeof(e));
  9854. return err;
  9855. }
  9856. #endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
  9857. #endif /* !WOLFSSL_SP_NO_3072 */
  9858. #ifdef WOLFSSL_SP_4096
  9859. /* Read big endian unsigned byte array into r.
  9860. *
  9861. * r A single precision integer.
  9862. * size Maximum number of bytes to convert
  9863. * a Byte array.
  9864. * n Number of bytes in array to read.
  9865. */
  9866. static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
  9867. {
  9868. int i, j = 0;
  9869. word32 s = 0;
  9870. r[0] = 0;
  9871. for (i = n-1; i >= 0; i--) {
  9872. r[j] |= (((sp_digit)a[i]) << s);
  9873. if (s >= 24U) {
  9874. r[j] &= 0xffffffff;
  9875. s = 32U - s;
  9876. if (j + 1 >= size) {
  9877. break;
  9878. }
  9879. r[++j] = (sp_digit)a[i] >> s;
  9880. s = 8U - s;
  9881. }
  9882. else {
  9883. s += 8U;
  9884. }
  9885. }
  9886. for (j++; j < size; j++) {
  9887. r[j] = 0;
  9888. }
  9889. }
  9890. /* Convert an mp_int to an array of sp_digit.
  9891. *
  9892. * r A single precision integer.
  9893. * size Maximum number of bytes to convert
  9894. * a A multi-precision integer.
  9895. */
  9896. static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
  9897. {
  9898. #if DIGIT_BIT == 32
  9899. int j;
  9900. XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
  9901. for (j = a->used; j < size; j++) {
  9902. r[j] = 0;
  9903. }
  9904. #elif DIGIT_BIT > 32
  9905. int i, j = 0;
  9906. word32 s = 0;
  9907. r[0] = 0;
  9908. for (i = 0; i < a->used && j < size; i++) {
  9909. r[j] |= ((sp_digit)a->dp[i] << s);
  9910. r[j] &= 0xffffffff;
  9911. s = 32U - s;
  9912. if (j + 1 >= size) {
  9913. break;
  9914. }
  9915. /* lint allow cast of mismatch word32 and mp_digit */
  9916. r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
  9917. while ((s + 32U) <= (word32)DIGIT_BIT) {
  9918. s += 32U;
  9919. r[j] &= 0xffffffff;
  9920. if (j + 1 >= size) {
  9921. break;
  9922. }
  9923. if (s < (word32)DIGIT_BIT) {
  9924. /* lint allow cast of mismatch word32 and mp_digit */
  9925. r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
  9926. }
  9927. else {
  9928. r[++j] = 0L;
  9929. }
  9930. }
  9931. s = (word32)DIGIT_BIT - s;
  9932. }
  9933. for (j++; j < size; j++) {
  9934. r[j] = 0;
  9935. }
  9936. #else
  9937. int i, j = 0, s = 0;
  9938. r[0] = 0;
  9939. for (i = 0; i < a->used && j < size; i++) {
  9940. r[j] |= ((sp_digit)a->dp[i]) << s;
  9941. if (s + DIGIT_BIT >= 32) {
  9942. r[j] &= 0xffffffff;
  9943. if (j + 1 >= size) {
  9944. break;
  9945. }
  9946. s = 32 - s;
  9947. if (s == DIGIT_BIT) {
  9948. r[++j] = 0;
  9949. s = 0;
  9950. }
  9951. else {
  9952. r[++j] = a->dp[i] >> s;
  9953. s = DIGIT_BIT - s;
  9954. }
  9955. }
  9956. else {
  9957. s += DIGIT_BIT;
  9958. }
  9959. }
  9960. for (j++; j < size; j++) {
  9961. r[j] = 0;
  9962. }
  9963. #endif
  9964. }
  9965. /* Write r as big endian to byte array.
  9966. * Fixed length number of bytes written: 512
  9967. *
  9968. * r A single precision integer.
  9969. * a Byte array.
  9970. */
  9971. static void sp_4096_to_bin(sp_digit* r, byte* a)
  9972. {
  9973. int i, j, s = 0, b;
  9974. j = 4096 / 8 - 1;
  9975. a[j] = 0;
  9976. for (i=0; i<128 && j>=0; i++) {
  9977. b = 0;
  9978. /* lint allow cast of mismatch sp_digit and int */
  9979. a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
  9980. b += 8 - s;
  9981. if (j < 0) {
  9982. break;
  9983. }
  9984. while (b < 32) {
  9985. a[j--] = (byte)(r[i] >> b);
  9986. b += 8;
  9987. if (j < 0) {
  9988. break;
  9989. }
  9990. }
  9991. s = 8 - (b - 32);
  9992. if (j >= 0) {
  9993. a[j] = 0;
  9994. }
  9995. if (s != 0) {
  9996. j++;
  9997. }
  9998. }
  9999. }
  10000. #ifndef WOLFSSL_SP_SMALL
  10001. /* Sub b from a into r. (r = a - b)
  10002. *
  10003. * r A single precision integer.
  10004. * a A single precision integer.
  10005. * b A single precision integer.
  10006. */
  10007. SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
  10008. const sp_digit* b)
  10009. {
  10010. sp_digit c = 0;
  10011. __asm__ __volatile__ (
  10012. "ldm %[a], {r3, r4}\n\t"
  10013. "ldm %[b]!, {r5, r6}\n\t"
  10014. "subs r3, r3, r5\n\t"
  10015. "sbcs r4, r4, r6\n\t"
  10016. "stm %[a]!, {r3, r4}\n\t"
  10017. "ldm %[a], {r3, r4}\n\t"
  10018. "ldm %[b]!, {r5, r6}\n\t"
  10019. "sbcs r3, r3, r5\n\t"
  10020. "sbcs r4, r4, r6\n\t"
  10021. "stm %[a]!, {r3, r4}\n\t"
  10022. "ldm %[a], {r3, r4}\n\t"
  10023. "ldm %[b]!, {r5, r6}\n\t"
  10024. "sbcs r3, r3, r5\n\t"
  10025. "sbcs r4, r4, r6\n\t"
  10026. "stm %[a]!, {r3, r4}\n\t"
  10027. "ldm %[a], {r3, r4}\n\t"
  10028. "ldm %[b]!, {r5, r6}\n\t"
  10029. "sbcs r3, r3, r5\n\t"
  10030. "sbcs r4, r4, r6\n\t"
  10031. "stm %[a]!, {r3, r4}\n\t"
  10032. "ldm %[a], {r3, r4}\n\t"
  10033. "ldm %[b]!, {r5, r6}\n\t"
  10034. "sbcs r3, r3, r5\n\t"
  10035. "sbcs r4, r4, r6\n\t"
  10036. "stm %[a]!, {r3, r4}\n\t"
  10037. "ldm %[a], {r3, r4}\n\t"
  10038. "ldm %[b]!, {r5, r6}\n\t"
  10039. "sbcs r3, r3, r5\n\t"
  10040. "sbcs r4, r4, r6\n\t"
  10041. "stm %[a]!, {r3, r4}\n\t"
  10042. "ldm %[a], {r3, r4}\n\t"
  10043. "ldm %[b]!, {r5, r6}\n\t"
  10044. "sbcs r3, r3, r5\n\t"
  10045. "sbcs r4, r4, r6\n\t"
  10046. "stm %[a]!, {r3, r4}\n\t"
  10047. "ldm %[a], {r3, r4}\n\t"
  10048. "ldm %[b]!, {r5, r6}\n\t"
  10049. "sbcs r3, r3, r5\n\t"
  10050. "sbcs r4, r4, r6\n\t"
  10051. "stm %[a]!, {r3, r4}\n\t"
  10052. "ldm %[a], {r3, r4}\n\t"
  10053. "ldm %[b]!, {r5, r6}\n\t"
  10054. "sbcs r3, r3, r5\n\t"
  10055. "sbcs r4, r4, r6\n\t"
  10056. "stm %[a]!, {r3, r4}\n\t"
  10057. "ldm %[a], {r3, r4}\n\t"
  10058. "ldm %[b]!, {r5, r6}\n\t"
  10059. "sbcs r3, r3, r5\n\t"
  10060. "sbcs r4, r4, r6\n\t"
  10061. "stm %[a]!, {r3, r4}\n\t"
  10062. "ldm %[a], {r3, r4}\n\t"
  10063. "ldm %[b]!, {r5, r6}\n\t"
  10064. "sbcs r3, r3, r5\n\t"
  10065. "sbcs r4, r4, r6\n\t"
  10066. "stm %[a]!, {r3, r4}\n\t"
  10067. "ldm %[a], {r3, r4}\n\t"
  10068. "ldm %[b]!, {r5, r6}\n\t"
  10069. "sbcs r3, r3, r5\n\t"
  10070. "sbcs r4, r4, r6\n\t"
  10071. "stm %[a]!, {r3, r4}\n\t"
  10072. "ldm %[a], {r3, r4}\n\t"
  10073. "ldm %[b]!, {r5, r6}\n\t"
  10074. "sbcs r3, r3, r5\n\t"
  10075. "sbcs r4, r4, r6\n\t"
  10076. "stm %[a]!, {r3, r4}\n\t"
  10077. "ldm %[a], {r3, r4}\n\t"
  10078. "ldm %[b]!, {r5, r6}\n\t"
  10079. "sbcs r3, r3, r5\n\t"
  10080. "sbcs r4, r4, r6\n\t"
  10081. "stm %[a]!, {r3, r4}\n\t"
  10082. "ldm %[a], {r3, r4}\n\t"
  10083. "ldm %[b]!, {r5, r6}\n\t"
  10084. "sbcs r3, r3, r5\n\t"
  10085. "sbcs r4, r4, r6\n\t"
  10086. "stm %[a]!, {r3, r4}\n\t"
  10087. "ldm %[a], {r3, r4}\n\t"
  10088. "ldm %[b]!, {r5, r6}\n\t"
  10089. "sbcs r3, r3, r5\n\t"
  10090. "sbcs r4, r4, r6\n\t"
  10091. "stm %[a]!, {r3, r4}\n\t"
  10092. "ldm %[a], {r3, r4}\n\t"
  10093. "ldm %[b]!, {r5, r6}\n\t"
  10094. "sbcs r3, r3, r5\n\t"
  10095. "sbcs r4, r4, r6\n\t"
  10096. "stm %[a]!, {r3, r4}\n\t"
  10097. "ldm %[a], {r3, r4}\n\t"
  10098. "ldm %[b]!, {r5, r6}\n\t"
  10099. "sbcs r3, r3, r5\n\t"
  10100. "sbcs r4, r4, r6\n\t"
  10101. "stm %[a]!, {r3, r4}\n\t"
  10102. "ldm %[a], {r3, r4}\n\t"
  10103. "ldm %[b]!, {r5, r6}\n\t"
  10104. "sbcs r3, r3, r5\n\t"
  10105. "sbcs r4, r4, r6\n\t"
  10106. "stm %[a]!, {r3, r4}\n\t"
  10107. "ldm %[a], {r3, r4}\n\t"
  10108. "ldm %[b]!, {r5, r6}\n\t"
  10109. "sbcs r3, r3, r5\n\t"
  10110. "sbcs r4, r4, r6\n\t"
  10111. "stm %[a]!, {r3, r4}\n\t"
  10112. "ldm %[a], {r3, r4}\n\t"
  10113. "ldm %[b]!, {r5, r6}\n\t"
  10114. "sbcs r3, r3, r5\n\t"
  10115. "sbcs r4, r4, r6\n\t"
  10116. "stm %[a]!, {r3, r4}\n\t"
  10117. "ldm %[a], {r3, r4}\n\t"
  10118. "ldm %[b]!, {r5, r6}\n\t"
  10119. "sbcs r3, r3, r5\n\t"
  10120. "sbcs r4, r4, r6\n\t"
  10121. "stm %[a]!, {r3, r4}\n\t"
  10122. "ldm %[a], {r3, r4}\n\t"
  10123. "ldm %[b]!, {r5, r6}\n\t"
  10124. "sbcs r3, r3, r5\n\t"
  10125. "sbcs r4, r4, r6\n\t"
  10126. "stm %[a]!, {r3, r4}\n\t"
  10127. "ldm %[a], {r3, r4}\n\t"
  10128. "ldm %[b]!, {r5, r6}\n\t"
  10129. "sbcs r3, r3, r5\n\t"
  10130. "sbcs r4, r4, r6\n\t"
  10131. "stm %[a]!, {r3, r4}\n\t"
  10132. "ldm %[a], {r3, r4}\n\t"
  10133. "ldm %[b]!, {r5, r6}\n\t"
  10134. "sbcs r3, r3, r5\n\t"
  10135. "sbcs r4, r4, r6\n\t"
  10136. "stm %[a]!, {r3, r4}\n\t"
  10137. "ldm %[a], {r3, r4}\n\t"
  10138. "ldm %[b]!, {r5, r6}\n\t"
  10139. "sbcs r3, r3, r5\n\t"
  10140. "sbcs r4, r4, r6\n\t"
  10141. "stm %[a]!, {r3, r4}\n\t"
  10142. "ldm %[a], {r3, r4}\n\t"
  10143. "ldm %[b]!, {r5, r6}\n\t"
  10144. "sbcs r3, r3, r5\n\t"
  10145. "sbcs r4, r4, r6\n\t"
  10146. "stm %[a]!, {r3, r4}\n\t"
  10147. "ldm %[a], {r3, r4}\n\t"
  10148. "ldm %[b]!, {r5, r6}\n\t"
  10149. "sbcs r3, r3, r5\n\t"
  10150. "sbcs r4, r4, r6\n\t"
  10151. "stm %[a]!, {r3, r4}\n\t"
  10152. "ldm %[a], {r3, r4}\n\t"
  10153. "ldm %[b]!, {r5, r6}\n\t"
  10154. "sbcs r3, r3, r5\n\t"
  10155. "sbcs r4, r4, r6\n\t"
  10156. "stm %[a]!, {r3, r4}\n\t"
  10157. "ldm %[a], {r3, r4}\n\t"
  10158. "ldm %[b]!, {r5, r6}\n\t"
  10159. "sbcs r3, r3, r5\n\t"
  10160. "sbcs r4, r4, r6\n\t"
  10161. "stm %[a]!, {r3, r4}\n\t"
  10162. "ldm %[a], {r3, r4}\n\t"
  10163. "ldm %[b]!, {r5, r6}\n\t"
  10164. "sbcs r3, r3, r5\n\t"
  10165. "sbcs r4, r4, r6\n\t"
  10166. "stm %[a]!, {r3, r4}\n\t"
  10167. "ldm %[a], {r3, r4}\n\t"
  10168. "ldm %[b]!, {r5, r6}\n\t"
  10169. "sbcs r3, r3, r5\n\t"
  10170. "sbcs r4, r4, r6\n\t"
  10171. "stm %[a]!, {r3, r4}\n\t"
  10172. "ldm %[a], {r3, r4}\n\t"
  10173. "ldm %[b]!, {r5, r6}\n\t"
  10174. "sbcs r3, r3, r5\n\t"
  10175. "sbcs r4, r4, r6\n\t"
  10176. "stm %[a]!, {r3, r4}\n\t"
  10177. "ldm %[a], {r3, r4}\n\t"
  10178. "ldm %[b]!, {r5, r6}\n\t"
  10179. "sbcs r3, r3, r5\n\t"
  10180. "sbcs r4, r4, r6\n\t"
  10181. "stm %[a]!, {r3, r4}\n\t"
  10182. "ldm %[a], {r3, r4}\n\t"
  10183. "ldm %[b]!, {r5, r6}\n\t"
  10184. "sbcs r3, r3, r5\n\t"
  10185. "sbcs r4, r4, r6\n\t"
  10186. "stm %[a]!, {r3, r4}\n\t"
  10187. "ldm %[a], {r3, r4}\n\t"
  10188. "ldm %[b]!, {r5, r6}\n\t"
  10189. "sbcs r3, r3, r5\n\t"
  10190. "sbcs r4, r4, r6\n\t"
  10191. "stm %[a]!, {r3, r4}\n\t"
  10192. "ldm %[a], {r3, r4}\n\t"
  10193. "ldm %[b]!, {r5, r6}\n\t"
  10194. "sbcs r3, r3, r5\n\t"
  10195. "sbcs r4, r4, r6\n\t"
  10196. "stm %[a]!, {r3, r4}\n\t"
  10197. "ldm %[a], {r3, r4}\n\t"
  10198. "ldm %[b]!, {r5, r6}\n\t"
  10199. "sbcs r3, r3, r5\n\t"
  10200. "sbcs r4, r4, r6\n\t"
  10201. "stm %[a]!, {r3, r4}\n\t"
  10202. "ldm %[a], {r3, r4}\n\t"
  10203. "ldm %[b]!, {r5, r6}\n\t"
  10204. "sbcs r3, r3, r5\n\t"
  10205. "sbcs r4, r4, r6\n\t"
  10206. "stm %[a]!, {r3, r4}\n\t"
  10207. "ldm %[a], {r3, r4}\n\t"
  10208. "ldm %[b]!, {r5, r6}\n\t"
  10209. "sbcs r3, r3, r5\n\t"
  10210. "sbcs r4, r4, r6\n\t"
  10211. "stm %[a]!, {r3, r4}\n\t"
  10212. "ldm %[a], {r3, r4}\n\t"
  10213. "ldm %[b]!, {r5, r6}\n\t"
  10214. "sbcs r3, r3, r5\n\t"
  10215. "sbcs r4, r4, r6\n\t"
  10216. "stm %[a]!, {r3, r4}\n\t"
  10217. "ldm %[a], {r3, r4}\n\t"
  10218. "ldm %[b]!, {r5, r6}\n\t"
  10219. "sbcs r3, r3, r5\n\t"
  10220. "sbcs r4, r4, r6\n\t"
  10221. "stm %[a]!, {r3, r4}\n\t"
  10222. "ldm %[a], {r3, r4}\n\t"
  10223. "ldm %[b]!, {r5, r6}\n\t"
  10224. "sbcs r3, r3, r5\n\t"
  10225. "sbcs r4, r4, r6\n\t"
  10226. "stm %[a]!, {r3, r4}\n\t"
  10227. "ldm %[a], {r3, r4}\n\t"
  10228. "ldm %[b]!, {r5, r6}\n\t"
  10229. "sbcs r3, r3, r5\n\t"
  10230. "sbcs r4, r4, r6\n\t"
  10231. "stm %[a]!, {r3, r4}\n\t"
  10232. "ldm %[a], {r3, r4}\n\t"
  10233. "ldm %[b]!, {r5, r6}\n\t"
  10234. "sbcs r3, r3, r5\n\t"
  10235. "sbcs r4, r4, r6\n\t"
  10236. "stm %[a]!, {r3, r4}\n\t"
  10237. "ldm %[a], {r3, r4}\n\t"
  10238. "ldm %[b]!, {r5, r6}\n\t"
  10239. "sbcs r3, r3, r5\n\t"
  10240. "sbcs r4, r4, r6\n\t"
  10241. "stm %[a]!, {r3, r4}\n\t"
  10242. "ldm %[a], {r3, r4}\n\t"
  10243. "ldm %[b]!, {r5, r6}\n\t"
  10244. "sbcs r3, r3, r5\n\t"
  10245. "sbcs r4, r4, r6\n\t"
  10246. "stm %[a]!, {r3, r4}\n\t"
  10247. "ldm %[a], {r3, r4}\n\t"
  10248. "ldm %[b]!, {r5, r6}\n\t"
  10249. "sbcs r3, r3, r5\n\t"
  10250. "sbcs r4, r4, r6\n\t"
  10251. "stm %[a]!, {r3, r4}\n\t"
  10252. "ldm %[a], {r3, r4}\n\t"
  10253. "ldm %[b]!, {r5, r6}\n\t"
  10254. "sbcs r3, r3, r5\n\t"
  10255. "sbcs r4, r4, r6\n\t"
  10256. "stm %[a]!, {r3, r4}\n\t"
  10257. "ldm %[a], {r3, r4}\n\t"
  10258. "ldm %[b]!, {r5, r6}\n\t"
  10259. "sbcs r3, r3, r5\n\t"
  10260. "sbcs r4, r4, r6\n\t"
  10261. "stm %[a]!, {r3, r4}\n\t"
  10262. "ldm %[a], {r3, r4}\n\t"
  10263. "ldm %[b]!, {r5, r6}\n\t"
  10264. "sbcs r3, r3, r5\n\t"
  10265. "sbcs r4, r4, r6\n\t"
  10266. "stm %[a]!, {r3, r4}\n\t"
  10267. "ldm %[a], {r3, r4}\n\t"
  10268. "ldm %[b]!, {r5, r6}\n\t"
  10269. "sbcs r3, r3, r5\n\t"
  10270. "sbcs r4, r4, r6\n\t"
  10271. "stm %[a]!, {r3, r4}\n\t"
  10272. "ldm %[a], {r3, r4}\n\t"
  10273. "ldm %[b]!, {r5, r6}\n\t"
  10274. "sbcs r3, r3, r5\n\t"
  10275. "sbcs r4, r4, r6\n\t"
  10276. "stm %[a]!, {r3, r4}\n\t"
  10277. "ldm %[a], {r3, r4}\n\t"
  10278. "ldm %[b]!, {r5, r6}\n\t"
  10279. "sbcs r3, r3, r5\n\t"
  10280. "sbcs r4, r4, r6\n\t"
  10281. "stm %[a]!, {r3, r4}\n\t"
  10282. "ldm %[a], {r3, r4}\n\t"
  10283. "ldm %[b]!, {r5, r6}\n\t"
  10284. "sbcs r3, r3, r5\n\t"
  10285. "sbcs r4, r4, r6\n\t"
  10286. "stm %[a]!, {r3, r4}\n\t"
  10287. "ldm %[a], {r3, r4}\n\t"
  10288. "ldm %[b]!, {r5, r6}\n\t"
  10289. "sbcs r3, r3, r5\n\t"
  10290. "sbcs r4, r4, r6\n\t"
  10291. "stm %[a]!, {r3, r4}\n\t"
  10292. "ldm %[a], {r3, r4}\n\t"
  10293. "ldm %[b]!, {r5, r6}\n\t"
  10294. "sbcs r3, r3, r5\n\t"
  10295. "sbcs r4, r4, r6\n\t"
  10296. "stm %[a]!, {r3, r4}\n\t"
  10297. "ldm %[a], {r3, r4}\n\t"
  10298. "ldm %[b]!, {r5, r6}\n\t"
  10299. "sbcs r3, r3, r5\n\t"
  10300. "sbcs r4, r4, r6\n\t"
  10301. "stm %[a]!, {r3, r4}\n\t"
  10302. "ldm %[a], {r3, r4}\n\t"
  10303. "ldm %[b]!, {r5, r6}\n\t"
  10304. "sbcs r3, r3, r5\n\t"
  10305. "sbcs r4, r4, r6\n\t"
  10306. "stm %[a]!, {r3, r4}\n\t"
  10307. "ldm %[a], {r3, r4}\n\t"
  10308. "ldm %[b]!, {r5, r6}\n\t"
  10309. "sbcs r3, r3, r5\n\t"
  10310. "sbcs r4, r4, r6\n\t"
  10311. "stm %[a]!, {r3, r4}\n\t"
  10312. "ldm %[a], {r3, r4}\n\t"
  10313. "ldm %[b]!, {r5, r6}\n\t"
  10314. "sbcs r3, r3, r5\n\t"
  10315. "sbcs r4, r4, r6\n\t"
  10316. "stm %[a]!, {r3, r4}\n\t"
  10317. "ldm %[a], {r3, r4}\n\t"
  10318. "ldm %[b]!, {r5, r6}\n\t"
  10319. "sbcs r3, r3, r5\n\t"
  10320. "sbcs r4, r4, r6\n\t"
  10321. "stm %[a]!, {r3, r4}\n\t"
  10322. "ldm %[a], {r3, r4}\n\t"
  10323. "ldm %[b]!, {r5, r6}\n\t"
  10324. "sbcs r3, r3, r5\n\t"
  10325. "sbcs r4, r4, r6\n\t"
  10326. "stm %[a]!, {r3, r4}\n\t"
  10327. "ldm %[a], {r3, r4}\n\t"
  10328. "ldm %[b]!, {r5, r6}\n\t"
  10329. "sbcs r3, r3, r5\n\t"
  10330. "sbcs r4, r4, r6\n\t"
  10331. "stm %[a]!, {r3, r4}\n\t"
  10332. "sbc %[c], %[c], %[c]\n\t"
  10333. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  10334. :
  10335. : "memory", "r3", "r4", "r5", "r6"
  10336. );
  10337. return c;
  10338. }
  10339. /* Add b to a into r. (r = a + b)
  10340. *
  10341. * r A single precision integer.
  10342. * a A single precision integer.
  10343. * b A single precision integer.
  10344. */
  10345. SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
  10346. const sp_digit* b)
  10347. {
  10348. sp_digit c = 0;
  10349. __asm__ __volatile__ (
  10350. "ldm %[a]!, {r4, r5}\n\t"
  10351. "ldm %[b]!, {r6, r8}\n\t"
  10352. "adds r4, r4, r6\n\t"
  10353. "adcs r5, r5, r8\n\t"
  10354. "stm %[r]!, {r4, r5}\n\t"
  10355. "ldm %[a]!, {r4, r5}\n\t"
  10356. "ldm %[b]!, {r6, r8}\n\t"
  10357. "adcs r4, r4, r6\n\t"
  10358. "adcs r5, r5, r8\n\t"
  10359. "stm %[r]!, {r4, r5}\n\t"
  10360. "ldm %[a]!, {r4, r5}\n\t"
  10361. "ldm %[b]!, {r6, r8}\n\t"
  10362. "adcs r4, r4, r6\n\t"
  10363. "adcs r5, r5, r8\n\t"
  10364. "stm %[r]!, {r4, r5}\n\t"
  10365. "ldm %[a]!, {r4, r5}\n\t"
  10366. "ldm %[b]!, {r6, r8}\n\t"
  10367. "adcs r4, r4, r6\n\t"
  10368. "adcs r5, r5, r8\n\t"
  10369. "stm %[r]!, {r4, r5}\n\t"
  10370. "ldm %[a]!, {r4, r5}\n\t"
  10371. "ldm %[b]!, {r6, r8}\n\t"
  10372. "adcs r4, r4, r6\n\t"
  10373. "adcs r5, r5, r8\n\t"
  10374. "stm %[r]!, {r4, r5}\n\t"
  10375. "ldm %[a]!, {r4, r5}\n\t"
  10376. "ldm %[b]!, {r6, r8}\n\t"
  10377. "adcs r4, r4, r6\n\t"
  10378. "adcs r5, r5, r8\n\t"
  10379. "stm %[r]!, {r4, r5}\n\t"
  10380. "ldm %[a]!, {r4, r5}\n\t"
  10381. "ldm %[b]!, {r6, r8}\n\t"
  10382. "adcs r4, r4, r6\n\t"
  10383. "adcs r5, r5, r8\n\t"
  10384. "stm %[r]!, {r4, r5}\n\t"
  10385. "ldm %[a]!, {r4, r5}\n\t"
  10386. "ldm %[b]!, {r6, r8}\n\t"
  10387. "adcs r4, r4, r6\n\t"
  10388. "adcs r5, r5, r8\n\t"
  10389. "stm %[r]!, {r4, r5}\n\t"
  10390. "ldm %[a]!, {r4, r5}\n\t"
  10391. "ldm %[b]!, {r6, r8}\n\t"
  10392. "adcs r4, r4, r6\n\t"
  10393. "adcs r5, r5, r8\n\t"
  10394. "stm %[r]!, {r4, r5}\n\t"
  10395. "ldm %[a]!, {r4, r5}\n\t"
  10396. "ldm %[b]!, {r6, r8}\n\t"
  10397. "adcs r4, r4, r6\n\t"
  10398. "adcs r5, r5, r8\n\t"
  10399. "stm %[r]!, {r4, r5}\n\t"
  10400. "ldm %[a]!, {r4, r5}\n\t"
  10401. "ldm %[b]!, {r6, r8}\n\t"
  10402. "adcs r4, r4, r6\n\t"
  10403. "adcs r5, r5, r8\n\t"
  10404. "stm %[r]!, {r4, r5}\n\t"
  10405. "ldm %[a]!, {r4, r5}\n\t"
  10406. "ldm %[b]!, {r6, r8}\n\t"
  10407. "adcs r4, r4, r6\n\t"
  10408. "adcs r5, r5, r8\n\t"
  10409. "stm %[r]!, {r4, r5}\n\t"
  10410. "ldm %[a]!, {r4, r5}\n\t"
  10411. "ldm %[b]!, {r6, r8}\n\t"
  10412. "adcs r4, r4, r6\n\t"
  10413. "adcs r5, r5, r8\n\t"
  10414. "stm %[r]!, {r4, r5}\n\t"
  10415. "ldm %[a]!, {r4, r5}\n\t"
  10416. "ldm %[b]!, {r6, r8}\n\t"
  10417. "adcs r4, r4, r6\n\t"
  10418. "adcs r5, r5, r8\n\t"
  10419. "stm %[r]!, {r4, r5}\n\t"
  10420. "ldm %[a]!, {r4, r5}\n\t"
  10421. "ldm %[b]!, {r6, r8}\n\t"
  10422. "adcs r4, r4, r6\n\t"
  10423. "adcs r5, r5, r8\n\t"
  10424. "stm %[r]!, {r4, r5}\n\t"
  10425. "ldm %[a]!, {r4, r5}\n\t"
  10426. "ldm %[b]!, {r6, r8}\n\t"
  10427. "adcs r4, r4, r6\n\t"
  10428. "adcs r5, r5, r8\n\t"
  10429. "stm %[r]!, {r4, r5}\n\t"
  10430. "ldm %[a]!, {r4, r5}\n\t"
  10431. "ldm %[b]!, {r6, r8}\n\t"
  10432. "adcs r4, r4, r6\n\t"
  10433. "adcs r5, r5, r8\n\t"
  10434. "stm %[r]!, {r4, r5}\n\t"
  10435. "ldm %[a]!, {r4, r5}\n\t"
  10436. "ldm %[b]!, {r6, r8}\n\t"
  10437. "adcs r4, r4, r6\n\t"
  10438. "adcs r5, r5, r8\n\t"
  10439. "stm %[r]!, {r4, r5}\n\t"
  10440. "ldm %[a]!, {r4, r5}\n\t"
  10441. "ldm %[b]!, {r6, r8}\n\t"
  10442. "adcs r4, r4, r6\n\t"
  10443. "adcs r5, r5, r8\n\t"
  10444. "stm %[r]!, {r4, r5}\n\t"
  10445. "ldm %[a]!, {r4, r5}\n\t"
  10446. "ldm %[b]!, {r6, r8}\n\t"
  10447. "adcs r4, r4, r6\n\t"
  10448. "adcs r5, r5, r8\n\t"
  10449. "stm %[r]!, {r4, r5}\n\t"
  10450. "ldm %[a]!, {r4, r5}\n\t"
  10451. "ldm %[b]!, {r6, r8}\n\t"
  10452. "adcs r4, r4, r6\n\t"
  10453. "adcs r5, r5, r8\n\t"
  10454. "stm %[r]!, {r4, r5}\n\t"
  10455. "ldm %[a]!, {r4, r5}\n\t"
  10456. "ldm %[b]!, {r6, r8}\n\t"
  10457. "adcs r4, r4, r6\n\t"
  10458. "adcs r5, r5, r8\n\t"
  10459. "stm %[r]!, {r4, r5}\n\t"
  10460. "ldm %[a]!, {r4, r5}\n\t"
  10461. "ldm %[b]!, {r6, r8}\n\t"
  10462. "adcs r4, r4, r6\n\t"
  10463. "adcs r5, r5, r8\n\t"
  10464. "stm %[r]!, {r4, r5}\n\t"
  10465. "ldm %[a]!, {r4, r5}\n\t"
  10466. "ldm %[b]!, {r6, r8}\n\t"
  10467. "adcs r4, r4, r6\n\t"
  10468. "adcs r5, r5, r8\n\t"
  10469. "stm %[r]!, {r4, r5}\n\t"
  10470. "ldm %[a]!, {r4, r5}\n\t"
  10471. "ldm %[b]!, {r6, r8}\n\t"
  10472. "adcs r4, r4, r6\n\t"
  10473. "adcs r5, r5, r8\n\t"
  10474. "stm %[r]!, {r4, r5}\n\t"
  10475. "ldm %[a]!, {r4, r5}\n\t"
  10476. "ldm %[b]!, {r6, r8}\n\t"
  10477. "adcs r4, r4, r6\n\t"
  10478. "adcs r5, r5, r8\n\t"
  10479. "stm %[r]!, {r4, r5}\n\t"
  10480. "ldm %[a]!, {r4, r5}\n\t"
  10481. "ldm %[b]!, {r6, r8}\n\t"
  10482. "adcs r4, r4, r6\n\t"
  10483. "adcs r5, r5, r8\n\t"
  10484. "stm %[r]!, {r4, r5}\n\t"
  10485. "ldm %[a]!, {r4, r5}\n\t"
  10486. "ldm %[b]!, {r6, r8}\n\t"
  10487. "adcs r4, r4, r6\n\t"
  10488. "adcs r5, r5, r8\n\t"
  10489. "stm %[r]!, {r4, r5}\n\t"
  10490. "ldm %[a]!, {r4, r5}\n\t"
  10491. "ldm %[b]!, {r6, r8}\n\t"
  10492. "adcs r4, r4, r6\n\t"
  10493. "adcs r5, r5, r8\n\t"
  10494. "stm %[r]!, {r4, r5}\n\t"
  10495. "ldm %[a]!, {r4, r5}\n\t"
  10496. "ldm %[b]!, {r6, r8}\n\t"
  10497. "adcs r4, r4, r6\n\t"
  10498. "adcs r5, r5, r8\n\t"
  10499. "stm %[r]!, {r4, r5}\n\t"
  10500. "ldm %[a]!, {r4, r5}\n\t"
  10501. "ldm %[b]!, {r6, r8}\n\t"
  10502. "adcs r4, r4, r6\n\t"
  10503. "adcs r5, r5, r8\n\t"
  10504. "stm %[r]!, {r4, r5}\n\t"
  10505. "ldm %[a]!, {r4, r5}\n\t"
  10506. "ldm %[b]!, {r6, r8}\n\t"
  10507. "adcs r4, r4, r6\n\t"
  10508. "adcs r5, r5, r8\n\t"
  10509. "stm %[r]!, {r4, r5}\n\t"
  10510. "ldm %[a]!, {r4, r5}\n\t"
  10511. "ldm %[b]!, {r6, r8}\n\t"
  10512. "adcs r4, r4, r6\n\t"
  10513. "adcs r5, r5, r8\n\t"
  10514. "stm %[r]!, {r4, r5}\n\t"
  10515. "ldm %[a]!, {r4, r5}\n\t"
  10516. "ldm %[b]!, {r6, r8}\n\t"
  10517. "adcs r4, r4, r6\n\t"
  10518. "adcs r5, r5, r8\n\t"
  10519. "stm %[r]!, {r4, r5}\n\t"
  10520. "ldm %[a]!, {r4, r5}\n\t"
  10521. "ldm %[b]!, {r6, r8}\n\t"
  10522. "adcs r4, r4, r6\n\t"
  10523. "adcs r5, r5, r8\n\t"
  10524. "stm %[r]!, {r4, r5}\n\t"
  10525. "ldm %[a]!, {r4, r5}\n\t"
  10526. "ldm %[b]!, {r6, r8}\n\t"
  10527. "adcs r4, r4, r6\n\t"
  10528. "adcs r5, r5, r8\n\t"
  10529. "stm %[r]!, {r4, r5}\n\t"
  10530. "ldm %[a]!, {r4, r5}\n\t"
  10531. "ldm %[b]!, {r6, r8}\n\t"
  10532. "adcs r4, r4, r6\n\t"
  10533. "adcs r5, r5, r8\n\t"
  10534. "stm %[r]!, {r4, r5}\n\t"
  10535. "ldm %[a]!, {r4, r5}\n\t"
  10536. "ldm %[b]!, {r6, r8}\n\t"
  10537. "adcs r4, r4, r6\n\t"
  10538. "adcs r5, r5, r8\n\t"
  10539. "stm %[r]!, {r4, r5}\n\t"
  10540. "ldm %[a]!, {r4, r5}\n\t"
  10541. "ldm %[b]!, {r6, r8}\n\t"
  10542. "adcs r4, r4, r6\n\t"
  10543. "adcs r5, r5, r8\n\t"
  10544. "stm %[r]!, {r4, r5}\n\t"
  10545. "ldm %[a]!, {r4, r5}\n\t"
  10546. "ldm %[b]!, {r6, r8}\n\t"
  10547. "adcs r4, r4, r6\n\t"
  10548. "adcs r5, r5, r8\n\t"
  10549. "stm %[r]!, {r4, r5}\n\t"
  10550. "ldm %[a]!, {r4, r5}\n\t"
  10551. "ldm %[b]!, {r6, r8}\n\t"
  10552. "adcs r4, r4, r6\n\t"
  10553. "adcs r5, r5, r8\n\t"
  10554. "stm %[r]!, {r4, r5}\n\t"
  10555. "ldm %[a]!, {r4, r5}\n\t"
  10556. "ldm %[b]!, {r6, r8}\n\t"
  10557. "adcs r4, r4, r6\n\t"
  10558. "adcs r5, r5, r8\n\t"
  10559. "stm %[r]!, {r4, r5}\n\t"
  10560. "ldm %[a]!, {r4, r5}\n\t"
  10561. "ldm %[b]!, {r6, r8}\n\t"
  10562. "adcs r4, r4, r6\n\t"
  10563. "adcs r5, r5, r8\n\t"
  10564. "stm %[r]!, {r4, r5}\n\t"
  10565. "ldm %[a]!, {r4, r5}\n\t"
  10566. "ldm %[b]!, {r6, r8}\n\t"
  10567. "adcs r4, r4, r6\n\t"
  10568. "adcs r5, r5, r8\n\t"
  10569. "stm %[r]!, {r4, r5}\n\t"
  10570. "ldm %[a]!, {r4, r5}\n\t"
  10571. "ldm %[b]!, {r6, r8}\n\t"
  10572. "adcs r4, r4, r6\n\t"
  10573. "adcs r5, r5, r8\n\t"
  10574. "stm %[r]!, {r4, r5}\n\t"
  10575. "ldm %[a]!, {r4, r5}\n\t"
  10576. "ldm %[b]!, {r6, r8}\n\t"
  10577. "adcs r4, r4, r6\n\t"
  10578. "adcs r5, r5, r8\n\t"
  10579. "stm %[r]!, {r4, r5}\n\t"
  10580. "ldm %[a]!, {r4, r5}\n\t"
  10581. "ldm %[b]!, {r6, r8}\n\t"
  10582. "adcs r4, r4, r6\n\t"
  10583. "adcs r5, r5, r8\n\t"
  10584. "stm %[r]!, {r4, r5}\n\t"
  10585. "ldm %[a]!, {r4, r5}\n\t"
  10586. "ldm %[b]!, {r6, r8}\n\t"
  10587. "adcs r4, r4, r6\n\t"
  10588. "adcs r5, r5, r8\n\t"
  10589. "stm %[r]!, {r4, r5}\n\t"
  10590. "ldm %[a]!, {r4, r5}\n\t"
  10591. "ldm %[b]!, {r6, r8}\n\t"
  10592. "adcs r4, r4, r6\n\t"
  10593. "adcs r5, r5, r8\n\t"
  10594. "stm %[r]!, {r4, r5}\n\t"
  10595. "ldm %[a]!, {r4, r5}\n\t"
  10596. "ldm %[b]!, {r6, r8}\n\t"
  10597. "adcs r4, r4, r6\n\t"
  10598. "adcs r5, r5, r8\n\t"
  10599. "stm %[r]!, {r4, r5}\n\t"
  10600. "ldm %[a]!, {r4, r5}\n\t"
  10601. "ldm %[b]!, {r6, r8}\n\t"
  10602. "adcs r4, r4, r6\n\t"
  10603. "adcs r5, r5, r8\n\t"
  10604. "stm %[r]!, {r4, r5}\n\t"
  10605. "ldm %[a]!, {r4, r5}\n\t"
  10606. "ldm %[b]!, {r6, r8}\n\t"
  10607. "adcs r4, r4, r6\n\t"
  10608. "adcs r5, r5, r8\n\t"
  10609. "stm %[r]!, {r4, r5}\n\t"
  10610. "ldm %[a]!, {r4, r5}\n\t"
  10611. "ldm %[b]!, {r6, r8}\n\t"
  10612. "adcs r4, r4, r6\n\t"
  10613. "adcs r5, r5, r8\n\t"
  10614. "stm %[r]!, {r4, r5}\n\t"
  10615. "ldm %[a]!, {r4, r5}\n\t"
  10616. "ldm %[b]!, {r6, r8}\n\t"
  10617. "adcs r4, r4, r6\n\t"
  10618. "adcs r5, r5, r8\n\t"
  10619. "stm %[r]!, {r4, r5}\n\t"
  10620. "ldm %[a]!, {r4, r5}\n\t"
  10621. "ldm %[b]!, {r6, r8}\n\t"
  10622. "adcs r4, r4, r6\n\t"
  10623. "adcs r5, r5, r8\n\t"
  10624. "stm %[r]!, {r4, r5}\n\t"
  10625. "ldm %[a]!, {r4, r5}\n\t"
  10626. "ldm %[b]!, {r6, r8}\n\t"
  10627. "adcs r4, r4, r6\n\t"
  10628. "adcs r5, r5, r8\n\t"
  10629. "stm %[r]!, {r4, r5}\n\t"
  10630. "ldm %[a]!, {r4, r5}\n\t"
  10631. "ldm %[b]!, {r6, r8}\n\t"
  10632. "adcs r4, r4, r6\n\t"
  10633. "adcs r5, r5, r8\n\t"
  10634. "stm %[r]!, {r4, r5}\n\t"
  10635. "ldm %[a]!, {r4, r5}\n\t"
  10636. "ldm %[b]!, {r6, r8}\n\t"
  10637. "adcs r4, r4, r6\n\t"
  10638. "adcs r5, r5, r8\n\t"
  10639. "stm %[r]!, {r4, r5}\n\t"
  10640. "ldm %[a]!, {r4, r5}\n\t"
  10641. "ldm %[b]!, {r6, r8}\n\t"
  10642. "adcs r4, r4, r6\n\t"
  10643. "adcs r5, r5, r8\n\t"
  10644. "stm %[r]!, {r4, r5}\n\t"
  10645. "ldm %[a]!, {r4, r5}\n\t"
  10646. "ldm %[b]!, {r6, r8}\n\t"
  10647. "adcs r4, r4, r6\n\t"
  10648. "adcs r5, r5, r8\n\t"
  10649. "stm %[r]!, {r4, r5}\n\t"
  10650. "ldm %[a]!, {r4, r5}\n\t"
  10651. "ldm %[b]!, {r6, r8}\n\t"
  10652. "adcs r4, r4, r6\n\t"
  10653. "adcs r5, r5, r8\n\t"
  10654. "stm %[r]!, {r4, r5}\n\t"
  10655. "ldm %[a]!, {r4, r5}\n\t"
  10656. "ldm %[b]!, {r6, r8}\n\t"
  10657. "adcs r4, r4, r6\n\t"
  10658. "adcs r5, r5, r8\n\t"
  10659. "stm %[r]!, {r4, r5}\n\t"
  10660. "ldm %[a]!, {r4, r5}\n\t"
  10661. "ldm %[b]!, {r6, r8}\n\t"
  10662. "adcs r4, r4, r6\n\t"
  10663. "adcs r5, r5, r8\n\t"
  10664. "stm %[r]!, {r4, r5}\n\t"
  10665. "ldm %[a]!, {r4, r5}\n\t"
  10666. "ldm %[b]!, {r6, r8}\n\t"
  10667. "adcs r4, r4, r6\n\t"
  10668. "adcs r5, r5, r8\n\t"
  10669. "stm %[r]!, {r4, r5}\n\t"
  10670. "mov %[c], #0\n\t"
  10671. "adc %[c], %[c], %[c]\n\t"
  10672. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  10673. :
  10674. : "memory", "r4", "r5", "r6", "r8"
  10675. );
  10676. return c;
  10677. }
  10678. /* Multiply a and b into r. (r = a * b)
  10679. *
  10680. * r A single precision integer.
  10681. * a A single precision integer.
  10682. * b A single precision integer.
  10683. */
  10684. SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
  10685. const sp_digit* b)
  10686. {
  10687. sp_digit* z0 = r;
  10688. sp_digit z1[128];
  10689. sp_digit a1[64];
  10690. sp_digit b1[64];
  10691. sp_digit z2[128];
  10692. sp_digit u, ca, cb;
  10693. ca = sp_2048_add_64(a1, a, &a[64]);
  10694. cb = sp_2048_add_64(b1, b, &b[64]);
  10695. u = ca & cb;
  10696. sp_2048_mul_64(z1, a1, b1);
  10697. sp_2048_mul_64(z2, &a[64], &b[64]);
  10698. sp_2048_mul_64(z0, a, b);
  10699. sp_2048_mask_64(r + 128, a1, 0 - cb);
  10700. sp_2048_mask_64(b1, b1, 0 - ca);
  10701. u += sp_2048_add_64(r + 128, r + 128, b1);
  10702. u += sp_4096_sub_in_place_128(z1, z2);
  10703. u += sp_4096_sub_in_place_128(z1, z0);
  10704. u += sp_4096_add_128(r + 64, r + 64, z1);
  10705. r[192] = u;
  10706. XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
  10707. (void)sp_4096_add_128(r + 128, r + 128, z2);
  10708. }
  10709. /* Square a and put result in r. (r = a * a)
  10710. *
  10711. * r A single precision integer.
  10712. * a A single precision integer.
  10713. */
  10714. SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
  10715. {
  10716. sp_digit* z0 = r;
  10717. sp_digit z2[128];
  10718. sp_digit z1[128];
  10719. sp_digit a1[64];
  10720. sp_digit u;
  10721. u = sp_2048_add_64(a1, a, &a[64]);
  10722. sp_2048_sqr_64(z1, a1);
  10723. sp_2048_sqr_64(z2, &a[64]);
  10724. sp_2048_sqr_64(z0, a);
  10725. sp_2048_mask_64(r + 128, a1, 0 - u);
  10726. u += sp_2048_add_64(r + 128, r + 128, r + 128);
  10727. u += sp_4096_sub_in_place_128(z1, z2);
  10728. u += sp_4096_sub_in_place_128(z1, z0);
  10729. u += sp_4096_add_128(r + 64, r + 64, z1);
  10730. r[192] = u;
  10731. XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
  10732. (void)sp_4096_add_128(r + 128, r + 128, z2);
  10733. }
  10734. #endif /* !WOLFSSL_SP_SMALL */
  10735. #ifdef WOLFSSL_SP_SMALL
  10736. /* Add b to a into r. (r = a + b)
  10737. *
  10738. * r A single precision integer.
  10739. * a A single precision integer.
  10740. * b A single precision integer.
  10741. */
  10742. SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
  10743. const sp_digit* b)
  10744. {
  10745. sp_digit c = 0;
  10746. __asm__ __volatile__ (
  10747. "mov r6, %[a]\n\t"
  10748. "mov r8, #0\n\t"
  10749. "add r6, r6, #512\n\t"
  10750. "sub r8, r8, #1\n\t"
  10751. "\n1:\n\t"
  10752. "adds %[c], %[c], r8\n\t"
  10753. "ldr r4, [%[a]]\n\t"
  10754. "ldr r5, [%[b]]\n\t"
  10755. "adcs r4, r4, r5\n\t"
  10756. "str r4, [%[r]]\n\t"
  10757. "mov %[c], #0\n\t"
  10758. "adc %[c], %[c], %[c]\n\t"
  10759. "add %[a], %[a], #4\n\t"
  10760. "add %[b], %[b], #4\n\t"
  10761. "add %[r], %[r], #4\n\t"
  10762. "cmp %[a], r6\n\t"
  10763. #ifdef __GNUC__
  10764. "bne 1b\n\t"
  10765. #else
  10766. "bne.n 1b\n\t"
  10767. #endif /* __GNUC__ */
  10768. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  10769. :
  10770. : "memory", "r4", "r5", "r6", "r8"
  10771. );
  10772. return c;
  10773. }
  10774. #endif /* WOLFSSL_SP_SMALL */
  10775. #ifdef WOLFSSL_SP_SMALL
  10776. /* Sub b from a into a. (a -= b)
  10777. *
  10778. * a A single precision integer.
  10779. * b A single precision integer.
  10780. */
  10781. SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
  10782. const sp_digit* b)
  10783. {
  10784. sp_digit c = 0;
  10785. __asm__ __volatile__ (
  10786. "mov r8, %[a]\n\t"
  10787. "add r8, r8, #512\n\t"
  10788. "\n1:\n\t"
  10789. "mov r5, #0\n\t"
  10790. "subs r5, r5, %[c]\n\t"
  10791. "ldr r3, [%[a]]\n\t"
  10792. "ldr r4, [%[a], #4]\n\t"
  10793. "ldr r5, [%[b]]\n\t"
  10794. "ldr r6, [%[b], #4]\n\t"
  10795. "sbcs r3, r3, r5\n\t"
  10796. "sbcs r4, r4, r6\n\t"
  10797. "str r3, [%[a]]\n\t"
  10798. "str r4, [%[a], #4]\n\t"
  10799. "sbc %[c], %[c], %[c]\n\t"
  10800. "add %[a], %[a], #8\n\t"
  10801. "add %[b], %[b], #8\n\t"
  10802. "cmp %[a], r8\n\t"
  10803. #ifdef __GNUC__
  10804. "bne 1b\n\t"
  10805. #else
  10806. "bne.n 1b\n\t"
  10807. #endif /* __GNUC__ */
  10808. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  10809. :
  10810. : "memory", "r3", "r4", "r5", "r6", "r8"
  10811. );
  10812. return c;
  10813. }
  10814. #endif /* WOLFSSL_SP_SMALL */
  10815. #ifdef WOLFSSL_SP_SMALL
  10816. /* Multiply a and b into r. (r = a * b)
  10817. *
  10818. * r A single precision integer.
  10819. * a A single precision integer.
  10820. * b A single precision integer.
  10821. */
  10822. SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
  10823. const sp_digit* b)
  10824. {
  10825. sp_digit tmp_arr[128 * 2];
  10826. sp_digit* tmp = tmp_arr;
  10827. __asm__ __volatile__ (
  10828. "mov r3, #0\n\t"
  10829. "mov r4, #0\n\t"
  10830. "mov r9, r3\n\t"
  10831. "mov r12, %[r]\n\t"
  10832. "mov r10, %[a]\n\t"
  10833. "mov r11, %[b]\n\t"
  10834. "mov r6, #2\n\t"
  10835. "lsl r6, r6, #8\n\t"
  10836. "add r6, r6, r10\n\t"
  10837. "mov r14, r6\n\t"
  10838. "\n1:\n\t"
  10839. "mov %[r], #0\n\t"
  10840. "mov r5, #0\n\t"
  10841. "mov r6, #1\n\t"
  10842. "lsl r6, r6, #8\n\t"
  10843. "add r6, r6, #252\n\t"
  10844. "mov %[a], r9\n\t"
  10845. "subs %[a], %[a], r6\n\t"
  10846. "sbc r6, r6, r6\n\t"
  10847. "mvn r6, r6\n\t"
  10848. "and %[a], %[a], r6\n\t"
  10849. "mov %[b], r9\n\t"
  10850. "sub %[b], %[b], %[a]\n\t"
  10851. "add %[a], %[a], r10\n\t"
  10852. "add %[b], %[b], r11\n\t"
  10853. "\n2:\n\t"
  10854. /* Multiply Start */
  10855. "ldr r6, [%[a]]\n\t"
  10856. "ldr r8, [%[b]]\n\t"
  10857. "umull r6, r8, r6, r8\n\t"
  10858. "adds r3, r3, r6\n\t"
  10859. "adcs r4, r4, r8\n\t"
  10860. "adc r5, r5, %[r]\n\t"
  10861. /* Multiply Done */
  10862. "add %[a], %[a], #4\n\t"
  10863. "sub %[b], %[b], #4\n\t"
  10864. "cmp %[a], r14\n\t"
  10865. #ifdef __GNUC__
  10866. "beq 3f\n\t"
  10867. #else
  10868. "beq.n 3f\n\t"
  10869. #endif /* __GNUC__ */
  10870. "mov r6, r9\n\t"
  10871. "add r6, r6, r10\n\t"
  10872. "cmp %[a], r6\n\t"
  10873. #ifdef __GNUC__
  10874. "ble 2b\n\t"
  10875. #else
  10876. "ble.n 2b\n\t"
  10877. #endif /* __GNUC__ */
  10878. "\n3:\n\t"
  10879. "mov %[r], r12\n\t"
  10880. "mov r8, r9\n\t"
  10881. "str r3, [%[r], r8]\n\t"
  10882. "mov r3, r4\n\t"
  10883. "mov r4, r5\n\t"
  10884. "add r8, r8, #4\n\t"
  10885. "mov r9, r8\n\t"
  10886. "mov r6, #3\n\t"
  10887. "lsl r6, r6, #8\n\t"
  10888. "add r6, r6, #248\n\t"
  10889. "cmp r8, r6\n\t"
  10890. #ifdef __GNUC__
  10891. "ble 1b\n\t"
  10892. #else
  10893. "ble.n 1b\n\t"
  10894. #endif /* __GNUC__ */
  10895. "str r3, [%[r], r8]\n\t"
  10896. "mov %[a], r10\n\t"
  10897. "mov %[b], r11\n\t"
  10898. :
  10899. : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
  10900. : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  10901. );
  10902. XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
  10903. }
  10904. /* Square a and put result in r. (r = a * a)
  10905. *
  10906. * r A single precision integer.
  10907. * a A single precision integer.
  10908. */
  10909. SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
  10910. {
  10911. __asm__ __volatile__ (
  10912. "mov r3, #0\n\t"
  10913. "mov r4, #0\n\t"
  10914. "mov r5, #0\n\t"
  10915. "mov r9, r3\n\t"
  10916. "mov r12, %[r]\n\t"
  10917. "mov r6, #4\n\t"
  10918. "lsl r6, r6, #8\n\t"
  10919. "neg r6, r6\n\t"
  10920. "add sp, sp, r6\n\t"
  10921. "mov r11, sp\n\t"
  10922. "mov r10, %[a]\n\t"
  10923. "\n1:\n\t"
  10924. "mov %[r], #0\n\t"
  10925. "mov r6, #1\n\t"
  10926. "lsl r6, r6, #8\n\t"
  10927. "add r6, r6, #252\n\t"
  10928. "mov %[a], r9\n\t"
  10929. "subs %[a], %[a], r6\n\t"
  10930. "sbc r6, r6, r6\n\t"
  10931. "mvn r6, r6\n\t"
  10932. "and %[a], %[a], r6\n\t"
  10933. "mov r2, r9\n\t"
  10934. "sub r2, r2, %[a]\n\t"
  10935. "add %[a], %[a], r10\n\t"
  10936. "add r2, r2, r10\n\t"
  10937. "\n2:\n\t"
  10938. "cmp r2, %[a]\n\t"
  10939. #ifdef __GNUC__
  10940. "beq 4f\n\t"
  10941. #else
  10942. "beq.n 4f\n\t"
  10943. #endif /* __GNUC__ */
  10944. /* Multiply * 2: Start */
  10945. "ldr r6, [%[a]]\n\t"
  10946. "ldr r8, [r2]\n\t"
  10947. "umull r6, r8, r6, r8\n\t"
  10948. "adds r3, r3, r6\n\t"
  10949. "adcs r4, r4, r8\n\t"
  10950. "adc r5, r5, %[r]\n\t"
  10951. "adds r3, r3, r6\n\t"
  10952. "adcs r4, r4, r8\n\t"
  10953. "adc r5, r5, %[r]\n\t"
  10954. /* Multiply * 2: Done */
  10955. #ifdef __GNUC__
  10956. "bal 5f\n\t"
  10957. #else
  10958. "bal.n 5f\n\t"
  10959. #endif /* __GNUC__ */
  10960. "\n4:\n\t"
  10961. /* Square: Start */
  10962. "ldr r6, [%[a]]\n\t"
  10963. "umull r6, r8, r6, r6\n\t"
  10964. "adds r3, r3, r6\n\t"
  10965. "adcs r4, r4, r8\n\t"
  10966. "adc r5, r5, %[r]\n\t"
  10967. /* Square: Done */
  10968. "\n5:\n\t"
  10969. "add %[a], %[a], #4\n\t"
  10970. "sub r2, r2, #4\n\t"
  10971. "mov r6, #2\n\t"
  10972. "lsl r6, r6, #8\n\t"
  10973. "add r6, r6, r10\n\t"
  10974. "cmp %[a], r6\n\t"
  10975. #ifdef __GNUC__
  10976. "beq 3f\n\t"
  10977. #else
  10978. "beq.n 3f\n\t"
  10979. #endif /* __GNUC__ */
  10980. "cmp %[a], r2\n\t"
  10981. #ifdef __GNUC__
  10982. "bgt 3f\n\t"
  10983. #else
  10984. "bgt.n 3f\n\t"
  10985. #endif /* __GNUC__ */
  10986. "mov r8, r9\n\t"
  10987. "add r8, r8, r10\n\t"
  10988. "cmp %[a], r8\n\t"
  10989. #ifdef __GNUC__
  10990. "ble 2b\n\t"
  10991. #else
  10992. "ble.n 2b\n\t"
  10993. #endif /* __GNUC__ */
  10994. "\n3:\n\t"
  10995. "mov %[r], r11\n\t"
  10996. "mov r8, r9\n\t"
  10997. "str r3, [%[r], r8]\n\t"
  10998. "mov r3, r4\n\t"
  10999. "mov r4, r5\n\t"
  11000. "mov r5, #0\n\t"
  11001. "add r8, r8, #4\n\t"
  11002. "mov r9, r8\n\t"
  11003. "mov r6, #3\n\t"
  11004. "lsl r6, r6, #8\n\t"
  11005. "add r6, r6, #248\n\t"
  11006. "cmp r8, r6\n\t"
  11007. #ifdef __GNUC__
  11008. "ble 1b\n\t"
  11009. #else
  11010. "ble.n 1b\n\t"
  11011. #endif /* __GNUC__ */
  11012. "mov %[a], r10\n\t"
  11013. "str r3, [%[r], r8]\n\t"
  11014. "mov %[r], r12\n\t"
  11015. "mov %[a], r11\n\t"
  11016. "mov r3, #3\n\t"
  11017. "lsl r3, r3, #8\n\t"
  11018. "add r3, r3, #252\n\t"
  11019. "\n4:\n\t"
  11020. "ldr r6, [%[a], r3]\n\t"
  11021. "str r6, [%[r], r3]\n\t"
  11022. "subs r3, r3, #4\n\t"
  11023. #ifdef __GNUC__
  11024. "bge 4b\n\t"
  11025. #else
  11026. "bge.n 4b\n\t"
  11027. #endif /* __GNUC__ */
  11028. "mov r6, #4\n\t"
  11029. "lsl r6, r6, #8\n\t"
  11030. "add sp, sp, r6\n\t"
  11031. :
  11032. : [r] "r" (r), [a] "r" (a)
  11033. : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
  11034. );
  11035. }
  11036. #endif /* WOLFSSL_SP_SMALL */
  11037. /* Caclulate the bottom digit of -1/a mod 2^n.
  11038. *
  11039. * a A single precision number.
  11040. * rho Bottom word of inverse.
  11041. */
  11042. static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
  11043. {
  11044. sp_digit x, b;
  11045. b = a[0];
  11046. x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
  11047. x *= 2 - b * x; /* here x*a==1 mod 2**8 */
  11048. x *= 2 - b * x; /* here x*a==1 mod 2**16 */
  11049. x *= 2 - b * x; /* here x*a==1 mod 2**32 */
  11050. /* rho = -1/m mod b */
  11051. *rho = -x;
  11052. }
  11053. /* Mul a by digit b into r. (r = a * b)
  11054. *
  11055. * r A single precision integer.
  11056. * a A single precision integer.
  11057. * b A single precision digit.
  11058. */
  11059. SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a,
  11060. sp_digit b)
  11061. {
  11062. __asm__ __volatile__ (
  11063. "add r9, %[a], #512\n\t"
  11064. /* A[0] * B */
  11065. "ldr r6, [%[a]], #4\n\t"
  11066. "umull r5, r3, r6, %[b]\n\t"
  11067. "mov r4, #0\n\t"
  11068. "str r5, [%[r]], #4\n\t"
  11069. /* A[0] * B - Done */
  11070. "\n1:\n\t"
  11071. "mov r5, #0\n\t"
  11072. /* A[] * B */
  11073. "ldr r6, [%[a]], #4\n\t"
  11074. "umull r6, r8, r6, %[b]\n\t"
  11075. "adds r3, r3, r6\n\t"
  11076. "adcs r4, r4, r8\n\t"
  11077. "adc r5, r5, #0\n\t"
  11078. /* A[] * B - Done */
  11079. "str r3, [%[r]], #4\n\t"
  11080. "mov r3, r4\n\t"
  11081. "mov r4, r5\n\t"
  11082. "cmp %[a], r9\n\t"
  11083. #ifdef __GNUC__
  11084. "blt 1b\n\t"
  11085. #else
  11086. "blt.n 1b\n\t"
  11087. #endif /* __GNUC__ */
  11088. "str r3, [%[r]]\n\t"
  11089. : [r] "+r" (r), [a] "+r" (a)
  11090. : [b] "r" (b)
  11091. : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
  11092. );
  11093. }
  11094. #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
  11095. /* r = 2^n mod m where n is the number of bits to reduce by.
  11096. * Given m must be 4096 bits, just need to subtract.
  11097. *
  11098. * r A single precision number.
  11099. * m A single precision number.
  11100. */
  11101. static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
  11102. {
  11103. XMEMSET(r, 0, sizeof(sp_digit) * 128);
  11104. /* r = 2^n mod m */
  11105. sp_4096_sub_in_place_128(r, m);
  11106. }
  11107. #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
  11108. /* Conditionally subtract b from a using the mask m.
  11109. * m is -1 to subtract and 0 when not copying.
  11110. *
  11111. * r A single precision number representing condition subtract result.
  11112. * a A single precision number to subtract from.
  11113. * b A single precision number to subtract.
  11114. * m Mask value to apply.
  11115. */
  11116. SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a,
  11117. const sp_digit* b, sp_digit m)
  11118. {
  11119. sp_digit c = 0;
  11120. __asm__ __volatile__ (
  11121. "mov r5, #2\n\t"
  11122. "lsl r5, r5, #8\n\t"
  11123. "mov r9, r5\n\t"
  11124. "mov r8, #0\n\t"
  11125. "\n1:\n\t"
  11126. "ldr r6, [%[b], r8]\n\t"
  11127. "and r6, r6, %[m]\n\t"
  11128. "mov r5, #0\n\t"
  11129. "subs r5, r5, %[c]\n\t"
  11130. "ldr r5, [%[a], r8]\n\t"
  11131. "sbcs r5, r5, r6\n\t"
  11132. "sbcs %[c], %[c], %[c]\n\t"
  11133. "str r5, [%[r], r8]\n\t"
  11134. "add r8, r8, #4\n\t"
  11135. "cmp r8, r9\n\t"
  11136. #ifdef __GNUC__
  11137. "blt 1b\n\t"
  11138. #else
  11139. "blt.n 1b\n\t"
  11140. #endif /* __GNUC__ */
  11141. : [c] "+r" (c)
  11142. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  11143. : "memory", "r5", "r6", "r8", "r9"
  11144. );
  11145. return c;
  11146. }
  11147. /* Reduce the number back to 4096 bits using Montgomery reduction.
  11148. *
  11149. * a A single precision number to reduce in place.
  11150. * m The single precision number representing the modulus.
  11151. * mp The digit representing the negative inverse of m mod 2^n.
  11152. */
  11153. SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m,
  11154. sp_digit mp)
  11155. {
  11156. sp_digit ca = 0;
  11157. __asm__ __volatile__ (
  11158. "mov r9, %[mp]\n\t"
  11159. "mov r12, %[m]\n\t"
  11160. "mov r10, %[a]\n\t"
  11161. "mov r4, #0\n\t"
  11162. "add r11, r10, #512\n\t"
  11163. "\n1:\n\t"
  11164. /* mu = a[i] * mp */
  11165. "mov %[mp], r9\n\t"
  11166. "ldr %[a], [r10]\n\t"
  11167. "mul %[mp], %[mp], %[a]\n\t"
  11168. "mov %[m], r12\n\t"
  11169. "add r14, r10, #504\n\t"
  11170. "\n2:\n\t"
  11171. /* a[i+j] += m[j] * mu */
  11172. "ldr %[a], [r10]\n\t"
  11173. "mov r5, #0\n\t"
  11174. /* Multiply m[j] and mu - Start */
  11175. "ldr r8, [%[m]], #4\n\t"
  11176. "umull r6, r8, %[mp], r8\n\t"
  11177. "adds %[a], %[a], r6\n\t"
  11178. "adc r5, r5, r8\n\t"
  11179. /* Multiply m[j] and mu - Done */
  11180. "adds r4, r4, %[a]\n\t"
  11181. "adc r5, r5, #0\n\t"
  11182. "str r4, [r10], #4\n\t"
  11183. /* a[i+j+1] += m[j+1] * mu */
  11184. "ldr %[a], [r10]\n\t"
  11185. "mov r4, #0\n\t"
  11186. /* Multiply m[j] and mu - Start */
  11187. "ldr r8, [%[m]], #4\n\t"
  11188. "umull r6, r8, %[mp], r8\n\t"
  11189. "adds %[a], %[a], r6\n\t"
  11190. "adc r4, r4, r8\n\t"
  11191. /* Multiply m[j] and mu - Done */
  11192. "adds r5, r5, %[a]\n\t"
  11193. "adc r4, r4, #0\n\t"
  11194. "str r5, [r10], #4\n\t"
  11195. "cmp r10, r14\n\t"
  11196. #ifdef __GNUC__
  11197. "blt 2b\n\t"
  11198. #else
  11199. "blt.n 2b\n\t"
  11200. #endif /* __GNUC__ */
  11201. /* a[i+126] += m[126] * mu */
  11202. "ldr %[a], [r10]\n\t"
  11203. "mov r5, #0\n\t"
  11204. /* Multiply m[j] and mu - Start */
  11205. "ldr r8, [%[m]], #4\n\t"
  11206. "umull r6, r8, %[mp], r8\n\t"
  11207. "adds %[a], %[a], r6\n\t"
  11208. "adc r5, r5, r8\n\t"
  11209. /* Multiply m[j] and mu - Done */
  11210. "adds r4, r4, %[a]\n\t"
  11211. "adc r5, r5, #0\n\t"
  11212. "str r4, [r10], #4\n\t"
  11213. /* a[i+127] += m[127] * mu */
  11214. "mov r4, %[ca]\n\t"
  11215. "mov %[ca], #0\n\t"
  11216. /* Multiply m[127] and mu - Start */
  11217. "ldr r8, [%[m]]\n\t"
  11218. "umull r6, r8, %[mp], r8\n\t"
  11219. "adds r5, r5, r6\n\t"
  11220. "adcs r4, r4, r8\n\t"
  11221. "adc %[ca], %[ca], #0\n\t"
  11222. /* Multiply m[127] and mu - Done */
  11223. "ldr r6, [r10]\n\t"
  11224. "ldr r8, [r10, #4]\n\t"
  11225. "adds r6, r6, r5\n\t"
  11226. "adcs r8, r8, r4\n\t"
  11227. "adc %[ca], %[ca], #0\n\t"
  11228. "str r6, [r10]\n\t"
  11229. "str r8, [r10, #4]\n\t"
  11230. /* Next word in a */
  11231. "sub r10, r10, #504\n\t"
  11232. "cmp r10, r11\n\t"
  11233. #ifdef __GNUC__
  11234. "blt 1b\n\t"
  11235. #else
  11236. "blt.n 1b\n\t"
  11237. #endif /* __GNUC__ */
  11238. "mov %[a], r10\n\t"
  11239. "mov %[m], r12\n\t"
  11240. : [ca] "+r" (ca), [a] "+r" (a)
  11241. : [m] "r" (m), [mp] "r" (mp)
  11242. : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  11243. );
  11244. sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca);
  11245. }
  11246. /* Multiply two Montogmery form numbers mod the modulus (prime).
  11247. * (r = a * b mod m)
  11248. *
  11249. * r Result of multiplication.
  11250. * a First number to multiply in Montogmery form.
  11251. * b Second number to multiply in Montogmery form.
  11252. * m Modulus (prime).
  11253. * mp Montogmery mulitplier.
  11254. */
  11255. static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b,
  11256. const sp_digit* m, sp_digit mp)
  11257. {
  11258. sp_4096_mul_128(r, a, b);
  11259. sp_4096_mont_reduce_128(r, m, mp);
  11260. }
  11261. /* Square the Montgomery form number. (r = a * a mod m)
  11262. *
  11263. * r Result of squaring.
  11264. * a Number to square in Montogmery form.
  11265. * m Modulus (prime).
  11266. * mp Montogmery mulitplier.
  11267. */
  11268. static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m,
  11269. sp_digit mp)
  11270. {
  11271. sp_4096_sqr_128(r, a);
  11272. sp_4096_mont_reduce_128(r, m, mp);
  11273. }
  11274. /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  11275. *
  11276. * d1 The high order half of the number to divide.
  11277. * d0 The low order half of the number to divide.
  11278. * div The dividend.
  11279. * returns the result of the division.
  11280. *
  11281. * Note that this is an approximate div. It may give an answer 1 larger.
  11282. */
  11283. SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0,
  11284. sp_digit div)
  11285. {
  11286. sp_digit r = 0;
  11287. __asm__ __volatile__ (
  11288. "lsr r6, %[div], #16\n\t"
  11289. "add r6, r6, #1\n\t"
  11290. "udiv r4, %[d1], r6\n\t"
  11291. "lsl r8, r4, #16\n\t"
  11292. "umull r4, r5, %[div], r8\n\t"
  11293. "subs %[d0], %[d0], r4\n\t"
  11294. "sbc %[d1], %[d1], r5\n\t"
  11295. "udiv r5, %[d1], r6\n\t"
  11296. "lsl r4, r5, #16\n\t"
  11297. "add r8, r8, r4\n\t"
  11298. "umull r4, r5, %[div], r4\n\t"
  11299. "subs %[d0], %[d0], r4\n\t"
  11300. "sbc %[d1], %[d1], r5\n\t"
  11301. "lsl r4, %[d1], #16\n\t"
  11302. "orr r4, r4, %[d0], lsr #16\n\t"
  11303. "udiv r4, r4, r6\n\t"
  11304. "add r8, r8, r4\n\t"
  11305. "umull r4, r5, %[div], r4\n\t"
  11306. "subs %[d0], %[d0], r4\n\t"
  11307. "sbc %[d1], %[d1], r5\n\t"
  11308. "lsl r4, %[d1], #16\n\t"
  11309. "orr r4, r4, %[d0], lsr #16\n\t"
  11310. "udiv r4, r4, r6\n\t"
  11311. "add r8, r8, r4\n\t"
  11312. "umull r4, r5, %[div], r4\n\t"
  11313. "subs %[d0], %[d0], r4\n\t"
  11314. "sbc %[d1], %[d1], r5\n\t"
  11315. "udiv r4, %[d0], %[div]\n\t"
  11316. "add r8, r8, r4\n\t"
  11317. "mov %[r], r8\n\t"
  11318. : [r] "+r" (r)
  11319. : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
  11320. : "r4", "r5", "r6", "r8"
  11321. );
  11322. return r;
  11323. }
  11324. /* AND m into each word of a and store in r.
  11325. *
  11326. * r A single precision integer.
  11327. * a A single precision integer.
  11328. * m Mask to AND against each digit.
  11329. */
  11330. static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
  11331. {
  11332. #ifdef WOLFSSL_SP_SMALL
  11333. int i;
  11334. for (i=0; i<128; i++) {
  11335. r[i] = a[i] & m;
  11336. }
  11337. #else
  11338. int i;
  11339. for (i = 0; i < 128; i += 8) {
  11340. r[i+0] = a[i+0] & m;
  11341. r[i+1] = a[i+1] & m;
  11342. r[i+2] = a[i+2] & m;
  11343. r[i+3] = a[i+3] & m;
  11344. r[i+4] = a[i+4] & m;
  11345. r[i+5] = a[i+5] & m;
  11346. r[i+6] = a[i+6] & m;
  11347. r[i+7] = a[i+7] & m;
  11348. }
  11349. #endif
  11350. }
  11351. /* Compare a with b in constant time.
  11352. *
  11353. * a A single precision integer.
  11354. * b A single precision integer.
  11355. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  11356. * respectively.
  11357. */
  11358. SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
  11359. {
  11360. sp_digit r = 0;
  11361. __asm__ __volatile__ (
  11362. "mov r3, #0\n\t"
  11363. "mvn r3, r3\n\t"
  11364. "mov r6, #1\n\t"
  11365. "lsl r6, r6, #8\n\t"
  11366. "add r6, r6, #252\n\t"
  11367. "\n1:\n\t"
  11368. "ldr r8, [%[a], r6]\n\t"
  11369. "ldr r5, [%[b], r6]\n\t"
  11370. "and r8, r8, r3\n\t"
  11371. "and r5, r5, r3\n\t"
  11372. "mov r4, r8\n\t"
  11373. "subs r8, r8, r5\n\t"
  11374. "sbc r8, r8, r8\n\t"
  11375. "add %[r], %[r], r8\n\t"
  11376. "mvn r8, r8\n\t"
  11377. "and r3, r3, r8\n\t"
  11378. "subs r5, r5, r4\n\t"
  11379. "sbc r8, r8, r8\n\t"
  11380. "sub %[r], %[r], r8\n\t"
  11381. "mvn r8, r8\n\t"
  11382. "and r3, r3, r8\n\t"
  11383. "sub r6, r6, #4\n\t"
  11384. "cmp r6, #0\n\t"
  11385. #ifdef __GNUC__
  11386. "bge 1b\n\t"
  11387. #else
  11388. "bge.n 1b\n\t"
  11389. #endif /* __GNUC__ */
  11390. : [r] "+r" (r)
  11391. : [a] "r" (a), [b] "r" (b)
  11392. : "r3", "r4", "r5", "r6", "r8"
  11393. );
  11394. return r;
  11395. }
  11396. /* Divide d in a and put remainder into r (m*d + r = a)
  11397. * m is not calculated as it is not needed at this time.
  11398. *
  11399. * a Number to be divided.
  11400. * d Number to divide with.
  11401. * m Multiplier result.
  11402. * r Remainder from the division.
  11403. * returns MP_OKAY indicating success.
  11404. */
  11405. static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m,
  11406. sp_digit* r)
  11407. {
  11408. sp_digit t1[256], t2[129];
  11409. sp_digit div, r1;
  11410. int i;
  11411. (void)m;
  11412. div = d[127];
  11413. XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
  11414. for (i=127; i>=0; i--) {
  11415. sp_digit hi = t1[128 + i] - (t1[128 + i] == div);
  11416. r1 = div_4096_word_128(hi, t1[128 + i - 1], div);
  11417. sp_4096_mul_d_128(t2, d, r1);
  11418. t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
  11419. t1[128 + i] -= t2[128];
  11420. sp_4096_mask_128(t2, d, t1[128 + i]);
  11421. t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
  11422. sp_4096_mask_128(t2, d, t1[128 + i]);
  11423. t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
  11424. }
  11425. r1 = sp_4096_cmp_128(t1, d) >= 0;
  11426. sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
  11427. return MP_OKAY;
  11428. }
  11429. /* Reduce a modulo m into r. (r = a mod m)
  11430. *
  11431. * r A single precision number that is the reduced result.
  11432. * a A single precision number that is to be reduced.
  11433. * m A single precision number that is the modulus to reduce with.
  11434. * returns MP_OKAY indicating success.
  11435. */
  11436. static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m)
  11437. {
  11438. return sp_4096_div_128(a, m, NULL, r);
  11439. }
  11440. /* Divide d in a and put remainder into r (m*d + r = a)
  11441. * m is not calculated as it is not needed at this time.
  11442. *
  11443. * a Number to be divided.
  11444. * d Number to divide with.
  11445. * m Multiplier result.
  11446. * r Remainder from the division.
  11447. * returns MP_OKAY indicating success.
  11448. */
  11449. static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
  11450. sp_digit* r)
  11451. {
  11452. sp_digit t1[256], t2[129];
  11453. sp_digit div, r1;
  11454. int i;
  11455. (void)m;
  11456. div = d[127];
  11457. XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
  11458. for (i=127; i>=0; i--) {
  11459. sp_digit hi = t1[128 + i] - (t1[128 + i] == div);
  11460. r1 = div_4096_word_128(hi, t1[128 + i - 1], div);
  11461. sp_4096_mul_d_128(t2, d, r1);
  11462. t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
  11463. t1[128 + i] -= t2[128];
  11464. if (t1[128 + i] != 0) {
  11465. t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
  11466. if (t1[128 + i] != 0)
  11467. t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
  11468. }
  11469. }
  11470. r1 = sp_4096_cmp_128(t1, d) >= 0;
  11471. sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
  11472. return MP_OKAY;
  11473. }
  11474. /* Reduce a modulo m into r. (r = a mod m)
  11475. *
  11476. * r A single precision number that is the reduced result.
  11477. * a A single precision number that is to be reduced.
  11478. * m A single precision number that is the modulus to reduce with.
  11479. * returns MP_OKAY indicating success.
  11480. */
  11481. static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
  11482. {
  11483. return sp_4096_div_128_cond(a, m, NULL, r);
  11484. }
  11485. #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
  11486. defined(WOLFSSL_HAVE_SP_DH)
  11487. #ifdef WOLFSSL_SP_SMALL
  11488. /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  11489. *
  11490. * r A single precision number that is the result of the operation.
  11491. * a A single precision number being exponentiated.
  11492. * e A single precision number that is the exponent.
  11493. * bits The number of bits in the exponent.
  11494. * m A single precision number that is the modulus.
  11495. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  11496. */
  11497. static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
  11498. int bits, const sp_digit* m, int reduceA)
  11499. {
  11500. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11501. sp_digit* td;
  11502. #else
  11503. sp_digit td[16 * 256];
  11504. #endif
  11505. sp_digit* t[16];
  11506. sp_digit* norm;
  11507. sp_digit mp = 1;
  11508. sp_digit n;
  11509. sp_digit mask;
  11510. int i;
  11511. int c, y;
  11512. int err = MP_OKAY;
  11513. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11514. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 256), NULL,
  11515. DYNAMIC_TYPE_TMP_BUFFER);
  11516. if (td == NULL) {
  11517. err = MEMORY_E;
  11518. }
  11519. #endif
  11520. if (err == MP_OKAY) {
  11521. norm = td;
  11522. for (i=0; i<16; i++) {
  11523. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11524. t[i] = td + i * 256;
  11525. #else
  11526. t[i] = &td[i * 256];
  11527. #endif
  11528. }
  11529. sp_4096_mont_setup(m, &mp);
  11530. sp_4096_mont_norm_128(norm, m);
  11531. XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
  11532. if (reduceA != 0) {
  11533. err = sp_4096_mod_128(t[1] + 128, a, m);
  11534. if (err == MP_OKAY) {
  11535. err = sp_4096_mod_128(t[1], t[1], m);
  11536. }
  11537. }
  11538. else {
  11539. XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
  11540. err = sp_4096_mod_128(t[1], t[1], m);
  11541. }
  11542. }
  11543. if (err == MP_OKAY) {
  11544. sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
  11545. sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
  11546. sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
  11547. sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
  11548. sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
  11549. sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
  11550. sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
  11551. sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
  11552. sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
  11553. sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
  11554. sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
  11555. sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
  11556. sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
  11557. sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
  11558. i = (bits - 1) / 32;
  11559. n = e[i--];
  11560. c = bits & 31;
  11561. if (c == 0) {
  11562. c = 32;
  11563. }
  11564. c -= bits % 4;
  11565. if (c == 32) {
  11566. c = 28;
  11567. }
  11568. if (c < 0) {
  11569. /* Number of bits in top word is less than number needed. */
  11570. c = -c;
  11571. y = (int)(n << c);
  11572. n = e[i--];
  11573. y |= (int)(n >> (64 - c));
  11574. n <<= c;
  11575. c = 64 - c;
  11576. }
  11577. else {
  11578. y = (int)(n >> c);
  11579. n <<= 32 - c;
  11580. }
  11581. XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
  11582. for (; i>=0 || c>=4; ) {
  11583. if (c == 0) {
  11584. n = e[i--];
  11585. y = (int)(n >> 28);
  11586. n <<= 4;
  11587. c = 28;
  11588. }
  11589. else if (c < 4) {
  11590. y = (int)(n >> 28);
  11591. n = e[i--];
  11592. c = 4 - c;
  11593. y |= (int)(n >> (32 - c));
  11594. n <<= c;
  11595. c = 32 - c;
  11596. }
  11597. else {
  11598. y = (int)((n >> 28) & 0xf);
  11599. n <<= 4;
  11600. c -= 4;
  11601. }
  11602. sp_4096_mont_sqr_128(r, r, m, mp);
  11603. sp_4096_mont_sqr_128(r, r, m, mp);
  11604. sp_4096_mont_sqr_128(r, r, m, mp);
  11605. sp_4096_mont_sqr_128(r, r, m, mp);
  11606. sp_4096_mont_mul_128(r, r, t[y], m, mp);
  11607. }
  11608. XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
  11609. sp_4096_mont_reduce_128(r, m, mp);
  11610. mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
  11611. sp_4096_cond_sub_128(r, r, m, mask);
  11612. }
  11613. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11614. if (td != NULL) {
  11615. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  11616. }
  11617. #endif
  11618. return err;
  11619. }
  11620. #else
  11621. /* Modular exponentiate a to the e mod m. (r = a^e mod m)
  11622. *
  11623. * r A single precision number that is the result of the operation.
  11624. * a A single precision number being exponentiated.
  11625. * e A single precision number that is the exponent.
  11626. * bits The number of bits in the exponent.
  11627. * m A single precision number that is the modulus.
  11628. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  11629. */
  11630. static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
  11631. int bits, const sp_digit* m, int reduceA)
  11632. {
  11633. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11634. sp_digit* td;
  11635. #else
  11636. sp_digit td[32 * 256];
  11637. #endif
  11638. sp_digit* t[32];
  11639. sp_digit* norm;
  11640. sp_digit mp = 1;
  11641. sp_digit n;
  11642. sp_digit mask;
  11643. int i;
  11644. int c, y;
  11645. int err = MP_OKAY;
  11646. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11647. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 256), NULL,
  11648. DYNAMIC_TYPE_TMP_BUFFER);
  11649. if (td == NULL) {
  11650. err = MEMORY_E;
  11651. }
  11652. #endif
  11653. if (err == MP_OKAY) {
  11654. norm = td;
  11655. for (i=0; i<32; i++) {
  11656. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11657. t[i] = td + i * 256;
  11658. #else
  11659. t[i] = &td[i * 256];
  11660. #endif
  11661. }
  11662. sp_4096_mont_setup(m, &mp);
  11663. sp_4096_mont_norm_128(norm, m);
  11664. XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
  11665. if (reduceA != 0) {
  11666. err = sp_4096_mod_128(t[1] + 128, a, m);
  11667. if (err == MP_OKAY) {
  11668. err = sp_4096_mod_128(t[1], t[1], m);
  11669. }
  11670. }
  11671. else {
  11672. XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
  11673. err = sp_4096_mod_128(t[1], t[1], m);
  11674. }
  11675. }
  11676. if (err == MP_OKAY) {
  11677. sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
  11678. sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
  11679. sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
  11680. sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
  11681. sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
  11682. sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
  11683. sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
  11684. sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
  11685. sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
  11686. sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
  11687. sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
  11688. sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
  11689. sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
  11690. sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
  11691. sp_4096_mont_sqr_128(t[16], t[ 8], m, mp);
  11692. sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp);
  11693. sp_4096_mont_sqr_128(t[18], t[ 9], m, mp);
  11694. sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp);
  11695. sp_4096_mont_sqr_128(t[20], t[10], m, mp);
  11696. sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp);
  11697. sp_4096_mont_sqr_128(t[22], t[11], m, mp);
  11698. sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp);
  11699. sp_4096_mont_sqr_128(t[24], t[12], m, mp);
  11700. sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp);
  11701. sp_4096_mont_sqr_128(t[26], t[13], m, mp);
  11702. sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp);
  11703. sp_4096_mont_sqr_128(t[28], t[14], m, mp);
  11704. sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp);
  11705. sp_4096_mont_sqr_128(t[30], t[15], m, mp);
  11706. sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp);
  11707. i = (bits - 1) / 32;
  11708. n = e[i--];
  11709. c = bits & 31;
  11710. if (c == 0) {
  11711. c = 32;
  11712. }
  11713. c -= bits % 5;
  11714. if (c == 32) {
  11715. c = 27;
  11716. }
  11717. if (c < 0) {
  11718. /* Number of bits in top word is less than number needed. */
  11719. c = -c;
  11720. y = (int)(n << c);
  11721. n = e[i--];
  11722. y |= (int)(n >> (64 - c));
  11723. n <<= c;
  11724. c = 64 - c;
  11725. }
  11726. else {
  11727. y = (int)(n >> c);
  11728. n <<= 32 - c;
  11729. }
  11730. XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
  11731. for (; i>=0 || c>=5; ) {
  11732. if (c == 0) {
  11733. n = e[i--];
  11734. y = (int)(n >> 27);
  11735. n <<= 5;
  11736. c = 27;
  11737. }
  11738. else if (c < 5) {
  11739. y = (int)(n >> 27);
  11740. n = e[i--];
  11741. c = 5 - c;
  11742. y |= (int)(n >> (32 - c));
  11743. n <<= c;
  11744. c = 32 - c;
  11745. }
  11746. else {
  11747. y = (int)((n >> 27) & 0x1f);
  11748. n <<= 5;
  11749. c -= 5;
  11750. }
  11751. sp_4096_mont_sqr_128(r, r, m, mp);
  11752. sp_4096_mont_sqr_128(r, r, m, mp);
  11753. sp_4096_mont_sqr_128(r, r, m, mp);
  11754. sp_4096_mont_sqr_128(r, r, m, mp);
  11755. sp_4096_mont_sqr_128(r, r, m, mp);
  11756. sp_4096_mont_mul_128(r, r, t[y], m, mp);
  11757. }
  11758. XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
  11759. sp_4096_mont_reduce_128(r, m, mp);
  11760. mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
  11761. sp_4096_cond_sub_128(r, r, m, mask);
  11762. }
  11763. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11764. if (td != NULL) {
  11765. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  11766. }
  11767. #endif
  11768. return err;
  11769. }
  11770. #endif /* WOLFSSL_SP_SMALL */
  11771. #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
  11772. #ifdef WOLFSSL_HAVE_SP_RSA
  11773. /* RSA public key operation.
  11774. *
  11775. * in Array of bytes representing the number to exponentiate, base.
  11776. * inLen Number of bytes in base.
  11777. * em Public exponent.
  11778. * mm Modulus.
  11779. * out Buffer to hold big-endian bytes of exponentiation result.
  11780. * Must be at least 512 bytes long.
  11781. * outLen Number of bytes in result.
  11782. * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
  11783. * an array is too long and MEMORY_E when dynamic memory allocation fails.
  11784. */
  11785. int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
  11786. byte* out, word32* outLen)
  11787. {
  11788. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  11789. sp_digit a[256], m[128], r[256];
  11790. #else
  11791. sp_digit* d = NULL;
  11792. sp_digit* a = NULL;
  11793. sp_digit* m = NULL;
  11794. sp_digit* r = NULL;
  11795. #endif
  11796. sp_digit *ah = NULL;
  11797. sp_digit e[1];
  11798. int err = MP_OKAY;
  11799. if (*outLen < 512) {
  11800. err = MP_TO_E;
  11801. }
  11802. else if (mp_count_bits(em) > 32 || inLen > 512 ||
  11803. mp_count_bits(mm) != 4096) {
  11804. err = MP_READ_E;
  11805. }
  11806. else if (mp_iseven(mm)) {
  11807. err = MP_VAL;
  11808. }
  11809. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  11810. if (err == MP_OKAY) {
  11811. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL,
  11812. DYNAMIC_TYPE_RSA);
  11813. if (d == NULL)
  11814. err = MEMORY_E;
  11815. }
  11816. if (err == MP_OKAY) {
  11817. a = d;
  11818. r = a + 128 * 2;
  11819. m = r + 128 * 2;
  11820. }
  11821. #endif
  11822. if (err == MP_OKAY) {
  11823. ah = a + 128;
  11824. sp_4096_from_bin(ah, 128, in, inLen);
  11825. #if DIGIT_BIT >= 32
  11826. e[0] = em->dp[0];
  11827. #else
  11828. e[0] = em->dp[0];
  11829. if (em->used > 1) {
  11830. e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
  11831. }
  11832. #endif
  11833. if (e[0] == 0) {
  11834. err = MP_EXPTMOD_E;
  11835. }
  11836. }
  11837. if (err == MP_OKAY) {
  11838. sp_4096_from_mp(m, 128, mm);
  11839. if (e[0] == 0x3) {
  11840. if (err == MP_OKAY) {
  11841. sp_4096_sqr_128(r, ah);
  11842. err = sp_4096_mod_128_cond(r, r, m);
  11843. }
  11844. if (err == MP_OKAY) {
  11845. sp_4096_mul_128(r, ah, r);
  11846. err = sp_4096_mod_128_cond(r, r, m);
  11847. }
  11848. }
  11849. else {
  11850. int i;
  11851. sp_digit mp;
  11852. sp_4096_mont_setup(m, &mp);
  11853. /* Convert to Montgomery form. */
  11854. XMEMSET(a, 0, sizeof(sp_digit) * 128);
  11855. err = sp_4096_mod_128_cond(a, a, m);
  11856. if (err == MP_OKAY) {
  11857. for (i = 31; i >= 0; i--) {
  11858. if (e[0] >> i) {
  11859. break;
  11860. }
  11861. }
  11862. XMEMCPY(r, a, sizeof(sp_digit) * 128);
  11863. for (i--; i>=0; i--) {
  11864. sp_4096_mont_sqr_128(r, r, m, mp);
  11865. if (((e[0] >> i) & 1) == 1) {
  11866. sp_4096_mont_mul_128(r, r, a, m, mp);
  11867. }
  11868. }
  11869. XMEMSET(&r[128], 0, sizeof(sp_digit) * 128);
  11870. sp_4096_mont_reduce_128(r, m, mp);
  11871. for (i = 127; i > 0; i--) {
  11872. if (r[i] != m[i]) {
  11873. break;
  11874. }
  11875. }
  11876. if (r[i] >= m[i]) {
  11877. sp_4096_sub_in_place_128(r, m);
  11878. }
  11879. }
  11880. }
  11881. }
  11882. if (err == MP_OKAY) {
  11883. sp_4096_to_bin(r, out);
  11884. *outLen = 512;
  11885. }
  11886. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  11887. if (d != NULL) {
  11888. XFREE(d, NULL, DYNAMIC_TYPE_RSA);
  11889. }
  11890. #endif
  11891. return err;
  11892. }
  11893. #ifndef WOLFSSL_RSA_PUBLIC_ONLY
  11894. /* Conditionally add a and b using the mask m.
  11895. * m is -1 to add and 0 when not.
  11896. *
  11897. * r A single precision number representing conditional add result.
  11898. * a A single precision number to add with.
  11899. * b A single precision number to add.
  11900. * m Mask value to apply.
  11901. */
  11902. SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
  11903. sp_digit m)
  11904. {
  11905. sp_digit c = 0;
  11906. __asm__ __volatile__ (
  11907. "mov r5, #1\n\t"
  11908. "lsl r5, r5, #8\n\t"
  11909. "mov r9, r5\n\t"
  11910. "mov r8, #0\n\t"
  11911. "\n1:\n\t"
  11912. "ldr r6, [%[b], r8]\n\t"
  11913. "and r6, r6, %[m]\n\t"
  11914. "adds r5, %[c], #-1\n\t"
  11915. "ldr r5, [%[a], r8]\n\t"
  11916. "adcs r5, r5, r6\n\t"
  11917. "mov %[c], #0\n\t"
  11918. "adcs %[c], %[c], %[c]\n\t"
  11919. "str r5, [%[r], r8]\n\t"
  11920. "add r8, r8, #4\n\t"
  11921. "cmp r8, r9\n\t"
  11922. #ifdef __GNUC__
  11923. "blt 1b\n\t"
  11924. #else
  11925. "blt.n 1b\n\t"
  11926. #endif /* __GNUC__ */
  11927. : [c] "+r" (c)
  11928. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  11929. : "memory", "r5", "r6", "r8", "r9"
  11930. );
  11931. return c;
  11932. }
  11933. /* RSA private key operation.
  11934. *
  11935. * in Array of bytes representing the number to exponentiate, base.
  11936. * inLen Number of bytes in base.
  11937. * dm Private exponent.
  11938. * pm First prime.
  11939. * qm Second prime.
  11940. * dpm First prime's CRT exponent.
  11941. * dqm Second prime's CRT exponent.
  11942. * qim Inverse of second prime mod p.
  11943. * mm Modulus.
  11944. * out Buffer to hold big-endian bytes of exponentiation result.
  11945. * Must be at least 512 bytes long.
  11946. * outLen Number of bytes in result.
  11947. * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
  11948. * an array is too long and MEMORY_E when dynamic memory allocation fails.
  11949. */
  11950. int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
  11951. mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
  11952. byte* out, word32* outLen)
  11953. {
  11954. #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
  11955. sp_digit* a = NULL;
  11956. sp_digit* d = NULL;
  11957. sp_digit* m = NULL;
  11958. sp_digit* r = NULL;
  11959. int err = MP_OKAY;
  11960. (void)pm;
  11961. (void)qm;
  11962. (void)dpm;
  11963. (void)dqm;
  11964. (void)qim;
  11965. if (*outLen < 512U) {
  11966. err = MP_TO_E;
  11967. }
  11968. if (err == MP_OKAY) {
  11969. if (mp_count_bits(dm) > 4096) {
  11970. err = MP_READ_E;
  11971. }
  11972. else if (inLen > 512) {
  11973. err = MP_READ_E;
  11974. }
  11975. else if (mp_count_bits(mm) != 4096) {
  11976. err = MP_READ_E;
  11977. }
  11978. else if (mp_iseven(mm)) {
  11979. err = MP_VAL;
  11980. }
  11981. }
  11982. if (err == MP_OKAY) {
  11983. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL,
  11984. DYNAMIC_TYPE_RSA);
  11985. if (d == NULL) {
  11986. err = MEMORY_E;
  11987. }
  11988. }
  11989. if (err == MP_OKAY) {
  11990. a = d + 128;
  11991. m = a + 256;
  11992. r = a;
  11993. sp_4096_from_bin(a, 128, in, inLen);
  11994. sp_4096_from_mp(d, 128, dm);
  11995. sp_4096_from_mp(m, 128, mm);
  11996. err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0);
  11997. }
  11998. if (err == MP_OKAY) {
  11999. sp_4096_to_bin(r, out);
  12000. *outLen = 512;
  12001. }
  12002. if (d != NULL) {
  12003. XMEMSET(d, 0, sizeof(sp_digit) * 128);
  12004. XFREE(d, NULL, DYNAMIC_TYPE_RSA);
  12005. }
  12006. return err;
  12007. #else
  12008. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  12009. sp_digit a[128 * 2];
  12010. sp_digit p[64], q[64], dp[64];
  12011. sp_digit tmpa[128], tmpb[128];
  12012. #else
  12013. sp_digit* t = NULL;
  12014. sp_digit* a = NULL;
  12015. sp_digit* p = NULL;
  12016. sp_digit* q = NULL;
  12017. sp_digit* dp = NULL;
  12018. sp_digit* tmpa = NULL;
  12019. sp_digit* tmpb = NULL;
  12020. #endif
  12021. sp_digit* r = NULL;
  12022. sp_digit* qi = NULL;
  12023. sp_digit* dq = NULL;
  12024. sp_digit c;
  12025. int err = MP_OKAY;
  12026. (void)dm;
  12027. (void)mm;
  12028. if (*outLen < 512) {
  12029. err = MP_TO_E;
  12030. }
  12031. else if (inLen > 512 || mp_count_bits(mm) != 4096) {
  12032. err = MP_READ_E;
  12033. }
  12034. else if (mp_iseven(mm)) {
  12035. err = MP_VAL;
  12036. }
  12037. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  12038. if (err == MP_OKAY) {
  12039. t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL,
  12040. DYNAMIC_TYPE_RSA);
  12041. if (t == NULL)
  12042. err = MEMORY_E;
  12043. }
  12044. if (err == MP_OKAY) {
  12045. a = t;
  12046. p = a + 128 * 2;
  12047. q = p + 64;
  12048. qi = dq = dp = q + 64;
  12049. tmpa = qi + 64;
  12050. tmpb = tmpa + 128;
  12051. r = t + 128;
  12052. }
  12053. #else
  12054. #endif
  12055. if (err == MP_OKAY) {
  12056. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  12057. r = a;
  12058. qi = dq = dp;
  12059. #endif
  12060. sp_4096_from_bin(a, 128, in, inLen);
  12061. sp_4096_from_mp(p, 64, pm);
  12062. sp_4096_from_mp(q, 64, qm);
  12063. sp_4096_from_mp(dp, 64, dpm);
  12064. err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1);
  12065. }
  12066. if (err == MP_OKAY) {
  12067. sp_4096_from_mp(dq, 64, dqm);
  12068. err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1);
  12069. }
  12070. if (err == MP_OKAY) {
  12071. c = sp_2048_sub_in_place_64(tmpa, tmpb);
  12072. c += sp_4096_cond_add_64(tmpa, tmpa, p, c);
  12073. sp_4096_cond_add_64(tmpa, tmpa, p, c);
  12074. sp_2048_from_mp(qi, 64, qim);
  12075. sp_2048_mul_64(tmpa, tmpa, qi);
  12076. err = sp_2048_mod_64(tmpa, tmpa, p);
  12077. }
  12078. if (err == MP_OKAY) {
  12079. sp_2048_mul_64(tmpa, q, tmpa);
  12080. XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64);
  12081. sp_4096_add_128(r, tmpb, tmpa);
  12082. sp_4096_to_bin(r, out);
  12083. *outLen = 512;
  12084. }
  12085. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  12086. if (t != NULL) {
  12087. XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11);
  12088. XFREE(t, NULL, DYNAMIC_TYPE_RSA);
  12089. }
  12090. #else
  12091. XMEMSET(tmpa, 0, sizeof(tmpa));
  12092. XMEMSET(tmpb, 0, sizeof(tmpb));
  12093. XMEMSET(p, 0, sizeof(p));
  12094. XMEMSET(q, 0, sizeof(q));
  12095. XMEMSET(dp, 0, sizeof(dp));
  12096. #endif
  12097. #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
  12098. return err;
  12099. }
  12100. #endif /* WOLFSSL_RSA_PUBLIC_ONLY */
  12101. #endif /* WOLFSSL_HAVE_SP_RSA */
  12102. #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
  12103. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  12104. /* Convert an array of sp_digit to an mp_int.
  12105. *
  12106. * a A single precision integer.
  12107. * r A multi-precision integer.
  12108. */
  12109. static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
  12110. {
  12111. int err;
  12112. err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
  12113. if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
  12114. #if DIGIT_BIT == 32
  12115. XMEMCPY(r->dp, a, sizeof(sp_digit) * 128);
  12116. r->used = 128;
  12117. mp_clamp(r);
  12118. #elif DIGIT_BIT < 32
  12119. int i, j = 0, s = 0;
  12120. r->dp[0] = 0;
  12121. for (i = 0; i < 128; i++) {
  12122. r->dp[j] |= (mp_digit)(a[i] << s);
  12123. r->dp[j] &= (1L << DIGIT_BIT) - 1;
  12124. s = DIGIT_BIT - s;
  12125. r->dp[++j] = (mp_digit)(a[i] >> s);
  12126. while (s + DIGIT_BIT <= 32) {
  12127. s += DIGIT_BIT;
  12128. r->dp[j++] &= (1L << DIGIT_BIT) - 1;
  12129. if (s == SP_WORD_SIZE) {
  12130. r->dp[j] = 0;
  12131. }
  12132. else {
  12133. r->dp[j] = (mp_digit)(a[i] >> s);
  12134. }
  12135. }
  12136. s = 32 - s;
  12137. }
  12138. r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
  12139. mp_clamp(r);
  12140. #else
  12141. int i, j = 0, s = 0;
  12142. r->dp[0] = 0;
  12143. for (i = 0; i < 128; i++) {
  12144. r->dp[j] |= ((mp_digit)a[i]) << s;
  12145. if (s + 32 >= DIGIT_BIT) {
  12146. #if DIGIT_BIT != 32 && DIGIT_BIT != 64
  12147. r->dp[j] &= (1L << DIGIT_BIT) - 1;
  12148. #endif
  12149. s = DIGIT_BIT - s;
  12150. r->dp[++j] = a[i] >> s;
  12151. s = 32 - s;
  12152. }
  12153. else {
  12154. s += 32;
  12155. }
  12156. }
  12157. r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
  12158. mp_clamp(r);
  12159. #endif
  12160. }
  12161. return err;
  12162. }
  12163. /* Perform the modular exponentiation for Diffie-Hellman.
  12164. *
  12165. * base Base. MP integer.
  12166. * exp Exponent. MP integer.
  12167. * mod Modulus. MP integer.
  12168. * res Result. MP integer.
  12169. * returns 0 on success, MP_READ_E if there are too many bytes in an array
  12170. * and MEMORY_E if memory allocation fails.
  12171. */
  12172. int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
  12173. {
  12174. int err = MP_OKAY;
  12175. sp_digit b[256], e[128], m[128];
  12176. sp_digit* r = b;
  12177. int expBits = mp_count_bits(exp);
  12178. if (mp_count_bits(base) > 4096) {
  12179. err = MP_READ_E;
  12180. }
  12181. else if (expBits > 4096) {
  12182. err = MP_READ_E;
  12183. }
  12184. else if (mp_count_bits(mod) != 4096) {
  12185. err = MP_READ_E;
  12186. }
  12187. else if (mp_iseven(mod)) {
  12188. err = MP_VAL;
  12189. }
  12190. if (err == MP_OKAY) {
  12191. sp_4096_from_mp(b, 128, base);
  12192. sp_4096_from_mp(e, 128, exp);
  12193. sp_4096_from_mp(m, 128, mod);
  12194. err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0);
  12195. }
  12196. if (err == MP_OKAY) {
  12197. err = sp_4096_to_mp(r, res);
  12198. }
  12199. XMEMSET(e, 0, sizeof(e));
  12200. return err;
  12201. }
  12202. #ifdef WOLFSSL_HAVE_SP_DH
  12203. #ifdef HAVE_FFDHE_4096
  12204. static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n)
  12205. {
  12206. __asm__ __volatile__ (
  12207. "mov r6, #31\n\t"
  12208. "sub r6, r6, %[n]\n\t"
  12209. "add %[a], %[a], #448\n\t"
  12210. "add %[r], %[r], #448\n\t"
  12211. "ldr r3, [%[a], #60]\n\t"
  12212. "lsr r4, r3, #1\n\t"
  12213. "lsl r3, r3, %[n]\n\t"
  12214. "lsr r4, r4, r6\n\t"
  12215. "ldr r2, [%[a], #56]\n\t"
  12216. "str r4, [%[r], #64]\n\t"
  12217. "lsr r5, r2, #1\n\t"
  12218. "lsl r2, r2, %[n]\n\t"
  12219. "lsr r5, r5, r6\n\t"
  12220. "orr r3, r3, r5\n\t"
  12221. "ldr r4, [%[a], #52]\n\t"
  12222. "str r3, [%[r], #60]\n\t"
  12223. "lsr r5, r4, #1\n\t"
  12224. "lsl r4, r4, %[n]\n\t"
  12225. "lsr r5, r5, r6\n\t"
  12226. "orr r2, r2, r5\n\t"
  12227. "ldr r3, [%[a], #48]\n\t"
  12228. "str r2, [%[r], #56]\n\t"
  12229. "lsr r5, r3, #1\n\t"
  12230. "lsl r3, r3, %[n]\n\t"
  12231. "lsr r5, r5, r6\n\t"
  12232. "orr r4, r4, r5\n\t"
  12233. "ldr r2, [%[a], #44]\n\t"
  12234. "str r4, [%[r], #52]\n\t"
  12235. "lsr r5, r2, #1\n\t"
  12236. "lsl r2, r2, %[n]\n\t"
  12237. "lsr r5, r5, r6\n\t"
  12238. "orr r3, r3, r5\n\t"
  12239. "ldr r4, [%[a], #40]\n\t"
  12240. "str r3, [%[r], #48]\n\t"
  12241. "lsr r5, r4, #1\n\t"
  12242. "lsl r4, r4, %[n]\n\t"
  12243. "lsr r5, r5, r6\n\t"
  12244. "orr r2, r2, r5\n\t"
  12245. "ldr r3, [%[a], #36]\n\t"
  12246. "str r2, [%[r], #44]\n\t"
  12247. "lsr r5, r3, #1\n\t"
  12248. "lsl r3, r3, %[n]\n\t"
  12249. "lsr r5, r5, r6\n\t"
  12250. "orr r4, r4, r5\n\t"
  12251. "ldr r2, [%[a], #32]\n\t"
  12252. "str r4, [%[r], #40]\n\t"
  12253. "lsr r5, r2, #1\n\t"
  12254. "lsl r2, r2, %[n]\n\t"
  12255. "lsr r5, r5, r6\n\t"
  12256. "orr r3, r3, r5\n\t"
  12257. "ldr r4, [%[a], #28]\n\t"
  12258. "str r3, [%[r], #36]\n\t"
  12259. "lsr r5, r4, #1\n\t"
  12260. "lsl r4, r4, %[n]\n\t"
  12261. "lsr r5, r5, r6\n\t"
  12262. "orr r2, r2, r5\n\t"
  12263. "ldr r3, [%[a], #24]\n\t"
  12264. "str r2, [%[r], #32]\n\t"
  12265. "lsr r5, r3, #1\n\t"
  12266. "lsl r3, r3, %[n]\n\t"
  12267. "lsr r5, r5, r6\n\t"
  12268. "orr r4, r4, r5\n\t"
  12269. "ldr r2, [%[a], #20]\n\t"
  12270. "str r4, [%[r], #28]\n\t"
  12271. "lsr r5, r2, #1\n\t"
  12272. "lsl r2, r2, %[n]\n\t"
  12273. "lsr r5, r5, r6\n\t"
  12274. "orr r3, r3, r5\n\t"
  12275. "ldr r4, [%[a], #16]\n\t"
  12276. "str r3, [%[r], #24]\n\t"
  12277. "lsr r5, r4, #1\n\t"
  12278. "lsl r4, r4, %[n]\n\t"
  12279. "lsr r5, r5, r6\n\t"
  12280. "orr r2, r2, r5\n\t"
  12281. "ldr r3, [%[a], #12]\n\t"
  12282. "str r2, [%[r], #20]\n\t"
  12283. "lsr r5, r3, #1\n\t"
  12284. "lsl r3, r3, %[n]\n\t"
  12285. "lsr r5, r5, r6\n\t"
  12286. "orr r4, r4, r5\n\t"
  12287. "ldr r2, [%[a], #8]\n\t"
  12288. "str r4, [%[r], #16]\n\t"
  12289. "lsr r5, r2, #1\n\t"
  12290. "lsl r2, r2, %[n]\n\t"
  12291. "lsr r5, r5, r6\n\t"
  12292. "orr r3, r3, r5\n\t"
  12293. "ldr r4, [%[a], #4]\n\t"
  12294. "str r3, [%[r], #12]\n\t"
  12295. "lsr r5, r4, #1\n\t"
  12296. "lsl r4, r4, %[n]\n\t"
  12297. "lsr r5, r5, r6\n\t"
  12298. "orr r2, r2, r5\n\t"
  12299. "ldr r3, [%[a], #0]\n\t"
  12300. "str r2, [%[r], #8]\n\t"
  12301. "lsr r5, r3, #1\n\t"
  12302. "lsl r3, r3, %[n]\n\t"
  12303. "lsr r5, r5, r6\n\t"
  12304. "orr r4, r4, r5\n\t"
  12305. "sub %[a], %[a], #64\n\t"
  12306. "sub %[r], %[r], #64\n\t"
  12307. "ldr r2, [%[a], #60]\n\t"
  12308. "str r4, [%[r], #68]\n\t"
  12309. "lsr r5, r2, #1\n\t"
  12310. "lsl r2, r2, %[n]\n\t"
  12311. "lsr r5, r5, r6\n\t"
  12312. "orr r3, r3, r5\n\t"
  12313. "ldr r4, [%[a], #56]\n\t"
  12314. "str r3, [%[r], #64]\n\t"
  12315. "lsr r5, r4, #1\n\t"
  12316. "lsl r4, r4, %[n]\n\t"
  12317. "lsr r5, r5, r6\n\t"
  12318. "orr r2, r2, r5\n\t"
  12319. "ldr r3, [%[a], #52]\n\t"
  12320. "str r2, [%[r], #60]\n\t"
  12321. "lsr r5, r3, #1\n\t"
  12322. "lsl r3, r3, %[n]\n\t"
  12323. "lsr r5, r5, r6\n\t"
  12324. "orr r4, r4, r5\n\t"
  12325. "ldr r2, [%[a], #48]\n\t"
  12326. "str r4, [%[r], #56]\n\t"
  12327. "lsr r5, r2, #1\n\t"
  12328. "lsl r2, r2, %[n]\n\t"
  12329. "lsr r5, r5, r6\n\t"
  12330. "orr r3, r3, r5\n\t"
  12331. "ldr r4, [%[a], #44]\n\t"
  12332. "str r3, [%[r], #52]\n\t"
  12333. "lsr r5, r4, #1\n\t"
  12334. "lsl r4, r4, %[n]\n\t"
  12335. "lsr r5, r5, r6\n\t"
  12336. "orr r2, r2, r5\n\t"
  12337. "ldr r3, [%[a], #40]\n\t"
  12338. "str r2, [%[r], #48]\n\t"
  12339. "lsr r5, r3, #1\n\t"
  12340. "lsl r3, r3, %[n]\n\t"
  12341. "lsr r5, r5, r6\n\t"
  12342. "orr r4, r4, r5\n\t"
  12343. "ldr r2, [%[a], #36]\n\t"
  12344. "str r4, [%[r], #44]\n\t"
  12345. "lsr r5, r2, #1\n\t"
  12346. "lsl r2, r2, %[n]\n\t"
  12347. "lsr r5, r5, r6\n\t"
  12348. "orr r3, r3, r5\n\t"
  12349. "ldr r4, [%[a], #32]\n\t"
  12350. "str r3, [%[r], #40]\n\t"
  12351. "lsr r5, r4, #1\n\t"
  12352. "lsl r4, r4, %[n]\n\t"
  12353. "lsr r5, r5, r6\n\t"
  12354. "orr r2, r2, r5\n\t"
  12355. "ldr r3, [%[a], #28]\n\t"
  12356. "str r2, [%[r], #36]\n\t"
  12357. "lsr r5, r3, #1\n\t"
  12358. "lsl r3, r3, %[n]\n\t"
  12359. "lsr r5, r5, r6\n\t"
  12360. "orr r4, r4, r5\n\t"
  12361. "ldr r2, [%[a], #24]\n\t"
  12362. "str r4, [%[r], #32]\n\t"
  12363. "lsr r5, r2, #1\n\t"
  12364. "lsl r2, r2, %[n]\n\t"
  12365. "lsr r5, r5, r6\n\t"
  12366. "orr r3, r3, r5\n\t"
  12367. "ldr r4, [%[a], #20]\n\t"
  12368. "str r3, [%[r], #28]\n\t"
  12369. "lsr r5, r4, #1\n\t"
  12370. "lsl r4, r4, %[n]\n\t"
  12371. "lsr r5, r5, r6\n\t"
  12372. "orr r2, r2, r5\n\t"
  12373. "ldr r3, [%[a], #16]\n\t"
  12374. "str r2, [%[r], #24]\n\t"
  12375. "lsr r5, r3, #1\n\t"
  12376. "lsl r3, r3, %[n]\n\t"
  12377. "lsr r5, r5, r6\n\t"
  12378. "orr r4, r4, r5\n\t"
  12379. "ldr r2, [%[a], #12]\n\t"
  12380. "str r4, [%[r], #20]\n\t"
  12381. "lsr r5, r2, #1\n\t"
  12382. "lsl r2, r2, %[n]\n\t"
  12383. "lsr r5, r5, r6\n\t"
  12384. "orr r3, r3, r5\n\t"
  12385. "ldr r4, [%[a], #8]\n\t"
  12386. "str r3, [%[r], #16]\n\t"
  12387. "lsr r5, r4, #1\n\t"
  12388. "lsl r4, r4, %[n]\n\t"
  12389. "lsr r5, r5, r6\n\t"
  12390. "orr r2, r2, r5\n\t"
  12391. "ldr r3, [%[a], #4]\n\t"
  12392. "str r2, [%[r], #12]\n\t"
  12393. "lsr r5, r3, #1\n\t"
  12394. "lsl r3, r3, %[n]\n\t"
  12395. "lsr r5, r5, r6\n\t"
  12396. "orr r4, r4, r5\n\t"
  12397. "ldr r2, [%[a], #0]\n\t"
  12398. "str r4, [%[r], #8]\n\t"
  12399. "lsr r5, r2, #1\n\t"
  12400. "lsl r2, r2, %[n]\n\t"
  12401. "lsr r5, r5, r6\n\t"
  12402. "orr r3, r3, r5\n\t"
  12403. "sub %[a], %[a], #64\n\t"
  12404. "sub %[r], %[r], #64\n\t"
  12405. "ldr r4, [%[a], #60]\n\t"
  12406. "str r3, [%[r], #68]\n\t"
  12407. "lsr r5, r4, #1\n\t"
  12408. "lsl r4, r4, %[n]\n\t"
  12409. "lsr r5, r5, r6\n\t"
  12410. "orr r2, r2, r5\n\t"
  12411. "ldr r3, [%[a], #56]\n\t"
  12412. "str r2, [%[r], #64]\n\t"
  12413. "lsr r5, r3, #1\n\t"
  12414. "lsl r3, r3, %[n]\n\t"
  12415. "lsr r5, r5, r6\n\t"
  12416. "orr r4, r4, r5\n\t"
  12417. "ldr r2, [%[a], #52]\n\t"
  12418. "str r4, [%[r], #60]\n\t"
  12419. "lsr r5, r2, #1\n\t"
  12420. "lsl r2, r2, %[n]\n\t"
  12421. "lsr r5, r5, r6\n\t"
  12422. "orr r3, r3, r5\n\t"
  12423. "ldr r4, [%[a], #48]\n\t"
  12424. "str r3, [%[r], #56]\n\t"
  12425. "lsr r5, r4, #1\n\t"
  12426. "lsl r4, r4, %[n]\n\t"
  12427. "lsr r5, r5, r6\n\t"
  12428. "orr r2, r2, r5\n\t"
  12429. "ldr r3, [%[a], #44]\n\t"
  12430. "str r2, [%[r], #52]\n\t"
  12431. "lsr r5, r3, #1\n\t"
  12432. "lsl r3, r3, %[n]\n\t"
  12433. "lsr r5, r5, r6\n\t"
  12434. "orr r4, r4, r5\n\t"
  12435. "ldr r2, [%[a], #40]\n\t"
  12436. "str r4, [%[r], #48]\n\t"
  12437. "lsr r5, r2, #1\n\t"
  12438. "lsl r2, r2, %[n]\n\t"
  12439. "lsr r5, r5, r6\n\t"
  12440. "orr r3, r3, r5\n\t"
  12441. "ldr r4, [%[a], #36]\n\t"
  12442. "str r3, [%[r], #44]\n\t"
  12443. "lsr r5, r4, #1\n\t"
  12444. "lsl r4, r4, %[n]\n\t"
  12445. "lsr r5, r5, r6\n\t"
  12446. "orr r2, r2, r5\n\t"
  12447. "ldr r3, [%[a], #32]\n\t"
  12448. "str r2, [%[r], #40]\n\t"
  12449. "lsr r5, r3, #1\n\t"
  12450. "lsl r3, r3, %[n]\n\t"
  12451. "lsr r5, r5, r6\n\t"
  12452. "orr r4, r4, r5\n\t"
  12453. "ldr r2, [%[a], #28]\n\t"
  12454. "str r4, [%[r], #36]\n\t"
  12455. "lsr r5, r2, #1\n\t"
  12456. "lsl r2, r2, %[n]\n\t"
  12457. "lsr r5, r5, r6\n\t"
  12458. "orr r3, r3, r5\n\t"
  12459. "ldr r4, [%[a], #24]\n\t"
  12460. "str r3, [%[r], #32]\n\t"
  12461. "lsr r5, r4, #1\n\t"
  12462. "lsl r4, r4, %[n]\n\t"
  12463. "lsr r5, r5, r6\n\t"
  12464. "orr r2, r2, r5\n\t"
  12465. "ldr r3, [%[a], #20]\n\t"
  12466. "str r2, [%[r], #28]\n\t"
  12467. "lsr r5, r3, #1\n\t"
  12468. "lsl r3, r3, %[n]\n\t"
  12469. "lsr r5, r5, r6\n\t"
  12470. "orr r4, r4, r5\n\t"
  12471. "ldr r2, [%[a], #16]\n\t"
  12472. "str r4, [%[r], #24]\n\t"
  12473. "lsr r5, r2, #1\n\t"
  12474. "lsl r2, r2, %[n]\n\t"
  12475. "lsr r5, r5, r6\n\t"
  12476. "orr r3, r3, r5\n\t"
  12477. "ldr r4, [%[a], #12]\n\t"
  12478. "str r3, [%[r], #20]\n\t"
  12479. "lsr r5, r4, #1\n\t"
  12480. "lsl r4, r4, %[n]\n\t"
  12481. "lsr r5, r5, r6\n\t"
  12482. "orr r2, r2, r5\n\t"
  12483. "ldr r3, [%[a], #8]\n\t"
  12484. "str r2, [%[r], #16]\n\t"
  12485. "lsr r5, r3, #1\n\t"
  12486. "lsl r3, r3, %[n]\n\t"
  12487. "lsr r5, r5, r6\n\t"
  12488. "orr r4, r4, r5\n\t"
  12489. "ldr r2, [%[a], #4]\n\t"
  12490. "str r4, [%[r], #12]\n\t"
  12491. "lsr r5, r2, #1\n\t"
  12492. "lsl r2, r2, %[n]\n\t"
  12493. "lsr r5, r5, r6\n\t"
  12494. "orr r3, r3, r5\n\t"
  12495. "ldr r4, [%[a], #0]\n\t"
  12496. "str r3, [%[r], #8]\n\t"
  12497. "lsr r5, r4, #1\n\t"
  12498. "lsl r4, r4, %[n]\n\t"
  12499. "lsr r5, r5, r6\n\t"
  12500. "orr r2, r2, r5\n\t"
  12501. "sub %[a], %[a], #64\n\t"
  12502. "sub %[r], %[r], #64\n\t"
  12503. "ldr r3, [%[a], #60]\n\t"
  12504. "str r2, [%[r], #68]\n\t"
  12505. "lsr r5, r3, #1\n\t"
  12506. "lsl r3, r3, %[n]\n\t"
  12507. "lsr r5, r5, r6\n\t"
  12508. "orr r4, r4, r5\n\t"
  12509. "ldr r2, [%[a], #56]\n\t"
  12510. "str r4, [%[r], #64]\n\t"
  12511. "lsr r5, r2, #1\n\t"
  12512. "lsl r2, r2, %[n]\n\t"
  12513. "lsr r5, r5, r6\n\t"
  12514. "orr r3, r3, r5\n\t"
  12515. "ldr r4, [%[a], #52]\n\t"
  12516. "str r3, [%[r], #60]\n\t"
  12517. "lsr r5, r4, #1\n\t"
  12518. "lsl r4, r4, %[n]\n\t"
  12519. "lsr r5, r5, r6\n\t"
  12520. "orr r2, r2, r5\n\t"
  12521. "ldr r3, [%[a], #48]\n\t"
  12522. "str r2, [%[r], #56]\n\t"
  12523. "lsr r5, r3, #1\n\t"
  12524. "lsl r3, r3, %[n]\n\t"
  12525. "lsr r5, r5, r6\n\t"
  12526. "orr r4, r4, r5\n\t"
  12527. "ldr r2, [%[a], #44]\n\t"
  12528. "str r4, [%[r], #52]\n\t"
  12529. "lsr r5, r2, #1\n\t"
  12530. "lsl r2, r2, %[n]\n\t"
  12531. "lsr r5, r5, r6\n\t"
  12532. "orr r3, r3, r5\n\t"
  12533. "ldr r4, [%[a], #40]\n\t"
  12534. "str r3, [%[r], #48]\n\t"
  12535. "lsr r5, r4, #1\n\t"
  12536. "lsl r4, r4, %[n]\n\t"
  12537. "lsr r5, r5, r6\n\t"
  12538. "orr r2, r2, r5\n\t"
  12539. "ldr r3, [%[a], #36]\n\t"
  12540. "str r2, [%[r], #44]\n\t"
  12541. "lsr r5, r3, #1\n\t"
  12542. "lsl r3, r3, %[n]\n\t"
  12543. "lsr r5, r5, r6\n\t"
  12544. "orr r4, r4, r5\n\t"
  12545. "ldr r2, [%[a], #32]\n\t"
  12546. "str r4, [%[r], #40]\n\t"
  12547. "lsr r5, r2, #1\n\t"
  12548. "lsl r2, r2, %[n]\n\t"
  12549. "lsr r5, r5, r6\n\t"
  12550. "orr r3, r3, r5\n\t"
  12551. "ldr r4, [%[a], #28]\n\t"
  12552. "str r3, [%[r], #36]\n\t"
  12553. "lsr r5, r4, #1\n\t"
  12554. "lsl r4, r4, %[n]\n\t"
  12555. "lsr r5, r5, r6\n\t"
  12556. "orr r2, r2, r5\n\t"
  12557. "ldr r3, [%[a], #24]\n\t"
  12558. "str r2, [%[r], #32]\n\t"
  12559. "lsr r5, r3, #1\n\t"
  12560. "lsl r3, r3, %[n]\n\t"
  12561. "lsr r5, r5, r6\n\t"
  12562. "orr r4, r4, r5\n\t"
  12563. "ldr r2, [%[a], #20]\n\t"
  12564. "str r4, [%[r], #28]\n\t"
  12565. "lsr r5, r2, #1\n\t"
  12566. "lsl r2, r2, %[n]\n\t"
  12567. "lsr r5, r5, r6\n\t"
  12568. "orr r3, r3, r5\n\t"
  12569. "ldr r4, [%[a], #16]\n\t"
  12570. "str r3, [%[r], #24]\n\t"
  12571. "lsr r5, r4, #1\n\t"
  12572. "lsl r4, r4, %[n]\n\t"
  12573. "lsr r5, r5, r6\n\t"
  12574. "orr r2, r2, r5\n\t"
  12575. "ldr r3, [%[a], #12]\n\t"
  12576. "str r2, [%[r], #20]\n\t"
  12577. "lsr r5, r3, #1\n\t"
  12578. "lsl r3, r3, %[n]\n\t"
  12579. "lsr r5, r5, r6\n\t"
  12580. "orr r4, r4, r5\n\t"
  12581. "ldr r2, [%[a], #8]\n\t"
  12582. "str r4, [%[r], #16]\n\t"
  12583. "lsr r5, r2, #1\n\t"
  12584. "lsl r2, r2, %[n]\n\t"
  12585. "lsr r5, r5, r6\n\t"
  12586. "orr r3, r3, r5\n\t"
  12587. "ldr r4, [%[a], #4]\n\t"
  12588. "str r3, [%[r], #12]\n\t"
  12589. "lsr r5, r4, #1\n\t"
  12590. "lsl r4, r4, %[n]\n\t"
  12591. "lsr r5, r5, r6\n\t"
  12592. "orr r2, r2, r5\n\t"
  12593. "ldr r3, [%[a], #0]\n\t"
  12594. "str r2, [%[r], #8]\n\t"
  12595. "lsr r5, r3, #1\n\t"
  12596. "lsl r3, r3, %[n]\n\t"
  12597. "lsr r5, r5, r6\n\t"
  12598. "orr r4, r4, r5\n\t"
  12599. "sub %[a], %[a], #64\n\t"
  12600. "sub %[r], %[r], #64\n\t"
  12601. "ldr r2, [%[a], #60]\n\t"
  12602. "str r4, [%[r], #68]\n\t"
  12603. "lsr r5, r2, #1\n\t"
  12604. "lsl r2, r2, %[n]\n\t"
  12605. "lsr r5, r5, r6\n\t"
  12606. "orr r3, r3, r5\n\t"
  12607. "ldr r4, [%[a], #56]\n\t"
  12608. "str r3, [%[r], #64]\n\t"
  12609. "lsr r5, r4, #1\n\t"
  12610. "lsl r4, r4, %[n]\n\t"
  12611. "lsr r5, r5, r6\n\t"
  12612. "orr r2, r2, r5\n\t"
  12613. "ldr r3, [%[a], #52]\n\t"
  12614. "str r2, [%[r], #60]\n\t"
  12615. "lsr r5, r3, #1\n\t"
  12616. "lsl r3, r3, %[n]\n\t"
  12617. "lsr r5, r5, r6\n\t"
  12618. "orr r4, r4, r5\n\t"
  12619. "ldr r2, [%[a], #48]\n\t"
  12620. "str r4, [%[r], #56]\n\t"
  12621. "lsr r5, r2, #1\n\t"
  12622. "lsl r2, r2, %[n]\n\t"
  12623. "lsr r5, r5, r6\n\t"
  12624. "orr r3, r3, r5\n\t"
  12625. "ldr r4, [%[a], #44]\n\t"
  12626. "str r3, [%[r], #52]\n\t"
  12627. "lsr r5, r4, #1\n\t"
  12628. "lsl r4, r4, %[n]\n\t"
  12629. "lsr r5, r5, r6\n\t"
  12630. "orr r2, r2, r5\n\t"
  12631. "ldr r3, [%[a], #40]\n\t"
  12632. "str r2, [%[r], #48]\n\t"
  12633. "lsr r5, r3, #1\n\t"
  12634. "lsl r3, r3, %[n]\n\t"
  12635. "lsr r5, r5, r6\n\t"
  12636. "orr r4, r4, r5\n\t"
  12637. "ldr r2, [%[a], #36]\n\t"
  12638. "str r4, [%[r], #44]\n\t"
  12639. "lsr r5, r2, #1\n\t"
  12640. "lsl r2, r2, %[n]\n\t"
  12641. "lsr r5, r5, r6\n\t"
  12642. "orr r3, r3, r5\n\t"
  12643. "ldr r4, [%[a], #32]\n\t"
  12644. "str r3, [%[r], #40]\n\t"
  12645. "lsr r5, r4, #1\n\t"
  12646. "lsl r4, r4, %[n]\n\t"
  12647. "lsr r5, r5, r6\n\t"
  12648. "orr r2, r2, r5\n\t"
  12649. "ldr r3, [%[a], #28]\n\t"
  12650. "str r2, [%[r], #36]\n\t"
  12651. "lsr r5, r3, #1\n\t"
  12652. "lsl r3, r3, %[n]\n\t"
  12653. "lsr r5, r5, r6\n\t"
  12654. "orr r4, r4, r5\n\t"
  12655. "ldr r2, [%[a], #24]\n\t"
  12656. "str r4, [%[r], #32]\n\t"
  12657. "lsr r5, r2, #1\n\t"
  12658. "lsl r2, r2, %[n]\n\t"
  12659. "lsr r5, r5, r6\n\t"
  12660. "orr r3, r3, r5\n\t"
  12661. "ldr r4, [%[a], #20]\n\t"
  12662. "str r3, [%[r], #28]\n\t"
  12663. "lsr r5, r4, #1\n\t"
  12664. "lsl r4, r4, %[n]\n\t"
  12665. "lsr r5, r5, r6\n\t"
  12666. "orr r2, r2, r5\n\t"
  12667. "ldr r3, [%[a], #16]\n\t"
  12668. "str r2, [%[r], #24]\n\t"
  12669. "lsr r5, r3, #1\n\t"
  12670. "lsl r3, r3, %[n]\n\t"
  12671. "lsr r5, r5, r6\n\t"
  12672. "orr r4, r4, r5\n\t"
  12673. "ldr r2, [%[a], #12]\n\t"
  12674. "str r4, [%[r], #20]\n\t"
  12675. "lsr r5, r2, #1\n\t"
  12676. "lsl r2, r2, %[n]\n\t"
  12677. "lsr r5, r5, r6\n\t"
  12678. "orr r3, r3, r5\n\t"
  12679. "ldr r4, [%[a], #8]\n\t"
  12680. "str r3, [%[r], #16]\n\t"
  12681. "lsr r5, r4, #1\n\t"
  12682. "lsl r4, r4, %[n]\n\t"
  12683. "lsr r5, r5, r6\n\t"
  12684. "orr r2, r2, r5\n\t"
  12685. "ldr r3, [%[a], #4]\n\t"
  12686. "str r2, [%[r], #12]\n\t"
  12687. "lsr r5, r3, #1\n\t"
  12688. "lsl r3, r3, %[n]\n\t"
  12689. "lsr r5, r5, r6\n\t"
  12690. "orr r4, r4, r5\n\t"
  12691. "ldr r2, [%[a], #0]\n\t"
  12692. "str r4, [%[r], #8]\n\t"
  12693. "lsr r5, r2, #1\n\t"
  12694. "lsl r2, r2, %[n]\n\t"
  12695. "lsr r5, r5, r6\n\t"
  12696. "orr r3, r3, r5\n\t"
  12697. "sub %[a], %[a], #64\n\t"
  12698. "sub %[r], %[r], #64\n\t"
  12699. "ldr r4, [%[a], #60]\n\t"
  12700. "str r3, [%[r], #68]\n\t"
  12701. "lsr r5, r4, #1\n\t"
  12702. "lsl r4, r4, %[n]\n\t"
  12703. "lsr r5, r5, r6\n\t"
  12704. "orr r2, r2, r5\n\t"
  12705. "ldr r3, [%[a], #56]\n\t"
  12706. "str r2, [%[r], #64]\n\t"
  12707. "lsr r5, r3, #1\n\t"
  12708. "lsl r3, r3, %[n]\n\t"
  12709. "lsr r5, r5, r6\n\t"
  12710. "orr r4, r4, r5\n\t"
  12711. "ldr r2, [%[a], #52]\n\t"
  12712. "str r4, [%[r], #60]\n\t"
  12713. "lsr r5, r2, #1\n\t"
  12714. "lsl r2, r2, %[n]\n\t"
  12715. "lsr r5, r5, r6\n\t"
  12716. "orr r3, r3, r5\n\t"
  12717. "ldr r4, [%[a], #48]\n\t"
  12718. "str r3, [%[r], #56]\n\t"
  12719. "lsr r5, r4, #1\n\t"
  12720. "lsl r4, r4, %[n]\n\t"
  12721. "lsr r5, r5, r6\n\t"
  12722. "orr r2, r2, r5\n\t"
  12723. "ldr r3, [%[a], #44]\n\t"
  12724. "str r2, [%[r], #52]\n\t"
  12725. "lsr r5, r3, #1\n\t"
  12726. "lsl r3, r3, %[n]\n\t"
  12727. "lsr r5, r5, r6\n\t"
  12728. "orr r4, r4, r5\n\t"
  12729. "ldr r2, [%[a], #40]\n\t"
  12730. "str r4, [%[r], #48]\n\t"
  12731. "lsr r5, r2, #1\n\t"
  12732. "lsl r2, r2, %[n]\n\t"
  12733. "lsr r5, r5, r6\n\t"
  12734. "orr r3, r3, r5\n\t"
  12735. "ldr r4, [%[a], #36]\n\t"
  12736. "str r3, [%[r], #44]\n\t"
  12737. "lsr r5, r4, #1\n\t"
  12738. "lsl r4, r4, %[n]\n\t"
  12739. "lsr r5, r5, r6\n\t"
  12740. "orr r2, r2, r5\n\t"
  12741. "ldr r3, [%[a], #32]\n\t"
  12742. "str r2, [%[r], #40]\n\t"
  12743. "lsr r5, r3, #1\n\t"
  12744. "lsl r3, r3, %[n]\n\t"
  12745. "lsr r5, r5, r6\n\t"
  12746. "orr r4, r4, r5\n\t"
  12747. "ldr r2, [%[a], #28]\n\t"
  12748. "str r4, [%[r], #36]\n\t"
  12749. "lsr r5, r2, #1\n\t"
  12750. "lsl r2, r2, %[n]\n\t"
  12751. "lsr r5, r5, r6\n\t"
  12752. "orr r3, r3, r5\n\t"
  12753. "ldr r4, [%[a], #24]\n\t"
  12754. "str r3, [%[r], #32]\n\t"
  12755. "lsr r5, r4, #1\n\t"
  12756. "lsl r4, r4, %[n]\n\t"
  12757. "lsr r5, r5, r6\n\t"
  12758. "orr r2, r2, r5\n\t"
  12759. "ldr r3, [%[a], #20]\n\t"
  12760. "str r2, [%[r], #28]\n\t"
  12761. "lsr r5, r3, #1\n\t"
  12762. "lsl r3, r3, %[n]\n\t"
  12763. "lsr r5, r5, r6\n\t"
  12764. "orr r4, r4, r5\n\t"
  12765. "ldr r2, [%[a], #16]\n\t"
  12766. "str r4, [%[r], #24]\n\t"
  12767. "lsr r5, r2, #1\n\t"
  12768. "lsl r2, r2, %[n]\n\t"
  12769. "lsr r5, r5, r6\n\t"
  12770. "orr r3, r3, r5\n\t"
  12771. "ldr r4, [%[a], #12]\n\t"
  12772. "str r3, [%[r], #20]\n\t"
  12773. "lsr r5, r4, #1\n\t"
  12774. "lsl r4, r4, %[n]\n\t"
  12775. "lsr r5, r5, r6\n\t"
  12776. "orr r2, r2, r5\n\t"
  12777. "ldr r3, [%[a], #8]\n\t"
  12778. "str r2, [%[r], #16]\n\t"
  12779. "lsr r5, r3, #1\n\t"
  12780. "lsl r3, r3, %[n]\n\t"
  12781. "lsr r5, r5, r6\n\t"
  12782. "orr r4, r4, r5\n\t"
  12783. "ldr r2, [%[a], #4]\n\t"
  12784. "str r4, [%[r], #12]\n\t"
  12785. "lsr r5, r2, #1\n\t"
  12786. "lsl r2, r2, %[n]\n\t"
  12787. "lsr r5, r5, r6\n\t"
  12788. "orr r3, r3, r5\n\t"
  12789. "ldr r4, [%[a], #0]\n\t"
  12790. "str r3, [%[r], #8]\n\t"
  12791. "lsr r5, r4, #1\n\t"
  12792. "lsl r4, r4, %[n]\n\t"
  12793. "lsr r5, r5, r6\n\t"
  12794. "orr r2, r2, r5\n\t"
  12795. "sub %[a], %[a], #64\n\t"
  12796. "sub %[r], %[r], #64\n\t"
  12797. "ldr r3, [%[a], #60]\n\t"
  12798. "str r2, [%[r], #68]\n\t"
  12799. "lsr r5, r3, #1\n\t"
  12800. "lsl r3, r3, %[n]\n\t"
  12801. "lsr r5, r5, r6\n\t"
  12802. "orr r4, r4, r5\n\t"
  12803. "ldr r2, [%[a], #56]\n\t"
  12804. "str r4, [%[r], #64]\n\t"
  12805. "lsr r5, r2, #1\n\t"
  12806. "lsl r2, r2, %[n]\n\t"
  12807. "lsr r5, r5, r6\n\t"
  12808. "orr r3, r3, r5\n\t"
  12809. "ldr r4, [%[a], #52]\n\t"
  12810. "str r3, [%[r], #60]\n\t"
  12811. "lsr r5, r4, #1\n\t"
  12812. "lsl r4, r4, %[n]\n\t"
  12813. "lsr r5, r5, r6\n\t"
  12814. "orr r2, r2, r5\n\t"
  12815. "ldr r3, [%[a], #48]\n\t"
  12816. "str r2, [%[r], #56]\n\t"
  12817. "lsr r5, r3, #1\n\t"
  12818. "lsl r3, r3, %[n]\n\t"
  12819. "lsr r5, r5, r6\n\t"
  12820. "orr r4, r4, r5\n\t"
  12821. "ldr r2, [%[a], #44]\n\t"
  12822. "str r4, [%[r], #52]\n\t"
  12823. "lsr r5, r2, #1\n\t"
  12824. "lsl r2, r2, %[n]\n\t"
  12825. "lsr r5, r5, r6\n\t"
  12826. "orr r3, r3, r5\n\t"
  12827. "ldr r4, [%[a], #40]\n\t"
  12828. "str r3, [%[r], #48]\n\t"
  12829. "lsr r5, r4, #1\n\t"
  12830. "lsl r4, r4, %[n]\n\t"
  12831. "lsr r5, r5, r6\n\t"
  12832. "orr r2, r2, r5\n\t"
  12833. "ldr r3, [%[a], #36]\n\t"
  12834. "str r2, [%[r], #44]\n\t"
  12835. "lsr r5, r3, #1\n\t"
  12836. "lsl r3, r3, %[n]\n\t"
  12837. "lsr r5, r5, r6\n\t"
  12838. "orr r4, r4, r5\n\t"
  12839. "ldr r2, [%[a], #32]\n\t"
  12840. "str r4, [%[r], #40]\n\t"
  12841. "lsr r5, r2, #1\n\t"
  12842. "lsl r2, r2, %[n]\n\t"
  12843. "lsr r5, r5, r6\n\t"
  12844. "orr r3, r3, r5\n\t"
  12845. "ldr r4, [%[a], #28]\n\t"
  12846. "str r3, [%[r], #36]\n\t"
  12847. "lsr r5, r4, #1\n\t"
  12848. "lsl r4, r4, %[n]\n\t"
  12849. "lsr r5, r5, r6\n\t"
  12850. "orr r2, r2, r5\n\t"
  12851. "ldr r3, [%[a], #24]\n\t"
  12852. "str r2, [%[r], #32]\n\t"
  12853. "lsr r5, r3, #1\n\t"
  12854. "lsl r3, r3, %[n]\n\t"
  12855. "lsr r5, r5, r6\n\t"
  12856. "orr r4, r4, r5\n\t"
  12857. "ldr r2, [%[a], #20]\n\t"
  12858. "str r4, [%[r], #28]\n\t"
  12859. "lsr r5, r2, #1\n\t"
  12860. "lsl r2, r2, %[n]\n\t"
  12861. "lsr r5, r5, r6\n\t"
  12862. "orr r3, r3, r5\n\t"
  12863. "ldr r4, [%[a], #16]\n\t"
  12864. "str r3, [%[r], #24]\n\t"
  12865. "lsr r5, r4, #1\n\t"
  12866. "lsl r4, r4, %[n]\n\t"
  12867. "lsr r5, r5, r6\n\t"
  12868. "orr r2, r2, r5\n\t"
  12869. "ldr r3, [%[a], #12]\n\t"
  12870. "str r2, [%[r], #20]\n\t"
  12871. "lsr r5, r3, #1\n\t"
  12872. "lsl r3, r3, %[n]\n\t"
  12873. "lsr r5, r5, r6\n\t"
  12874. "orr r4, r4, r5\n\t"
  12875. "ldr r2, [%[a], #8]\n\t"
  12876. "str r4, [%[r], #16]\n\t"
  12877. "lsr r5, r2, #1\n\t"
  12878. "lsl r2, r2, %[n]\n\t"
  12879. "lsr r5, r5, r6\n\t"
  12880. "orr r3, r3, r5\n\t"
  12881. "ldr r4, [%[a], #4]\n\t"
  12882. "str r3, [%[r], #12]\n\t"
  12883. "lsr r5, r4, #1\n\t"
  12884. "lsl r4, r4, %[n]\n\t"
  12885. "lsr r5, r5, r6\n\t"
  12886. "orr r2, r2, r5\n\t"
  12887. "ldr r3, [%[a], #0]\n\t"
  12888. "str r2, [%[r], #8]\n\t"
  12889. "lsr r5, r3, #1\n\t"
  12890. "lsl r3, r3, %[n]\n\t"
  12891. "lsr r5, r5, r6\n\t"
  12892. "orr r4, r4, r5\n\t"
  12893. "sub %[a], %[a], #64\n\t"
  12894. "sub %[r], %[r], #64\n\t"
  12895. "ldr r2, [%[a], #60]\n\t"
  12896. "str r4, [%[r], #68]\n\t"
  12897. "lsr r5, r2, #1\n\t"
  12898. "lsl r2, r2, %[n]\n\t"
  12899. "lsr r5, r5, r6\n\t"
  12900. "orr r3, r3, r5\n\t"
  12901. "ldr r4, [%[a], #56]\n\t"
  12902. "str r3, [%[r], #64]\n\t"
  12903. "lsr r5, r4, #1\n\t"
  12904. "lsl r4, r4, %[n]\n\t"
  12905. "lsr r5, r5, r6\n\t"
  12906. "orr r2, r2, r5\n\t"
  12907. "ldr r3, [%[a], #52]\n\t"
  12908. "str r2, [%[r], #60]\n\t"
  12909. "lsr r5, r3, #1\n\t"
  12910. "lsl r3, r3, %[n]\n\t"
  12911. "lsr r5, r5, r6\n\t"
  12912. "orr r4, r4, r5\n\t"
  12913. "ldr r2, [%[a], #48]\n\t"
  12914. "str r4, [%[r], #56]\n\t"
  12915. "lsr r5, r2, #1\n\t"
  12916. "lsl r2, r2, %[n]\n\t"
  12917. "lsr r5, r5, r6\n\t"
  12918. "orr r3, r3, r5\n\t"
  12919. "ldr r4, [%[a], #44]\n\t"
  12920. "str r3, [%[r], #52]\n\t"
  12921. "lsr r5, r4, #1\n\t"
  12922. "lsl r4, r4, %[n]\n\t"
  12923. "lsr r5, r5, r6\n\t"
  12924. "orr r2, r2, r5\n\t"
  12925. "ldr r3, [%[a], #40]\n\t"
  12926. "str r2, [%[r], #48]\n\t"
  12927. "lsr r5, r3, #1\n\t"
  12928. "lsl r3, r3, %[n]\n\t"
  12929. "lsr r5, r5, r6\n\t"
  12930. "orr r4, r4, r5\n\t"
  12931. "ldr r2, [%[a], #36]\n\t"
  12932. "str r4, [%[r], #44]\n\t"
  12933. "lsr r5, r2, #1\n\t"
  12934. "lsl r2, r2, %[n]\n\t"
  12935. "lsr r5, r5, r6\n\t"
  12936. "orr r3, r3, r5\n\t"
  12937. "ldr r4, [%[a], #32]\n\t"
  12938. "str r3, [%[r], #40]\n\t"
  12939. "lsr r5, r4, #1\n\t"
  12940. "lsl r4, r4, %[n]\n\t"
  12941. "lsr r5, r5, r6\n\t"
  12942. "orr r2, r2, r5\n\t"
  12943. "ldr r3, [%[a], #28]\n\t"
  12944. "str r2, [%[r], #36]\n\t"
  12945. "lsr r5, r3, #1\n\t"
  12946. "lsl r3, r3, %[n]\n\t"
  12947. "lsr r5, r5, r6\n\t"
  12948. "orr r4, r4, r5\n\t"
  12949. "ldr r2, [%[a], #24]\n\t"
  12950. "str r4, [%[r], #32]\n\t"
  12951. "lsr r5, r2, #1\n\t"
  12952. "lsl r2, r2, %[n]\n\t"
  12953. "lsr r5, r5, r6\n\t"
  12954. "orr r3, r3, r5\n\t"
  12955. "ldr r4, [%[a], #20]\n\t"
  12956. "str r3, [%[r], #28]\n\t"
  12957. "lsr r5, r4, #1\n\t"
  12958. "lsl r4, r4, %[n]\n\t"
  12959. "lsr r5, r5, r6\n\t"
  12960. "orr r2, r2, r5\n\t"
  12961. "ldr r3, [%[a], #16]\n\t"
  12962. "str r2, [%[r], #24]\n\t"
  12963. "lsr r5, r3, #1\n\t"
  12964. "lsl r3, r3, %[n]\n\t"
  12965. "lsr r5, r5, r6\n\t"
  12966. "orr r4, r4, r5\n\t"
  12967. "ldr r2, [%[a], #12]\n\t"
  12968. "str r4, [%[r], #20]\n\t"
  12969. "lsr r5, r2, #1\n\t"
  12970. "lsl r2, r2, %[n]\n\t"
  12971. "lsr r5, r5, r6\n\t"
  12972. "orr r3, r3, r5\n\t"
  12973. "ldr r4, [%[a], #8]\n\t"
  12974. "str r3, [%[r], #16]\n\t"
  12975. "lsr r5, r4, #1\n\t"
  12976. "lsl r4, r4, %[n]\n\t"
  12977. "lsr r5, r5, r6\n\t"
  12978. "orr r2, r2, r5\n\t"
  12979. "ldr r3, [%[a], #4]\n\t"
  12980. "str r2, [%[r], #12]\n\t"
  12981. "lsr r5, r3, #1\n\t"
  12982. "lsl r3, r3, %[n]\n\t"
  12983. "lsr r5, r5, r6\n\t"
  12984. "orr r4, r4, r5\n\t"
  12985. "ldr r2, [%[a], #0]\n\t"
  12986. "str r4, [%[r], #8]\n\t"
  12987. "lsr r5, r2, #1\n\t"
  12988. "lsl r2, r2, %[n]\n\t"
  12989. "lsr r5, r5, r6\n\t"
  12990. "orr r3, r3, r5\n\t"
  12991. "str r2, [%[r]]\n\t"
  12992. "str r3, [%[r], #4]\n\t"
  12993. :
  12994. : [r] "r" (r), [a] "r" (a), [n] "r" (n)
  12995. : "memory", "r2", "r3", "r4", "r5", "r6"
  12996. );
  12997. }
  12998. /* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
  12999. *
  13000. * r A single precision number that is the result of the operation.
  13001. * e A single precision number that is the exponent.
  13002. * bits The number of bits in the exponent.
  13003. * m A single precision number that is the modulus.
  13004. * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
  13005. */
  13006. static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits,
  13007. const sp_digit* m)
  13008. {
  13009. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13010. sp_digit* td;
  13011. #else
  13012. sp_digit td[385];
  13013. #endif
  13014. sp_digit* norm;
  13015. sp_digit* tmp;
  13016. sp_digit mp = 1;
  13017. sp_digit n, o;
  13018. sp_digit mask;
  13019. int i;
  13020. int c, y;
  13021. int err = MP_OKAY;
  13022. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13023. td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL,
  13024. DYNAMIC_TYPE_TMP_BUFFER);
  13025. if (td == NULL) {
  13026. err = MEMORY_E;
  13027. }
  13028. #endif
  13029. if (err == MP_OKAY) {
  13030. norm = td;
  13031. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13032. tmp = td + 256;
  13033. #else
  13034. tmp = &td[256];
  13035. #endif
  13036. sp_4096_mont_setup(m, &mp);
  13037. sp_4096_mont_norm_128(norm, m);
  13038. i = (bits - 1) / 32;
  13039. n = e[i--];
  13040. c = bits & 31;
  13041. if (c == 0) {
  13042. c = 32;
  13043. }
  13044. c -= bits % 5;
  13045. if (c == 32) {
  13046. c = 27;
  13047. }
  13048. if (c < 0) {
  13049. /* Number of bits in top word is less than number needed. */
  13050. c = -c;
  13051. y = (int)(n << c);
  13052. n = e[i--];
  13053. y |= (int)(n >> (64 - c));
  13054. n <<= c;
  13055. c = 64 - c;
  13056. }
  13057. else {
  13058. y = (int)(n >> c);
  13059. n <<= 32 - c;
  13060. }
  13061. sp_4096_lshift_128(r, norm, (byte)y);
  13062. for (; i>=0 || c>=5; ) {
  13063. if (c == 0) {
  13064. n = e[i--];
  13065. y = (int)(n >> 27);
  13066. n <<= 5;
  13067. c = 27;
  13068. }
  13069. else if (c < 5) {
  13070. y = (int)(n >> 27);
  13071. n = e[i--];
  13072. c = 5 - c;
  13073. y |= (int)(n >> (32 - c));
  13074. n <<= c;
  13075. c = 32 - c;
  13076. }
  13077. else {
  13078. y = (int)((n >> 27) & 0x1f);
  13079. n <<= 5;
  13080. c -= 5;
  13081. }
  13082. sp_4096_mont_sqr_128(r, r, m, mp);
  13083. sp_4096_mont_sqr_128(r, r, m, mp);
  13084. sp_4096_mont_sqr_128(r, r, m, mp);
  13085. sp_4096_mont_sqr_128(r, r, m, mp);
  13086. sp_4096_mont_sqr_128(r, r, m, mp);
  13087. sp_4096_lshift_128(r, r, (byte)y);
  13088. sp_4096_mul_d_128(tmp, norm, r[128]);
  13089. r[128] = 0;
  13090. o = sp_4096_add_128(r, r, tmp);
  13091. sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o);
  13092. }
  13093. XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
  13094. sp_4096_mont_reduce_128(r, m, mp);
  13095. mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
  13096. sp_4096_cond_sub_128(r, r, m, mask);
  13097. }
  13098. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13099. if (td != NULL) {
  13100. XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  13101. }
  13102. #endif
  13103. return err;
  13104. }
  13105. #endif /* HAVE_FFDHE_4096 */
  13106. /* Perform the modular exponentiation for Diffie-Hellman.
  13107. *
  13108. * base Base.
  13109. * exp Array of bytes that is the exponent.
  13110. * expLen Length of data, in bytes, in exponent.
  13111. * mod Modulus.
  13112. * out Buffer to hold big-endian bytes of exponentiation result.
  13113. * Must be at least 512 bytes long.
  13114. * outLen Length, in bytes, of exponentiation result.
  13115. * returns 0 on success, MP_READ_E if there are too many bytes in an array
  13116. * and MEMORY_E if memory allocation fails.
  13117. */
  13118. int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
  13119. mp_int* mod, byte* out, word32* outLen)
  13120. {
  13121. int err = MP_OKAY;
  13122. sp_digit b[256], e[128], m[128];
  13123. sp_digit* r = b;
  13124. word32 i;
  13125. if (mp_count_bits(base) > 4096) {
  13126. err = MP_READ_E;
  13127. }
  13128. else if (expLen > 512) {
  13129. err = MP_READ_E;
  13130. }
  13131. else if (mp_count_bits(mod) != 4096) {
  13132. err = MP_READ_E;
  13133. }
  13134. else if (mp_iseven(mod)) {
  13135. err = MP_VAL;
  13136. }
  13137. if (err == MP_OKAY) {
  13138. sp_4096_from_mp(b, 128, base);
  13139. sp_4096_from_bin(e, 128, exp, expLen);
  13140. sp_4096_from_mp(m, 128, mod);
  13141. #ifdef HAVE_FFDHE_4096
  13142. if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1)
  13143. err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m);
  13144. else
  13145. #endif
  13146. err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0);
  13147. }
  13148. if (err == MP_OKAY) {
  13149. sp_4096_to_bin(r, out);
  13150. *outLen = 512;
  13151. for (i=0; i<512 && out[i] == 0; i++) {
  13152. }
  13153. *outLen -= i;
  13154. XMEMMOVE(out, out + i, *outLen);
  13155. }
  13156. XMEMSET(e, 0, sizeof(e));
  13157. return err;
  13158. }
  13159. #endif /* WOLFSSL_HAVE_SP_DH */
  13160. #endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
  13161. #endif /* WOLFSSL_SP_4096 */
  13162. #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
  13163. #ifdef WOLFSSL_HAVE_SP_ECC
  13164. #ifndef WOLFSSL_SP_NO_256
  13165. /* Point structure to use. */
  13166. typedef struct sp_point_256 {
  13167. sp_digit x[2 * 8];
  13168. sp_digit y[2 * 8];
  13169. sp_digit z[2 * 8];
  13170. int infinity;
  13171. } sp_point_256;
  13172. /* The modulus (prime) of the curve P256. */
  13173. static const sp_digit p256_mod[8] = {
  13174. 0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
  13175. 0x00000001,0xffffffff
  13176. };
  13177. /* The Montogmery normalizer for modulus of the curve P256. */
  13178. static const sp_digit p256_norm_mod[8] = {
  13179. 0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
  13180. 0xfffffffe,0x00000000
  13181. };
  13182. /* The Montogmery multiplier for modulus of the curve P256. */
  13183. static const sp_digit p256_mp_mod = 0x00000001;
  13184. #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
  13185. defined(HAVE_ECC_VERIFY)
  13186. /* The order of the curve P256. */
  13187. static const sp_digit p256_order[8] = {
  13188. 0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
  13189. 0x00000000,0xffffffff
  13190. };
  13191. #endif
  13192. /* The order of the curve P256 minus 2. */
  13193. static const sp_digit p256_order2[8] = {
  13194. 0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
  13195. 0x00000000,0xffffffff
  13196. };
  13197. #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
  13198. /* The Montogmery normalizer for order of the curve P256. */
  13199. static const sp_digit p256_norm_order[8] = {
  13200. 0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
  13201. 0xffffffff,0x00000000
  13202. };
  13203. #endif
  13204. #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
  13205. /* The Montogmery multiplier for order of the curve P256. */
  13206. static const sp_digit p256_mp_order = 0xee00bc4f;
  13207. #endif
  13208. /* The base point of curve P256. */
  13209. static const sp_point_256 p256_base = {
  13210. /* X ordinate */
  13211. {
  13212. 0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
  13213. 0xe12c4247,0x6b17d1f2,
  13214. 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
  13215. },
  13216. /* Y ordinate */
  13217. {
  13218. 0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
  13219. 0xfe1a7f9b,0x4fe342e2,
  13220. 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
  13221. },
  13222. /* Z ordinate */
  13223. {
  13224. 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
  13225. 0x00000000,0x00000000,
  13226. 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
  13227. },
  13228. /* infinity */
  13229. 0
  13230. };
  13231. #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
  13232. static const sp_digit p256_b[8] = {
  13233. 0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
  13234. 0xaa3a93e7,0x5ac635d8
  13235. };
  13236. #endif
  13237. static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p)
  13238. {
  13239. int ret = MP_OKAY;
  13240. (void)heap;
  13241. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  13242. (void)sp;
  13243. *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
  13244. #else
  13245. *p = sp;
  13246. #endif
  13247. if (*p == NULL) {
  13248. ret = MEMORY_E;
  13249. }
  13250. return ret;
  13251. }
  13252. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  13253. /* Allocate memory for point and return error. */
  13254. #define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p))
  13255. #else
  13256. /* Set pointer to data and return no error. */
  13257. #define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p))
  13258. #endif
  13259. static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap)
  13260. {
  13261. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  13262. /* If valid pointer then clear point data if requested and free data. */
  13263. if (p != NULL) {
  13264. if (clear != 0) {
  13265. XMEMSET(p, 0, sizeof(*p));
  13266. }
  13267. XFREE(p, heap, DYNAMIC_TYPE_ECC);
  13268. }
  13269. #else
  13270. /* Clear point data if requested. */
  13271. if (clear != 0) {
  13272. XMEMSET(p, 0, sizeof(*p));
  13273. }
  13274. #endif
  13275. (void)heap;
  13276. }
  13277. /* Multiply a number by Montogmery normalizer mod modulus (prime).
  13278. *
  13279. * r The resulting Montgomery form number.
  13280. * a The number to convert.
  13281. * m The modulus (prime).
  13282. */
  13283. static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
  13284. {
  13285. (void)m;
  13286. __asm__ __volatile__ (
  13287. "sub sp, sp, #24\n\t"
  13288. "ldr r2, [%[a], #0]\n\t"
  13289. "ldr r3, [%[a], #4]\n\t"
  13290. "ldr r4, [%[a], #8]\n\t"
  13291. "ldr r5, [%[a], #12]\n\t"
  13292. "ldr r6, [%[a], #16]\n\t"
  13293. "ldr r8, [%[a], #20]\n\t"
  13294. "ldr r9, [%[a], #24]\n\t"
  13295. "ldr r10, [%[a], #28]\n\t"
  13296. /* Clear overflow and underflow */
  13297. "mov r14, #0\n\t"
  13298. "mov r12, #0\n\t"
  13299. /* t[0] = 1 1 0 -1 -1 -1 -1 0 */
  13300. "adds r11, r2, r3\n\t"
  13301. "adc r14, r14, #0\n\t"
  13302. "subs r11, r11, r5\n\t"
  13303. "sbc r12, r12, #0\n\t"
  13304. "subs r11, r11, r6\n\t"
  13305. "sbc r12, r12, #0\n\t"
  13306. "subs r11, r11, r8\n\t"
  13307. "sbc r12, r12, #0\n\t"
  13308. "subs r11, r11, r9\n\t"
  13309. "sbc r12, r12, #0\n\t"
  13310. /* Store t[0] */
  13311. "str r11, [sp, #0]\n\t"
  13312. "neg r12, r12\n\t"
  13313. "mov r11, #0\n\t"
  13314. /* t[1] = 0 1 1 0 -1 -1 -1 -1 */
  13315. "adds r14, r14, r3\n\t"
  13316. "adc r11, r11, #0\n\t"
  13317. "adds r14, r14, r4\n\t"
  13318. "adc r11, r11, #0\n\t"
  13319. "subs r14, r14, r12\n\t"
  13320. "mov r12, #0\n\t"
  13321. "sbc r12, r12, #0\n\t"
  13322. "subs r14, r14, r6\n\t"
  13323. "sbc r12, r12, #0\n\t"
  13324. "subs r14, r14, r8\n\t"
  13325. "sbc r12, r12, #0\n\t"
  13326. "subs r14, r14, r9\n\t"
  13327. "sbc r12, r12, #0\n\t"
  13328. "subs r14, r14, r10\n\t"
  13329. "sbc r12, r12, #0\n\t"
  13330. /* Store t[1] */
  13331. "str r14, [sp, #4]\n\t"
  13332. "neg r12, r12\n\t"
  13333. "mov r14, #0\n\t"
  13334. /* t[2] = 0 0 1 1 0 -1 -1 -1 */
  13335. "adds r11, r11, r4\n\t"
  13336. "adc r14, r14, #0\n\t"
  13337. "adds r11, r11, r5\n\t"
  13338. "adc r14, r14, #0\n\t"
  13339. "subs r11, r11, r12\n\t"
  13340. "mov r12, #0\n\t"
  13341. "sbc r12, r12, #0\n\t"
  13342. "subs r11, r11, r8\n\t"
  13343. "sbc r12, r12, #0\n\t"
  13344. "subs r11, r11, r9\n\t"
  13345. "sbc r12, r12, #0\n\t"
  13346. "subs r11, r11, r10\n\t"
  13347. "sbc r12, r12, #0\n\t"
  13348. /* Store t[2] */
  13349. "str r11, [sp, #8]\n\t"
  13350. "neg r12, r12\n\t"
  13351. "mov r11, #0\n\t"
  13352. /* t[3] = -1 -1 0 2 2 1 0 -1 */
  13353. "adds r14, r14, r5\n\t"
  13354. "adc r11, r11, #0\n\t"
  13355. "adds r14, r14, r5\n\t"
  13356. "adc r11, r11, #0\n\t"
  13357. "adds r14, r14, r6\n\t"
  13358. "adc r11, r11, #0\n\t"
  13359. "adds r14, r14, r6\n\t"
  13360. "adc r11, r11, #0\n\t"
  13361. "adds r14, r14, r8\n\t"
  13362. "adc r11, r11, #0\n\t"
  13363. "subs r14, r14, r12\n\t"
  13364. "mov r12, #0\n\t"
  13365. "sbc r12, r12, #0\n\t"
  13366. "subs r14, r14, r2\n\t"
  13367. "sbc r12, r12, #0\n\t"
  13368. "subs r14, r14, r3\n\t"
  13369. "sbc r12, r12, #0\n\t"
  13370. "subs r14, r14, r10\n\t"
  13371. "sbc r12, r12, #0\n\t"
  13372. /* Store t[3] */
  13373. "str r14, [sp, #12]\n\t"
  13374. "neg r12, r12\n\t"
  13375. "mov r14, #0\n\t"
  13376. /* t[4] = 0 -1 -1 0 2 2 1 0 */
  13377. "adds r11, r11, r6\n\t"
  13378. "adc r14, r14, #0\n\t"
  13379. "adds r11, r11, r6\n\t"
  13380. "adc r14, r14, #0\n\t"
  13381. "adds r11, r11, r8\n\t"
  13382. "adc r14, r14, #0\n\t"
  13383. "adds r11, r11, r8\n\t"
  13384. "adc r14, r14, #0\n\t"
  13385. "adds r11, r11, r9\n\t"
  13386. "adc r14, r14, #0\n\t"
  13387. "subs r11, r11, r12\n\t"
  13388. "mov r12, #0\n\t"
  13389. "sbc r12, r12, #0\n\t"
  13390. "subs r11, r11, r3\n\t"
  13391. "sbc r12, r12, #0\n\t"
  13392. "subs r11, r11, r4\n\t"
  13393. "sbc r12, r12, #0\n\t"
  13394. /* Store t[4] */
  13395. "str r11, [sp, #16]\n\t"
  13396. "neg r12, r12\n\t"
  13397. "mov r11, #0\n\t"
  13398. /* t[5] = 0 0 -1 -1 0 2 2 1 */
  13399. "adds r14, r14, r8\n\t"
  13400. "adc r11, r11, #0\n\t"
  13401. "adds r14, r14, r8\n\t"
  13402. "adc r11, r11, #0\n\t"
  13403. "adds r14, r14, r9\n\t"
  13404. "adc r11, r11, #0\n\t"
  13405. "adds r14, r14, r9\n\t"
  13406. "adc r11, r11, #0\n\t"
  13407. "adds r14, r14, r10\n\t"
  13408. "adc r11, r11, #0\n\t"
  13409. "subs r14, r14, r12\n\t"
  13410. "mov r12, #0\n\t"
  13411. "sbc r12, r12, #0\n\t"
  13412. "subs r14, r14, r4\n\t"
  13413. "sbc r12, r12, #0\n\t"
  13414. "subs r14, r14, r5\n\t"
  13415. "sbc r12, r12, #0\n\t"
  13416. /* Store t[5] */
  13417. "str r14, [sp, #20]\n\t"
  13418. "neg r12, r12\n\t"
  13419. "mov r14, #0\n\t"
  13420. /* t[6] = -1 -1 0 0 0 1 3 2 */
  13421. "adds r11, r11, r8\n\t"
  13422. "adc r14, r14, #0\n\t"
  13423. "adds r11, r11, r9\n\t"
  13424. "adc r14, r14, #0\n\t"
  13425. "adds r11, r11, r9\n\t"
  13426. "adc r14, r14, #0\n\t"
  13427. "adds r11, r11, r9\n\t"
  13428. "adc r14, r14, #0\n\t"
  13429. "adds r11, r11, r10\n\t"
  13430. "adc r14, r14, #0\n\t"
  13431. "adds r11, r11, r10\n\t"
  13432. "adc r14, r14, #0\n\t"
  13433. "subs r11, r11, r12\n\t"
  13434. "mov r12, #0\n\t"
  13435. "sbc r12, r12, #0\n\t"
  13436. "subs r11, r11, r2\n\t"
  13437. "sbc r12, r12, #0\n\t"
  13438. "subs r11, r11, r3\n\t"
  13439. "sbc r12, r12, #0\n\t"
  13440. /* Store t[6] */
  13441. "mov r9, r11\n\t"
  13442. "neg r12, r12\n\t"
  13443. "mov r11, #0\n\t"
  13444. /* t[7] = 1 0 -1 -1 -1 -1 0 3 */
  13445. "adds r14, r14, r2\n\t"
  13446. "adc r11, r11, #0\n\t"
  13447. "adds r14, r14, r10\n\t"
  13448. "adc r11, r11, #0\n\t"
  13449. "adds r14, r14, r10\n\t"
  13450. "adc r11, r11, #0\n\t"
  13451. "adds r14, r14, r10\n\t"
  13452. "adc r11, r11, #0\n\t"
  13453. "subs r14, r14, r12\n\t"
  13454. "mov r12, #0\n\t"
  13455. "sbc r12, r12, #0\n\t"
  13456. "subs r14, r14, r4\n\t"
  13457. "sbc r12, r12, #0\n\t"
  13458. "subs r14, r14, r5\n\t"
  13459. "sbc r12, r12, #0\n\t"
  13460. "subs r14, r14, r6\n\t"
  13461. "sbc r12, r12, #0\n\t"
  13462. "subs r14, r14, r8\n\t"
  13463. "sbc r12, r12, #0\n\t"
  13464. /* Store t[7] */
  13465. /* Load intermediate */
  13466. "ldr r2, [sp, #0]\n\t"
  13467. "ldr r3, [sp, #4]\n\t"
  13468. "ldr r4, [sp, #8]\n\t"
  13469. "ldr r5, [sp, #12]\n\t"
  13470. "ldr r6, [sp, #16]\n\t"
  13471. "ldr r8, [sp, #20]\n\t"
  13472. "neg r12, r12\n\t"
  13473. /* Add overflow */
  13474. /* Subtract underflow - add neg underflow */
  13475. "adds r2, r2, r11\n\t"
  13476. "adcs r3, r3, #0\n\t"
  13477. "adcs r4, r4, #0\n\t"
  13478. "adds r5, r5, r12\n\t"
  13479. "adcs r6, r6, #0\n\t"
  13480. "adcs r8, r8, #0\n\t"
  13481. "adcs r9, r9, r12\n\t"
  13482. "adc r14, r14, r11\n\t"
  13483. /* Subtract overflow */
  13484. /* Add underflow - subtract neg underflow */
  13485. "subs r2, r2, r12\n\t"
  13486. "sbcs r3, r3, #0\n\t"
  13487. "sbcs r4, r4, #0\n\t"
  13488. "subs r5, r5, r11\n\t"
  13489. "sbcs r6, r6, #0\n\t"
  13490. "sbcs r8, r8, #0\n\t"
  13491. "sbcs r9, r9, r11\n\t"
  13492. "sbc r14, r14, r12\n\t"
  13493. /* Store result */
  13494. "str r2, [%[r], #0]\n\t"
  13495. "str r3, [%[r], #4]\n\t"
  13496. "str r4, [%[r], #8]\n\t"
  13497. "str r5, [%[r], #12]\n\t"
  13498. "str r6, [%[r], #16]\n\t"
  13499. "str r8, [%[r], #20]\n\t"
  13500. "str r9, [%[r], #24]\n\t"
  13501. "str r14, [%[r], #28]\n\t"
  13502. "add sp, sp, #24\n\t"
  13503. :
  13504. : [r] "r" (r), [a] "r" (a)
  13505. : "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r14", "r12"
  13506. );
  13507. return MP_OKAY;
  13508. }
  13509. /* Convert an mp_int to an array of sp_digit.
  13510. *
  13511. * r A single precision integer.
  13512. * size Maximum number of bytes to convert
  13513. * a A multi-precision integer.
  13514. */
  13515. static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
  13516. {
  13517. #if DIGIT_BIT == 32
  13518. int j;
  13519. XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
  13520. for (j = a->used; j < size; j++) {
  13521. r[j] = 0;
  13522. }
  13523. #elif DIGIT_BIT > 32
  13524. int i, j = 0;
  13525. word32 s = 0;
  13526. r[0] = 0;
  13527. for (i = 0; i < a->used && j < size; i++) {
  13528. r[j] |= ((sp_digit)a->dp[i] << s);
  13529. r[j] &= 0xffffffff;
  13530. s = 32U - s;
  13531. if (j + 1 >= size) {
  13532. break;
  13533. }
  13534. /* lint allow cast of mismatch word32 and mp_digit */
  13535. r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
  13536. while ((s + 32U) <= (word32)DIGIT_BIT) {
  13537. s += 32U;
  13538. r[j] &= 0xffffffff;
  13539. if (j + 1 >= size) {
  13540. break;
  13541. }
  13542. if (s < (word32)DIGIT_BIT) {
  13543. /* lint allow cast of mismatch word32 and mp_digit */
  13544. r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
  13545. }
  13546. else {
  13547. r[++j] = 0L;
  13548. }
  13549. }
  13550. s = (word32)DIGIT_BIT - s;
  13551. }
  13552. for (j++; j < size; j++) {
  13553. r[j] = 0;
  13554. }
  13555. #else
  13556. int i, j = 0, s = 0;
  13557. r[0] = 0;
  13558. for (i = 0; i < a->used && j < size; i++) {
  13559. r[j] |= ((sp_digit)a->dp[i]) << s;
  13560. if (s + DIGIT_BIT >= 32) {
  13561. r[j] &= 0xffffffff;
  13562. if (j + 1 >= size) {
  13563. break;
  13564. }
  13565. s = 32 - s;
  13566. if (s == DIGIT_BIT) {
  13567. r[++j] = 0;
  13568. s = 0;
  13569. }
  13570. else {
  13571. r[++j] = a->dp[i] >> s;
  13572. s = DIGIT_BIT - s;
  13573. }
  13574. }
  13575. else {
  13576. s += DIGIT_BIT;
  13577. }
  13578. }
  13579. for (j++; j < size; j++) {
  13580. r[j] = 0;
  13581. }
  13582. #endif
  13583. }
  13584. /* Convert a point of type ecc_point to type sp_point_256.
  13585. *
  13586. * p Point of type sp_point_256 (result).
  13587. * pm Point of type ecc_point.
  13588. */
  13589. static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm)
  13590. {
  13591. XMEMSET(p->x, 0, sizeof(p->x));
  13592. XMEMSET(p->y, 0, sizeof(p->y));
  13593. XMEMSET(p->z, 0, sizeof(p->z));
  13594. sp_256_from_mp(p->x, 8, pm->x);
  13595. sp_256_from_mp(p->y, 8, pm->y);
  13596. sp_256_from_mp(p->z, 8, pm->z);
  13597. p->infinity = 0;
  13598. }
  13599. /* Convert an array of sp_digit to an mp_int.
  13600. *
  13601. * a A single precision integer.
  13602. * r A multi-precision integer.
  13603. */
  13604. static int sp_256_to_mp(const sp_digit* a, mp_int* r)
  13605. {
  13606. int err;
  13607. err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
  13608. if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
  13609. #if DIGIT_BIT == 32
  13610. XMEMCPY(r->dp, a, sizeof(sp_digit) * 8);
  13611. r->used = 8;
  13612. mp_clamp(r);
  13613. #elif DIGIT_BIT < 32
  13614. int i, j = 0, s = 0;
  13615. r->dp[0] = 0;
  13616. for (i = 0; i < 8; i++) {
  13617. r->dp[j] |= (mp_digit)(a[i] << s);
  13618. r->dp[j] &= (1L << DIGIT_BIT) - 1;
  13619. s = DIGIT_BIT - s;
  13620. r->dp[++j] = (mp_digit)(a[i] >> s);
  13621. while (s + DIGIT_BIT <= 32) {
  13622. s += DIGIT_BIT;
  13623. r->dp[j++] &= (1L << DIGIT_BIT) - 1;
  13624. if (s == SP_WORD_SIZE) {
  13625. r->dp[j] = 0;
  13626. }
  13627. else {
  13628. r->dp[j] = (mp_digit)(a[i] >> s);
  13629. }
  13630. }
  13631. s = 32 - s;
  13632. }
  13633. r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
  13634. mp_clamp(r);
  13635. #else
  13636. int i, j = 0, s = 0;
  13637. r->dp[0] = 0;
  13638. for (i = 0; i < 8; i++) {
  13639. r->dp[j] |= ((mp_digit)a[i]) << s;
  13640. if (s + 32 >= DIGIT_BIT) {
  13641. #if DIGIT_BIT != 32 && DIGIT_BIT != 64
  13642. r->dp[j] &= (1L << DIGIT_BIT) - 1;
  13643. #endif
  13644. s = DIGIT_BIT - s;
  13645. r->dp[++j] = a[i] >> s;
  13646. s = 32 - s;
  13647. }
  13648. else {
  13649. s += 32;
  13650. }
  13651. }
  13652. r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
  13653. mp_clamp(r);
  13654. #endif
  13655. }
  13656. return err;
  13657. }
  13658. /* Convert a point of type sp_point_256 to type ecc_point.
  13659. *
  13660. * p Point of type sp_point_256.
  13661. * pm Point of type ecc_point (result).
  13662. * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
  13663. * MP_OKAY.
  13664. */
  13665. static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
  13666. {
  13667. int err;
  13668. err = sp_256_to_mp(p->x, pm->x);
  13669. if (err == MP_OKAY) {
  13670. err = sp_256_to_mp(p->y, pm->y);
  13671. }
  13672. if (err == MP_OKAY) {
  13673. err = sp_256_to_mp(p->z, pm->z);
  13674. }
  13675. return err;
  13676. }
  13677. /* Multiply two Montogmery form numbers mod the modulus (prime).
  13678. * (r = a * b mod m)
  13679. *
  13680. * r Result of multiplication.
  13681. * a First number to multiply in Montogmery form.
  13682. * b Second number to multiply in Montogmery form.
  13683. * m Modulus (prime).
  13684. * mp Montogmery mulitplier.
  13685. */
  13686. SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
  13687. const sp_digit* m, sp_digit mp)
  13688. {
  13689. (void)mp;
  13690. (void)m;
  13691. __asm__ __volatile__ (
  13692. "sub sp, sp, #68\n\t"
  13693. "mov r5, #0\n\t"
  13694. /* A[0] * B[0] */
  13695. "ldr r6, [%[a], #0]\n\t"
  13696. "ldr r8, [%[b], #0]\n\t"
  13697. "umull r9, r10, r6, r8\n\t"
  13698. "str r9, [sp, #0]\n\t"
  13699. /* A[0] * B[1] */
  13700. "ldr r6, [%[a], #0]\n\t"
  13701. "ldr r8, [%[b], #4]\n\t"
  13702. "umull r3, r4, r6, r8\n\t"
  13703. "adds r10, r3, r10\n\t"
  13704. "adc r11, r4, #0\n\t"
  13705. /* A[1] * B[0] */
  13706. "ldr r6, [%[a], #4]\n\t"
  13707. "ldr r8, [%[b], #0]\n\t"
  13708. "umull r3, r4, r6, r8\n\t"
  13709. "adds r10, r3, r10\n\t"
  13710. "adcs r11, r4, r11\n\t"
  13711. "adc r14, r5, #0\n\t"
  13712. "str r10, [sp, #4]\n\t"
  13713. /* A[0] * B[2] */
  13714. "ldr r6, [%[a], #0]\n\t"
  13715. "ldr r8, [%[b], #8]\n\t"
  13716. "umull r3, r4, r6, r8\n\t"
  13717. "adds r11, r3, r11\n\t"
  13718. "adc r14, r4, r14\n\t"
  13719. /* A[1] * B[1] */
  13720. "ldr r6, [%[a], #4]\n\t"
  13721. "ldr r8, [%[b], #4]\n\t"
  13722. "umull r3, r4, r6, r8\n\t"
  13723. "adds r11, r3, r11\n\t"
  13724. "adcs r14, r4, r14\n\t"
  13725. "adc r9, r5, #0\n\t"
  13726. /* A[2] * B[0] */
  13727. "ldr r6, [%[a], #8]\n\t"
  13728. "ldr r8, [%[b], #0]\n\t"
  13729. "umull r3, r4, r6, r8\n\t"
  13730. "adds r11, r3, r11\n\t"
  13731. "adcs r14, r4, r14\n\t"
  13732. "adc r9, r5, r9\n\t"
  13733. "str r11, [sp, #8]\n\t"
  13734. /* A[0] * B[3] */
  13735. "ldr r6, [%[a], #0]\n\t"
  13736. "ldr r8, [%[b], #12]\n\t"
  13737. "umull r3, r4, r6, r8\n\t"
  13738. "adds r14, r3, r14\n\t"
  13739. "adcs r9, r4, r9\n\t"
  13740. "adc r10, r5, #0\n\t"
  13741. /* A[1] * B[2] */
  13742. "ldr r6, [%[a], #4]\n\t"
  13743. "ldr r8, [%[b], #8]\n\t"
  13744. "umull r3, r4, r6, r8\n\t"
  13745. "adds r14, r3, r14\n\t"
  13746. "adcs r9, r4, r9\n\t"
  13747. "adc r10, r5, r10\n\t"
  13748. /* A[2] * B[1] */
  13749. "ldr r6, [%[a], #8]\n\t"
  13750. "ldr r8, [%[b], #4]\n\t"
  13751. "umull r3, r4, r6, r8\n\t"
  13752. "adds r14, r3, r14\n\t"
  13753. "adcs r9, r4, r9\n\t"
  13754. "adc r10, r5, r10\n\t"
  13755. /* A[3] * B[0] */
  13756. "ldr r6, [%[a], #12]\n\t"
  13757. "ldr r8, [%[b], #0]\n\t"
  13758. "umull r3, r4, r6, r8\n\t"
  13759. "adds r14, r3, r14\n\t"
  13760. "adcs r9, r4, r9\n\t"
  13761. "adc r10, r5, r10\n\t"
  13762. "str r14, [sp, #12]\n\t"
  13763. /* A[0] * B[4] */
  13764. "ldr r6, [%[a], #0]\n\t"
  13765. "ldr r8, [%[b], #16]\n\t"
  13766. "umull r3, r4, r6, r8\n\t"
  13767. "adds r9, r3, r9\n\t"
  13768. "adcs r10, r4, r10\n\t"
  13769. "adc r11, r5, #0\n\t"
  13770. /* A[1] * B[3] */
  13771. "ldr r6, [%[a], #4]\n\t"
  13772. "ldr r8, [%[b], #12]\n\t"
  13773. "umull r3, r4, r6, r8\n\t"
  13774. "adds r9, r3, r9\n\t"
  13775. "adcs r10, r4, r10\n\t"
  13776. "adc r11, r5, r11\n\t"
  13777. /* A[2] * B[2] */
  13778. "ldr r6, [%[a], #8]\n\t"
  13779. "ldr r8, [%[b], #8]\n\t"
  13780. "umull r3, r4, r6, r8\n\t"
  13781. "adds r9, r3, r9\n\t"
  13782. "adcs r10, r4, r10\n\t"
  13783. "adc r11, r5, r11\n\t"
  13784. /* A[3] * B[1] */
  13785. "ldr r6, [%[a], #12]\n\t"
  13786. "ldr r8, [%[b], #4]\n\t"
  13787. "umull r3, r4, r6, r8\n\t"
  13788. "adds r9, r3, r9\n\t"
  13789. "adcs r10, r4, r10\n\t"
  13790. "adc r11, r5, r11\n\t"
  13791. /* A[4] * B[0] */
  13792. "ldr r6, [%[a], #16]\n\t"
  13793. "ldr r8, [%[b], #0]\n\t"
  13794. "umull r3, r4, r6, r8\n\t"
  13795. "adds r9, r3, r9\n\t"
  13796. "adcs r10, r4, r10\n\t"
  13797. "adc r11, r5, r11\n\t"
  13798. "str r9, [sp, #16]\n\t"
  13799. /* A[0] * B[5] */
  13800. "ldr r6, [%[a], #0]\n\t"
  13801. "ldr r8, [%[b], #20]\n\t"
  13802. "umull r3, r4, r6, r8\n\t"
  13803. "adds r10, r3, r10\n\t"
  13804. "adcs r11, r4, r11\n\t"
  13805. "adc r14, r5, #0\n\t"
  13806. /* A[1] * B[4] */
  13807. "ldr r6, [%[a], #4]\n\t"
  13808. "ldr r8, [%[b], #16]\n\t"
  13809. "umull r3, r4, r6, r8\n\t"
  13810. "adds r10, r3, r10\n\t"
  13811. "adcs r11, r4, r11\n\t"
  13812. "adc r14, r5, r14\n\t"
  13813. /* A[2] * B[3] */
  13814. "ldr r6, [%[a], #8]\n\t"
  13815. "ldr r8, [%[b], #12]\n\t"
  13816. "umull r3, r4, r6, r8\n\t"
  13817. "adds r10, r3, r10\n\t"
  13818. "adcs r11, r4, r11\n\t"
  13819. "adc r14, r5, r14\n\t"
  13820. /* A[3] * B[2] */
  13821. "ldr r6, [%[a], #12]\n\t"
  13822. "ldr r8, [%[b], #8]\n\t"
  13823. "umull r3, r4, r6, r8\n\t"
  13824. "adds r10, r3, r10\n\t"
  13825. "adcs r11, r4, r11\n\t"
  13826. "adc r14, r5, r14\n\t"
  13827. /* A[4] * B[1] */
  13828. "ldr r6, [%[a], #16]\n\t"
  13829. "ldr r8, [%[b], #4]\n\t"
  13830. "umull r3, r4, r6, r8\n\t"
  13831. "adds r10, r3, r10\n\t"
  13832. "adcs r11, r4, r11\n\t"
  13833. "adc r14, r5, r14\n\t"
  13834. /* A[5] * B[0] */
  13835. "ldr r6, [%[a], #20]\n\t"
  13836. "ldr r8, [%[b], #0]\n\t"
  13837. "umull r3, r4, r6, r8\n\t"
  13838. "adds r10, r3, r10\n\t"
  13839. "adcs r11, r4, r11\n\t"
  13840. "adc r14, r5, r14\n\t"
  13841. "str r10, [sp, #20]\n\t"
  13842. /* A[0] * B[6] */
  13843. "ldr r6, [%[a], #0]\n\t"
  13844. "ldr r8, [%[b], #24]\n\t"
  13845. "umull r3, r4, r6, r8\n\t"
  13846. "adds r11, r3, r11\n\t"
  13847. "adcs r14, r4, r14\n\t"
  13848. "adc r9, r5, #0\n\t"
  13849. /* A[1] * B[5] */
  13850. "ldr r6, [%[a], #4]\n\t"
  13851. "ldr r8, [%[b], #20]\n\t"
  13852. "umull r3, r4, r6, r8\n\t"
  13853. "adds r11, r3, r11\n\t"
  13854. "adcs r14, r4, r14\n\t"
  13855. "adc r9, r5, r9\n\t"
  13856. /* A[2] * B[4] */
  13857. "ldr r6, [%[a], #8]\n\t"
  13858. "ldr r8, [%[b], #16]\n\t"
  13859. "umull r3, r4, r6, r8\n\t"
  13860. "adds r11, r3, r11\n\t"
  13861. "adcs r14, r4, r14\n\t"
  13862. "adc r9, r5, r9\n\t"
  13863. /* A[3] * B[3] */
  13864. "ldr r6, [%[a], #12]\n\t"
  13865. "ldr r8, [%[b], #12]\n\t"
  13866. "umull r3, r4, r6, r8\n\t"
  13867. "adds r11, r3, r11\n\t"
  13868. "adcs r14, r4, r14\n\t"
  13869. "adc r9, r5, r9\n\t"
  13870. /* A[4] * B[2] */
  13871. "ldr r6, [%[a], #16]\n\t"
  13872. "ldr r8, [%[b], #8]\n\t"
  13873. "umull r3, r4, r6, r8\n\t"
  13874. "adds r11, r3, r11\n\t"
  13875. "adcs r14, r4, r14\n\t"
  13876. "adc r9, r5, r9\n\t"
  13877. /* A[5] * B[1] */
  13878. "ldr r6, [%[a], #20]\n\t"
  13879. "ldr r8, [%[b], #4]\n\t"
  13880. "umull r3, r4, r6, r8\n\t"
  13881. "adds r11, r3, r11\n\t"
  13882. "adcs r14, r4, r14\n\t"
  13883. "adc r9, r5, r9\n\t"
  13884. /* A[6] * B[0] */
  13885. "ldr r6, [%[a], #24]\n\t"
  13886. "ldr r8, [%[b], #0]\n\t"
  13887. "umull r3, r4, r6, r8\n\t"
  13888. "adds r11, r3, r11\n\t"
  13889. "adcs r14, r4, r14\n\t"
  13890. "adc r9, r5, r9\n\t"
  13891. "str r11, [sp, #24]\n\t"
  13892. /* A[0] * B[7] */
  13893. "ldr r6, [%[a], #0]\n\t"
  13894. "ldr r8, [%[b], #28]\n\t"
  13895. "umull r3, r4, r6, r8\n\t"
  13896. "adds r14, r3, r14\n\t"
  13897. "adcs r9, r4, r9\n\t"
  13898. "adc r10, r5, #0\n\t"
  13899. /* A[1] * B[6] */
  13900. "ldr r6, [%[a], #4]\n\t"
  13901. "ldr r8, [%[b], #24]\n\t"
  13902. "umull r3, r4, r6, r8\n\t"
  13903. "adds r14, r3, r14\n\t"
  13904. "adcs r9, r4, r9\n\t"
  13905. "adc r10, r5, r10\n\t"
  13906. /* A[2] * B[5] */
  13907. "ldr r6, [%[a], #8]\n\t"
  13908. "ldr r8, [%[b], #20]\n\t"
  13909. "umull r3, r4, r6, r8\n\t"
  13910. "adds r14, r3, r14\n\t"
  13911. "adcs r9, r4, r9\n\t"
  13912. "adc r10, r5, r10\n\t"
  13913. /* A[3] * B[4] */
  13914. "ldr r6, [%[a], #12]\n\t"
  13915. "ldr r8, [%[b], #16]\n\t"
  13916. "umull r3, r4, r6, r8\n\t"
  13917. "adds r14, r3, r14\n\t"
  13918. "adcs r9, r4, r9\n\t"
  13919. "adc r10, r5, r10\n\t"
  13920. /* A[4] * B[3] */
  13921. "ldr r6, [%[a], #16]\n\t"
  13922. "ldr r8, [%[b], #12]\n\t"
  13923. "umull r3, r4, r6, r8\n\t"
  13924. "adds r14, r3, r14\n\t"
  13925. "adcs r9, r4, r9\n\t"
  13926. "adc r10, r5, r10\n\t"
  13927. /* A[5] * B[2] */
  13928. "ldr r6, [%[a], #20]\n\t"
  13929. "ldr r8, [%[b], #8]\n\t"
  13930. "umull r3, r4, r6, r8\n\t"
  13931. "adds r14, r3, r14\n\t"
  13932. "adcs r9, r4, r9\n\t"
  13933. "adc r10, r5, r10\n\t"
  13934. /* A[6] * B[1] */
  13935. "ldr r6, [%[a], #24]\n\t"
  13936. "ldr r8, [%[b], #4]\n\t"
  13937. "umull r3, r4, r6, r8\n\t"
  13938. "adds r14, r3, r14\n\t"
  13939. "adcs r9, r4, r9\n\t"
  13940. "adc r10, r5, r10\n\t"
  13941. /* A[7] * B[0] */
  13942. "ldr r6, [%[a], #28]\n\t"
  13943. "ldr r8, [%[b], #0]\n\t"
  13944. "umull r3, r4, r6, r8\n\t"
  13945. "adds r14, r3, r14\n\t"
  13946. "adcs r9, r4, r9\n\t"
  13947. "adc r10, r5, r10\n\t"
  13948. "str r14, [sp, #28]\n\t"
  13949. /* A[1] * B[7] */
  13950. "ldr r6, [%[a], #4]\n\t"
  13951. "ldr r8, [%[b], #28]\n\t"
  13952. "umull r3, r4, r6, r8\n\t"
  13953. "adds r9, r3, r9\n\t"
  13954. "adcs r10, r4, r10\n\t"
  13955. "adc r11, r5, #0\n\t"
  13956. /* A[2] * B[6] */
  13957. "ldr r6, [%[a], #8]\n\t"
  13958. "ldr r8, [%[b], #24]\n\t"
  13959. "umull r3, r4, r6, r8\n\t"
  13960. "adds r9, r3, r9\n\t"
  13961. "adcs r10, r4, r10\n\t"
  13962. "adc r11, r5, r11\n\t"
  13963. /* A[3] * B[5] */
  13964. "ldr r6, [%[a], #12]\n\t"
  13965. "ldr r8, [%[b], #20]\n\t"
  13966. "umull r3, r4, r6, r8\n\t"
  13967. "adds r9, r3, r9\n\t"
  13968. "adcs r10, r4, r10\n\t"
  13969. "adc r11, r5, r11\n\t"
  13970. /* A[4] * B[4] */
  13971. "ldr r6, [%[a], #16]\n\t"
  13972. "ldr r8, [%[b], #16]\n\t"
  13973. "umull r3, r4, r6, r8\n\t"
  13974. "adds r9, r3, r9\n\t"
  13975. "adcs r10, r4, r10\n\t"
  13976. "adc r11, r5, r11\n\t"
  13977. /* A[5] * B[3] */
  13978. "ldr r6, [%[a], #20]\n\t"
  13979. "ldr r8, [%[b], #12]\n\t"
  13980. "umull r3, r4, r6, r8\n\t"
  13981. "adds r9, r3, r9\n\t"
  13982. "adcs r10, r4, r10\n\t"
  13983. "adc r11, r5, r11\n\t"
  13984. /* A[6] * B[2] */
  13985. "ldr r6, [%[a], #24]\n\t"
  13986. "ldr r8, [%[b], #8]\n\t"
  13987. "umull r3, r4, r6, r8\n\t"
  13988. "adds r9, r3, r9\n\t"
  13989. "adcs r10, r4, r10\n\t"
  13990. "adc r11, r5, r11\n\t"
  13991. /* A[7] * B[1] */
  13992. "ldr r6, [%[a], #28]\n\t"
  13993. "ldr r8, [%[b], #4]\n\t"
  13994. "umull r3, r4, r6, r8\n\t"
  13995. "adds r9, r3, r9\n\t"
  13996. "adcs r10, r4, r10\n\t"
  13997. "adc r11, r5, r11\n\t"
  13998. "str r9, [sp, #32]\n\t"
  13999. /* A[2] * B[7] */
  14000. "ldr r6, [%[a], #8]\n\t"
  14001. "ldr r8, [%[b], #28]\n\t"
  14002. "umull r3, r4, r6, r8\n\t"
  14003. "adds r10, r3, r10\n\t"
  14004. "adcs r11, r4, r11\n\t"
  14005. "adc r14, r5, #0\n\t"
  14006. /* A[3] * B[6] */
  14007. "ldr r6, [%[a], #12]\n\t"
  14008. "ldr r8, [%[b], #24]\n\t"
  14009. "umull r3, r4, r6, r8\n\t"
  14010. "adds r10, r3, r10\n\t"
  14011. "adcs r11, r4, r11\n\t"
  14012. "adc r14, r5, r14\n\t"
  14013. /* A[4] * B[5] */
  14014. "ldr r6, [%[a], #16]\n\t"
  14015. "ldr r8, [%[b], #20]\n\t"
  14016. "umull r3, r4, r6, r8\n\t"
  14017. "adds r10, r3, r10\n\t"
  14018. "adcs r11, r4, r11\n\t"
  14019. "adc r14, r5, r14\n\t"
  14020. /* A[5] * B[4] */
  14021. "ldr r6, [%[a], #20]\n\t"
  14022. "ldr r8, [%[b], #16]\n\t"
  14023. "umull r3, r4, r6, r8\n\t"
  14024. "adds r10, r3, r10\n\t"
  14025. "adcs r11, r4, r11\n\t"
  14026. "adc r14, r5, r14\n\t"
  14027. /* A[6] * B[3] */
  14028. "ldr r6, [%[a], #24]\n\t"
  14029. "ldr r8, [%[b], #12]\n\t"
  14030. "umull r3, r4, r6, r8\n\t"
  14031. "adds r10, r3, r10\n\t"
  14032. "adcs r11, r4, r11\n\t"
  14033. "adc r14, r5, r14\n\t"
  14034. /* A[7] * B[2] */
  14035. "ldr r6, [%[a], #28]\n\t"
  14036. "ldr r8, [%[b], #8]\n\t"
  14037. "umull r3, r4, r6, r8\n\t"
  14038. "adds r10, r3, r10\n\t"
  14039. "adcs r11, r4, r11\n\t"
  14040. "adc r14, r5, r14\n\t"
  14041. "str r10, [sp, #36]\n\t"
  14042. /* A[3] * B[7] */
  14043. "ldr r6, [%[a], #12]\n\t"
  14044. "ldr r8, [%[b], #28]\n\t"
  14045. "umull r3, r4, r6, r8\n\t"
  14046. "adds r11, r3, r11\n\t"
  14047. "adcs r14, r4, r14\n\t"
  14048. "adc r9, r5, #0\n\t"
  14049. /* A[4] * B[6] */
  14050. "ldr r6, [%[a], #16]\n\t"
  14051. "ldr r8, [%[b], #24]\n\t"
  14052. "umull r3, r4, r6, r8\n\t"
  14053. "adds r11, r3, r11\n\t"
  14054. "adcs r14, r4, r14\n\t"
  14055. "adc r9, r5, r9\n\t"
  14056. /* A[5] * B[5] */
  14057. "ldr r6, [%[a], #20]\n\t"
  14058. "ldr r8, [%[b], #20]\n\t"
  14059. "umull r3, r4, r6, r8\n\t"
  14060. "adds r11, r3, r11\n\t"
  14061. "adcs r14, r4, r14\n\t"
  14062. "adc r9, r5, r9\n\t"
  14063. /* A[6] * B[4] */
  14064. "ldr r6, [%[a], #24]\n\t"
  14065. "ldr r8, [%[b], #16]\n\t"
  14066. "umull r3, r4, r6, r8\n\t"
  14067. "adds r11, r3, r11\n\t"
  14068. "adcs r14, r4, r14\n\t"
  14069. "adc r9, r5, r9\n\t"
  14070. /* A[7] * B[3] */
  14071. "ldr r6, [%[a], #28]\n\t"
  14072. "ldr r8, [%[b], #12]\n\t"
  14073. "umull r3, r4, r6, r8\n\t"
  14074. "adds r11, r3, r11\n\t"
  14075. "adcs r14, r4, r14\n\t"
  14076. "adc r9, r5, r9\n\t"
  14077. "str r11, [sp, #40]\n\t"
  14078. /* A[4] * B[7] */
  14079. "ldr r6, [%[a], #16]\n\t"
  14080. "ldr r8, [%[b], #28]\n\t"
  14081. "umull r3, r4, r6, r8\n\t"
  14082. "adds r14, r3, r14\n\t"
  14083. "adcs r9, r4, r9\n\t"
  14084. "adc r10, r5, #0\n\t"
  14085. /* A[5] * B[6] */
  14086. "ldr r6, [%[a], #20]\n\t"
  14087. "ldr r8, [%[b], #24]\n\t"
  14088. "umull r3, r4, r6, r8\n\t"
  14089. "adds r14, r3, r14\n\t"
  14090. "adcs r9, r4, r9\n\t"
  14091. "adc r10, r5, r10\n\t"
  14092. /* A[6] * B[5] */
  14093. "ldr r6, [%[a], #24]\n\t"
  14094. "ldr r8, [%[b], #20]\n\t"
  14095. "umull r3, r4, r6, r8\n\t"
  14096. "adds r14, r3, r14\n\t"
  14097. "adcs r9, r4, r9\n\t"
  14098. "adc r10, r5, r10\n\t"
  14099. /* A[7] * B[4] */
  14100. "ldr r6, [%[a], #28]\n\t"
  14101. "ldr r8, [%[b], #16]\n\t"
  14102. "umull r3, r4, r6, r8\n\t"
  14103. "adds r14, r3, r14\n\t"
  14104. "adcs r9, r4, r9\n\t"
  14105. "adc r10, r5, r10\n\t"
  14106. "str r14, [sp, #44]\n\t"
  14107. /* A[5] * B[7] */
  14108. "ldr r6, [%[a], #20]\n\t"
  14109. "ldr r8, [%[b], #28]\n\t"
  14110. "umull r3, r4, r6, r8\n\t"
  14111. "adds r9, r3, r9\n\t"
  14112. "adcs r10, r4, r10\n\t"
  14113. "adc r11, r5, #0\n\t"
  14114. /* A[6] * B[6] */
  14115. "ldr r6, [%[a], #24]\n\t"
  14116. "ldr r8, [%[b], #24]\n\t"
  14117. "umull r3, r4, r6, r8\n\t"
  14118. "adds r9, r3, r9\n\t"
  14119. "adcs r10, r4, r10\n\t"
  14120. "adc r11, r5, r11\n\t"
  14121. /* A[7] * B[5] */
  14122. "ldr r6, [%[a], #28]\n\t"
  14123. "ldr r8, [%[b], #20]\n\t"
  14124. "umull r3, r4, r6, r8\n\t"
  14125. "adds r9, r3, r9\n\t"
  14126. "adcs r10, r4, r10\n\t"
  14127. "adc r11, r5, r11\n\t"
  14128. /* A[6] * B[7] */
  14129. "ldr r6, [%[a], #24]\n\t"
  14130. "ldr r8, [%[b], #28]\n\t"
  14131. "umull r3, r4, r6, r8\n\t"
  14132. "adds r10, r3, r10\n\t"
  14133. "adcs r11, r4, r11\n\t"
  14134. "adc r14, r5, #0\n\t"
  14135. /* A[7] * B[6] */
  14136. "ldr r6, [%[a], #28]\n\t"
  14137. "ldr r8, [%[b], #24]\n\t"
  14138. "umull r3, r4, r6, r8\n\t"
  14139. "adds r10, r3, r10\n\t"
  14140. "adcs r11, r4, r11\n\t"
  14141. "adc r14, r5, r14\n\t"
  14142. /* A[7] * B[7] */
  14143. "ldr r6, [%[a], #28]\n\t"
  14144. "ldr r8, [%[b], #28]\n\t"
  14145. "umull r3, r4, r6, r8\n\t"
  14146. "adds r11, r3, r11\n\t"
  14147. "adc r14, r4, r14\n\t"
  14148. "str r9, [sp, #48]\n\t"
  14149. "str r10, [sp, #52]\n\t"
  14150. "str r11, [sp, #56]\n\t"
  14151. "str r14, [sp, #60]\n\t"
  14152. /* Start Reduction */
  14153. "ldr r4, [sp, #0]\n\t"
  14154. "ldr r5, [sp, #4]\n\t"
  14155. "ldr r6, [sp, #8]\n\t"
  14156. "ldr r8, [sp, #12]\n\t"
  14157. "ldr r9, [sp, #16]\n\t"
  14158. "ldr r10, [sp, #20]\n\t"
  14159. "ldr r11, [sp, #24]\n\t"
  14160. "ldr r14, [sp, #28]\n\t"
  14161. /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */
  14162. /* - a[0] << 224 */
  14163. /* + (a[0]-a[1] * 2) << (6 * 32) */
  14164. "adds r11, r11, r4\n\t"
  14165. "adc r14, r14, r5\n\t"
  14166. "adds r11, r11, r4\n\t"
  14167. "adc r14, r14, r5\n\t"
  14168. /* - a[0] << (7 * 32) */
  14169. "sub r14, r14, r4\n\t"
  14170. /* + a[0]-a[4] << (3 * 32) */
  14171. "mov %[a], r8\n\t"
  14172. "mov %[b], r9\n\t"
  14173. "adds r8, r8, r4\n\t"
  14174. "adcs r9, r9, r5\n\t"
  14175. "adcs r10, r10, r6\n\t"
  14176. "adcs r11, r11, %[a]\n\t"
  14177. "adc r14, r14, %[b]\n\t"
  14178. "str r4, [sp, #0]\n\t"
  14179. "str r5, [sp, #4]\n\t"
  14180. "str r6, [sp, #8]\n\t"
  14181. "str r8, [sp, #12]\n\t"
  14182. "str r9, [sp, #16]\n\t"
  14183. "str r10, [sp, #20]\n\t"
  14184. /* a += mu * m */
  14185. /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */
  14186. "mov %[a], #0\n\t"
  14187. /* a[6] += t[0] + t[3] */
  14188. "ldr r3, [sp, #24]\n\t"
  14189. "adds r3, r3, r4\n\t"
  14190. "adc %[b], %[a], #0\n\t"
  14191. "adds r3, r3, r8\n\t"
  14192. "adc %[b], %[b], #0\n\t"
  14193. "str r11, [sp, #24]\n\t"
  14194. /* a[7] += t[1] + t[4] */
  14195. "ldr r3, [sp, #28]\n\t"
  14196. "adds r3, r3, %[b]\n\t"
  14197. "adc %[b], %[a], #0\n\t"
  14198. "adds r3, r3, r5\n\t"
  14199. "adc %[b], %[b], #0\n\t"
  14200. "adds r3, r3, r9\n\t"
  14201. "adc %[b], %[b], #0\n\t"
  14202. "str r14, [sp, #28]\n\t"
  14203. "str r3, [sp, #64]\n\t"
  14204. /* a[8] += t[0] + t[2] + t[5] */
  14205. "ldr r3, [sp, #32]\n\t"
  14206. "adds r3, r3, %[b]\n\t"
  14207. "adc %[b], %[a], #0\n\t"
  14208. "adds r3, r3, r4\n\t"
  14209. "adc %[b], %[b], #0\n\t"
  14210. "adds r3, r3, r6\n\t"
  14211. "adc %[b], %[b], #0\n\t"
  14212. "adds r3, r3, r10\n\t"
  14213. "adc %[b], %[b], #0\n\t"
  14214. "str r3, [sp, #32]\n\t"
  14215. /* a[9] += t[1] + t[3] + t[6] */
  14216. /* a[10] += t[2] + t[4] + t[7] */
  14217. "ldr r3, [sp, #36]\n\t"
  14218. "ldr r4, [sp, #40]\n\t"
  14219. "adds r3, r3, %[b]\n\t"
  14220. "adcs r4, r4, #0\n\t"
  14221. "adc %[b], %[a], #0\n\t"
  14222. "adds r3, r3, r5\n\t"
  14223. "adcs r4, r4, r6\n\t"
  14224. "adc %[b], %[b], #0\n\t"
  14225. "adds r3, r3, r8\n\t"
  14226. "adcs r4, r4, r9\n\t"
  14227. "adc %[b], %[b], #0\n\t"
  14228. "adds r3, r3, r11\n\t"
  14229. "adcs r4, r4, r14\n\t"
  14230. "adc %[b], %[b], #0\n\t"
  14231. "str r3, [sp, #36]\n\t"
  14232. "str r4, [sp, #40]\n\t"
  14233. /* a[11] += t[3] + t[5] */
  14234. /* a[12] += t[4] + t[6] */
  14235. /* a[13] += t[5] + t[7] */
  14236. /* a[14] += t[6] */
  14237. "ldr r3, [sp, #44]\n\t"
  14238. "ldr r4, [sp, #48]\n\t"
  14239. "ldr r5, [sp, #52]\n\t"
  14240. "ldr r6, [sp, #56]\n\t"
  14241. "adds r3, r3, %[b]\n\t"
  14242. "adcs r4, r4, #0\n\t"
  14243. "adcs r5, r5, #0\n\t"
  14244. "adcs r6, r6, #0\n\t"
  14245. "adc %[b], %[a], #0\n\t"
  14246. "adds r3, r3, r8\n\t"
  14247. "adcs r4, r4, r9\n\t"
  14248. "adcs r5, r5, r10\n\t"
  14249. "adcs r6, r6, r11\n\t"
  14250. "adc %[b], %[b], #0\n\t"
  14251. "adds r3, r3, r10\n\t"
  14252. "adcs r4, r4, r11\n\t"
  14253. "adcs r5, r5, r14\n\t"
  14254. "adcs r6, r6, #0\n\t"
  14255. "adc %[b], %[b], #0\n\t"
  14256. "str r3, [sp, #44]\n\t"
  14257. "str r4, [sp, #48]\n\t"
  14258. "str r5, [sp, #52]\n\t"
  14259. "str r6, [sp, #56]\n\t"
  14260. /* a[15] += t[7] */
  14261. "ldr r3, [sp, #60]\n\t"
  14262. "adds r3, r3, %[b]\n\t"
  14263. "adc %[b], %[a], #0\n\t"
  14264. "adds r3, r3, r14\n\t"
  14265. "adc %[b], %[b], #0\n\t"
  14266. "str r3, [sp, #60]\n\t"
  14267. "ldr r3, [sp, #64]\n\t"
  14268. "ldr r4, [sp, #32]\n\t"
  14269. "ldr r5, [sp, #36]\n\t"
  14270. "ldr r6, [sp, #40]\n\t"
  14271. "ldr r9, [sp, #0]\n\t"
  14272. "ldr r10, [sp, #4]\n\t"
  14273. "ldr r11, [sp, #8]\n\t"
  14274. "ldr r14, [sp, #12]\n\t"
  14275. "subs r3, r3, r9\n\t"
  14276. "sbcs r4, r4, r10\n\t"
  14277. "sbcs r5, r5, r11\n\t"
  14278. "sbcs r6, r6, r14\n\t"
  14279. "str r4, [sp, #32]\n\t"
  14280. "str r5, [sp, #36]\n\t"
  14281. "str r6, [sp, #40]\n\t"
  14282. "ldr r3, [sp, #44]\n\t"
  14283. "ldr r4, [sp, #48]\n\t"
  14284. "ldr r5, [sp, #52]\n\t"
  14285. "ldr r6, [sp, #56]\n\t"
  14286. "ldr r8, [sp, #60]\n\t"
  14287. "ldr r9, [sp, #16]\n\t"
  14288. "ldr r10, [sp, #20]\n\t"
  14289. "ldr r11, [sp, #24]\n\t"
  14290. "ldr r14, [sp, #28]\n\t"
  14291. "sbcs r3, r3, r9\n\t"
  14292. "sbcs r4, r4, r10\n\t"
  14293. "sbcs r5, r5, r11\n\t"
  14294. "sbcs r6, r6, r14\n\t"
  14295. "sbc r8, r8, #0\n\t"
  14296. "str r3, [sp, #44]\n\t"
  14297. "str r4, [sp, #48]\n\t"
  14298. "str r5, [sp, #52]\n\t"
  14299. "str r6, [sp, #56]\n\t"
  14300. "str r8, [sp, #60]\n\t"
  14301. /* mask m and sub from result if overflow */
  14302. "sub %[b], %[a], %[b]\n\t"
  14303. "and %[a], %[b], #1\n\t"
  14304. "ldr r3, [sp, #32]\n\t"
  14305. "ldr r4, [sp, #36]\n\t"
  14306. "ldr r5, [sp, #40]\n\t"
  14307. "ldr r6, [sp, #44]\n\t"
  14308. "ldr r8, [sp, #48]\n\t"
  14309. "ldr r9, [sp, #52]\n\t"
  14310. "ldr r10, [sp, #56]\n\t"
  14311. "ldr r11, [sp, #60]\n\t"
  14312. "subs r3, r3, %[b]\n\t"
  14313. "sbcs r4, r4, %[b]\n\t"
  14314. "sbcs r5, r5, %[b]\n\t"
  14315. "sbcs r6, r6, #0\n\t"
  14316. "sbcs r8, r8, #0\n\t"
  14317. "sbcs r9, r9, #0\n\t"
  14318. "sbcs r10, r10, %[a]\n\t"
  14319. "sbc r11, r11, %[b]\n\t"
  14320. "str r3, [%[r], #0]\n\t"
  14321. "str r4, [%[r], #4]\n\t"
  14322. "str r5, [%[r], #8]\n\t"
  14323. "str r6, [%[r], #12]\n\t"
  14324. "str r8, [%[r], #16]\n\t"
  14325. "str r9, [%[r], #20]\n\t"
  14326. "str r10, [%[r], #24]\n\t"
  14327. "str r11, [%[r], #28]\n\t"
  14328. "add sp, sp, #68\n\t"
  14329. : [a] "+r" (a), [b] "+r" (b)
  14330. : [r] "r" (r)
  14331. : "memory", "r9", "r10", "r11", "r14", "r3", "r4", "r5", "r6", "r8"
  14332. );
  14333. }
  14334. /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  14335. *
  14336. * r Result of squaring.
  14337. * a Number to square in Montogmery form.
  14338. * m Modulus (prime).
  14339. * mp Montogmery mulitplier.
  14340. */
  14341. SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m,
  14342. sp_digit mp)
  14343. {
  14344. (void)mp;
  14345. (void)m;
  14346. __asm__ __volatile__ (
  14347. "sub sp, sp, #68\n\t"
  14348. "mov r5, #0\n\t"
  14349. /* A[0] * A[1] */
  14350. "ldr r6, [%[a], #0]\n\t"
  14351. "ldr r8, [%[a], #4]\n\t"
  14352. "umull r10, r11, r6, r8\n\t"
  14353. "str r10, [sp, #4]\n\t"
  14354. /* A[0] * A[2] */
  14355. "ldr r6, [%[a], #0]\n\t"
  14356. "ldr r8, [%[a], #8]\n\t"
  14357. "umull r3, r4, r6, r8\n\t"
  14358. "adds r11, r3, r11\n\t"
  14359. "adc r14, r4, #0\n\t"
  14360. "str r11, [sp, #8]\n\t"
  14361. /* A[0] * A[3] */
  14362. "ldr r6, [%[a], #0]\n\t"
  14363. "ldr r8, [%[a], #12]\n\t"
  14364. "umull r3, r4, r6, r8\n\t"
  14365. "adds r14, r3, r14\n\t"
  14366. "adc r9, r4, #0\n\t"
  14367. /* A[1] * A[2] */
  14368. "ldr r6, [%[a], #4]\n\t"
  14369. "ldr r8, [%[a], #8]\n\t"
  14370. "umull r3, r4, r6, r8\n\t"
  14371. "adds r14, r3, r14\n\t"
  14372. "adcs r9, r4, r9\n\t"
  14373. "adc r10, r5, #0\n\t"
  14374. "str r14, [sp, #12]\n\t"
  14375. /* A[0] * A[4] */
  14376. "ldr r6, [%[a], #0]\n\t"
  14377. "ldr r8, [%[a], #16]\n\t"
  14378. "umull r3, r4, r6, r8\n\t"
  14379. "adds r9, r3, r9\n\t"
  14380. "adc r10, r4, r10\n\t"
  14381. /* A[1] * A[3] */
  14382. "ldr r6, [%[a], #4]\n\t"
  14383. "ldr r8, [%[a], #12]\n\t"
  14384. "umull r3, r4, r6, r8\n\t"
  14385. "adds r9, r3, r9\n\t"
  14386. "adcs r10, r4, r10\n\t"
  14387. "adc r11, r5, #0\n\t"
  14388. "str r9, [sp, #16]\n\t"
  14389. /* A[0] * A[5] */
  14390. "ldr r6, [%[a], #0]\n\t"
  14391. "ldr r8, [%[a], #20]\n\t"
  14392. "umull r3, r4, r6, r8\n\t"
  14393. "adds r10, r3, r10\n\t"
  14394. "adc r11, r4, r11\n\t"
  14395. /* A[1] * A[4] */
  14396. "ldr r6, [%[a], #4]\n\t"
  14397. "ldr r8, [%[a], #16]\n\t"
  14398. "umull r3, r4, r6, r8\n\t"
  14399. "adds r10, r3, r10\n\t"
  14400. "adcs r11, r4, r11\n\t"
  14401. "adc r14, r5, #0\n\t"
  14402. /* A[2] * A[3] */
  14403. "ldr r6, [%[a], #8]\n\t"
  14404. "ldr r8, [%[a], #12]\n\t"
  14405. "umull r3, r4, r6, r8\n\t"
  14406. "adds r10, r3, r10\n\t"
  14407. "adcs r11, r4, r11\n\t"
  14408. "adc r14, r5, r14\n\t"
  14409. "str r10, [sp, #20]\n\t"
  14410. /* A[0] * A[6] */
  14411. "ldr r6, [%[a], #0]\n\t"
  14412. "ldr r8, [%[a], #24]\n\t"
  14413. "umull r3, r4, r6, r8\n\t"
  14414. "adds r11, r3, r11\n\t"
  14415. "adcs r14, r4, r14\n\t"
  14416. "adc r9, r5, #0\n\t"
  14417. /* A[1] * A[5] */
  14418. "ldr r6, [%[a], #4]\n\t"
  14419. "ldr r8, [%[a], #20]\n\t"
  14420. "umull r3, r4, r6, r8\n\t"
  14421. "adds r11, r3, r11\n\t"
  14422. "adcs r14, r4, r14\n\t"
  14423. "adc r9, r5, r9\n\t"
  14424. /* A[2] * A[4] */
  14425. "ldr r6, [%[a], #8]\n\t"
  14426. "ldr r8, [%[a], #16]\n\t"
  14427. "umull r3, r4, r6, r8\n\t"
  14428. "adds r11, r3, r11\n\t"
  14429. "adcs r14, r4, r14\n\t"
  14430. "adc r9, r5, r9\n\t"
  14431. "str r11, [sp, #24]\n\t"
  14432. /* A[0] * A[7] */
  14433. "ldr r6, [%[a], #0]\n\t"
  14434. "ldr r8, [%[a], #28]\n\t"
  14435. "umull r3, r4, r6, r8\n\t"
  14436. "adds r14, r3, r14\n\t"
  14437. "adcs r9, r4, r9\n\t"
  14438. "adc r10, r5, #0\n\t"
  14439. /* A[1] * A[6] */
  14440. "ldr r6, [%[a], #4]\n\t"
  14441. "ldr r8, [%[a], #24]\n\t"
  14442. "umull r3, r4, r6, r8\n\t"
  14443. "adds r14, r3, r14\n\t"
  14444. "adcs r9, r4, r9\n\t"
  14445. "adc r10, r5, r10\n\t"
  14446. /* A[2] * A[5] */
  14447. "ldr r6, [%[a], #8]\n\t"
  14448. "ldr r8, [%[a], #20]\n\t"
  14449. "umull r3, r4, r6, r8\n\t"
  14450. "adds r14, r3, r14\n\t"
  14451. "adcs r9, r4, r9\n\t"
  14452. "adc r10, r5, r10\n\t"
  14453. /* A[3] * A[4] */
  14454. "ldr r6, [%[a], #12]\n\t"
  14455. "ldr r8, [%[a], #16]\n\t"
  14456. "umull r3, r4, r6, r8\n\t"
  14457. "adds r14, r3, r14\n\t"
  14458. "adcs r9, r4, r9\n\t"
  14459. "adc r10, r5, r10\n\t"
  14460. "str r14, [sp, #28]\n\t"
  14461. /* A[1] * A[7] */
  14462. "ldr r6, [%[a], #4]\n\t"
  14463. "ldr r8, [%[a], #28]\n\t"
  14464. "umull r3, r4, r6, r8\n\t"
  14465. "adds r9, r3, r9\n\t"
  14466. "adcs r10, r4, r10\n\t"
  14467. "adc r11, r5, #0\n\t"
  14468. /* A[2] * A[6] */
  14469. "ldr r6, [%[a], #8]\n\t"
  14470. "ldr r8, [%[a], #24]\n\t"
  14471. "umull r3, r4, r6, r8\n\t"
  14472. "adds r9, r3, r9\n\t"
  14473. "adcs r10, r4, r10\n\t"
  14474. "adc r11, r5, r11\n\t"
  14475. /* A[3] * A[5] */
  14476. "ldr r6, [%[a], #12]\n\t"
  14477. "ldr r8, [%[a], #20]\n\t"
  14478. "umull r3, r4, r6, r8\n\t"
  14479. "adds r9, r3, r9\n\t"
  14480. "adcs r10, r4, r10\n\t"
  14481. "adc r11, r5, r11\n\t"
  14482. "str r9, [sp, #32]\n\t"
  14483. /* A[2] * A[7] */
  14484. "ldr r6, [%[a], #8]\n\t"
  14485. "ldr r8, [%[a], #28]\n\t"
  14486. "umull r3, r4, r6, r8\n\t"
  14487. "adds r10, r3, r10\n\t"
  14488. "adcs r11, r4, r11\n\t"
  14489. "adc r14, r5, #0\n\t"
  14490. /* A[3] * A[6] */
  14491. "ldr r6, [%[a], #12]\n\t"
  14492. "ldr r8, [%[a], #24]\n\t"
  14493. "umull r3, r4, r6, r8\n\t"
  14494. "adds r10, r3, r10\n\t"
  14495. "adcs r11, r4, r11\n\t"
  14496. "adc r14, r5, r14\n\t"
  14497. /* A[4] * A[5] */
  14498. "ldr r6, [%[a], #16]\n\t"
  14499. "ldr r8, [%[a], #20]\n\t"
  14500. "umull r3, r4, r6, r8\n\t"
  14501. "adds r10, r3, r10\n\t"
  14502. "adcs r11, r4, r11\n\t"
  14503. "adc r14, r5, r14\n\t"
  14504. "str r10, [sp, #36]\n\t"
  14505. /* A[3] * A[7] */
  14506. "ldr r6, [%[a], #12]\n\t"
  14507. "ldr r8, [%[a], #28]\n\t"
  14508. "umull r3, r4, r6, r8\n\t"
  14509. "adds r11, r3, r11\n\t"
  14510. "adcs r14, r4, r14\n\t"
  14511. "adc r9, r5, #0\n\t"
  14512. /* A[4] * A[6] */
  14513. "ldr r6, [%[a], #16]\n\t"
  14514. "ldr r8, [%[a], #24]\n\t"
  14515. "umull r3, r4, r6, r8\n\t"
  14516. "adds r11, r3, r11\n\t"
  14517. "adcs r14, r4, r14\n\t"
  14518. "adc r9, r5, r9\n\t"
  14519. "str r11, [sp, #40]\n\t"
  14520. /* A[4] * A[7] */
  14521. "ldr r6, [%[a], #16]\n\t"
  14522. "ldr r8, [%[a], #28]\n\t"
  14523. "umull r3, r4, r6, r8\n\t"
  14524. "adds r14, r3, r14\n\t"
  14525. "adcs r9, r4, r9\n\t"
  14526. "adc r10, r5, #0\n\t"
  14527. /* A[5] * A[6] */
  14528. "ldr r6, [%[a], #20]\n\t"
  14529. "ldr r8, [%[a], #24]\n\t"
  14530. "umull r3, r4, r6, r8\n\t"
  14531. "adds r14, r3, r14\n\t"
  14532. "adcs r9, r4, r9\n\t"
  14533. "adc r10, r5, r10\n\t"
  14534. "str r14, [sp, #44]\n\t"
  14535. /* A[5] * A[7] */
  14536. "ldr r6, [%[a], #20]\n\t"
  14537. "ldr r8, [%[a], #28]\n\t"
  14538. "umull r3, r4, r6, r8\n\t"
  14539. "adds r9, r3, r9\n\t"
  14540. "adcs r10, r4, r10\n\t"
  14541. "adc r11, r5, #0\n\t"
  14542. "str r9, [sp, #48]\n\t"
  14543. /* A[6] * A[7] */
  14544. "ldr r6, [%[a], #24]\n\t"
  14545. "ldr r8, [%[a], #28]\n\t"
  14546. "umull r3, r4, r6, r8\n\t"
  14547. "adds r10, r3, r10\n\t"
  14548. "adc r11, r4, r11\n\t"
  14549. "str r10, [sp, #52]\n\t"
  14550. "str r11, [sp, #56]\n\t"
  14551. /* Double */
  14552. "ldr r4, [sp, #4]\n\t"
  14553. "ldr r6, [sp, #8]\n\t"
  14554. "ldr r8, [sp, #12]\n\t"
  14555. "ldr r9, [sp, #16]\n\t"
  14556. "ldr r10, [sp, #20]\n\t"
  14557. "ldr r11, [sp, #24]\n\t"
  14558. "ldr r14, [sp, #28]\n\t"
  14559. "ldr r12, [sp, #32]\n\t"
  14560. "ldr r3, [sp, #36]\n\t"
  14561. "adds r4, r4, r4\n\t"
  14562. "adcs r6, r6, r6\n\t"
  14563. "adcs r8, r8, r8\n\t"
  14564. "adcs r9, r9, r9\n\t"
  14565. "adcs r10, r10, r10\n\t"
  14566. "adcs r11, r11, r11\n\t"
  14567. "adcs r14, r14, r14\n\t"
  14568. "adcs r12, r12, r12\n\t"
  14569. "adcs r3, r3, r3\n\t"
  14570. "str r4, [sp, #4]\n\t"
  14571. "str r6, [sp, #8]\n\t"
  14572. "str r8, [sp, #12]\n\t"
  14573. "str r9, [sp, #16]\n\t"
  14574. "str r10, [sp, #20]\n\t"
  14575. "str r11, [sp, #24]\n\t"
  14576. "str r14, [sp, #28]\n\t"
  14577. "str r12, [sp, #32]\n\t"
  14578. "str r3, [sp, #36]\n\t"
  14579. "ldr r4, [sp, #40]\n\t"
  14580. "ldr r6, [sp, #44]\n\t"
  14581. "ldr r8, [sp, #48]\n\t"
  14582. "ldr r9, [sp, #52]\n\t"
  14583. "ldr r10, [sp, #56]\n\t"
  14584. "adcs r4, r4, r4\n\t"
  14585. "adcs r6, r6, r6\n\t"
  14586. "adcs r8, r8, r8\n\t"
  14587. "adcs r9, r9, r9\n\t"
  14588. "adcs r10, r10, r10\n\t"
  14589. "str r4, [sp, #40]\n\t"
  14590. "str r6, [sp, #44]\n\t"
  14591. "str r8, [sp, #48]\n\t"
  14592. "str r9, [sp, #52]\n\t"
  14593. "str r10, [sp, #56]\n\t"
  14594. "adc r11, r5, #0\n\t"
  14595. "str r11, [sp, #60]\n\t"
  14596. "ldr r4, [sp, #4]\n\t"
  14597. "ldr r5, [sp, #8]\n\t"
  14598. "ldr r12, [sp, #12]\n\t"
  14599. /* A[0] * A[0] */
  14600. "ldr r6, [%[a], #0]\n\t"
  14601. "umull r9, r10, r6, r6\n\t"
  14602. /* A[1] * A[1] */
  14603. "ldr r6, [%[a], #4]\n\t"
  14604. "umull r11, r14, r6, r6\n\t"
  14605. "adds r10, r10, r4\n\t"
  14606. "adcs r11, r11, r5\n\t"
  14607. "adcs r14, r14, r12\n\t"
  14608. "str r9, [sp, #0]\n\t"
  14609. "str r10, [sp, #4]\n\t"
  14610. "str r11, [sp, #8]\n\t"
  14611. "str r14, [sp, #12]\n\t"
  14612. "ldr r3, [sp, #16]\n\t"
  14613. "ldr r4, [sp, #20]\n\t"
  14614. "ldr r5, [sp, #24]\n\t"
  14615. "ldr r12, [sp, #28]\n\t"
  14616. /* A[2] * A[2] */
  14617. "ldr r6, [%[a], #8]\n\t"
  14618. "umull r9, r10, r6, r6\n\t"
  14619. /* A[3] * A[3] */
  14620. "ldr r6, [%[a], #12]\n\t"
  14621. "umull r11, r14, r6, r6\n\t"
  14622. "adcs r9, r9, r3\n\t"
  14623. "adcs r10, r10, r4\n\t"
  14624. "adcs r11, r11, r5\n\t"
  14625. "adcs r14, r14, r12\n\t"
  14626. "str r9, [sp, #16]\n\t"
  14627. "str r10, [sp, #20]\n\t"
  14628. "str r11, [sp, #24]\n\t"
  14629. "str r14, [sp, #28]\n\t"
  14630. "ldr r3, [sp, #32]\n\t"
  14631. "ldr r4, [sp, #36]\n\t"
  14632. "ldr r5, [sp, #40]\n\t"
  14633. "ldr r12, [sp, #44]\n\t"
  14634. /* A[4] * A[4] */
  14635. "ldr r6, [%[a], #16]\n\t"
  14636. "umull r9, r10, r6, r6\n\t"
  14637. /* A[5] * A[5] */
  14638. "ldr r6, [%[a], #20]\n\t"
  14639. "umull r11, r14, r6, r6\n\t"
  14640. "adcs r9, r9, r3\n\t"
  14641. "adcs r10, r10, r4\n\t"
  14642. "adcs r11, r11, r5\n\t"
  14643. "adcs r14, r14, r12\n\t"
  14644. "str r9, [sp, #32]\n\t"
  14645. "str r10, [sp, #36]\n\t"
  14646. "str r11, [sp, #40]\n\t"
  14647. "str r14, [sp, #44]\n\t"
  14648. "ldr r3, [sp, #48]\n\t"
  14649. "ldr r4, [sp, #52]\n\t"
  14650. "ldr r5, [sp, #56]\n\t"
  14651. "ldr r12, [sp, #60]\n\t"
  14652. /* A[6] * A[6] */
  14653. "ldr r6, [%[a], #24]\n\t"
  14654. "umull r9, r10, r6, r6\n\t"
  14655. /* A[7] * A[7] */
  14656. "ldr r6, [%[a], #28]\n\t"
  14657. "umull r11, r14, r6, r6\n\t"
  14658. "adcs r9, r9, r3\n\t"
  14659. "adcs r10, r10, r4\n\t"
  14660. "adcs r11, r11, r5\n\t"
  14661. "adc r14, r14, r12\n\t"
  14662. "str r9, [sp, #48]\n\t"
  14663. "str r10, [sp, #52]\n\t"
  14664. "str r11, [sp, #56]\n\t"
  14665. "str r14, [sp, #60]\n\t"
  14666. /* Start Reduction */
  14667. "ldr r4, [sp, #0]\n\t"
  14668. "ldr r5, [sp, #4]\n\t"
  14669. "ldr r6, [sp, #8]\n\t"
  14670. "ldr r8, [sp, #12]\n\t"
  14671. "ldr r9, [sp, #16]\n\t"
  14672. "ldr r10, [sp, #20]\n\t"
  14673. "ldr r11, [sp, #24]\n\t"
  14674. "ldr r14, [sp, #28]\n\t"
  14675. /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */
  14676. /* - a[0] << 224 */
  14677. /* + (a[0]-a[1] * 2) << (6 * 32) */
  14678. "adds r11, r11, r4\n\t"
  14679. "adc r14, r14, r5\n\t"
  14680. "adds r11, r11, r4\n\t"
  14681. "adc r14, r14, r5\n\t"
  14682. /* - a[0] << (7 * 32) */
  14683. "sub r14, r14, r4\n\t"
  14684. /* + a[0]-a[4] << (3 * 32) */
  14685. "mov %[a], r8\n\t"
  14686. "mov r12, r9\n\t"
  14687. "adds r8, r8, r4\n\t"
  14688. "adcs r9, r9, r5\n\t"
  14689. "adcs r10, r10, r6\n\t"
  14690. "adcs r11, r11, %[a]\n\t"
  14691. "adc r14, r14, r12\n\t"
  14692. "str r4, [sp, #0]\n\t"
  14693. "str r5, [sp, #4]\n\t"
  14694. "str r6, [sp, #8]\n\t"
  14695. "str r8, [sp, #12]\n\t"
  14696. "str r9, [sp, #16]\n\t"
  14697. "str r10, [sp, #20]\n\t"
  14698. /* a += mu * m */
  14699. /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */
  14700. "mov %[a], #0\n\t"
  14701. /* a[6] += t[0] + t[3] */
  14702. "ldr r3, [sp, #24]\n\t"
  14703. "adds r3, r3, r4\n\t"
  14704. "adc r12, %[a], #0\n\t"
  14705. "adds r3, r3, r8\n\t"
  14706. "adc r12, r12, #0\n\t"
  14707. "str r11, [sp, #24]\n\t"
  14708. /* a[7] += t[1] + t[4] */
  14709. "ldr r3, [sp, #28]\n\t"
  14710. "adds r3, r3, r12\n\t"
  14711. "adc r12, %[a], #0\n\t"
  14712. "adds r3, r3, r5\n\t"
  14713. "adc r12, r12, #0\n\t"
  14714. "adds r3, r3, r9\n\t"
  14715. "adc r12, r12, #0\n\t"
  14716. "str r14, [sp, #28]\n\t"
  14717. "str r3, [sp, #64]\n\t"
  14718. /* a[8] += t[0] + t[2] + t[5] */
  14719. "ldr r3, [sp, #32]\n\t"
  14720. "adds r3, r3, r12\n\t"
  14721. "adc r12, %[a], #0\n\t"
  14722. "adds r3, r3, r4\n\t"
  14723. "adc r12, r12, #0\n\t"
  14724. "adds r3, r3, r6\n\t"
  14725. "adc r12, r12, #0\n\t"
  14726. "adds r3, r3, r10\n\t"
  14727. "adc r12, r12, #0\n\t"
  14728. "str r3, [sp, #32]\n\t"
  14729. /* a[9] += t[1] + t[3] + t[6] */
  14730. /* a[10] += t[2] + t[4] + t[7] */
  14731. "ldr r3, [sp, #36]\n\t"
  14732. "ldr r4, [sp, #40]\n\t"
  14733. "adds r3, r3, r12\n\t"
  14734. "adcs r4, r4, #0\n\t"
  14735. "adc r12, %[a], #0\n\t"
  14736. "adds r3, r3, r5\n\t"
  14737. "adcs r4, r4, r6\n\t"
  14738. "adc r12, r12, #0\n\t"
  14739. "adds r3, r3, r8\n\t"
  14740. "adcs r4, r4, r9\n\t"
  14741. "adc r12, r12, #0\n\t"
  14742. "adds r3, r3, r11\n\t"
  14743. "adcs r4, r4, r14\n\t"
  14744. "adc r12, r12, #0\n\t"
  14745. "str r3, [sp, #36]\n\t"
  14746. "str r4, [sp, #40]\n\t"
  14747. /* a[11] += t[3] + t[5] */
  14748. /* a[12] += t[4] + t[6] */
  14749. /* a[13] += t[5] + t[7] */
  14750. /* a[14] += t[6] */
  14751. "ldr r3, [sp, #44]\n\t"
  14752. "ldr r4, [sp, #48]\n\t"
  14753. "ldr r5, [sp, #52]\n\t"
  14754. "ldr r6, [sp, #56]\n\t"
  14755. "adds r3, r3, r12\n\t"
  14756. "adcs r4, r4, #0\n\t"
  14757. "adcs r5, r5, #0\n\t"
  14758. "adcs r6, r6, #0\n\t"
  14759. "adc r12, %[a], #0\n\t"
  14760. "adds r3, r3, r8\n\t"
  14761. "adcs r4, r4, r9\n\t"
  14762. "adcs r5, r5, r10\n\t"
  14763. "adcs r6, r6, r11\n\t"
  14764. "adc r12, r12, #0\n\t"
  14765. "adds r3, r3, r10\n\t"
  14766. "adcs r4, r4, r11\n\t"
  14767. "adcs r5, r5, r14\n\t"
  14768. "adcs r6, r6, #0\n\t"
  14769. "adc r12, r12, #0\n\t"
  14770. "str r3, [sp, #44]\n\t"
  14771. "str r4, [sp, #48]\n\t"
  14772. "str r5, [sp, #52]\n\t"
  14773. "str r6, [sp, #56]\n\t"
  14774. /* a[15] += t[7] */
  14775. "ldr r3, [sp, #60]\n\t"
  14776. "adds r3, r3, r12\n\t"
  14777. "adc r12, %[a], #0\n\t"
  14778. "adds r3, r3, r14\n\t"
  14779. "adc r12, r12, #0\n\t"
  14780. "str r3, [sp, #60]\n\t"
  14781. "ldr r3, [sp, #64]\n\t"
  14782. "ldr r4, [sp, #32]\n\t"
  14783. "ldr r5, [sp, #36]\n\t"
  14784. "ldr r6, [sp, #40]\n\t"
  14785. "ldr r9, [sp, #0]\n\t"
  14786. "ldr r10, [sp, #4]\n\t"
  14787. "ldr r11, [sp, #8]\n\t"
  14788. "ldr r14, [sp, #12]\n\t"
  14789. "subs r3, r3, r9\n\t"
  14790. "sbcs r4, r4, r10\n\t"
  14791. "sbcs r5, r5, r11\n\t"
  14792. "sbcs r6, r6, r14\n\t"
  14793. "str r4, [sp, #32]\n\t"
  14794. "str r5, [sp, #36]\n\t"
  14795. "str r6, [sp, #40]\n\t"
  14796. "ldr r3, [sp, #44]\n\t"
  14797. "ldr r4, [sp, #48]\n\t"
  14798. "ldr r5, [sp, #52]\n\t"
  14799. "ldr r6, [sp, #56]\n\t"
  14800. "ldr r8, [sp, #60]\n\t"
  14801. "ldr r9, [sp, #16]\n\t"
  14802. "ldr r10, [sp, #20]\n\t"
  14803. "ldr r11, [sp, #24]\n\t"
  14804. "ldr r14, [sp, #28]\n\t"
  14805. "sbcs r3, r3, r9\n\t"
  14806. "sbcs r4, r4, r10\n\t"
  14807. "sbcs r5, r5, r11\n\t"
  14808. "sbcs r6, r6, r14\n\t"
  14809. "sbc r8, r8, #0\n\t"
  14810. "str r3, [sp, #44]\n\t"
  14811. "str r4, [sp, #48]\n\t"
  14812. "str r5, [sp, #52]\n\t"
  14813. "str r6, [sp, #56]\n\t"
  14814. "str r8, [sp, #60]\n\t"
  14815. /* mask m and sub from result if overflow */
  14816. "sub r12, %[a], r12\n\t"
  14817. "and %[a], r12, #1\n\t"
  14818. "ldr r3, [sp, #32]\n\t"
  14819. "ldr r4, [sp, #36]\n\t"
  14820. "ldr r5, [sp, #40]\n\t"
  14821. "ldr r6, [sp, #44]\n\t"
  14822. "ldr r8, [sp, #48]\n\t"
  14823. "ldr r9, [sp, #52]\n\t"
  14824. "ldr r10, [sp, #56]\n\t"
  14825. "ldr r11, [sp, #60]\n\t"
  14826. "subs r3, r3, r12\n\t"
  14827. "sbcs r4, r4, r12\n\t"
  14828. "sbcs r5, r5, r12\n\t"
  14829. "sbcs r6, r6, #0\n\t"
  14830. "sbcs r8, r8, #0\n\t"
  14831. "sbcs r9, r9, #0\n\t"
  14832. "sbcs r10, r10, %[a]\n\t"
  14833. "sbc r11, r11, r12\n\t"
  14834. "str r3, [%[r], #0]\n\t"
  14835. "str r4, [%[r], #4]\n\t"
  14836. "str r5, [%[r], #8]\n\t"
  14837. "str r6, [%[r], #12]\n\t"
  14838. "str r8, [%[r], #16]\n\t"
  14839. "str r9, [%[r], #20]\n\t"
  14840. "str r10, [%[r], #24]\n\t"
  14841. "str r11, [%[r], #28]\n\t"
  14842. "add sp, sp, #68\n\t"
  14843. : [a] "+r" (a)
  14844. : [r] "r" (r)
  14845. : "memory", "r9", "r10", "r11", "r14", "r3", "r4", "r5", "r6", "r8", "r12"
  14846. );
  14847. }
  14848. #if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
  14849. /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
  14850. *
  14851. * r Result of squaring.
  14852. * a Number to square in Montogmery form.
  14853. * n Number of times to square.
  14854. * m Modulus (prime).
  14855. * mp Montogmery mulitplier.
  14856. */
  14857. static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n,
  14858. const sp_digit* m, sp_digit mp)
  14859. {
  14860. sp_256_mont_sqr_8(r, a, m, mp);
  14861. for (; n > 1; n--) {
  14862. sp_256_mont_sqr_8(r, r, m, mp);
  14863. }
  14864. }
  14865. #endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
  14866. #ifdef WOLFSSL_SP_SMALL
  14867. /* Mod-2 for the P256 curve. */
  14868. static const uint32_t p256_mod_minus_2[8] = {
  14869. 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
  14870. 0x00000001U,0xffffffffU
  14871. };
  14872. #endif /* !WOLFSSL_SP_SMALL */
  14873. /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
  14874. * P256 curve. (r = 1 / a mod m)
  14875. *
  14876. * r Inverse result.
  14877. * a Number to invert.
  14878. * td Temporary data.
  14879. */
  14880. static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
  14881. {
  14882. #ifdef WOLFSSL_SP_SMALL
  14883. sp_digit* t = td;
  14884. int i;
  14885. XMEMCPY(t, a, sizeof(sp_digit) * 8);
  14886. for (i=254; i>=0; i--) {
  14887. sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
  14888. if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
  14889. sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
  14890. }
  14891. XMEMCPY(r, t, sizeof(sp_digit) * 8);
  14892. #else
  14893. sp_digit* t1 = td;
  14894. sp_digit* t2 = td + 2 * 8;
  14895. sp_digit* t3 = td + 4 * 8;
  14896. /* 0x2 */
  14897. sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod);
  14898. /* 0x3 */
  14899. sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod);
  14900. /* 0xc */
  14901. sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod);
  14902. /* 0xd */
  14903. sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod);
  14904. /* 0xf */
  14905. sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
  14906. /* 0xf0 */
  14907. sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod);
  14908. /* 0xfd */
  14909. sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
  14910. /* 0xff */
  14911. sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
  14912. /* 0xff00 */
  14913. sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod);
  14914. /* 0xfffd */
  14915. sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
  14916. /* 0xffff */
  14917. sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
  14918. /* 0xffff0000 */
  14919. sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod);
  14920. /* 0xfffffffd */
  14921. sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
  14922. /* 0xffffffff */
  14923. sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
  14924. /* 0xffffffff00000000 */
  14925. sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod);
  14926. /* 0xffffffffffffffff */
  14927. sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
  14928. /* 0xffffffff00000001 */
  14929. sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod);
  14930. /* 0xffffffff000000010000000000000000000000000000000000000000 */
  14931. sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod);
  14932. /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
  14933. sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod);
  14934. /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
  14935. sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod);
  14936. /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
  14937. sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod);
  14938. #endif /* WOLFSSL_SP_SMALL */
  14939. }
  14940. /* Compare a with b in constant time.
  14941. *
  14942. * a A single precision integer.
  14943. * b A single precision integer.
  14944. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  14945. * respectively.
  14946. */
  14947. SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
  14948. {
  14949. sp_digit r = 0;
  14950. __asm__ __volatile__ (
  14951. "mov r3, #0\n\t"
  14952. "mvn r3, r3\n\t"
  14953. "mov r6, #28\n\t"
  14954. "\n1:\n\t"
  14955. "ldr r8, [%[a], r6]\n\t"
  14956. "ldr r5, [%[b], r6]\n\t"
  14957. "and r8, r8, r3\n\t"
  14958. "and r5, r5, r3\n\t"
  14959. "mov r4, r8\n\t"
  14960. "subs r8, r8, r5\n\t"
  14961. "sbc r8, r8, r8\n\t"
  14962. "add %[r], %[r], r8\n\t"
  14963. "mvn r8, r8\n\t"
  14964. "and r3, r3, r8\n\t"
  14965. "subs r5, r5, r4\n\t"
  14966. "sbc r8, r8, r8\n\t"
  14967. "sub %[r], %[r], r8\n\t"
  14968. "mvn r8, r8\n\t"
  14969. "and r3, r3, r8\n\t"
  14970. "sub r6, r6, #4\n\t"
  14971. "cmp r6, #0\n\t"
  14972. #ifdef __GNUC__
  14973. "bge 1b\n\t"
  14974. #else
  14975. "bge.n 1b\n\t"
  14976. #endif /* __GNUC__ */
  14977. : [r] "+r" (r)
  14978. : [a] "r" (a), [b] "r" (b)
  14979. : "r3", "r4", "r5", "r6", "r8"
  14980. );
  14981. return r;
  14982. }
  14983. /* Normalize the values in each word to 32.
  14984. *
  14985. * a Array of sp_digit to normalize.
  14986. */
  14987. #define sp_256_norm_8(a)
  14988. /* Conditionally subtract b from a using the mask m.
  14989. * m is -1 to subtract and 0 when not copying.
  14990. *
  14991. * r A single precision number representing condition subtract result.
  14992. * a A single precision number to subtract from.
  14993. * b A single precision number to subtract.
  14994. * m Mask value to apply.
  14995. */
  14996. SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a,
  14997. const sp_digit* b, sp_digit m)
  14998. {
  14999. sp_digit c = 0;
  15000. __asm__ __volatile__ (
  15001. "mov r5, #32\n\t"
  15002. "mov r9, r5\n\t"
  15003. "mov r8, #0\n\t"
  15004. "\n1:\n\t"
  15005. "ldr r6, [%[b], r8]\n\t"
  15006. "and r6, r6, %[m]\n\t"
  15007. "mov r5, #0\n\t"
  15008. "subs r5, r5, %[c]\n\t"
  15009. "ldr r5, [%[a], r8]\n\t"
  15010. "sbcs r5, r5, r6\n\t"
  15011. "sbcs %[c], %[c], %[c]\n\t"
  15012. "str r5, [%[r], r8]\n\t"
  15013. "add r8, r8, #4\n\t"
  15014. "cmp r8, r9\n\t"
  15015. #ifdef __GNUC__
  15016. "blt 1b\n\t"
  15017. #else
  15018. "blt.n 1b\n\t"
  15019. #endif /* __GNUC__ */
  15020. : [c] "+r" (c)
  15021. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  15022. : "memory", "r5", "r6", "r8", "r9"
  15023. );
  15024. return c;
  15025. }
  15026. /* Reduce the number back to 256 bits using Montgomery reduction.
  15027. *
  15028. * a A single precision number to reduce in place.
  15029. * m The single precision number representing the modulus.
  15030. * mp The digit representing the negative inverse of m mod 2^n.
  15031. */
  15032. SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m,
  15033. sp_digit mp)
  15034. {
  15035. (void)mp;
  15036. (void)m;
  15037. __asm__ __volatile__ (
  15038. "mov r2, #0\n\t"
  15039. "mov r1, #0\n\t"
  15040. /* i = 0 */
  15041. "mov r9, r2\n\t"
  15042. "\n1:\n\t"
  15043. "mov r4, #0\n\t"
  15044. /* mu = a[i] * 1 (mp) = a[i] */
  15045. "ldr r3, [%[a]]\n\t"
  15046. /* a[i] += -1 * mu = -1 * a[i] => a[i] = 0 no carry */
  15047. /* a[i+1] += -1 * mu */
  15048. "ldr r6, [%[a], #4]\n\t"
  15049. "mov r5, #0\n\t"
  15050. "adds r4, r4, r6\n\t"
  15051. "adc r5, r5, r2\n\t"
  15052. "str r4, [%[a], #4]\n\t"
  15053. /* a[i+2] += -1 * mu */
  15054. "ldr r6, [%[a], #8]\n\t"
  15055. "mov r4, #0\n\t"
  15056. "adds r5, r5, r6\n\t"
  15057. "adc r4, r4, r2\n\t"
  15058. "str r5, [%[a], #8]\n\t"
  15059. /* a[i+3] += 0 * mu */
  15060. "ldr r6, [%[a], #12]\n\t"
  15061. "mov r5, #0\n\t"
  15062. "adds r4, r4, r3\n\t"
  15063. "adc r5, r5, r2\n\t"
  15064. "adds r4, r4, r6\n\t"
  15065. "adc r5, r5, r2\n\t"
  15066. "str r4, [%[a], #12]\n\t"
  15067. /* a[i+4] += 0 * mu */
  15068. "ldr r6, [%[a], #16]\n\t"
  15069. "mov r4, #0\n\t"
  15070. "adds r5, r5, r6\n\t"
  15071. "adc r4, r4, r2\n\t"
  15072. "str r5, [%[a], #16]\n\t"
  15073. /* a[i+5] += 0 * mu */
  15074. "ldr r6, [%[a], #20]\n\t"
  15075. "mov r5, #0\n\t"
  15076. "adds r4, r4, r6\n\t"
  15077. "adc r5, r5, r2\n\t"
  15078. "str r4, [%[a], #20]\n\t"
  15079. /* a[i+6] += 1 * mu */
  15080. "ldr r6, [%[a], #24]\n\t"
  15081. "mov r4, #0\n\t"
  15082. "adds r5, r5, r3\n\t"
  15083. "adc r4, r4, r2\n\t"
  15084. "adds r5, r5, r6\n\t"
  15085. "adc r4, r4, r2\n\t"
  15086. "str r5, [%[a], #24]\n\t"
  15087. /* a[i+7] += -1 * mu */
  15088. "ldr r6, [%[a], #28]\n\t"
  15089. "ldr r8, [%[a], #32]\n\t"
  15090. "adds r5, r1, r3\n\t"
  15091. "mov r1, #0\n\t"
  15092. "adc r1, r1, r2\n\t"
  15093. "subs r4, r4, r3\n\t"
  15094. "sbcs r5, r5, r2\n\t"
  15095. "sbc r1, r1, r2\n\t"
  15096. "adds r4, r4, r6\n\t"
  15097. "adcs r5, r5, r8\n\t"
  15098. "adc r1, r1, r2\n\t"
  15099. "str r4, [%[a], #28]\n\t"
  15100. "str r5, [%[a], #32]\n\t"
  15101. /* i += 1 */
  15102. "add r9, r9, #1\n\t"
  15103. "add %[a], %[a], #4\n\t"
  15104. "mov r6, #8\n\t"
  15105. "cmp r9, r6\n\t"
  15106. #ifdef __GNUC__
  15107. "blt 1b\n\t"
  15108. #else
  15109. "blt.n 1b\n\t"
  15110. #endif /* __GNUC__ */
  15111. "sub %[a], %[a], #32\n\t"
  15112. "mov r3, r1\n\t"
  15113. "sub r1, r1, #1\n\t"
  15114. "mvn r1, r1\n\t"
  15115. "ldr r4, [%[a],#32]\n\t"
  15116. "ldr r5, [%[a],#36]\n\t"
  15117. "ldr r6, [%[a],#40]\n\t"
  15118. "ldr r8, [%[a],#44]\n\t"
  15119. "ldr r9, [%[a],#48]\n\t"
  15120. "ldr r10, [%[a],#52]\n\t"
  15121. "ldr r11, [%[a],#56]\n\t"
  15122. "ldr r14, [%[a],#60]\n\t"
  15123. "subs r4, r4, r1\n\t"
  15124. "sbcs r5, r5, r1\n\t"
  15125. "sbcs r6, r6, r1\n\t"
  15126. "sbcs r8, r8, r2\n\t"
  15127. "sbcs r9, r9, r2\n\t"
  15128. "sbcs r10, r10, r2\n\t"
  15129. "sbcs r11, r11, r3\n\t"
  15130. "sbc r14, r14, r1\n\t"
  15131. "str r4, [%[a],#0]\n\t"
  15132. "str r5, [%[a],#4]\n\t"
  15133. "str r6, [%[a],#8]\n\t"
  15134. "str r8, [%[a],#12]\n\t"
  15135. "str r9, [%[a],#16]\n\t"
  15136. "str r10, [%[a],#20]\n\t"
  15137. "str r11, [%[a],#24]\n\t"
  15138. "str r14, [%[a],#28]\n\t"
  15139. : [a] "+r" (a)
  15140. :
  15141. : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r14"
  15142. );
  15143. (void)m;
  15144. (void)mp;
  15145. }
  15146. /* Reduce the number back to 256 bits using Montgomery reduction.
  15147. *
  15148. * a A single precision number to reduce in place.
  15149. * m The single precision number representing the modulus.
  15150. * mp The digit representing the negative inverse of m mod 2^n.
  15151. */
  15152. SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m,
  15153. sp_digit mp)
  15154. {
  15155. sp_digit ca = 0;
  15156. __asm__ __volatile__ (
  15157. "mov r9, %[mp]\n\t"
  15158. "mov r12, %[m]\n\t"
  15159. "mov r10, %[a]\n\t"
  15160. "mov r4, #0\n\t"
  15161. "add r11, r10, #32\n\t"
  15162. "\n1:\n\t"
  15163. /* mu = a[i] * mp */
  15164. "mov %[mp], r9\n\t"
  15165. "ldr %[a], [r10]\n\t"
  15166. "mul %[mp], %[mp], %[a]\n\t"
  15167. "mov %[m], r12\n\t"
  15168. "add r14, r10, #24\n\t"
  15169. "\n2:\n\t"
  15170. /* a[i+j] += m[j] * mu */
  15171. "ldr %[a], [r10]\n\t"
  15172. "mov r5, #0\n\t"
  15173. /* Multiply m[j] and mu - Start */
  15174. "ldr r8, [%[m]], #4\n\t"
  15175. "umull r6, r8, %[mp], r8\n\t"
  15176. "adds %[a], %[a], r6\n\t"
  15177. "adc r5, r5, r8\n\t"
  15178. /* Multiply m[j] and mu - Done */
  15179. "adds r4, r4, %[a]\n\t"
  15180. "adc r5, r5, #0\n\t"
  15181. "str r4, [r10], #4\n\t"
  15182. /* a[i+j+1] += m[j+1] * mu */
  15183. "ldr %[a], [r10]\n\t"
  15184. "mov r4, #0\n\t"
  15185. /* Multiply m[j] and mu - Start */
  15186. "ldr r8, [%[m]], #4\n\t"
  15187. "umull r6, r8, %[mp], r8\n\t"
  15188. "adds %[a], %[a], r6\n\t"
  15189. "adc r4, r4, r8\n\t"
  15190. /* Multiply m[j] and mu - Done */
  15191. "adds r5, r5, %[a]\n\t"
  15192. "adc r4, r4, #0\n\t"
  15193. "str r5, [r10], #4\n\t"
  15194. "cmp r10, r14\n\t"
  15195. #ifdef __GNUC__
  15196. "blt 2b\n\t"
  15197. #else
  15198. "blt.n 2b\n\t"
  15199. #endif /* __GNUC__ */
  15200. /* a[i+6] += m[6] * mu */
  15201. "ldr %[a], [r10]\n\t"
  15202. "mov r5, #0\n\t"
  15203. /* Multiply m[j] and mu - Start */
  15204. "ldr r8, [%[m]], #4\n\t"
  15205. "umull r6, r8, %[mp], r8\n\t"
  15206. "adds %[a], %[a], r6\n\t"
  15207. "adc r5, r5, r8\n\t"
  15208. /* Multiply m[j] and mu - Done */
  15209. "adds r4, r4, %[a]\n\t"
  15210. "adc r5, r5, #0\n\t"
  15211. "str r4, [r10], #4\n\t"
  15212. /* a[i+7] += m[7] * mu */
  15213. "mov r4, %[ca]\n\t"
  15214. "mov %[ca], #0\n\t"
  15215. /* Multiply m[7] and mu - Start */
  15216. "ldr r8, [%[m]]\n\t"
  15217. "umull r6, r8, %[mp], r8\n\t"
  15218. "adds r5, r5, r6\n\t"
  15219. "adcs r4, r4, r8\n\t"
  15220. "adc %[ca], %[ca], #0\n\t"
  15221. /* Multiply m[7] and mu - Done */
  15222. "ldr r6, [r10]\n\t"
  15223. "ldr r8, [r10, #4]\n\t"
  15224. "adds r6, r6, r5\n\t"
  15225. "adcs r8, r8, r4\n\t"
  15226. "adc %[ca], %[ca], #0\n\t"
  15227. "str r6, [r10]\n\t"
  15228. "str r8, [r10, #4]\n\t"
  15229. /* Next word in a */
  15230. "sub r10, r10, #24\n\t"
  15231. "cmp r10, r11\n\t"
  15232. #ifdef __GNUC__
  15233. "blt 1b\n\t"
  15234. #else
  15235. "blt.n 1b\n\t"
  15236. #endif /* __GNUC__ */
  15237. "mov %[a], r10\n\t"
  15238. "mov %[m], r12\n\t"
  15239. : [ca] "+r" (ca), [a] "+r" (a)
  15240. : [m] "r" (m), [mp] "r" (mp)
  15241. : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  15242. );
  15243. sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca);
  15244. }
  15245. /* Map the Montgomery form projective coordinate point to an affine point.
  15246. *
  15247. * r Resulting affine coordinate point.
  15248. * p Montgomery form projective coordinate point.
  15249. * t Temporary ordinate data.
  15250. */
  15251. static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
  15252. {
  15253. sp_digit* t1 = t;
  15254. sp_digit* t2 = t + 2*8;
  15255. int32_t n;
  15256. sp_256_mont_inv_8(t1, p->z, t + 2*8);
  15257. sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
  15258. sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
  15259. /* x /= z^2 */
  15260. sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod);
  15261. XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U);
  15262. sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod);
  15263. /* Reduce x to less than modulus */
  15264. n = sp_256_cmp_8(r->x, p256_mod);
  15265. sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
  15266. (sp_digit)1 : (sp_digit)0));
  15267. sp_256_norm_8(r->x);
  15268. /* y /= z^3 */
  15269. sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod);
  15270. XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U);
  15271. sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod);
  15272. /* Reduce y to less than modulus */
  15273. n = sp_256_cmp_8(r->y, p256_mod);
  15274. sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
  15275. (sp_digit)1 : (sp_digit)0));
  15276. sp_256_norm_8(r->y);
  15277. XMEMSET(r->z, 0, sizeof(r->z));
  15278. r->z[0] = 1;
  15279. }
  15280. #ifdef WOLFSSL_SP_SMALL
  15281. /* Add b to a into r. (r = a + b)
  15282. *
  15283. * r A single precision integer.
  15284. * a A single precision integer.
  15285. * b A single precision integer.
  15286. */
  15287. SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
  15288. const sp_digit* b)
  15289. {
  15290. sp_digit c = 0;
  15291. __asm__ __volatile__ (
  15292. "mov r6, %[a]\n\t"
  15293. "mov r8, #0\n\t"
  15294. "add r6, r6, #32\n\t"
  15295. "sub r8, r8, #1\n\t"
  15296. "\n1:\n\t"
  15297. "adds %[c], %[c], r8\n\t"
  15298. "ldr r4, [%[a]]\n\t"
  15299. "ldr r5, [%[b]]\n\t"
  15300. "adcs r4, r4, r5\n\t"
  15301. "str r4, [%[r]]\n\t"
  15302. "mov %[c], #0\n\t"
  15303. "adc %[c], %[c], %[c]\n\t"
  15304. "add %[a], %[a], #4\n\t"
  15305. "add %[b], %[b], #4\n\t"
  15306. "add %[r], %[r], #4\n\t"
  15307. "cmp %[a], r6\n\t"
  15308. #ifdef __GNUC__
  15309. "bne 1b\n\t"
  15310. #else
  15311. "bne.n 1b\n\t"
  15312. #endif /* __GNUC__ */
  15313. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  15314. :
  15315. : "memory", "r4", "r5", "r6", "r8"
  15316. );
  15317. return c;
  15318. }
  15319. #else
  15320. /* Add b to a into r. (r = a + b)
  15321. *
  15322. * r A single precision integer.
  15323. * a A single precision integer.
  15324. * b A single precision integer.
  15325. */
  15326. SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
  15327. const sp_digit* b)
  15328. {
  15329. sp_digit c = 0;
  15330. __asm__ __volatile__ (
  15331. "ldm %[a]!, {r4, r5}\n\t"
  15332. "ldm %[b]!, {r6, r8}\n\t"
  15333. "adds r4, r4, r6\n\t"
  15334. "adcs r5, r5, r8\n\t"
  15335. "stm %[r]!, {r4, r5}\n\t"
  15336. "ldm %[a]!, {r4, r5}\n\t"
  15337. "ldm %[b]!, {r6, r8}\n\t"
  15338. "adcs r4, r4, r6\n\t"
  15339. "adcs r5, r5, r8\n\t"
  15340. "stm %[r]!, {r4, r5}\n\t"
  15341. "ldm %[a]!, {r4, r5}\n\t"
  15342. "ldm %[b]!, {r6, r8}\n\t"
  15343. "adcs r4, r4, r6\n\t"
  15344. "adcs r5, r5, r8\n\t"
  15345. "stm %[r]!, {r4, r5}\n\t"
  15346. "ldm %[a]!, {r4, r5}\n\t"
  15347. "ldm %[b]!, {r6, r8}\n\t"
  15348. "adcs r4, r4, r6\n\t"
  15349. "adcs r5, r5, r8\n\t"
  15350. "stm %[r]!, {r4, r5}\n\t"
  15351. "mov %[c], #0\n\t"
  15352. "adc %[c], %[c], %[c]\n\t"
  15353. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  15354. :
  15355. : "memory", "r4", "r5", "r6", "r8"
  15356. );
  15357. return c;
  15358. }
  15359. #endif /* WOLFSSL_SP_SMALL */
  15360. /* Add two Montgomery form numbers (r = a + b % m).
  15361. *
  15362. * r Result of addition.
  15363. * a First number to add in Montogmery form.
  15364. * b Second number to add in Montogmery form.
  15365. * m Modulus (prime).
  15366. */
  15367. SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
  15368. const sp_digit* m)
  15369. {
  15370. (void)m;
  15371. __asm__ __volatile__ (
  15372. "mov r12, #0\n\t"
  15373. "ldr r4, [%[a],#0]\n\t"
  15374. "ldr r5, [%[a],#4]\n\t"
  15375. "ldr r6, [%[a],#8]\n\t"
  15376. "ldr r8, [%[a],#12]\n\t"
  15377. "ldr r9, [%[b],#0]\n\t"
  15378. "ldr r10, [%[b],#4]\n\t"
  15379. "ldr r11, [%[b],#8]\n\t"
  15380. "ldr r14, [%[b],#12]\n\t"
  15381. "adds r4, r4, r9\n\t"
  15382. "adcs r5, r5, r10\n\t"
  15383. "adcs r6, r6, r11\n\t"
  15384. "adcs r8, r8, r14\n\t"
  15385. "str r4, [%[r],#0]\n\t"
  15386. "str r5, [%[r],#4]\n\t"
  15387. "str r6, [%[r],#8]\n\t"
  15388. "str r8, [%[r],#12]\n\t"
  15389. "ldr r4, [%[a],#16]\n\t"
  15390. "ldr r5, [%[a],#20]\n\t"
  15391. "ldr r6, [%[a],#24]\n\t"
  15392. "ldr r8, [%[a],#28]\n\t"
  15393. "ldr r9, [%[b],#16]\n\t"
  15394. "ldr r10, [%[b],#20]\n\t"
  15395. "ldr r11, [%[b],#24]\n\t"
  15396. "ldr r14, [%[b],#28]\n\t"
  15397. "adcs r4, r4, r9\n\t"
  15398. "adcs r5, r5, r10\n\t"
  15399. "adcs r6, r6, r11\n\t"
  15400. "adcs r8, r8, r14\n\t"
  15401. "adc r3, r12, #0\n\t"
  15402. "sub r3, r12, r3\n\t"
  15403. "and r12, r3, #1\n\t"
  15404. "ldr r9, [%[r],#0]\n\t"
  15405. "ldr r10, [%[r],#4]\n\t"
  15406. "ldr r11, [%[r],#8]\n\t"
  15407. "ldr r14, [%[r],#12]\n\t"
  15408. "subs r9, r9, r3\n\t"
  15409. "sbcs r10, r10, r3\n\t"
  15410. "sbcs r11, r11, r3\n\t"
  15411. "sbcs r14, r14, #0\n\t"
  15412. "sbcs r4, r4, #0\n\t"
  15413. "sbcs r5, r5, #0\n\t"
  15414. "sbcs r6, r6, r12\n\t"
  15415. "sbc r8, r8, r3\n\t"
  15416. "str r9, [%[r],#0]\n\t"
  15417. "str r10, [%[r],#4]\n\t"
  15418. "str r11, [%[r],#8]\n\t"
  15419. "str r14, [%[r],#12]\n\t"
  15420. "str r4, [%[r],#16]\n\t"
  15421. "str r5, [%[r],#20]\n\t"
  15422. "str r6, [%[r],#24]\n\t"
  15423. "str r8, [%[r],#28]\n\t"
  15424. :
  15425. : [r] "r" (r), [a] "r" (a), [b] "r" (b)
  15426. : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r14", "r3", "r12"
  15427. );
  15428. }
  15429. /* Double a Montgomery form number (r = a + a % m).
  15430. *
  15431. * r Result of doubling.
  15432. * a Number to double in Montogmery form.
  15433. * m Modulus (prime).
  15434. */
  15435. SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
  15436. {
  15437. (void)m;
  15438. __asm__ __volatile__ (
  15439. "mov r12, #0\n\t"
  15440. "ldr r4, [%[a],#0]\n\t"
  15441. "ldr r5, [%[a],#4]\n\t"
  15442. "ldr r6, [%[a],#8]\n\t"
  15443. "ldr r8, [%[a],#12]\n\t"
  15444. "ldr r9, [%[a],#16]\n\t"
  15445. "ldr r10, [%[a],#20]\n\t"
  15446. "ldr r11, [%[a],#24]\n\t"
  15447. "ldr r14, [%[a],#28]\n\t"
  15448. "adds r4, r4, r4\n\t"
  15449. "adcs r5, r5, r5\n\t"
  15450. "adcs r6, r6, r6\n\t"
  15451. "adcs r8, r8, r8\n\t"
  15452. "adcs r9, r9, r9\n\t"
  15453. "adcs r10, r10, r10\n\t"
  15454. "adcs r11, r11, r11\n\t"
  15455. "adcs r14, r14, r14\n\t"
  15456. "adc r3, r12, #0\n\t"
  15457. "sub r3, r12, r3\n\t"
  15458. "and r12, r3, #1\n\t"
  15459. "subs r4, r4, r3\n\t"
  15460. "sbcs r5, r5, r3\n\t"
  15461. "sbcs r6, r6, r3\n\t"
  15462. "sbcs r8, r8, #0\n\t"
  15463. "sbcs r9, r9, #0\n\t"
  15464. "sbcs r10, r10, #0\n\t"
  15465. "sbcs r11, r11, r12\n\t"
  15466. "sbc r14, r14, r3\n\t"
  15467. "str r4, [%[r],#0]\n\t"
  15468. "str r5, [%[r],#4]\n\t"
  15469. "str r6, [%[r],#8]\n\t"
  15470. "str r8, [%[r],#12]\n\t"
  15471. "str r9, [%[r],#16]\n\t"
  15472. "str r10, [%[r],#20]\n\t"
  15473. "str r11, [%[r],#24]\n\t"
  15474. "str r14, [%[r],#28]\n\t"
  15475. :
  15476. : [r] "r" (r), [a] "r" (a)
  15477. : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r14", "r3", "r12"
  15478. );
  15479. }
  15480. /* Triple a Montgomery form number (r = a + a + a % m).
  15481. *
  15482. * r Result of Tripling.
  15483. * a Number to triple in Montogmery form.
  15484. * m Modulus (prime).
  15485. */
  15486. SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
  15487. {
  15488. (void)m;
  15489. __asm__ __volatile__ (
  15490. "ldr r2, [%[a],#0]\n\t"
  15491. "ldr r3, [%[a],#4]\n\t"
  15492. "ldr r4, [%[a],#8]\n\t"
  15493. "ldr r5, [%[a],#12]\n\t"
  15494. "ldr r6, [%[a],#16]\n\t"
  15495. "ldr r8, [%[a],#20]\n\t"
  15496. "ldr r9, [%[a],#24]\n\t"
  15497. "ldr r10, [%[a],#28]\n\t"
  15498. "adds r2, r2, r2\n\t"
  15499. "adcs r3, r3, r3\n\t"
  15500. "adcs r4, r4, r4\n\t"
  15501. "adcs r5, r5, r5\n\t"
  15502. "adcs r6, r6, r6\n\t"
  15503. "adcs r8, r8, r8\n\t"
  15504. "adcs r9, r9, r9\n\t"
  15505. "adcs r10, r10, r10\n\t"
  15506. "mov r11, #0\n\t"
  15507. "mov r14, #0\n\t"
  15508. "adc r11, r11, r11\n\t"
  15509. "mov r12, r11\n\t"
  15510. "sub r11, r11, #1\n\t"
  15511. "mvn r11, r11\n\t"
  15512. "subs r2, r2, r11\n\t"
  15513. "sbcs r3, r3, r11\n\t"
  15514. "sbcs r4, r4, r11\n\t"
  15515. "sbcs r5, r5, r14\n\t"
  15516. "sbcs r6, r6, r14\n\t"
  15517. "sbcs r8, r8, r14\n\t"
  15518. "sbcs r9, r9, r12\n\t"
  15519. "sbc r10, r10, r11\n\t"
  15520. "ldr r12, [%[a],#0]\n\t"
  15521. "ldr r14, [%[a],#4]\n\t"
  15522. "adds r2, r2, r12\n\t"
  15523. "adcs r3, r3, r14\n\t"
  15524. "ldr r12, [%[a],#8]\n\t"
  15525. "ldr r14, [%[a],#12]\n\t"
  15526. "adcs r4, r4, r12\n\t"
  15527. "adcs r5, r5, r14\n\t"
  15528. "ldr r12, [%[a],#16]\n\t"
  15529. "ldr r14, [%[a],#20]\n\t"
  15530. "adcs r6, r6, r12\n\t"
  15531. "adcs r8, r8, r14\n\t"
  15532. "ldr r12, [%[a],#24]\n\t"
  15533. "ldr r14, [%[a],#28]\n\t"
  15534. "adcs r9, r9, r12\n\t"
  15535. "adcs r10, r10, r14\n\t"
  15536. "mov r11, #0\n\t"
  15537. "mov r14, #0\n\t"
  15538. "adc r11, r11, r11\n\t"
  15539. "mov r12, r11\n\t"
  15540. "sub r11, r11, #1\n\t"
  15541. "mvn r11, r11\n\t"
  15542. "subs r2, r2, r11\n\t"
  15543. "str r2, [%[r],#0]\n\t"
  15544. "sbcs r3, r3, r11\n\t"
  15545. "str r3, [%[r],#4]\n\t"
  15546. "sbcs r4, r4, r11\n\t"
  15547. "str r4, [%[r],#8]\n\t"
  15548. "sbcs r5, r5, r14\n\t"
  15549. "str r5, [%[r],#12]\n\t"
  15550. "sbcs r6, r6, r14\n\t"
  15551. "str r6, [%[r],#16]\n\t"
  15552. "sbcs r8, r8, r14\n\t"
  15553. "str r8, [%[r],#20]\n\t"
  15554. "sbcs r9, r9, r12\n\t"
  15555. "str r9, [%[r],#24]\n\t"
  15556. "sbc r10, r10, r11\n\t"
  15557. "str r10, [%[r],#28]\n\t"
  15558. :
  15559. : [r] "r" (r), [a] "r" (a)
  15560. : "memory", "r11", "r12", "r14", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10"
  15561. );
  15562. }
  15563. /* Subtract two Montgomery form numbers (r = a - b % m).
  15564. *
  15565. * r Result of subtration.
  15566. * a Number to subtract from in Montogmery form.
  15567. * b Number to subtract with in Montogmery form.
  15568. * m Modulus (prime).
  15569. */
  15570. SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
  15571. const sp_digit* m)
  15572. {
  15573. (void)m;
  15574. __asm__ __volatile__ (
  15575. "mov r12, #0\n\t"
  15576. "ldr r4, [%[a],#0]\n\t"
  15577. "ldr r5, [%[a],#4]\n\t"
  15578. "ldr r6, [%[a],#8]\n\t"
  15579. "ldr r8, [%[a],#12]\n\t"
  15580. "ldr r9, [%[b],#0]\n\t"
  15581. "ldr r10, [%[b],#4]\n\t"
  15582. "ldr r11, [%[b],#8]\n\t"
  15583. "ldr r14, [%[b],#12]\n\t"
  15584. "subs r4, r4, r9\n\t"
  15585. "sbcs r5, r5, r10\n\t"
  15586. "sbcs r6, r6, r11\n\t"
  15587. "sbcs r8, r8, r14\n\t"
  15588. "str r4, [%[r],#0]\n\t"
  15589. "str r5, [%[r],#4]\n\t"
  15590. "str r6, [%[r],#8]\n\t"
  15591. "str r8, [%[r],#12]\n\t"
  15592. "ldr r4, [%[a],#16]\n\t"
  15593. "ldr r5, [%[a],#20]\n\t"
  15594. "ldr r6, [%[a],#24]\n\t"
  15595. "ldr r8, [%[a],#28]\n\t"
  15596. "ldr r9, [%[b],#16]\n\t"
  15597. "ldr r10, [%[b],#20]\n\t"
  15598. "ldr r11, [%[b],#24]\n\t"
  15599. "ldr r14, [%[b],#28]\n\t"
  15600. "sbcs r4, r4, r9\n\t"
  15601. "sbcs r5, r5, r10\n\t"
  15602. "sbcs r6, r6, r11\n\t"
  15603. "sbcs r8, r8, r14\n\t"
  15604. "sbc r3, r12, #0\n\t"
  15605. "and r12, r3, #1\n\t"
  15606. "ldr r9, [%[r],#0]\n\t"
  15607. "ldr r10, [%[r],#4]\n\t"
  15608. "ldr r11, [%[r],#8]\n\t"
  15609. "ldr r14, [%[r],#12]\n\t"
  15610. "adds r9, r9, r3\n\t"
  15611. "adcs r10, r10, r3\n\t"
  15612. "adcs r11, r11, r3\n\t"
  15613. "adcs r14, r14, #0\n\t"
  15614. "adcs r4, r4, #0\n\t"
  15615. "adcs r5, r5, #0\n\t"
  15616. "adcs r6, r6, r12\n\t"
  15617. "adc r8, r8, r3\n\t"
  15618. "str r9, [%[r],#0]\n\t"
  15619. "str r10, [%[r],#4]\n\t"
  15620. "str r11, [%[r],#8]\n\t"
  15621. "str r14, [%[r],#12]\n\t"
  15622. "str r4, [%[r],#16]\n\t"
  15623. "str r5, [%[r],#20]\n\t"
  15624. "str r6, [%[r],#24]\n\t"
  15625. "str r8, [%[r],#28]\n\t"
  15626. :
  15627. : [r] "r" (r), [a] "r" (a), [b] "r" (b)
  15628. : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r14", "r3", "r12"
  15629. );
  15630. }
  15631. /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  15632. *
  15633. * r Result of division by 2.
  15634. * a Number to divide.
  15635. * m Modulus (prime).
  15636. */
  15637. SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
  15638. {
  15639. __asm__ __volatile__ (
  15640. "ldr r8, [%[a], #0]\n\t"
  15641. "lsl r8, r8, #31\n\t"
  15642. "lsr r8, r8, #31\n\t"
  15643. "mov r5, #0\n\t"
  15644. "sub r5, r5, r8\n\t"
  15645. "mov r8, #0\n\t"
  15646. "lsl r6, r5, #31\n\t"
  15647. "lsr r6, r6, #31\n\t"
  15648. "ldr r3, [%[a], #0]\n\t"
  15649. "ldr r4, [%[a], #4]\n\t"
  15650. "adds r3, r3, r5\n\t"
  15651. "adcs r4, r4, r5\n\t"
  15652. "str r3, [%[r], #0]\n\t"
  15653. "str r4, [%[r], #4]\n\t"
  15654. "ldr r3, [%[a], #8]\n\t"
  15655. "ldr r4, [%[a], #12]\n\t"
  15656. "adcs r3, r3, r5\n\t"
  15657. "adcs r4, r4, r8\n\t"
  15658. "str r3, [%[r], #8]\n\t"
  15659. "str r4, [%[r], #12]\n\t"
  15660. "ldr r3, [%[a], #16]\n\t"
  15661. "ldr r4, [%[a], #20]\n\t"
  15662. "adcs r3, r3, r8\n\t"
  15663. "adcs r4, r4, r8\n\t"
  15664. "str r3, [%[r], #16]\n\t"
  15665. "str r4, [%[r], #20]\n\t"
  15666. "ldr r3, [%[a], #24]\n\t"
  15667. "ldr r4, [%[a], #28]\n\t"
  15668. "adcs r3, r3, r6\n\t"
  15669. "adcs r4, r4, r5\n\t"
  15670. "adc r8, r8, r8\n\t"
  15671. "lsl r8, r8, #31\n\t"
  15672. "lsr r5, r3, #1\n\t"
  15673. "lsl r3, r3, #31\n\t"
  15674. "lsr r6, r4, #1\n\t"
  15675. "lsl r4, r4, #31\n\t"
  15676. "orr r5, r5, r4\n\t"
  15677. "orr r6, r6, r8\n\t"
  15678. "mov r8, r3\n\t"
  15679. "str r5, [%[r], #24]\n\t"
  15680. "str r6, [%[r], #28]\n\t"
  15681. "ldr r3, [%[a], #16]\n\t"
  15682. "ldr r4, [%[a], #20]\n\t"
  15683. "lsr r5, r3, #1\n\t"
  15684. "lsl r3, r3, #31\n\t"
  15685. "lsr r6, r4, #1\n\t"
  15686. "lsl r4, r4, #31\n\t"
  15687. "orr r5, r5, r4\n\t"
  15688. "orr r6, r6, r8\n\t"
  15689. "mov r8, r3\n\t"
  15690. "str r5, [%[r], #16]\n\t"
  15691. "str r6, [%[r], #20]\n\t"
  15692. "ldr r3, [%[a], #8]\n\t"
  15693. "ldr r4, [%[a], #12]\n\t"
  15694. "lsr r5, r3, #1\n\t"
  15695. "lsl r3, r3, #31\n\t"
  15696. "lsr r6, r4, #1\n\t"
  15697. "lsl r4, r4, #31\n\t"
  15698. "orr r5, r5, r4\n\t"
  15699. "orr r6, r6, r8\n\t"
  15700. "mov r8, r3\n\t"
  15701. "str r5, [%[r], #8]\n\t"
  15702. "str r6, [%[r], #12]\n\t"
  15703. "ldr r3, [%[r], #0]\n\t"
  15704. "ldr r4, [%[r], #4]\n\t"
  15705. "lsr r5, r3, #1\n\t"
  15706. "lsr r6, r4, #1\n\t"
  15707. "lsl r4, r4, #31\n\t"
  15708. "orr r5, r5, r4\n\t"
  15709. "orr r6, r6, r8\n\t"
  15710. "str r5, [%[r], #0]\n\t"
  15711. "str r6, [%[r], #4]\n\t"
  15712. :
  15713. : [r] "r" (r), [a] "r" (a), [m] "r" (m)
  15714. : "memory", "r3", "r4", "r5", "r6", "r8"
  15715. );
  15716. }
  15717. /* Double the Montgomery form projective point p.
  15718. *
  15719. * r Result of doubling point.
  15720. * p Point to double.
  15721. * t Temporary ordinate data.
  15722. */
  15723. #ifdef WOLFSSL_SP_NONBLOCK
  15724. typedef struct sp_256_proj_point_dbl_8_ctx {
  15725. int state;
  15726. sp_digit* t1;
  15727. sp_digit* t2;
  15728. sp_digit* x;
  15729. sp_digit* y;
  15730. sp_digit* z;
  15731. } sp_256_proj_point_dbl_8_ctx;
  15732. static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t)
  15733. {
  15734. int err = FP_WOULDBLOCK;
  15735. sp_256_proj_point_dbl_8_ctx* ctx = (sp_256_proj_point_dbl_8_ctx*)sp_ctx->data;
  15736. typedef char ctx_size_test[sizeof(sp_256_proj_point_dbl_8_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
  15737. (void)sizeof(ctx_size_test);
  15738. switch (ctx->state) {
  15739. case 0:
  15740. ctx->t1 = t;
  15741. ctx->t2 = t + 2*8;
  15742. ctx->x = r->x;
  15743. ctx->y = r->y;
  15744. ctx->z = r->z;
  15745. /* Put infinity into result. */
  15746. if (r != p) {
  15747. r->infinity = p->infinity;
  15748. }
  15749. ctx->state = 1;
  15750. break;
  15751. case 1:
  15752. /* T1 = Z * Z */
  15753. sp_256_mont_sqr_8(ctx->t1, p->z, p256_mod, p256_mp_mod);
  15754. ctx->state = 2;
  15755. break;
  15756. case 2:
  15757. /* Z = Y * Z */
  15758. sp_256_mont_mul_8(ctx->z, p->y, p->z, p256_mod, p256_mp_mod);
  15759. ctx->state = 3;
  15760. break;
  15761. case 3:
  15762. /* Z = 2Z */
  15763. sp_256_mont_dbl_8(ctx->z, ctx->z, p256_mod);
  15764. ctx->state = 4;
  15765. break;
  15766. case 4:
  15767. /* T2 = X - T1 */
  15768. sp_256_mont_sub_8(ctx->t2, p->x, ctx->t1, p256_mod);
  15769. ctx->state = 5;
  15770. break;
  15771. case 5:
  15772. /* T1 = X + T1 */
  15773. sp_256_mont_add_8(ctx->t1, p->x, ctx->t1, p256_mod);
  15774. ctx->state = 6;
  15775. break;
  15776. case 6:
  15777. /* T2 = T1 * T2 */
  15778. sp_256_mont_mul_8(ctx->t2, ctx->t1, ctx->t2, p256_mod, p256_mp_mod);
  15779. ctx->state = 7;
  15780. break;
  15781. case 7:
  15782. /* T1 = 3T2 */
  15783. sp_256_mont_tpl_8(ctx->t1, ctx->t2, p256_mod);
  15784. ctx->state = 8;
  15785. break;
  15786. case 8:
  15787. /* Y = 2Y */
  15788. sp_256_mont_dbl_8(ctx->y, p->y, p256_mod);
  15789. ctx->state = 9;
  15790. break;
  15791. case 9:
  15792. /* Y = Y * Y */
  15793. sp_256_mont_sqr_8(ctx->y, ctx->y, p256_mod, p256_mp_mod);
  15794. ctx->state = 10;
  15795. break;
  15796. case 10:
  15797. /* T2 = Y * Y */
  15798. sp_256_mont_sqr_8(ctx->t2, ctx->y, p256_mod, p256_mp_mod);
  15799. ctx->state = 11;
  15800. break;
  15801. case 11:
  15802. /* T2 = T2/2 */
  15803. sp_256_div2_8(ctx->t2, ctx->t2, p256_mod);
  15804. ctx->state = 12;
  15805. break;
  15806. case 12:
  15807. /* Y = Y * X */
  15808. sp_256_mont_mul_8(ctx->y, ctx->y, p->x, p256_mod, p256_mp_mod);
  15809. ctx->state = 13;
  15810. break;
  15811. case 13:
  15812. /* X = T1 * T1 */
  15813. sp_256_mont_sqr_8(ctx->x, ctx->t1, p256_mod, p256_mp_mod);
  15814. ctx->state = 14;
  15815. break;
  15816. case 14:
  15817. /* X = X - Y */
  15818. sp_256_mont_sub_8(ctx->x, ctx->x, ctx->y, p256_mod);
  15819. ctx->state = 15;
  15820. break;
  15821. case 15:
  15822. /* X = X - Y */
  15823. sp_256_mont_sub_8(ctx->x, ctx->x, ctx->y, p256_mod);
  15824. ctx->state = 16;
  15825. break;
  15826. case 16:
  15827. /* Y = Y - X */
  15828. sp_256_mont_sub_8(ctx->y, ctx->y, ctx->x, p256_mod);
  15829. ctx->state = 17;
  15830. break;
  15831. case 17:
  15832. /* Y = Y * T1 */
  15833. sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t1, p256_mod, p256_mp_mod);
  15834. ctx->state = 18;
  15835. break;
  15836. case 18:
  15837. /* Y = Y - T2 */
  15838. sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t2, p256_mod);
  15839. ctx->state = 19;
  15840. /* fall-through */
  15841. case 19:
  15842. err = MP_OKAY;
  15843. break;
  15844. }
  15845. if (err == MP_OKAY && ctx->state != 19) {
  15846. err = FP_WOULDBLOCK;
  15847. }
  15848. return err;
  15849. }
  15850. #endif /* WOLFSSL_SP_NONBLOCK */
  15851. static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
  15852. {
  15853. sp_digit* t1 = t;
  15854. sp_digit* t2 = t + 2*8;
  15855. sp_digit* x;
  15856. sp_digit* y;
  15857. sp_digit* z;
  15858. x = r->x;
  15859. y = r->y;
  15860. z = r->z;
  15861. /* Put infinity into result. */
  15862. if (r != p) {
  15863. r->infinity = p->infinity;
  15864. }
  15865. /* T1 = Z * Z */
  15866. sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod);
  15867. /* Z = Y * Z */
  15868. sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod);
  15869. /* Z = 2Z */
  15870. sp_256_mont_dbl_8(z, z, p256_mod);
  15871. /* T2 = X - T1 */
  15872. sp_256_mont_sub_8(t2, p->x, t1, p256_mod);
  15873. /* T1 = X + T1 */
  15874. sp_256_mont_add_8(t1, p->x, t1, p256_mod);
  15875. /* T2 = T1 * T2 */
  15876. sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod);
  15877. /* T1 = 3T2 */
  15878. sp_256_mont_tpl_8(t1, t2, p256_mod);
  15879. /* Y = 2Y */
  15880. sp_256_mont_dbl_8(y, p->y, p256_mod);
  15881. /* Y = Y * Y */
  15882. sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod);
  15883. /* T2 = Y * Y */
  15884. sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
  15885. /* T2 = T2/2 */
  15886. sp_256_div2_8(t2, t2, p256_mod);
  15887. /* Y = Y * X */
  15888. sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod);
  15889. /* X = T1 * T1 */
  15890. sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod);
  15891. /* X = X - Y */
  15892. sp_256_mont_sub_8(x, x, y, p256_mod);
  15893. /* X = X - Y */
  15894. sp_256_mont_sub_8(x, x, y, p256_mod);
  15895. /* Y = Y - X */
  15896. sp_256_mont_sub_8(y, y, x, p256_mod);
  15897. /* Y = Y * T1 */
  15898. sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod);
  15899. /* Y = Y - T2 */
  15900. sp_256_mont_sub_8(y, y, t2, p256_mod);
  15901. }
  15902. #ifdef WOLFSSL_SP_SMALL
  15903. /* Sub b from a into r. (r = a - b)
  15904. *
  15905. * r A single precision integer.
  15906. * a A single precision integer.
  15907. * b A single precision integer.
  15908. */
  15909. SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
  15910. const sp_digit* b)
  15911. {
  15912. sp_digit c = 0;
  15913. __asm__ __volatile__ (
  15914. "mov r6, %[a]\n\t"
  15915. "add r6, r6, #32\n\t"
  15916. "\n1:\n\t"
  15917. "mov r5, #0\n\t"
  15918. "subs r5, r5, %[c]\n\t"
  15919. "ldr r4, [%[a]]\n\t"
  15920. "ldr r5, [%[b]]\n\t"
  15921. "sbcs r4, r4, r5\n\t"
  15922. "str r4, [%[r]]\n\t"
  15923. "sbc %[c], %[c], %[c]\n\t"
  15924. "add %[a], %[a], #4\n\t"
  15925. "add %[b], %[b], #4\n\t"
  15926. "add %[r], %[r], #4\n\t"
  15927. "cmp %[a], r6\n\t"
  15928. #ifdef __GNUC__
  15929. "bne 1b\n\t"
  15930. #else
  15931. "bne.n 1b\n\t"
  15932. #endif /* __GNUC__ */
  15933. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  15934. :
  15935. : "memory", "r4", "r5", "r6"
  15936. );
  15937. return c;
  15938. }
  15939. #else
  15940. /* Sub b from a into r. (r = a - b)
  15941. *
  15942. * r A single precision integer.
  15943. * a A single precision integer.
  15944. * b A single precision integer.
  15945. */
  15946. SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
  15947. const sp_digit* b)
  15948. {
  15949. sp_digit c = 0;
  15950. __asm__ __volatile__ (
  15951. "ldr r4, [%[a], #0]\n\t"
  15952. "ldr r5, [%[a], #4]\n\t"
  15953. "ldr r6, [%[b], #0]\n\t"
  15954. "ldr r8, [%[b], #4]\n\t"
  15955. "subs r4, r4, r6\n\t"
  15956. "sbcs r5, r5, r8\n\t"
  15957. "str r4, [%[r], #0]\n\t"
  15958. "str r5, [%[r], #4]\n\t"
  15959. "ldr r4, [%[a], #8]\n\t"
  15960. "ldr r5, [%[a], #12]\n\t"
  15961. "ldr r6, [%[b], #8]\n\t"
  15962. "ldr r8, [%[b], #12]\n\t"
  15963. "sbcs r4, r4, r6\n\t"
  15964. "sbcs r5, r5, r8\n\t"
  15965. "str r4, [%[r], #8]\n\t"
  15966. "str r5, [%[r], #12]\n\t"
  15967. "ldr r4, [%[a], #16]\n\t"
  15968. "ldr r5, [%[a], #20]\n\t"
  15969. "ldr r6, [%[b], #16]\n\t"
  15970. "ldr r8, [%[b], #20]\n\t"
  15971. "sbcs r4, r4, r6\n\t"
  15972. "sbcs r5, r5, r8\n\t"
  15973. "str r4, [%[r], #16]\n\t"
  15974. "str r5, [%[r], #20]\n\t"
  15975. "ldr r4, [%[a], #24]\n\t"
  15976. "ldr r5, [%[a], #28]\n\t"
  15977. "ldr r6, [%[b], #24]\n\t"
  15978. "ldr r8, [%[b], #28]\n\t"
  15979. "sbcs r4, r4, r6\n\t"
  15980. "sbcs r5, r5, r8\n\t"
  15981. "str r4, [%[r], #24]\n\t"
  15982. "str r5, [%[r], #28]\n\t"
  15983. "sbc %[c], %[c], %[c]\n\t"
  15984. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  15985. :
  15986. : "memory", "r4", "r5", "r6", "r8"
  15987. );
  15988. return c;
  15989. }
  15990. #endif /* WOLFSSL_SP_SMALL */
  15991. /* Compare two numbers to determine if they are equal.
  15992. * Constant time implementation.
  15993. *
  15994. * a First number to compare.
  15995. * b Second number to compare.
  15996. * returns 1 when equal and 0 otherwise.
  15997. */
  15998. static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
  15999. {
  16000. return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
  16001. (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0;
  16002. }
  16003. /* Add two Montgomery form projective points.
  16004. *
  16005. * r Result of addition.
  16006. * p First point to add.
  16007. * q Second point to add.
  16008. * t Temporary ordinate data.
  16009. */
  16010. #ifdef WOLFSSL_SP_NONBLOCK
  16011. typedef struct sp_256_proj_point_add_8_ctx {
  16012. int state;
  16013. sp_256_proj_point_dbl_8_ctx dbl_ctx;
  16014. const sp_point_256* ap[2];
  16015. sp_point_256* rp[2];
  16016. sp_digit* t1;
  16017. sp_digit* t2;
  16018. sp_digit* t3;
  16019. sp_digit* t4;
  16020. sp_digit* t5;
  16021. sp_digit* x;
  16022. sp_digit* y;
  16023. sp_digit* z;
  16024. } sp_256_proj_point_add_8_ctx;
  16025. static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r,
  16026. const sp_point_256* p, const sp_point_256* q, sp_digit* t)
  16027. {
  16028. int err = FP_WOULDBLOCK;
  16029. sp_256_proj_point_add_8_ctx* ctx = (sp_256_proj_point_add_8_ctx*)sp_ctx->data;
  16030. /* Ensure only the first point is the same as the result. */
  16031. if (q == r) {
  16032. const sp_point_256* a = p;
  16033. p = q;
  16034. q = a;
  16035. }
  16036. typedef char ctx_size_test[sizeof(sp_256_proj_point_add_8_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
  16037. (void)sizeof(ctx_size_test);
  16038. switch (ctx->state) {
  16039. case 0: /* INIT */
  16040. ctx->t1 = t;
  16041. ctx->t2 = t + 2*8;
  16042. ctx->t3 = t + 4*8;
  16043. ctx->t4 = t + 6*8;
  16044. ctx->t5 = t + 8*8;
  16045. ctx->state = 1;
  16046. break;
  16047. case 1:
  16048. /* Check double */
  16049. (void)sp_256_sub_8(ctx->t1, p256_mod, q->y);
  16050. sp_256_norm_8(ctx->t1);
  16051. if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
  16052. (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, ctx->t1))) != 0)
  16053. {
  16054. XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
  16055. ctx->state = 2;
  16056. }
  16057. else {
  16058. ctx->state = 3;
  16059. }
  16060. break;
  16061. case 2:
  16062. err = sp_256_proj_point_dbl_8_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t);
  16063. if (err == MP_OKAY)
  16064. ctx->state = 27; /* done */
  16065. break;
  16066. case 3:
  16067. {
  16068. int i;
  16069. ctx->rp[0] = r;
  16070. /*lint allow cast to different type of pointer*/
  16071. ctx->rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
  16072. XMEMSET(ctx->rp[1], 0, sizeof(sp_point_256));
  16073. ctx->x = ctx->rp[p->infinity | q->infinity]->x;
  16074. ctx->y = ctx->rp[p->infinity | q->infinity]->y;
  16075. ctx->z = ctx->rp[p->infinity | q->infinity]->z;
  16076. ctx->ap[0] = p;
  16077. ctx->ap[1] = q;
  16078. for (i=0; i<8; i++) {
  16079. r->x[i] = ctx->ap[p->infinity]->x[i];
  16080. }
  16081. for (i=0; i<8; i++) {
  16082. r->y[i] = ctx->ap[p->infinity]->y[i];
  16083. }
  16084. for (i=0; i<8; i++) {
  16085. r->z[i] = ctx->ap[p->infinity]->z[i];
  16086. }
  16087. r->infinity = ctx->ap[p->infinity]->infinity;
  16088. ctx->state = 4;
  16089. break;
  16090. }
  16091. case 4:
  16092. /* U1 = X1*Z2^2 */
  16093. sp_256_mont_sqr_8(ctx->t1, q->z, p256_mod, p256_mp_mod);
  16094. ctx->state = 5;
  16095. break;
  16096. case 5:
  16097. sp_256_mont_mul_8(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod);
  16098. ctx->state = 6;
  16099. break;
  16100. case 6:
  16101. sp_256_mont_mul_8(ctx->t1, ctx->t1, ctx->x, p256_mod, p256_mp_mod);
  16102. ctx->state = 7;
  16103. break;
  16104. case 7:
  16105. /* U2 = X2*Z1^2 */
  16106. sp_256_mont_sqr_8(ctx->t2, ctx->z, p256_mod, p256_mp_mod);
  16107. ctx->state = 8;
  16108. break;
  16109. case 8:
  16110. sp_256_mont_mul_8(ctx->t4, ctx->t2, ctx->z, p256_mod, p256_mp_mod);
  16111. ctx->state = 9;
  16112. break;
  16113. case 9:
  16114. sp_256_mont_mul_8(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod);
  16115. ctx->state = 10;
  16116. break;
  16117. case 10:
  16118. /* S1 = Y1*Z2^3 */
  16119. sp_256_mont_mul_8(ctx->t3, ctx->t3, ctx->y, p256_mod, p256_mp_mod);
  16120. ctx->state = 11;
  16121. break;
  16122. case 11:
  16123. /* S2 = Y2*Z1^3 */
  16124. sp_256_mont_mul_8(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod);
  16125. ctx->state = 12;
  16126. break;
  16127. case 12:
  16128. /* H = U2 - U1 */
  16129. sp_256_mont_sub_8(ctx->t2, ctx->t2, ctx->t1, p256_mod);
  16130. ctx->state = 13;
  16131. break;
  16132. case 13:
  16133. /* R = S2 - S1 */
  16134. sp_256_mont_sub_8(ctx->t4, ctx->t4, ctx->t3, p256_mod);
  16135. ctx->state = 14;
  16136. break;
  16137. case 14:
  16138. /* Z3 = H*Z1*Z2 */
  16139. sp_256_mont_mul_8(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod);
  16140. ctx->state = 15;
  16141. break;
  16142. case 15:
  16143. sp_256_mont_mul_8(ctx->z, ctx->z, ctx->t2, p256_mod, p256_mp_mod);
  16144. ctx->state = 16;
  16145. break;
  16146. case 16:
  16147. /* X3 = R^2 - H^3 - 2*U1*H^2 */
  16148. sp_256_mont_sqr_8(ctx->x, ctx->t4, p256_mod, p256_mp_mod);
  16149. ctx->state = 17;
  16150. break;
  16151. case 17:
  16152. sp_256_mont_sqr_8(ctx->t5, ctx->t2, p256_mod, p256_mp_mod);
  16153. ctx->state = 18;
  16154. break;
  16155. case 18:
  16156. sp_256_mont_mul_8(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod);
  16157. ctx->state = 19;
  16158. break;
  16159. case 19:
  16160. sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod);
  16161. ctx->state = 20;
  16162. break;
  16163. case 20:
  16164. sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t5, p256_mod);
  16165. ctx->state = 21;
  16166. break;
  16167. case 21:
  16168. sp_256_mont_dbl_8(ctx->t1, ctx->y, p256_mod);
  16169. ctx->state = 22;
  16170. break;
  16171. case 22:
  16172. sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t1, p256_mod);
  16173. ctx->state = 23;
  16174. break;
  16175. case 23:
  16176. /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
  16177. sp_256_mont_sub_8(ctx->y, ctx->y, ctx->x, p256_mod);
  16178. ctx->state = 24;
  16179. break;
  16180. case 24:
  16181. sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod);
  16182. ctx->state = 25;
  16183. break;
  16184. case 25:
  16185. sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod);
  16186. ctx->state = 26;
  16187. break;
  16188. case 26:
  16189. sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t5, p256_mod);
  16190. ctx->state = 27;
  16191. /* fall-through */
  16192. case 27:
  16193. err = MP_OKAY;
  16194. break;
  16195. }
  16196. if (err == MP_OKAY && ctx->state != 27) {
  16197. err = FP_WOULDBLOCK;
  16198. }
  16199. return err;
  16200. }
  16201. #endif /* WOLFSSL_SP_NONBLOCK */
  16202. static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
  16203. sp_digit* t)
  16204. {
  16205. const sp_point_256* ap[2];
  16206. sp_point_256* rp[2];
  16207. sp_digit* t1 = t;
  16208. sp_digit* t2 = t + 2*8;
  16209. sp_digit* t3 = t + 4*8;
  16210. sp_digit* t4 = t + 6*8;
  16211. sp_digit* t5 = t + 8*8;
  16212. sp_digit* x;
  16213. sp_digit* y;
  16214. sp_digit* z;
  16215. int i;
  16216. /* Ensure only the first point is the same as the result. */
  16217. if (q == r) {
  16218. const sp_point_256* a = p;
  16219. p = q;
  16220. q = a;
  16221. }
  16222. /* Check double */
  16223. (void)sp_256_sub_8(t1, p256_mod, q->y);
  16224. sp_256_norm_8(t1);
  16225. if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
  16226. (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
  16227. sp_256_proj_point_dbl_8(r, p, t);
  16228. }
  16229. else {
  16230. rp[0] = r;
  16231. /*lint allow cast to different type of pointer*/
  16232. rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
  16233. XMEMSET(rp[1], 0, sizeof(sp_point_256));
  16234. x = rp[p->infinity | q->infinity]->x;
  16235. y = rp[p->infinity | q->infinity]->y;
  16236. z = rp[p->infinity | q->infinity]->z;
  16237. ap[0] = p;
  16238. ap[1] = q;
  16239. for (i=0; i<8; i++) {
  16240. r->x[i] = ap[p->infinity]->x[i];
  16241. }
  16242. for (i=0; i<8; i++) {
  16243. r->y[i] = ap[p->infinity]->y[i];
  16244. }
  16245. for (i=0; i<8; i++) {
  16246. r->z[i] = ap[p->infinity]->z[i];
  16247. }
  16248. r->infinity = ap[p->infinity]->infinity;
  16249. /* U1 = X1*Z2^2 */
  16250. sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod);
  16251. sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod);
  16252. sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod);
  16253. /* U2 = X2*Z1^2 */
  16254. sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
  16255. sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
  16256. sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
  16257. /* S1 = Y1*Z2^3 */
  16258. sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod);
  16259. /* S2 = Y2*Z1^3 */
  16260. sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
  16261. /* H = U2 - U1 */
  16262. sp_256_mont_sub_8(t2, t2, t1, p256_mod);
  16263. /* R = S2 - S1 */
  16264. sp_256_mont_sub_8(t4, t4, t3, p256_mod);
  16265. /* Z3 = H*Z1*Z2 */
  16266. sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod);
  16267. sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
  16268. /* X3 = R^2 - H^3 - 2*U1*H^2 */
  16269. sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod);
  16270. sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
  16271. sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod);
  16272. sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
  16273. sp_256_mont_sub_8(x, x, t5, p256_mod);
  16274. sp_256_mont_dbl_8(t1, y, p256_mod);
  16275. sp_256_mont_sub_8(x, x, t1, p256_mod);
  16276. /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
  16277. sp_256_mont_sub_8(y, y, x, p256_mod);
  16278. sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod);
  16279. sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod);
  16280. sp_256_mont_sub_8(y, y, t5, p256_mod);
  16281. }
  16282. }
  16283. #ifndef WC_NO_CACHE_RESISTANT
  16284. /* Touch each possible point that could be being copied.
  16285. *
  16286. * r Point to copy into.
  16287. * table Table - start of the entires to access
  16288. * idx Index of entry to retrieve.
  16289. */
  16290. static void sp_256_get_point_16_8(sp_point_256* r, const sp_point_256* table,
  16291. int idx)
  16292. {
  16293. int i;
  16294. sp_digit mask;
  16295. r->x[0] = 0;
  16296. r->x[1] = 0;
  16297. r->x[2] = 0;
  16298. r->x[3] = 0;
  16299. r->x[4] = 0;
  16300. r->x[5] = 0;
  16301. r->x[6] = 0;
  16302. r->x[7] = 0;
  16303. r->y[0] = 0;
  16304. r->y[1] = 0;
  16305. r->y[2] = 0;
  16306. r->y[3] = 0;
  16307. r->y[4] = 0;
  16308. r->y[5] = 0;
  16309. r->y[6] = 0;
  16310. r->y[7] = 0;
  16311. r->z[0] = 0;
  16312. r->z[1] = 0;
  16313. r->z[2] = 0;
  16314. r->z[3] = 0;
  16315. r->z[4] = 0;
  16316. r->z[5] = 0;
  16317. r->z[6] = 0;
  16318. r->z[7] = 0;
  16319. for (i = 1; i < 16; i++) {
  16320. mask = 0 - (i == idx);
  16321. r->x[0] |= mask & table[i].x[0];
  16322. r->x[1] |= mask & table[i].x[1];
  16323. r->x[2] |= mask & table[i].x[2];
  16324. r->x[3] |= mask & table[i].x[3];
  16325. r->x[4] |= mask & table[i].x[4];
  16326. r->x[5] |= mask & table[i].x[5];
  16327. r->x[6] |= mask & table[i].x[6];
  16328. r->x[7] |= mask & table[i].x[7];
  16329. r->y[0] |= mask & table[i].y[0];
  16330. r->y[1] |= mask & table[i].y[1];
  16331. r->y[2] |= mask & table[i].y[2];
  16332. r->y[3] |= mask & table[i].y[3];
  16333. r->y[4] |= mask & table[i].y[4];
  16334. r->y[5] |= mask & table[i].y[5];
  16335. r->y[6] |= mask & table[i].y[6];
  16336. r->y[7] |= mask & table[i].y[7];
  16337. r->z[0] |= mask & table[i].z[0];
  16338. r->z[1] |= mask & table[i].z[1];
  16339. r->z[2] |= mask & table[i].z[2];
  16340. r->z[3] |= mask & table[i].z[3];
  16341. r->z[4] |= mask & table[i].z[4];
  16342. r->z[5] |= mask & table[i].z[5];
  16343. r->z[6] |= mask & table[i].z[6];
  16344. r->z[7] |= mask & table[i].z[7];
  16345. }
  16346. }
  16347. #endif /* !WC_NO_CACHE_RESISTANT */
  16348. /* Multiply the point by the scalar and return the result.
  16349. * If map is true then convert result to affine coordinates.
  16350. *
  16351. * Simple, smaller code size and memory size, of windowing.
  16352. * Calculate uindow of 4 bits.
  16353. * Only add points from table.
  16354. *
  16355. * r Resulting point.
  16356. * g Point to multiply.
  16357. * k Scalar to multiply by.
  16358. * map Indicates whether to convert result to affine.
  16359. * ct Constant time required.
  16360. * heap Heap to use for allocation.
  16361. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  16362. */
  16363. static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
  16364. int map, int ct, void* heap)
  16365. {
  16366. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  16367. sp_point_256 td[16];
  16368. sp_point_256 rtd;
  16369. sp_digit tmpd[2 * 8 * 5];
  16370. #ifndef WC_NO_CACHE_RESISTANT
  16371. sp_point_256 pd;
  16372. #endif
  16373. #endif
  16374. sp_point_256* t;
  16375. sp_point_256* rt;
  16376. #ifndef WC_NO_CACHE_RESISTANT
  16377. sp_point_256* p;
  16378. #endif
  16379. sp_digit* tmp;
  16380. sp_digit n;
  16381. int i;
  16382. int c, y;
  16383. int err;
  16384. /* Constant time used for cache attack resistance implementation. */
  16385. (void)ct;
  16386. (void)heap;
  16387. err = sp_256_point_new_8(heap, rtd, rt);
  16388. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  16389. #ifndef WC_NO_CACHE_RESISTANT
  16390. t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 17, heap, DYNAMIC_TYPE_ECC);
  16391. #else
  16392. t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
  16393. #endif
  16394. if (t == NULL)
  16395. err = MEMORY_E;
  16396. tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
  16397. DYNAMIC_TYPE_ECC);
  16398. if (tmp == NULL)
  16399. err = MEMORY_E;
  16400. #else
  16401. t = td;
  16402. tmp = tmpd;
  16403. #endif
  16404. if (err == MP_OKAY) {
  16405. #ifndef WC_NO_CACHE_RESISTANT
  16406. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  16407. p = t + 16;
  16408. #else
  16409. p = &pd;
  16410. #endif
  16411. #endif
  16412. /* t[0] = {0, 0, 1} * norm */
  16413. XMEMSET(&t[0], 0, sizeof(t[0]));
  16414. t[0].infinity = 1;
  16415. /* t[1] = {g->x, g->y, g->z} * norm */
  16416. (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
  16417. (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
  16418. (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
  16419. t[1].infinity = 0;
  16420. sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp);
  16421. t[ 2].infinity = 0;
  16422. sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp);
  16423. t[ 3].infinity = 0;
  16424. sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp);
  16425. t[ 4].infinity = 0;
  16426. sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp);
  16427. t[ 5].infinity = 0;
  16428. sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp);
  16429. t[ 6].infinity = 0;
  16430. sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp);
  16431. t[ 7].infinity = 0;
  16432. sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp);
  16433. t[ 8].infinity = 0;
  16434. sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp);
  16435. t[ 9].infinity = 0;
  16436. sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp);
  16437. t[10].infinity = 0;
  16438. sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp);
  16439. t[11].infinity = 0;
  16440. sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp);
  16441. t[12].infinity = 0;
  16442. sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp);
  16443. t[13].infinity = 0;
  16444. sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp);
  16445. t[14].infinity = 0;
  16446. sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp);
  16447. t[15].infinity = 0;
  16448. i = 6;
  16449. n = k[i+1] << 0;
  16450. c = 28;
  16451. y = n >> 28;
  16452. #ifndef WC_NO_CACHE_RESISTANT
  16453. if (ct) {
  16454. sp_256_get_point_16_8(rt, t, y);
  16455. rt->infinity = !y;
  16456. }
  16457. else
  16458. #endif
  16459. {
  16460. XMEMCPY(rt, &t[y], sizeof(sp_point_256));
  16461. }
  16462. n <<= 4;
  16463. for (; i>=0 || c>=4; ) {
  16464. if (c < 4) {
  16465. n |= k[i--];
  16466. c += 32;
  16467. }
  16468. y = (n >> 28) & 0xf;
  16469. n <<= 4;
  16470. c -= 4;
  16471. sp_256_proj_point_dbl_8(rt, rt, tmp);
  16472. sp_256_proj_point_dbl_8(rt, rt, tmp);
  16473. sp_256_proj_point_dbl_8(rt, rt, tmp);
  16474. sp_256_proj_point_dbl_8(rt, rt, tmp);
  16475. #ifndef WC_NO_CACHE_RESISTANT
  16476. if (ct) {
  16477. sp_256_get_point_16_8(p, t, y);
  16478. p->infinity = !y;
  16479. sp_256_proj_point_add_8(rt, rt, p, tmp);
  16480. }
  16481. else
  16482. #endif
  16483. {
  16484. sp_256_proj_point_add_8(rt, rt, &t[y], tmp);
  16485. }
  16486. }
  16487. if (map != 0) {
  16488. sp_256_map_8(r, rt, tmp);
  16489. }
  16490. else {
  16491. XMEMCPY(r, rt, sizeof(sp_point_256));
  16492. }
  16493. }
  16494. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  16495. if (tmp != NULL) {
  16496. XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5);
  16497. XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
  16498. }
  16499. if (t != NULL) {
  16500. XMEMSET(t, 0, sizeof(sp_point_256) * 16);
  16501. XFREE(t, heap, DYNAMIC_TYPE_ECC);
  16502. }
  16503. #else
  16504. ForceZero(tmpd, sizeof(tmpd));
  16505. ForceZero(td, sizeof(td));
  16506. #endif
  16507. sp_256_point_free_8(rt, 1, heap);
  16508. return err;
  16509. }
  16510. /* A table entry for pre-computed points. */
  16511. typedef struct sp_table_entry_256 {
  16512. sp_digit x[8];
  16513. sp_digit y[8];
  16514. } sp_table_entry_256;
  16515. #ifdef FP_ECC
  16516. /* Double the Montgomery form projective point p a number of times.
  16517. *
  16518. * r Result of repeated doubling of point.
  16519. * p Point to double.
  16520. * n Number of times to double
  16521. * t Temporary ordinate data.
  16522. */
  16523. static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t)
  16524. {
  16525. sp_digit* w = t;
  16526. sp_digit* a = t + 2*8;
  16527. sp_digit* b = t + 4*8;
  16528. sp_digit* t1 = t + 6*8;
  16529. sp_digit* t2 = t + 8*8;
  16530. sp_digit* x;
  16531. sp_digit* y;
  16532. sp_digit* z;
  16533. x = p->x;
  16534. y = p->y;
  16535. z = p->z;
  16536. /* Y = 2*Y */
  16537. sp_256_mont_dbl_8(y, y, p256_mod);
  16538. /* W = Z^4 */
  16539. sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod);
  16540. sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod);
  16541. #ifndef WOLFSSL_SP_SMALL
  16542. while (--n > 0)
  16543. #else
  16544. while (--n >= 0)
  16545. #endif
  16546. {
  16547. /* A = 3*(X^2 - W) */
  16548. sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
  16549. sp_256_mont_sub_8(t1, t1, w, p256_mod);
  16550. sp_256_mont_tpl_8(a, t1, p256_mod);
  16551. /* B = X*Y^2 */
  16552. sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
  16553. sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
  16554. /* X = A^2 - 2B */
  16555. sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
  16556. sp_256_mont_dbl_8(t2, b, p256_mod);
  16557. sp_256_mont_sub_8(x, x, t2, p256_mod);
  16558. /* Z = Z*Y */
  16559. sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
  16560. /* t2 = Y^4 */
  16561. sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
  16562. #ifdef WOLFSSL_SP_SMALL
  16563. if (n != 0)
  16564. #endif
  16565. {
  16566. /* W = W*Y^4 */
  16567. sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod);
  16568. }
  16569. /* y = 2*A*(B - X) - Y^4 */
  16570. sp_256_mont_sub_8(y, b, x, p256_mod);
  16571. sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
  16572. sp_256_mont_dbl_8(y, y, p256_mod);
  16573. sp_256_mont_sub_8(y, y, t1, p256_mod);
  16574. }
  16575. #ifndef WOLFSSL_SP_SMALL
  16576. /* A = 3*(X^2 - W) */
  16577. sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
  16578. sp_256_mont_sub_8(t1, t1, w, p256_mod);
  16579. sp_256_mont_tpl_8(a, t1, p256_mod);
  16580. /* B = X*Y^2 */
  16581. sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
  16582. sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
  16583. /* X = A^2 - 2B */
  16584. sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
  16585. sp_256_mont_dbl_8(t2, b, p256_mod);
  16586. sp_256_mont_sub_8(x, x, t2, p256_mod);
  16587. /* Z = Z*Y */
  16588. sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
  16589. /* t2 = Y^4 */
  16590. sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
  16591. /* y = 2*A*(B - X) - Y^4 */
  16592. sp_256_mont_sub_8(y, b, x, p256_mod);
  16593. sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
  16594. sp_256_mont_dbl_8(y, y, p256_mod);
  16595. sp_256_mont_sub_8(y, y, t1, p256_mod);
  16596. #endif
  16597. /* Y = Y/2 */
  16598. sp_256_div2_8(y, y, p256_mod);
  16599. }
  16600. /* Convert the projective point to affine.
  16601. * Ordinates are in Montgomery form.
  16602. *
  16603. * a Point to convert.
  16604. * t Temporary data.
  16605. */
  16606. static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t)
  16607. {
  16608. sp_digit* t1 = t;
  16609. sp_digit* t2 = t + 2 * 8;
  16610. sp_digit* tmp = t + 4 * 8;
  16611. sp_256_mont_inv_8(t1, a->z, tmp);
  16612. sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
  16613. sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
  16614. sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
  16615. sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
  16616. XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
  16617. }
  16618. #endif /* FP_ECC */
  16619. /* Add two Montgomery form projective points. The second point has a q value of
  16620. * one.
  16621. * Only the first point can be the same pointer as the result point.
  16622. *
  16623. * r Result of addition.
  16624. * p First point to add.
  16625. * q Second point to add.
  16626. * t Temporary ordinate data.
  16627. */
  16628. static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p,
  16629. const sp_point_256* q, sp_digit* t)
  16630. {
  16631. const sp_point_256* ap[2];
  16632. sp_point_256* rp[2];
  16633. sp_digit* t1 = t;
  16634. sp_digit* t2 = t + 2*8;
  16635. sp_digit* t3 = t + 4*8;
  16636. sp_digit* t4 = t + 6*8;
  16637. sp_digit* t5 = t + 8*8;
  16638. sp_digit* x;
  16639. sp_digit* y;
  16640. sp_digit* z;
  16641. int i;
  16642. /* Check double */
  16643. (void)sp_256_sub_8(t1, p256_mod, q->y);
  16644. sp_256_norm_8(t1);
  16645. if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
  16646. (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
  16647. sp_256_proj_point_dbl_8(r, p, t);
  16648. }
  16649. else {
  16650. rp[0] = r;
  16651. /*lint allow cast to different type of pointer*/
  16652. rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
  16653. XMEMSET(rp[1], 0, sizeof(sp_point_256));
  16654. x = rp[p->infinity | q->infinity]->x;
  16655. y = rp[p->infinity | q->infinity]->y;
  16656. z = rp[p->infinity | q->infinity]->z;
  16657. ap[0] = p;
  16658. ap[1] = q;
  16659. for (i=0; i<8; i++) {
  16660. r->x[i] = ap[p->infinity]->x[i];
  16661. }
  16662. for (i=0; i<8; i++) {
  16663. r->y[i] = ap[p->infinity]->y[i];
  16664. }
  16665. for (i=0; i<8; i++) {
  16666. r->z[i] = ap[p->infinity]->z[i];
  16667. }
  16668. r->infinity = ap[p->infinity]->infinity;
  16669. /* U2 = X2*Z1^2 */
  16670. sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
  16671. sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
  16672. sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
  16673. /* S2 = Y2*Z1^3 */
  16674. sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
  16675. /* H = U2 - X1 */
  16676. sp_256_mont_sub_8(t2, t2, x, p256_mod);
  16677. /* R = S2 - Y1 */
  16678. sp_256_mont_sub_8(t4, t4, y, p256_mod);
  16679. /* Z3 = H*Z1 */
  16680. sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
  16681. /* X3 = R^2 - H^3 - 2*X1*H^2 */
  16682. sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod);
  16683. sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
  16684. sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod);
  16685. sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
  16686. sp_256_mont_sub_8(x, t1, t5, p256_mod);
  16687. sp_256_mont_dbl_8(t1, t3, p256_mod);
  16688. sp_256_mont_sub_8(x, x, t1, p256_mod);
  16689. /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
  16690. sp_256_mont_sub_8(t3, t3, x, p256_mod);
  16691. sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod);
  16692. sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod);
  16693. sp_256_mont_sub_8(y, t3, t5, p256_mod);
  16694. }
  16695. }
  16696. #ifdef WOLFSSL_SP_SMALL
  16697. #ifdef FP_ECC
  16698. /* Generate the pre-computed table of points for the base point.
  16699. *
  16700. * a The base point.
  16701. * table Place to store generated point data.
  16702. * tmp Temporary data.
  16703. * heap Heap to use for allocation.
  16704. */
  16705. static int sp_256_gen_stripe_table_8(const sp_point_256* a,
  16706. sp_table_entry_256* table, sp_digit* tmp, void* heap)
  16707. {
  16708. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  16709. sp_point_256 td, s1d, s2d;
  16710. #endif
  16711. sp_point_256* t;
  16712. sp_point_256* s1 = NULL;
  16713. sp_point_256* s2 = NULL;
  16714. int i, j;
  16715. int err;
  16716. (void)heap;
  16717. err = sp_256_point_new_8(heap, td, t);
  16718. if (err == MP_OKAY) {
  16719. err = sp_256_point_new_8(heap, s1d, s1);
  16720. }
  16721. if (err == MP_OKAY) {
  16722. err = sp_256_point_new_8(heap, s2d, s2);
  16723. }
  16724. if (err == MP_OKAY) {
  16725. err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
  16726. }
  16727. if (err == MP_OKAY) {
  16728. err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
  16729. }
  16730. if (err == MP_OKAY) {
  16731. err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
  16732. }
  16733. if (err == MP_OKAY) {
  16734. t->infinity = 0;
  16735. sp_256_proj_to_affine_8(t, tmp);
  16736. XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
  16737. s1->infinity = 0;
  16738. XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
  16739. s2->infinity = 0;
  16740. /* table[0] = {0, 0, infinity} */
  16741. XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
  16742. /* table[1] = Affine version of 'a' in Montgomery form */
  16743. XMEMCPY(table[1].x, t->x, sizeof(table->x));
  16744. XMEMCPY(table[1].y, t->y, sizeof(table->y));
  16745. for (i=1; i<4; i++) {
  16746. sp_256_proj_point_dbl_n_8(t, 64, tmp);
  16747. sp_256_proj_to_affine_8(t, tmp);
  16748. XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
  16749. XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
  16750. }
  16751. for (i=1; i<4; i++) {
  16752. XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
  16753. XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
  16754. for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
  16755. XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
  16756. XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
  16757. sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
  16758. sp_256_proj_to_affine_8(t, tmp);
  16759. XMEMCPY(table[j].x, t->x, sizeof(table->x));
  16760. XMEMCPY(table[j].y, t->y, sizeof(table->y));
  16761. }
  16762. }
  16763. }
  16764. sp_256_point_free_8(s2, 0, heap);
  16765. sp_256_point_free_8(s1, 0, heap);
  16766. sp_256_point_free_8( t, 0, heap);
  16767. return err;
  16768. }
  16769. #endif /* FP_ECC */
  16770. #ifndef WC_NO_CACHE_RESISTANT
  16771. /* Touch each possible entry that could be being copied.
  16772. *
  16773. * r Point to copy into.
  16774. * table Table - start of the entires to access
  16775. * idx Index of entry to retrieve.
  16776. */
  16777. static void sp_256_get_entry_16_8(sp_point_256* r,
  16778. const sp_table_entry_256* table, int idx)
  16779. {
  16780. int i;
  16781. sp_digit mask;
  16782. r->x[0] = 0;
  16783. r->x[1] = 0;
  16784. r->x[2] = 0;
  16785. r->x[3] = 0;
  16786. r->x[4] = 0;
  16787. r->x[5] = 0;
  16788. r->x[6] = 0;
  16789. r->x[7] = 0;
  16790. r->y[0] = 0;
  16791. r->y[1] = 0;
  16792. r->y[2] = 0;
  16793. r->y[3] = 0;
  16794. r->y[4] = 0;
  16795. r->y[5] = 0;
  16796. r->y[6] = 0;
  16797. r->y[7] = 0;
  16798. for (i = 1; i < 16; i++) {
  16799. mask = 0 - (i == idx);
  16800. r->x[0] |= mask & table[i].x[0];
  16801. r->x[1] |= mask & table[i].x[1];
  16802. r->x[2] |= mask & table[i].x[2];
  16803. r->x[3] |= mask & table[i].x[3];
  16804. r->x[4] |= mask & table[i].x[4];
  16805. r->x[5] |= mask & table[i].x[5];
  16806. r->x[6] |= mask & table[i].x[6];
  16807. r->x[7] |= mask & table[i].x[7];
  16808. r->y[0] |= mask & table[i].y[0];
  16809. r->y[1] |= mask & table[i].y[1];
  16810. r->y[2] |= mask & table[i].y[2];
  16811. r->y[3] |= mask & table[i].y[3];
  16812. r->y[4] |= mask & table[i].y[4];
  16813. r->y[5] |= mask & table[i].y[5];
  16814. r->y[6] |= mask & table[i].y[6];
  16815. r->y[7] |= mask & table[i].y[7];
  16816. }
  16817. }
  16818. #endif /* !WC_NO_CACHE_RESISTANT */
  16819. /* Multiply the point by the scalar and return the result.
  16820. * If map is true then convert result to affine coordinates.
  16821. *
  16822. * Implementation uses striping of bits.
  16823. * Choose bits 4 bits apart.
  16824. *
  16825. * r Resulting point.
  16826. * k Scalar to multiply by.
  16827. * table Pre-computed table.
  16828. * map Indicates whether to convert result to affine.
  16829. * ct Constant time required.
  16830. * heap Heap to use for allocation.
  16831. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  16832. */
  16833. static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
  16834. const sp_table_entry_256* table, const sp_digit* k, int map,
  16835. int ct, void* heap)
  16836. {
  16837. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  16838. sp_point_256 rtd;
  16839. sp_point_256 pd;
  16840. sp_digit td[2 * 8 * 5];
  16841. #endif
  16842. sp_point_256* rt;
  16843. sp_point_256* p = NULL;
  16844. sp_digit* t;
  16845. int i, j;
  16846. int y, x;
  16847. int err;
  16848. (void)g;
  16849. /* Constant time used for cache attack resistance implementation. */
  16850. (void)ct;
  16851. (void)heap;
  16852. err = sp_256_point_new_8(heap, rtd, rt);
  16853. if (err == MP_OKAY) {
  16854. err = sp_256_point_new_8(heap, pd, p);
  16855. }
  16856. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  16857. t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
  16858. DYNAMIC_TYPE_ECC);
  16859. if (t == NULL) {
  16860. err = MEMORY_E;
  16861. }
  16862. #else
  16863. t = td;
  16864. #endif
  16865. if (err == MP_OKAY) {
  16866. XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
  16867. XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
  16868. y = 0;
  16869. for (j=0,x=63; j<4; j++,x+=64) {
  16870. y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
  16871. }
  16872. #ifndef WC_NO_CACHE_RESISTANT
  16873. if (ct) {
  16874. sp_256_get_entry_16_8(rt, table, y);
  16875. } else
  16876. #endif
  16877. {
  16878. XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
  16879. XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
  16880. }
  16881. rt->infinity = !y;
  16882. for (i=62; i>=0; i--) {
  16883. y = 0;
  16884. for (j=0,x=i; j<4; j++,x+=64) {
  16885. y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
  16886. }
  16887. sp_256_proj_point_dbl_8(rt, rt, t);
  16888. #ifndef WC_NO_CACHE_RESISTANT
  16889. if (ct) {
  16890. sp_256_get_entry_16_8(p, table, y);
  16891. }
  16892. else
  16893. #endif
  16894. {
  16895. XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
  16896. XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
  16897. }
  16898. p->infinity = !y;
  16899. sp_256_proj_point_add_qz1_8(rt, rt, p, t);
  16900. }
  16901. if (map != 0) {
  16902. sp_256_map_8(r, rt, t);
  16903. }
  16904. else {
  16905. XMEMCPY(r, rt, sizeof(sp_point_256));
  16906. }
  16907. }
  16908. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  16909. if (t != NULL) {
  16910. XFREE(t, heap, DYNAMIC_TYPE_ECC);
  16911. }
  16912. #endif
  16913. sp_256_point_free_8(p, 0, heap);
  16914. sp_256_point_free_8(rt, 0, heap);
  16915. return err;
  16916. }
  16917. #ifdef FP_ECC
  16918. #ifndef FP_ENTRIES
  16919. #define FP_ENTRIES 16
  16920. #endif
  16921. typedef struct sp_cache_256_t {
  16922. sp_digit x[8];
  16923. sp_digit y[8];
  16924. sp_table_entry_256 table[16];
  16925. uint32_t cnt;
  16926. int set;
  16927. } sp_cache_256_t;
  16928. static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
  16929. static THREAD_LS_T int sp_cache_256_last = -1;
  16930. static THREAD_LS_T int sp_cache_256_inited = 0;
  16931. #ifndef HAVE_THREAD_LS
  16932. static volatile int initCacheMutex_256 = 0;
  16933. static wolfSSL_Mutex sp_cache_256_lock;
  16934. #endif
  16935. static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
  16936. {
  16937. int i, j;
  16938. uint32_t least;
  16939. if (sp_cache_256_inited == 0) {
  16940. for (i=0; i<FP_ENTRIES; i++) {
  16941. sp_cache_256[i].set = 0;
  16942. }
  16943. sp_cache_256_inited = 1;
  16944. }
  16945. /* Compare point with those in cache. */
  16946. for (i=0; i<FP_ENTRIES; i++) {
  16947. if (!sp_cache_256[i].set)
  16948. continue;
  16949. if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
  16950. sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
  16951. sp_cache_256[i].cnt++;
  16952. break;
  16953. }
  16954. }
  16955. /* No match. */
  16956. if (i == FP_ENTRIES) {
  16957. /* Find empty entry. */
  16958. i = (sp_cache_256_last + 1) % FP_ENTRIES;
  16959. for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
  16960. if (!sp_cache_256[i].set) {
  16961. break;
  16962. }
  16963. }
  16964. /* Evict least used. */
  16965. if (i == sp_cache_256_last) {
  16966. least = sp_cache_256[0].cnt;
  16967. for (j=1; j<FP_ENTRIES; j++) {
  16968. if (sp_cache_256[j].cnt < least) {
  16969. i = j;
  16970. least = sp_cache_256[i].cnt;
  16971. }
  16972. }
  16973. }
  16974. XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
  16975. XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
  16976. sp_cache_256[i].set = 1;
  16977. sp_cache_256[i].cnt = 1;
  16978. }
  16979. *cache = &sp_cache_256[i];
  16980. sp_cache_256_last = i;
  16981. }
  16982. #endif /* FP_ECC */
  16983. /* Multiply the base point of P256 by the scalar and return the result.
  16984. * If map is true then convert result to affine coordinates.
  16985. *
  16986. * r Resulting point.
  16987. * g Point to multiply.
  16988. * k Scalar to multiply by.
  16989. * map Indicates whether to convert result to affine.
  16990. * ct Constant time required.
  16991. * heap Heap to use for allocation.
  16992. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  16993. */
  16994. static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
  16995. int map, int ct, void* heap)
  16996. {
  16997. #ifndef FP_ECC
  16998. return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap);
  16999. #else
  17000. sp_digit tmp[2 * 8 * 5];
  17001. sp_cache_256_t* cache;
  17002. int err = MP_OKAY;
  17003. #ifndef HAVE_THREAD_LS
  17004. if (initCacheMutex_256 == 0) {
  17005. wc_InitMutex(&sp_cache_256_lock);
  17006. initCacheMutex_256 = 1;
  17007. }
  17008. if (wc_LockMutex(&sp_cache_256_lock) != 0)
  17009. err = BAD_MUTEX_E;
  17010. #endif /* HAVE_THREAD_LS */
  17011. if (err == MP_OKAY) {
  17012. sp_ecc_get_cache_256(g, &cache);
  17013. if (cache->cnt == 2)
  17014. sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
  17015. #ifndef HAVE_THREAD_LS
  17016. wc_UnLockMutex(&sp_cache_256_lock);
  17017. #endif /* HAVE_THREAD_LS */
  17018. if (cache->cnt < 2) {
  17019. err = sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap);
  17020. }
  17021. else {
  17022. err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
  17023. map, ct, heap);
  17024. }
  17025. }
  17026. return err;
  17027. #endif
  17028. }
  17029. #else
  17030. #ifdef FP_ECC
  17031. /* Generate the pre-computed table of points for the base point.
  17032. *
  17033. * a The base point.
  17034. * table Place to store generated point data.
  17035. * tmp Temporary data.
  17036. * heap Heap to use for allocation.
  17037. */
  17038. static int sp_256_gen_stripe_table_8(const sp_point_256* a,
  17039. sp_table_entry_256* table, sp_digit* tmp, void* heap)
  17040. {
  17041. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  17042. sp_point_256 td, s1d, s2d;
  17043. #endif
  17044. sp_point_256* t;
  17045. sp_point_256* s1 = NULL;
  17046. sp_point_256* s2 = NULL;
  17047. int i, j;
  17048. int err;
  17049. (void)heap;
  17050. err = sp_256_point_new_8(heap, td, t);
  17051. if (err == MP_OKAY) {
  17052. err = sp_256_point_new_8(heap, s1d, s1);
  17053. }
  17054. if (err == MP_OKAY) {
  17055. err = sp_256_point_new_8(heap, s2d, s2);
  17056. }
  17057. if (err == MP_OKAY) {
  17058. err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
  17059. }
  17060. if (err == MP_OKAY) {
  17061. err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
  17062. }
  17063. if (err == MP_OKAY) {
  17064. err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
  17065. }
  17066. if (err == MP_OKAY) {
  17067. t->infinity = 0;
  17068. sp_256_proj_to_affine_8(t, tmp);
  17069. XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
  17070. s1->infinity = 0;
  17071. XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
  17072. s2->infinity = 0;
  17073. /* table[0] = {0, 0, infinity} */
  17074. XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
  17075. /* table[1] = Affine version of 'a' in Montgomery form */
  17076. XMEMCPY(table[1].x, t->x, sizeof(table->x));
  17077. XMEMCPY(table[1].y, t->y, sizeof(table->y));
  17078. for (i=1; i<8; i++) {
  17079. sp_256_proj_point_dbl_n_8(t, 32, tmp);
  17080. sp_256_proj_to_affine_8(t, tmp);
  17081. XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
  17082. XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
  17083. }
  17084. for (i=1; i<8; i++) {
  17085. XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
  17086. XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
  17087. for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
  17088. XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
  17089. XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
  17090. sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
  17091. sp_256_proj_to_affine_8(t, tmp);
  17092. XMEMCPY(table[j].x, t->x, sizeof(table->x));
  17093. XMEMCPY(table[j].y, t->y, sizeof(table->y));
  17094. }
  17095. }
  17096. }
  17097. sp_256_point_free_8(s2, 0, heap);
  17098. sp_256_point_free_8(s1, 0, heap);
  17099. sp_256_point_free_8( t, 0, heap);
  17100. return err;
  17101. }
  17102. #endif /* FP_ECC */
  17103. #ifndef WC_NO_CACHE_RESISTANT
  17104. /* Touch each possible entry that could be being copied.
  17105. *
  17106. * r Point to copy into.
  17107. * table Table - start of the entires to access
  17108. * idx Index of entry to retrieve.
  17109. */
  17110. static void sp_256_get_entry_256_8(sp_point_256* r,
  17111. const sp_table_entry_256* table, int idx)
  17112. {
  17113. int i;
  17114. sp_digit mask;
  17115. r->x[0] = 0;
  17116. r->x[1] = 0;
  17117. r->x[2] = 0;
  17118. r->x[3] = 0;
  17119. r->x[4] = 0;
  17120. r->x[5] = 0;
  17121. r->x[6] = 0;
  17122. r->x[7] = 0;
  17123. r->y[0] = 0;
  17124. r->y[1] = 0;
  17125. r->y[2] = 0;
  17126. r->y[3] = 0;
  17127. r->y[4] = 0;
  17128. r->y[5] = 0;
  17129. r->y[6] = 0;
  17130. r->y[7] = 0;
  17131. for (i = 1; i < 256; i++) {
  17132. mask = 0 - (i == idx);
  17133. r->x[0] |= mask & table[i].x[0];
  17134. r->x[1] |= mask & table[i].x[1];
  17135. r->x[2] |= mask & table[i].x[2];
  17136. r->x[3] |= mask & table[i].x[3];
  17137. r->x[4] |= mask & table[i].x[4];
  17138. r->x[5] |= mask & table[i].x[5];
  17139. r->x[6] |= mask & table[i].x[6];
  17140. r->x[7] |= mask & table[i].x[7];
  17141. r->y[0] |= mask & table[i].y[0];
  17142. r->y[1] |= mask & table[i].y[1];
  17143. r->y[2] |= mask & table[i].y[2];
  17144. r->y[3] |= mask & table[i].y[3];
  17145. r->y[4] |= mask & table[i].y[4];
  17146. r->y[5] |= mask & table[i].y[5];
  17147. r->y[6] |= mask & table[i].y[6];
  17148. r->y[7] |= mask & table[i].y[7];
  17149. }
  17150. }
  17151. #endif /* !WC_NO_CACHE_RESISTANT */
  17152. /* Multiply the point by the scalar and return the result.
  17153. * If map is true then convert result to affine coordinates.
  17154. *
  17155. * Implementation uses striping of bits.
  17156. * Choose bits 8 bits apart.
  17157. *
  17158. * r Resulting point.
  17159. * k Scalar to multiply by.
  17160. * table Pre-computed table.
  17161. * map Indicates whether to convert result to affine.
  17162. * ct Constant time required.
  17163. * heap Heap to use for allocation.
  17164. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  17165. */
  17166. static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
  17167. const sp_table_entry_256* table, const sp_digit* k, int map,
  17168. int ct, void* heap)
  17169. {
  17170. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  17171. sp_point_256 rtd;
  17172. sp_point_256 pd;
  17173. sp_digit td[2 * 8 * 5];
  17174. #endif
  17175. sp_point_256* rt;
  17176. sp_point_256* p = NULL;
  17177. sp_digit* t;
  17178. int i, j;
  17179. int y, x;
  17180. int err;
  17181. (void)g;
  17182. /* Constant time used for cache attack resistance implementation. */
  17183. (void)ct;
  17184. (void)heap;
  17185. err = sp_256_point_new_8(heap, rtd, rt);
  17186. if (err == MP_OKAY) {
  17187. err = sp_256_point_new_8(heap, pd, p);
  17188. }
  17189. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  17190. t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
  17191. DYNAMIC_TYPE_ECC);
  17192. if (t == NULL) {
  17193. err = MEMORY_E;
  17194. }
  17195. #else
  17196. t = td;
  17197. #endif
  17198. if (err == MP_OKAY) {
  17199. XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
  17200. XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
  17201. y = 0;
  17202. for (j=0,x=31; j<8; j++,x+=32) {
  17203. y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
  17204. }
  17205. #ifndef WC_NO_CACHE_RESISTANT
  17206. if (ct) {
  17207. sp_256_get_entry_256_8(rt, table, y);
  17208. } else
  17209. #endif
  17210. {
  17211. XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
  17212. XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
  17213. }
  17214. rt->infinity = !y;
  17215. for (i=30; i>=0; i--) {
  17216. y = 0;
  17217. for (j=0,x=i; j<8; j++,x+=32) {
  17218. y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
  17219. }
  17220. sp_256_proj_point_dbl_8(rt, rt, t);
  17221. #ifndef WC_NO_CACHE_RESISTANT
  17222. if (ct) {
  17223. sp_256_get_entry_256_8(p, table, y);
  17224. }
  17225. else
  17226. #endif
  17227. {
  17228. XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
  17229. XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
  17230. }
  17231. p->infinity = !y;
  17232. sp_256_proj_point_add_qz1_8(rt, rt, p, t);
  17233. }
  17234. if (map != 0) {
  17235. sp_256_map_8(r, rt, t);
  17236. }
  17237. else {
  17238. XMEMCPY(r, rt, sizeof(sp_point_256));
  17239. }
  17240. }
  17241. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  17242. if (t != NULL) {
  17243. XFREE(t, heap, DYNAMIC_TYPE_ECC);
  17244. }
  17245. #endif
  17246. sp_256_point_free_8(p, 0, heap);
  17247. sp_256_point_free_8(rt, 0, heap);
  17248. return err;
  17249. }
  17250. #ifdef FP_ECC
  17251. #ifndef FP_ENTRIES
  17252. #define FP_ENTRIES 16
  17253. #endif
  17254. typedef struct sp_cache_256_t {
  17255. sp_digit x[8];
  17256. sp_digit y[8];
  17257. sp_table_entry_256 table[256];
  17258. uint32_t cnt;
  17259. int set;
  17260. } sp_cache_256_t;
  17261. static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
  17262. static THREAD_LS_T int sp_cache_256_last = -1;
  17263. static THREAD_LS_T int sp_cache_256_inited = 0;
  17264. #ifndef HAVE_THREAD_LS
  17265. static volatile int initCacheMutex_256 = 0;
  17266. static wolfSSL_Mutex sp_cache_256_lock;
  17267. #endif
  17268. static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
  17269. {
  17270. int i, j;
  17271. uint32_t least;
  17272. if (sp_cache_256_inited == 0) {
  17273. for (i=0; i<FP_ENTRIES; i++) {
  17274. sp_cache_256[i].set = 0;
  17275. }
  17276. sp_cache_256_inited = 1;
  17277. }
  17278. /* Compare point with those in cache. */
  17279. for (i=0; i<FP_ENTRIES; i++) {
  17280. if (!sp_cache_256[i].set)
  17281. continue;
  17282. if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
  17283. sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
  17284. sp_cache_256[i].cnt++;
  17285. break;
  17286. }
  17287. }
  17288. /* No match. */
  17289. if (i == FP_ENTRIES) {
  17290. /* Find empty entry. */
  17291. i = (sp_cache_256_last + 1) % FP_ENTRIES;
  17292. for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
  17293. if (!sp_cache_256[i].set) {
  17294. break;
  17295. }
  17296. }
  17297. /* Evict least used. */
  17298. if (i == sp_cache_256_last) {
  17299. least = sp_cache_256[0].cnt;
  17300. for (j=1; j<FP_ENTRIES; j++) {
  17301. if (sp_cache_256[j].cnt < least) {
  17302. i = j;
  17303. least = sp_cache_256[i].cnt;
  17304. }
  17305. }
  17306. }
  17307. XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
  17308. XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
  17309. sp_cache_256[i].set = 1;
  17310. sp_cache_256[i].cnt = 1;
  17311. }
  17312. *cache = &sp_cache_256[i];
  17313. sp_cache_256_last = i;
  17314. }
  17315. #endif /* FP_ECC */
  17316. /* Multiply the base point of P256 by the scalar and return the result.
  17317. * If map is true then convert result to affine coordinates.
  17318. *
  17319. * r Resulting point.
  17320. * g Point to multiply.
  17321. * k Scalar to multiply by.
  17322. * map Indicates whether to convert result to affine.
  17323. * ct Constant time required.
  17324. * heap Heap to use for allocation.
  17325. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  17326. */
  17327. static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
  17328. int map, int ct, void* heap)
  17329. {
  17330. #ifndef FP_ECC
  17331. return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap);
  17332. #else
  17333. sp_digit tmp[2 * 8 * 5];
  17334. sp_cache_256_t* cache;
  17335. int err = MP_OKAY;
  17336. #ifndef HAVE_THREAD_LS
  17337. if (initCacheMutex_256 == 0) {
  17338. wc_InitMutex(&sp_cache_256_lock);
  17339. initCacheMutex_256 = 1;
  17340. }
  17341. if (wc_LockMutex(&sp_cache_256_lock) != 0)
  17342. err = BAD_MUTEX_E;
  17343. #endif /* HAVE_THREAD_LS */
  17344. if (err == MP_OKAY) {
  17345. sp_ecc_get_cache_256(g, &cache);
  17346. if (cache->cnt == 2)
  17347. sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
  17348. #ifndef HAVE_THREAD_LS
  17349. wc_UnLockMutex(&sp_cache_256_lock);
  17350. #endif /* HAVE_THREAD_LS */
  17351. if (cache->cnt < 2) {
  17352. err = sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap);
  17353. }
  17354. else {
  17355. err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
  17356. map, ct, heap);
  17357. }
  17358. }
  17359. return err;
  17360. #endif
  17361. }
  17362. #endif /* WOLFSSL_SP_SMALL */
  17363. /* Multiply the point by the scalar and return the result.
  17364. * If map is true then convert result to affine coordinates.
  17365. *
  17366. * km Scalar to multiply by.
  17367. * p Point to multiply.
  17368. * r Resulting point.
  17369. * map Indicates whether to convert result to affine.
  17370. * heap Heap to use for allocation.
  17371. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  17372. */
  17373. int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
  17374. void* heap)
  17375. {
  17376. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  17377. sp_point_256 p;
  17378. sp_digit kd[8];
  17379. #endif
  17380. sp_point_256* point;
  17381. sp_digit* k = NULL;
  17382. int err = MP_OKAY;
  17383. err = sp_256_point_new_8(heap, p, point);
  17384. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  17385. if (err == MP_OKAY) {
  17386. k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
  17387. DYNAMIC_TYPE_ECC);
  17388. if (k == NULL)
  17389. err = MEMORY_E;
  17390. }
  17391. #else
  17392. k = kd;
  17393. #endif
  17394. if (err == MP_OKAY) {
  17395. sp_256_from_mp(k, 8, km);
  17396. sp_256_point_from_ecc_point_8(point, gm);
  17397. err = sp_256_ecc_mulmod_8(point, point, k, map, 1, heap);
  17398. }
  17399. if (err == MP_OKAY) {
  17400. err = sp_256_point_to_ecc_point_8(point, r);
  17401. }
  17402. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  17403. if (k != NULL) {
  17404. XFREE(k, heap, DYNAMIC_TYPE_ECC);
  17405. }
  17406. #endif
  17407. sp_256_point_free_8(point, 0, heap);
  17408. return err;
  17409. }
  17410. #ifdef WOLFSSL_SP_SMALL
  17411. static const sp_table_entry_256 p256_table[16] = {
  17412. /* 0 */
  17413. { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
  17414. { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
  17415. /* 1 */
  17416. { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
  17417. 0xa53755c6,0x18905f76 },
  17418. { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
  17419. 0x25885d85,0x8571ff18 } },
  17420. /* 2 */
  17421. { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
  17422. 0xfd1b667f,0x2f5e6961 },
  17423. { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
  17424. 0x8d6f0f7b,0xf648f916 } },
  17425. /* 3 */
  17426. { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
  17427. 0x133d0015,0x5abe0285 },
  17428. { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
  17429. 0x6b6f7383,0x94bb725b } },
  17430. /* 4 */
  17431. { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
  17432. 0x21d324f6,0x61d587d4 },
  17433. { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
  17434. 0x4621efbe,0xfa11fe12 } },
  17435. /* 5 */
  17436. { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
  17437. 0x1f13bedc,0x586eb04c },
  17438. { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
  17439. 0x70864f11,0x19d5ac08 } },
  17440. /* 6 */
  17441. { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
  17442. 0xc3b266b1,0xbb6de651 },
  17443. { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
  17444. 0x5d18b99b,0x60b4619a } },
  17445. /* 7 */
  17446. { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
  17447. 0xaeebffcd,0x9d0f27b2 },
  17448. { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
  17449. 0x356ec48d,0x244a566d } },
  17450. /* 8 */
  17451. { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
  17452. 0xcd42ab1b,0x803f3e02 },
  17453. { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
  17454. 0x5067adc1,0xc097440e } },
  17455. /* 9 */
  17456. { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
  17457. 0x915f1f30,0xf1af32d5 },
  17458. { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
  17459. 0xe2d41c8b,0x23d0f130 } },
  17460. /* 10 */
  17461. { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
  17462. 0x7990216a,0x50bbb4d9 },
  17463. { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
  17464. 0x01fe49c3,0x2b100118 } },
  17465. /* 11 */
  17466. { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
  17467. 0x83fbae0c,0xdd558999 },
  17468. { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
  17469. 0x149d6041,0xe6e4c551 } },
  17470. /* 12 */
  17471. { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
  17472. 0xdb7e63af,0xfad27148 },
  17473. { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
  17474. 0x9f0e1a84,0x77387de3 } },
  17475. /* 13 */
  17476. { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
  17477. 0xbef0c47e,0xb37b85c0 },
  17478. { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
  17479. 0xf9f628d5,0x9c135ac8 } },
  17480. /* 14 */
  17481. { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
  17482. 0x91ece900,0xc109f9cb },
  17483. { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
  17484. 0x2eee1ee1,0x9bc3344f } },
  17485. /* 15 */
  17486. { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
  17487. 0x5f1a4cc1,0x29591d52 },
  17488. { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
  17489. 0x18ef332c,0x6376551f } },
  17490. };
  17491. /* Multiply the base point of P256 by the scalar and return the result.
  17492. * If map is true then convert result to affine coordinates.
  17493. *
  17494. * r Resulting point.
  17495. * k Scalar to multiply by.
  17496. * map Indicates whether to convert result to affine.
  17497. * ct Constant time required.
  17498. * heap Heap to use for allocation.
  17499. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  17500. */
  17501. static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
  17502. int map, int ct, void* heap)
  17503. {
  17504. return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
  17505. k, map, ct, heap);
  17506. }
  17507. #else
  17508. static const sp_table_entry_256 p256_table[256] = {
  17509. /* 0 */
  17510. { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
  17511. { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
  17512. /* 1 */
  17513. { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
  17514. 0xa53755c6,0x18905f76 },
  17515. { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
  17516. 0x25885d85,0x8571ff18 } },
  17517. /* 2 */
  17518. { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,
  17519. 0xdbdf58e9,0xd953c50d },
  17520. { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,
  17521. 0x9eb288f3,0x863ebb7e } },
  17522. /* 3 */
  17523. { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954,
  17524. 0xb5ff80a0,0x00076055 },
  17525. { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39,
  17526. 0x34373ee0,0x83087761 } },
  17527. /* 4 */
  17528. { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
  17529. 0xfd1b667f,0x2f5e6961 },
  17530. { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
  17531. 0x8d6f0f7b,0xf648f916 } },
  17532. /* 5 */
  17533. { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
  17534. 0x133d0015,0x5abe0285 },
  17535. { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
  17536. 0x6b6f7383,0x94bb725b } },
  17537. /* 6 */
  17538. { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129,
  17539. 0x2f7dc4ef,0xcdd6bbcb },
  17540. { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792,
  17541. 0x4bdae5f6,0xa361bebd } },
  17542. /* 7 */
  17543. { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec,
  17544. 0xc4b5292c,0xba12ca09 },
  17545. { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089,
  17546. 0x701fef4b,0x53ebb99d } },
  17547. /* 8 */
  17548. { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,
  17549. 0x06d54831,0x8589fb92 },
  17550. { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,
  17551. 0x02541c4f,0xebb0696d } },
  17552. /* 9 */
  17553. { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3,
  17554. 0xd1b27da3,0xeb2820cb },
  17555. { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42,
  17556. 0x55a7da1d,0x1f28289b } },
  17557. /* 10 */
  17558. { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862,
  17559. 0x05e54d63,0x337a4b59 },
  17560. { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781,
  17561. 0xf4c2fbd6,0x0d65e0d5 } },
  17562. /* 11 */
  17563. { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4,
  17564. 0x52f4a232,0xc23da242 },
  17565. { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86,
  17566. 0xc790cff1,0x19de3b8c } },
  17567. /* 12 */
  17568. { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586,
  17569. 0x91fccbfd,0xe34dcbd4 },
  17570. { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127,
  17571. 0x7b4e0f7f,0xe7641f44 } },
  17572. /* 13 */
  17573. { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6,
  17574. 0x052a57bf,0x4a12df57 },
  17575. { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa,
  17576. 0xbb5bea46,0x6af5aa93 } },
  17577. /* 14 */
  17578. { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4,
  17579. 0x66a44013,0x5fe3475a },
  17580. { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae,
  17581. 0xecfea916,0xb544e308 } },
  17582. /* 15 */
  17583. { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76,
  17584. 0xa6b0c20b,0xe0b6b2bd },
  17585. { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad,
  17586. 0x25a63774,0x71c023de } },
  17587. /* 16 */
  17588. { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
  17589. 0x21d324f6,0x61d587d4 },
  17590. { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
  17591. 0x4621efbe,0xfa11fe12 } },
  17592. /* 17 */
  17593. { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
  17594. 0x1f13bedc,0x586eb04c },
  17595. { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
  17596. 0x70864f11,0x19d5ac08 } },
  17597. /* 18 */
  17598. { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b,
  17599. 0x7f9c563f,0xe7c0073f },
  17600. { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a,
  17601. 0xc65b3c0a,0xe08504fe } },
  17602. /* 19 */
  17603. { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa,
  17604. 0x5b0996b4,0x78f01882 },
  17605. { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877,
  17606. 0x7e94747a,0x43a773b8 } },
  17607. /* 20 */
  17608. { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
  17609. 0xc3b266b1,0xbb6de651 },
  17610. { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
  17611. 0x5d18b99b,0x60b4619a } },
  17612. /* 21 */
  17613. { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
  17614. 0xaeebffcd,0x9d0f27b2 },
  17615. { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
  17616. 0x356ec48d,0x244a566d } },
  17617. /* 22 */
  17618. { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b,
  17619. 0x3581ef69,0x45e58c87 },
  17620. { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2,
  17621. 0xc1e4b7a4,0xc040e21c } },
  17622. /* 23 */
  17623. { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576,
  17624. 0x682c6ec7,0x1cdf5c97 },
  17625. { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1,
  17626. 0xa92dff3d,0x046755f8 } },
  17627. /* 24 */
  17628. { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172,
  17629. 0x3b83a5f3,0x046e5e11 },
  17630. { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6,
  17631. 0x303d005b,0x6e0106c3 } },
  17632. /* 25 */
  17633. { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8,
  17634. 0xe901cf1f,0x442594ed },
  17635. { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1,
  17636. 0x4c2ee68e,0xa796fa51 } },
  17637. /* 26 */
  17638. { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e,
  17639. 0xc69766e9,0xe4ad2da9 },
  17640. { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4,
  17641. 0xc37b5143,0xc5e94046 } },
  17642. /* 27 */
  17643. { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0,
  17644. 0xdb464747,0x63283daf },
  17645. { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad,
  17646. 0x1981a938,0x68bd19ab } },
  17647. /* 28 */
  17648. { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981,
  17649. 0x3c6fdfd6,0x495292f5 },
  17650. { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2,
  17651. 0x26036837,0x0ec7530d } },
  17652. /* 29 */
  17653. { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5,
  17654. 0x64863f0b,0x0f6207a6 },
  17655. { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407,
  17656. 0x08ed6dcf,0xff0db072 } },
  17657. /* 30 */
  17658. { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317,
  17659. 0x88740ea3,0x313b513c },
  17660. { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd,
  17661. 0x86f19f81,0x2d3abcf9 } },
  17662. /* 31 */
  17663. { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f,
  17664. 0xded98cdf,0xc036fa10 },
  17665. { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277,
  17666. 0xb6d40194,0xa6b2a2c4 } },
  17667. /* 32 */
  17668. { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,
  17669. 0xaf7c9860,0x810ee252 },
  17670. { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,
  17671. 0x92731745,0xd485717a } },
  17672. /* 33 */
  17673. { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb,
  17674. 0x2f9a604e,0x6a6045a7 },
  17675. { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73,
  17676. 0xf9e15790,0xd3e45cfa } },
  17677. /* 34 */
  17678. { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54,
  17679. 0xe3c2c19c,0x207755de },
  17680. { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6,
  17681. 0x7154b00d,0x48dc5ee5 } },
  17682. /* 35 */
  17683. { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe,
  17684. 0xdff6f445,0xf2fb0aed },
  17685. { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad,
  17686. 0xdb28d525,0xa13e9015 } },
  17687. /* 36 */
  17688. { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241,
  17689. 0x1497526f,0x2bf0d6b0 },
  17690. { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f,
  17691. 0x162fe89f,0x42a94a5a } },
  17692. /* 37 */
  17693. { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050,
  17694. 0xc65ede3d,0x2c2dd969 },
  17695. { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706,
  17696. 0x42c56dbc,0xf437fa1f } },
  17697. /* 38 */
  17698. { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050,
  17699. 0x54707aa8,0xaaf45b33 },
  17700. { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681,
  17701. 0xf4f272bc,0xcdf6310d } },
  17702. /* 39 */
  17703. { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772,
  17704. 0xda9e2ff2,0xf0d008ba },
  17705. { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d,
  17706. 0xca887b8b,0x5bd5c2f5 } },
  17707. /* 40 */
  17708. { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e,
  17709. 0xa09e4719,0xaa12dfc8 },
  17710. { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73,
  17711. 0xe48ca901,0x6c036e73 } },
  17712. /* 41 */
  17713. { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b,
  17714. 0x96afbe24,0x292ff658 },
  17715. { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f,
  17716. 0x311b7276,0x644e0c90 } },
  17717. /* 42 */
  17718. { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87,
  17719. 0xcab79a77,0xf25ae793 },
  17720. { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3,
  17721. 0x13db0a3e,0x39b8e653 } },
  17722. /* 43 */
  17723. { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a,
  17724. 0x0f19db06,0x39122f2f },
  17725. { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2,
  17726. 0xce80ff8d,0x8de80af8 } },
  17727. /* 44 */
  17728. { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b,
  17729. 0x2e368c04,0x87194906 },
  17730. { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a,
  17731. 0x5b74fde1,0xfc315e6a } },
  17732. /* 45 */
  17733. { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b,
  17734. 0xee389088,0xe6d4a7ad },
  17735. { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93,
  17736. 0x9be2ae57,0x35dfaf9a } },
  17737. /* 46 */
  17738. { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41,
  17739. 0x1c830d2b,0x1da5c7d7 },
  17740. { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7,
  17741. 0xdbf4b9d6,0x7077c0fd } },
  17742. /* 47 */
  17743. { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140,
  17744. 0xe50efe44,0x53a8632e },
  17745. { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3,
  17746. 0x34e1fcc1,0x028ca76d } },
  17747. /* 48 */
  17748. { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117,
  17749. 0x6962f046,0x04c17cd8 },
  17750. { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6,
  17751. 0xfed97474,0xf7ba4de9 } },
  17752. /* 49 */
  17753. { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553,
  17754. 0x52131c41,0xe31f9600 },
  17755. { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac,
  17756. 0xce34d47b,0xaa3a6259 } },
  17757. /* 50 */
  17758. { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa,
  17759. 0x7e79daee,0x2398dd62 },
  17760. { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377,
  17761. 0x1c046210,0x5717f5b2 } },
  17762. /* 51 */
  17763. { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239,
  17764. 0x0e3c28de,0x660a2c56 },
  17765. { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481,
  17766. 0x4f522453,0x624ee54c } },
  17767. /* 52 */
  17768. { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423,
  17769. 0x92bdfbc0,0x4f392afb },
  17770. { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803,
  17771. 0xccdb399c,0x8a3e7977 } },
  17772. /* 53 */
  17773. { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de,
  17774. 0x70c24404,0x3888d023 },
  17775. { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8,
  17776. 0x18102336,0xa5e62e47 } },
  17777. /* 54 */
  17778. { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7,
  17779. 0x466a5adc,0x2c4768e6 },
  17780. { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064,
  17781. 0xf9e652a0,0x7b5e6441 } },
  17782. /* 55 */
  17783. { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5,
  17784. 0x0c8d744a,0xb8af73cb },
  17785. { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f,
  17786. 0x7f3f0895,0xa036395f } },
  17787. /* 56 */
  17788. { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682,
  17789. 0x875fb533,0x4be36b01 },
  17790. { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05,
  17791. 0x1bdc00c0,0x8cbc9a87 } },
  17792. /* 57 */
  17793. { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c,
  17794. 0x0c0835f8,0x44e7553e },
  17795. { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276,
  17796. 0x5eb8fc18,0x470a683a } },
  17797. /* 58 */
  17798. { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee,
  17799. 0xc63dc6ef,0x16410690 },
  17800. { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72,
  17801. 0x7abcbb4f,0xd73479fd } },
  17802. /* 59 */
  17803. { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1,
  17804. 0x0771666b,0x816469e3 },
  17805. { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb,
  17806. 0xf0dd3f9c,0x0a36dd23 } },
  17807. /* 60 */
  17808. { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad,
  17809. 0xfdbab118,0xe331dfd6 },
  17810. { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7,
  17811. 0x492e3389,0xd3b4782a } },
  17812. /* 61 */
  17813. { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953,
  17814. 0x4c86a5bd,0x7281275a },
  17815. { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a,
  17816. 0xce145059,0x2c062e7e } },
  17817. /* 62 */
  17818. { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288,
  17819. 0x2c4e7ef1,0x282a35f9 },
  17820. { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38,
  17821. 0x554d2abd,0xc71cd513 } },
  17822. /* 63 */
  17823. { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7,
  17824. 0xcf47f3a3,0xc50f6740 },
  17825. { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222,
  17826. 0x212958dc,0xb9ecb3a7 } },
  17827. /* 64 */
  17828. { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
  17829. 0xcd42ab1b,0x803f3e02 },
  17830. { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
  17831. 0x5067adc1,0xc097440e } },
  17832. /* 65 */
  17833. { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
  17834. 0x915f1f30,0xf1af32d5 },
  17835. { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
  17836. 0xe2d41c8b,0x23d0f130 } },
  17837. /* 66 */
  17838. { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648,
  17839. 0xc0a3fadd,0xb0288dd6 },
  17840. { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7,
  17841. 0xf408c8d2,0xffd3724f } },
  17842. /* 67 */
  17843. { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b,
  17844. 0xd78c26df,0xf5590f4a },
  17845. { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f,
  17846. 0xf6f74a20,0x18d6da54 } },
  17847. /* 68 */
  17848. { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
  17849. 0x7990216a,0x50bbb4d9 },
  17850. { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
  17851. 0x01fe49c3,0x2b100118 } },
  17852. /* 69 */
  17853. { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
  17854. 0x83fbae0c,0xdd558999 },
  17855. { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
  17856. 0x149d6041,0xe6e4c551 } },
  17857. /* 70 */
  17858. { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b,
  17859. 0x07ed56ff,0x51e00db1 },
  17860. { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5,
  17861. 0x49829177,0xe22f4241 } },
  17862. /* 71 */
  17863. { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f,
  17864. 0x52dc48c9,0xf709373d },
  17865. { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a,
  17866. 0xe7275b11,0xbd52d288 } },
  17867. /* 72 */
  17868. { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e,
  17869. 0xc8aa77a6,0xa0d0f8e4 },
  17870. { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8,
  17871. 0x946d6a00,0xa56c78c7 } },
  17872. /* 73 */
  17873. { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f,
  17874. 0x731a367a,0xd8befdf8 },
  17875. { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40,
  17876. 0xce9f6478,0x854a68a5 } },
  17877. /* 74 */
  17878. { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b,
  17879. 0x98846a95,0x5cacea0b },
  17880. { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8,
  17881. 0x35e4efa9,0xe4982d12 } },
  17882. /* 75 */
  17883. { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa,
  17884. 0x16b20499,0x8046b7f6 },
  17885. { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea,
  17886. 0x9082af55,0xeb17ca7b } },
  17887. /* 76 */
  17888. { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565,
  17889. 0xfab5e131,0x097b00ba },
  17890. { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11,
  17891. 0xafdbcc9e,0xf95c747b } },
  17892. /* 77 */
  17893. { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1,
  17894. 0x566ed837,0x3512601e },
  17895. { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2,
  17896. 0x6068ab6b,0x0ef97123 } },
  17897. /* 78 */
  17898. { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74,
  17899. 0x3b4fbc95,0xfc16d933 },
  17900. { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497,
  17901. 0xb95d7a17,0x14ca4af1 } },
  17902. /* 79 */
  17903. { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7,
  17904. 0xf59c231d,0x4057b063 },
  17905. { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae,
  17906. 0xf1330b13,0x1c3b5d64 } },
  17907. /* 80 */
  17908. { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
  17909. 0xdb7e63af,0xfad27148 },
  17910. { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
  17911. 0x9f0e1a84,0x77387de3 } },
  17912. /* 81 */
  17913. { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
  17914. 0xbef0c47e,0xb37b85c0 },
  17915. { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
  17916. 0xf9f628d5,0x9c135ac8 } },
  17917. /* 82 */
  17918. { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176,
  17919. 0xc433851f,0x5721361f },
  17920. { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e,
  17921. 0xe6bb11bd,0xdcbac3c9 } },
  17922. /* 83 */
  17923. { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7,
  17924. 0x2d626862,0xb8c1c89e },
  17925. { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9,
  17926. 0x2f9422d4,0x5d23bbda } },
  17927. /* 84 */
  17928. { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
  17929. 0x91ece900,0xc109f9cb },
  17930. { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
  17931. 0x2eee1ee1,0x9bc3344f } },
  17932. /* 85 */
  17933. { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
  17934. 0x5f1a4cc1,0x29591d52 },
  17935. { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
  17936. 0x18ef332c,0x6376551f } },
  17937. /* 86 */
  17938. { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064,
  17939. 0x08e2987a,0xbdb79dc8 },
  17940. { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022,
  17941. 0xadd3c14a,0x8ee86001 } },
  17942. /* 87 */
  17943. { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899,
  17944. 0x6f77aa4b,0x92e51d7a },
  17945. { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3,
  17946. 0x0a56aaaa,0x5182f86f } },
  17947. /* 88 */
  17948. { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb,
  17949. 0x4073a6f2,0x91dcab5d },
  17950. { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c,
  17951. 0x97974f2b,0x17a0cedb } },
  17952. /* 89 */
  17953. { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4,
  17954. 0x7f4cdf41,0x2e8ce36c },
  17955. { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388,
  17956. 0x34f668f3,0xf4ccc6cb } },
  17957. /* 90 */
  17958. { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741,
  17959. 0x9a0df3c9,0xac0db488 },
  17960. { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f,
  17961. 0x94c974a2,0x95a64a61 } },
  17962. /* 91 */
  17963. { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c,
  17964. 0x29210677,0x231e54ba },
  17965. { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b,
  17966. 0xd8a731e1,0xab0be032 } },
  17967. /* 92 */
  17968. { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196,
  17969. 0x2cf6a679,0xf1bcc880 },
  17970. { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc,
  17971. 0x5aebb271,0x85169469 } },
  17972. /* 93 */
  17973. { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2,
  17974. 0xdaad55d8,0x8f67d9d2 },
  17975. { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4,
  17976. 0xc0728b5d,0xf84572b9 } },
  17977. /* 94 */
  17978. { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07,
  17979. 0x616b2c19,0xedee2710 },
  17980. { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3,
  17981. 0x44ebd7f4,0x9fd27e9b } },
  17982. /* 95 */
  17983. { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816,
  17984. 0x958ff387,0xa40c2fb6 },
  17985. { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704,
  17986. 0x7dc6decf,0x99bc9bb8 } },
  17987. /* 96 */
  17988. { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0,
  17989. 0xa16d7e64,0x9abe210b },
  17990. { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987,
  17991. 0x87f344b0,0x7881c257 } },
  17992. /* 97 */
  17993. { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2,
  17994. 0xa30e8940,0x15e6e319 },
  17995. { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1,
  17996. 0x191172ce,0x0e55facf } },
  17997. /* 98 */
  17998. { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca,
  17999. 0x6fe96577,0xd73d0976 },
  18000. { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859,
  18001. 0x8f15a50b,0x9250a374 } },
  18002. /* 99 */
  18003. { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289,
  18004. 0xc1cc8c0b,0x77414082 },
  18005. { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7,
  18006. 0x12eb20b9,0x8cb04f4d } },
  18007. /* 100 */
  18008. { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f,
  18009. 0x47123b51,0xe4e429ef },
  18010. { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07,
  18011. 0x3c6e6552,0x37bca2ff } },
  18012. /* 101 */
  18013. { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9,
  18014. 0x3002b22a,0x59913edc },
  18015. { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375,
  18016. 0xb013e226,0x43786e4a } },
  18017. /* 102 */
  18018. { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845,
  18019. 0xb7e79e7a,0x8638ca98 },
  18020. { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0,
  18021. 0x7b3aa6f0,0x1ecdd36a } },
  18022. /* 103 */
  18023. { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa,
  18024. 0xd459f32d,0xd85d0f85 },
  18025. { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4,
  18026. 0xb4ed3c62,0xa04f19c3 } },
  18027. /* 104 */
  18028. { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a,
  18029. 0x5c0950b0,0x92b2eeea },
  18030. { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3,
  18031. 0x5834276c,0x1ee78221 } },
  18032. /* 105 */
  18033. { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a,
  18034. 0x57a6e150,0xf3f2ced8 },
  18035. { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7,
  18036. 0x3da3e210,0x0f56a454 } },
  18037. /* 106 */
  18038. { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0,
  18039. 0x1969e263,0xbd8f1741 },
  18040. { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7,
  18041. 0x30ccfa09,0x2d1a1c35 } },
  18042. /* 107 */
  18043. { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949,
  18044. 0xb91fba46,0xa107a65e },
  18045. { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584,
  18046. 0xf87a9af2,0x183d760a } },
  18047. /* 108 */
  18048. { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963,
  18049. 0xc269d754,0x1d44179d },
  18050. { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5,
  18051. 0x9606d262,0x771f9cc2 } },
  18052. /* 109 */
  18053. { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2,
  18054. 0x0362718e,0x64427a31 },
  18055. { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d,
  18056. 0x6ae90d6d,0x49d9b749 } },
  18057. /* 110 */
  18058. { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0,
  18059. 0x3f605445,0x9037d81b },
  18060. { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96,
  18061. 0x7cc0639c,0x08c3de6a } },
  18062. /* 111 */
  18063. { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e,
  18064. 0x45796b2f,0xc6909442 },
  18065. { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab,
  18066. 0xcafe3ac0,0x3fa3db02 } },
  18067. /* 112 */
  18068. { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c,
  18069. 0xfdb808ff,0xc5c4bdb0 },
  18070. { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d,
  18071. 0x46c2b6b5,0x2d56db94 } },
  18072. /* 113 */
  18073. { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4,
  18074. 0xe503ba42,0x0f56bd9d },
  18075. { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a,
  18076. 0x1173b5f1,0x4003bb9d } },
  18077. /* 114 */
  18078. { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d,
  18079. 0xa07f2f9e,0x53765522 },
  18080. { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e,
  18081. 0x6c5d4549,0x7a056f58 } },
  18082. /* 115 */
  18083. { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e,
  18084. 0x7a1a2675,0x77d482f1 },
  18085. { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057,
  18086. 0x2b38b0e4,0x4115012b } },
  18087. /* 116 */
  18088. { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e,
  18089. 0xfbea0946,0xcdf04572 },
  18090. { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1,
  18091. 0x97383109,0xee703dda } },
  18092. /* 117 */
  18093. { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff,
  18094. 0xa162ce21,0x2a0ad89d },
  18095. { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c,
  18096. 0xac2b4659,0xd62d0b67 } },
  18097. /* 118 */
  18098. { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3,
  18099. 0x991c2426,0xb39a23f2 },
  18100. { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137,
  18101. 0xc0674cc5,0x04ed0092 } },
  18102. /* 119 */
  18103. { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6,
  18104. 0x0177c387,0xa0a91fc1 },
  18105. { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1,
  18106. 0x9ed20c41,0x084cf988 } },
  18107. /* 120 */
  18108. { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4,
  18109. 0x73abf77e,0xd57955b2 },
  18110. { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089,
  18111. 0x02d141f1,0x8e14ea42 } },
  18112. /* 121 */
  18113. { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194,
  18114. 0x2aa4d158,0x597e1a37 },
  18115. { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a,
  18116. 0x199b4dea,0xca3f0236 } },
  18117. /* 122 */
  18118. { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1,
  18119. 0x309c07e4,0xbde7fd7e },
  18120. { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f,
  18121. 0x0a7dd198,0xb623ad0e } },
  18122. /* 123 */
  18123. { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0,
  18124. 0x58ec137b,0xd6aa2e46 },
  18125. { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b,
  18126. 0x2dcc513a,0x111662e0 } },
  18127. /* 124 */
  18128. { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7,
  18129. 0x94b750f8,0xdb3ee1cb },
  18130. { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93,
  18131. 0x52206a59,0x886a6442 } },
  18132. /* 125 */
  18133. { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d,
  18134. 0x018a17bc,0xa70cf4eb },
  18135. { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e,
  18136. 0xd1747b77,0xaa4772ab } },
  18137. /* 126 */
  18138. { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4,
  18139. 0x30faf974,0x611a6ddc },
  18140. { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf,
  18141. 0x16429c88,0x5cfffaf8 } },
  18142. /* 127 */
  18143. { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f,
  18144. 0x7dc1994c,0x6e5a6b23 },
  18145. { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6,
  18146. 0x242dabcc,0x481a238d } },
  18147. /* 128 */
  18148. { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,
  18149. 0xe0cdf943,0x2c41114c },
  18150. { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,
  18151. 0x42ff9297,0x20477abf } },
  18152. /* 129 */
  18153. { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b,
  18154. 0xc77396b6,0xac66409a },
  18155. { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba,
  18156. 0xcc122f85,0xce8e6975 } },
  18157. /* 130 */
  18158. { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d,
  18159. 0x250bb4a8,0x08fde365 },
  18160. { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc,
  18161. 0x565d6cd7,0x2f7e2fd2 } },
  18162. /* 131 */
  18163. { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d,
  18164. 0x907702ae,0xc65be92e },
  18165. { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585,
  18166. 0xd1193b3a,0x4bff8e47 } },
  18167. /* 132 */
  18168. { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef,
  18169. 0x5772967d,0x3e4e4ae6 },
  18170. { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26,
  18171. 0x58ec6028,0x5388aefd } },
  18172. /* 133 */
  18173. { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f,
  18174. 0x4f75be0e,0x5cf908d1 },
  18175. { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f,
  18176. 0x60f00ce2,0xa698ba40 } },
  18177. /* 134 */
  18178. { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544,
  18179. 0x7aebad8d,0xb142ef8a },
  18180. { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b,
  18181. 0x58515075,0xd1896a96 } },
  18182. /* 135 */
  18183. { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73,
  18184. 0x7981da39,0x267b0e0b },
  18185. { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0,
  18186. 0xa1119393,0xb54e287a } },
  18187. /* 136 */
  18188. { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab,
  18189. 0x5f87d4e6,0x84abb28b },
  18190. { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b,
  18191. 0x17655640,0xe5436f67 } },
  18192. /* 137 */
  18193. { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd,
  18194. 0x5b9ce99e,0x0404f68b },
  18195. { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960,
  18196. 0x0ac1c701,0x3a4263df } },
  18197. /* 138 */
  18198. { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6,
  18199. 0x905ea367,0x0ca8fd3f },
  18200. { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be,
  18201. 0x4ddb0c33,0x96dca264 } },
  18202. /* 139 */
  18203. { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770,
  18204. 0x3aad59dc,0x4363e212 },
  18205. { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604,
  18206. 0xd8bb98c4,0x840e115c } },
  18207. /* 140 */
  18208. { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272,
  18209. 0x30ded6d4,0x5e0d6abd },
  18210. { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9,
  18211. 0x2945a25a,0x7dea48f4 } },
  18212. /* 141 */
  18213. { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54,
  18214. 0xebfd16d1,0xabc2a2be },
  18215. { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377,
  18216. 0x6c7eefc1,0x4ea35394 } },
  18217. /* 142 */
  18218. { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a,
  18219. 0x1c94ffc3,0x3a76e689 },
  18220. { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72,
  18221. 0x465e6464,0x8212a10a } },
  18222. /* 143 */
  18223. { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67,
  18224. 0x599cb164,0xaa7cab71 },
  18225. { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292,
  18226. 0xfe0617c3,0x40e38073 } },
  18227. /* 144 */
  18228. { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320,
  18229. 0xb3055526,0xe3604700 },
  18230. { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434,
  18231. 0xa3dee15f,0x6542d677 } },
  18232. /* 145 */
  18233. { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8,
  18234. 0x09bb6f21,0xa6534aee },
  18235. { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1,
  18236. 0xdc9aef22,0xf3cb672f } },
  18237. /* 146 */
  18238. { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9,
  18239. 0xaae870e7,0x7cafaa2e },
  18240. { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108,
  18241. 0xb9bd522e,0x0aab13c1 } },
  18242. /* 147 */
  18243. { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173,
  18244. 0x847012e9,0x4b91a602 },
  18245. { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a,
  18246. 0x72321cab,0x49534c53 } },
  18247. /* 148 */
  18248. { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b,
  18249. 0xd65ac5ee,0xcaf46c4f },
  18250. { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168,
  18251. 0x04c6770f,0x14ce9e57 } },
  18252. /* 149 */
  18253. { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f,
  18254. 0x3e4c9a71,0x1bb708a5 },
  18255. { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71,
  18256. 0xda300102,0xf9d126f2 } },
  18257. /* 150 */
  18258. { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311,
  18259. 0x729ecc69,0x807afcb9 },
  18260. { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59,
  18261. 0x6568cd8c,0x751adcd1 } },
  18262. /* 151 */
  18263. { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14,
  18264. 0x2537743f,0x29ec4468 },
  18265. { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a,
  18266. 0x92a4077d,0xff9370e3 } },
  18267. /* 152 */
  18268. { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e,
  18269. 0xa2a9d01a,0x9776478b },
  18270. { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5,
  18271. 0xac2f82fa,0x74a6313f } },
  18272. /* 153 */
  18273. { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0,
  18274. 0x0ff4863d,0xab75be15 },
  18275. { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03,
  18276. 0x0b4459f6,0x4ebeac2e } },
  18277. /* 154 */
  18278. { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633,
  18279. 0x2c1baffc,0xdf99887b },
  18280. { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511,
  18281. 0x779f4058,0x27b040a7 } },
  18282. /* 155 */
  18283. { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152,
  18284. 0xe4cfa3f5,0xb393dd37 },
  18285. { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be,
  18286. 0xd0463419,0x09588c12 } },
  18287. /* 156 */
  18288. { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280,
  18289. 0xdb9f648b,0x81c879a9 },
  18290. { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41,
  18291. 0x5fc11bc4,0xfa0d48f5 } },
  18292. /* 157 */
  18293. { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1,
  18294. 0xb6a367d6,0x8ea0e156 },
  18295. { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b,
  18296. 0xfa00b5ac,0x3f5ab924 } },
  18297. /* 158 */
  18298. { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6,
  18299. 0x2b74256e,0x8bc76887 },
  18300. { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168,
  18301. 0x60fcf34f,0xb386f190 } },
  18302. /* 159 */
  18303. { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea,
  18304. 0x1b069c4d,0x4cb460f7 },
  18305. { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66,
  18306. 0x95ef5223,0x52c0d508 } },
  18307. /* 160 */
  18308. { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661,
  18309. 0x2bb09c0b,0x4ac3c938 },
  18310. { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765,
  18311. 0xe39705f4,0x380d94c7 } },
  18312. /* 161 */
  18313. { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977,
  18314. 0xde2637af,0x2ce3e171 },
  18315. { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f,
  18316. 0x0b624e4d,0x2e6cd852 } },
  18317. /* 162 */
  18318. { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e,
  18319. 0x42c69d54,0xca177547 },
  18320. { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793,
  18321. 0x9cab2ce6,0xa976a713 } },
  18322. /* 163 */
  18323. { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7,
  18324. 0x0a1f4999,0x8720a717 },
  18325. { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a,
  18326. 0xc769893c,0x9719ef29 } },
  18327. /* 164 */
  18328. { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0,
  18329. 0xe15704c1,0xa5072976 },
  18330. { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18,
  18331. 0xf7b77725,0x99389c9d } },
  18332. /* 165 */
  18333. { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89,
  18334. 0x202c82e4,0xa88806aa },
  18335. { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0,
  18336. 0x4738dcfe,0x0043bffb } },
  18337. /* 166 */
  18338. { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a,
  18339. 0xba6c4866,0x52f3ef01 },
  18340. { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa,
  18341. 0x9ef27e75,0x3296bd89 } },
  18342. /* 167 */
  18343. { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd,
  18344. 0xaee571e9,0x3b90febf },
  18345. { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48,
  18346. 0x9f810b18,0x6e88069d } },
  18347. /* 168 */
  18348. { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221,
  18349. 0xdefaad13,0xa7222bea },
  18350. { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5,
  18351. 0xbc2ac690,0xbe94d523 } },
  18352. /* 169 */
  18353. { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1,
  18354. 0x9be8c766,0x7782defe },
  18355. { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc,
  18356. 0xa2892e4b,0x03838567 } },
  18357. /* 170 */
  18358. { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc,
  18359. 0xadf7b420,0xdbd986c4 },
  18360. { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d,
  18361. 0x6860bbd0,0x8e24d3c4 } },
  18362. /* 171 */
  18363. { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4,
  18364. 0x407bafc8,0x541a99c4 },
  18365. { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4,
  18366. 0xf57d35d1,0xc0092c49 } },
  18367. /* 172 */
  18368. { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1,
  18369. 0x7286944d,0x75e40634 },
  18370. { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16,
  18371. 0xc7848586,0x5b7cb658 } },
  18372. /* 173 */
  18373. { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1,
  18374. 0x8df097a1,0x7ae13eba },
  18375. { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878,
  18376. 0xe2a8e3fd,0x787d8074 } },
  18377. /* 174 */
  18378. { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3,
  18379. 0x9ef28484,0x5c222819 },
  18380. { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1,
  18381. 0xbaf0f2b0,0xe45d37ab } },
  18382. /* 175 */
  18383. { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7,
  18384. 0x84dfb9d3,0xed7bc122 },
  18385. { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140,
  18386. 0x45ca6d27,0xaac97cc9 } },
  18387. /* 176 */
  18388. { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1,
  18389. 0x1163dc4e,0x318f97b3 },
  18390. { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f,
  18391. 0x9a84ff4d,0xfa41faa1 } },
  18392. /* 177 */
  18393. { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4,
  18394. 0x1d26e9e2,0x38bb6b2c },
  18395. { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf,
  18396. 0xce7601a5,0x94dd0905 } },
  18397. /* 178 */
  18398. { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9,
  18399. 0xd25c2ae9,0x92077867 },
  18400. { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3,
  18401. 0xd29beb51,0x81e8428b } },
  18402. /* 179 */
  18403. { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f,
  18404. 0xdbbfa4b1,0x1b94ab62 },
  18405. { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f,
  18406. 0x055590ee,0x06a38e28 } },
  18407. /* 180 */
  18408. { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b,
  18409. 0x83d9d4f8,0xa7b36c20 },
  18410. { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2,
  18411. 0xa2822a20,0xbe54c6b4 } },
  18412. /* 181 */
  18413. { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f,
  18414. 0xeae022bb,0xbf30a5ab },
  18415. { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb,
  18416. 0x2732d13a,0xd1c820de } },
  18417. /* 182 */
  18418. { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe,
  18419. 0x68a18da3,0xb7d17bed },
  18420. { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af,
  18421. 0x6412cc64,0x3997fd5e } },
  18422. /* 183 */
  18423. { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0,
  18424. 0x3c6c13e8,0x0eeb8929 },
  18425. { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6,
  18426. 0xc922b6ef,0x228916f8 } },
  18427. /* 184 */
  18428. { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e,
  18429. 0x6e93097e,0xec05ad1d },
  18430. { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237,
  18431. 0x7ff11b37,0x7d314156 } },
  18432. /* 185 */
  18433. { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97,
  18434. 0x9bc1d7a3,0xe9ce66fc },
  18435. { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34,
  18436. 0x72280651,0xd9650b01 } },
  18437. /* 186 */
  18438. { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208,
  18439. 0x804eb7a2,0x14d6699a },
  18440. { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90,
  18441. 0x0d43598a,0x6f4c6841 } },
  18442. /* 187 */
  18443. { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2,
  18444. 0x61189abb,0x4c4350fd },
  18445. { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413,
  18446. 0x5a3118b5,0xa726d242 } },
  18447. /* 188 */
  18448. { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f,
  18449. 0xcc6cf392,0x13639e82 },
  18450. { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e,
  18451. 0xc1a335a3,0xca9365e1 } },
  18452. /* 189 */
  18453. { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4,
  18454. 0x970b72a5,0x9ce29c34 },
  18455. { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a,
  18456. 0xab42af98,0x48c4abd7 } },
  18457. /* 190 */
  18458. { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698,
  18459. 0xf67b33cb,0x78017c32 },
  18460. { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55,
  18461. 0xde5c1c04,0x53cd0454 } },
  18462. /* 191 */
  18463. { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1,
  18464. 0xd3d7fa8f,0xeea465c1 },
  18465. { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770,
  18466. 0x7ae69193,0x1b6e42a4 } },
  18467. /* 192 */
  18468. { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887,
  18469. 0x187fbd3d,0x0224da14 },
  18470. { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf,
  18471. 0x42bfff33,0x60838ef0 } },
  18472. /* 193 */
  18473. { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a,
  18474. 0x2d331643,0x636eb202 },
  18475. { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2,
  18476. 0x39218bac,0x8844eeb6 } },
  18477. /* 194 */
  18478. { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f,
  18479. 0x51fb789e,0x27ba83dc },
  18480. { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35,
  18481. 0x87f3a4ab,0xadb62d34 } },
  18482. /* 195 */
  18483. { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7,
  18484. 0x75e7c8b2,0xb990fd76 },
  18485. { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a,
  18486. 0x4d10d18d,0x81707ef9 } },
  18487. /* 196 */
  18488. { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4,
  18489. 0xd5a8aa5c,0x3792daea },
  18490. { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527,
  18491. 0x94b001ba,0x5abd635e } },
  18492. /* 197 */
  18493. { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea,
  18494. 0x846ab610,0x5995bf21 },
  18495. { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44,
  18496. 0xd483411e,0x44c32ca2 } },
  18497. /* 198 */
  18498. { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b,
  18499. 0x8082a54c,0x1f2162fb },
  18500. { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e,
  18501. 0xc3e907c9,0x8f1d402b } },
  18502. /* 199 */
  18503. { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37,
  18504. 0x926edbf9,0xb1980f43 },
  18505. { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4,
  18506. 0x37448e45,0x2828ad9b } },
  18507. /* 200 */
  18508. { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2,
  18509. 0x5a14b390,0x4973f127 },
  18510. { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f,
  18511. 0xdb168ac7,0x6dac8ed0 } },
  18512. /* 201 */
  18513. { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0,
  18514. 0x20b9de4c,0x4b23ef59 },
  18515. { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863,
  18516. 0xddf49a4e,0x4dd71534 } },
  18517. /* 202 */
  18518. { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8,
  18519. 0x2f4a4dbb,0xfd317000 },
  18520. { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976,
  18521. 0x9569f365,0x14fac58c } },
  18522. /* 203 */
  18523. { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240,
  18524. 0x36abda50,0xed7c7651 },
  18525. { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075,
  18526. 0x4d2e9f53,0xfefcb7f7 } },
  18527. /* 204 */
  18528. { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de,
  18529. 0x87e0d80b,0x1801a57e },
  18530. { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b,
  18531. 0x1ead1064,0x9f8fc11e } },
  18532. /* 205 */
  18533. { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd,
  18534. 0x3d3a69a9,0xa9d3809d },
  18535. { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e,
  18536. 0xe1178ef7,0x3006b9ae } },
  18537. /* 206 */
  18538. { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd,
  18539. 0x45f8f761,0x0ab85fd7 },
  18540. { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274,
  18541. 0x11e942c2,0xb122d675 } },
  18542. /* 207 */
  18543. { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301,
  18544. 0x097dbaec,0x9f599dc1 },
  18545. { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4,
  18546. 0x8a294b78,0x7d5528e0 } },
  18547. /* 208 */
  18548. { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b,
  18549. 0x303f1730,0x28ccea01 },
  18550. { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc,
  18551. 0xa1d013bf,0xc18baf48 } },
  18552. /* 209 */
  18553. { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171,
  18554. 0xb7a9596b,0x9def809d },
  18555. { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d,
  18556. 0x68808ce5,0x0357f8b0 } },
  18557. /* 210 */
  18558. { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874,
  18559. 0x1b489887,0xe4a01add },
  18560. { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71,
  18561. 0xce10cc30,0x466d7d79 } },
  18562. /* 211 */
  18563. { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28,
  18564. 0x451ead1a,0xc672a522 },
  18565. { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680,
  18566. 0xf2a67513,0x5e3d64fa } },
  18567. /* 212 */
  18568. { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a,
  18569. 0xeb8e42fc,0x6c8a7a95 },
  18570. { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738,
  18571. 0xad82ca91,0x348ae422 } },
  18572. /* 213 */
  18573. { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782,
  18574. 0xd9ef2d2e,0xc1074de0 },
  18575. { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50,
  18576. 0xc9e54ffc,0xfbadfbdb } },
  18577. /* 214 */
  18578. { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd,
  18579. 0x83716fcd,0xb7f976b4 },
  18580. { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760,
  18581. 0xcafcc805,0xf4d41b2e } },
  18582. /* 215 */
  18583. { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974,
  18584. 0xe0160f10,0x180824ea },
  18585. { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34,
  18586. 0x83cf6d25,0x67e5f639 } },
  18587. /* 216 */
  18588. { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276,
  18589. 0x04c11fc6,0x9fef789a },
  18590. { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0,
  18591. 0xa99c4e20,0xbc80c181 } },
  18592. /* 217 */
  18593. { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171,
  18594. 0x9f8cdf10,0x49270e62 },
  18595. { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17,
  18596. 0x61372f7f,0xd2ee52f9 } },
  18597. /* 218 */
  18598. { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5,
  18599. 0xe5abb733,0xdfb478be },
  18600. { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf,
  18601. 0x08df473a,0xd9a140b4 } },
  18602. /* 219 */
  18603. { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391,
  18604. 0x623f4b1a,0x760c058d },
  18605. { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110,
  18606. 0x8f190409,0x7141982d } },
  18607. /* 220 */
  18608. { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6,
  18609. 0x89d54e47,0x3af9d1ce },
  18610. { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc,
  18611. 0x73957dd6,0xb1f815c3 } },
  18612. /* 221 */
  18613. { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d,
  18614. 0x1543f052,0xa41aed14 },
  18615. { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be,
  18616. 0x86fb60ef,0xd6e9c1dd } },
  18617. /* 222 */
  18618. { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7,
  18619. 0xae9bf8c2,0x9c9c6e10 },
  18620. { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23,
  18621. 0x40fa61b6,0x566bd596 } },
  18622. /* 223 */
  18623. { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0,
  18624. 0xf525345e,0xcf2c7390 },
  18625. { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a,
  18626. 0x8aa20979,0x02f51755 } },
  18627. /* 224 */
  18628. { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac,
  18629. 0xe8d4d97d,0x14e9ada5 },
  18630. { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d,
  18631. 0x8e9d9ae8,0xa0ad4fab } },
  18632. /* 225 */
  18633. { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737,
  18634. 0x6e56ed1e,0xbcd530b8 },
  18635. { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761,
  18636. 0x6979341d,0x909283cf } },
  18637. /* 226 */
  18638. { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b,
  18639. 0xace1549a,0x35eeb7c9 },
  18640. { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c,
  18641. 0x448ae864,0x9a8b2cf4 } },
  18642. /* 227 */
  18643. { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168,
  18644. 0xd4491379,0x6bdb60f4 },
  18645. { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741,
  18646. 0x94ba08a9,0x01ec3cfd } },
  18647. /* 228 */
  18648. { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f,
  18649. 0x475464f6,0xd1acb1c0 },
  18650. { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813,
  18651. 0x405626c2,0x7dcd079d } },
  18652. /* 229 */
  18653. { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971,
  18654. 0x377d19b8,0x0bf53589 },
  18655. { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6,
  18656. 0xe16686fc,0xd28be4d9 } },
  18657. /* 230 */
  18658. { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa,
  18659. 0x510f88ce,0xd76007aa },
  18660. { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082,
  18661. 0xb303bb01,0xf2b52f68 } },
  18662. /* 231 */
  18663. { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680,
  18664. 0xcc5aed3a,0xd8dbe98e },
  18665. { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd,
  18666. 0xee559705,0xe01593a3 } },
  18667. /* 232 */
  18668. { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f,
  18669. 0xaeb8ef06,0xafec07b1 },
  18670. { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a,
  18671. 0x6e2dbfdd,0xa71b9354 } },
  18672. /* 233 */
  18673. { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db,
  18674. 0x628523d9,0x53a2005c },
  18675. { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7,
  18676. 0x3d588e3d,0xbf47d19b } },
  18677. /* 234 */
  18678. { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae,
  18679. 0x39c9a1b6,0x001c2c7f },
  18680. { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b,
  18681. 0x86ffb99b,0xfdadf8e7 } },
  18682. /* 235 */
  18683. { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055,
  18684. 0x5aa43c94,0x3a838e4d },
  18685. { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6,
  18686. 0x873e1da3,0x3cdb8257 } },
  18687. /* 236 */
  18688. { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2,
  18689. 0xf1f57fba,0x5a60cc89 },
  18690. { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8,
  18691. 0xdbfd8fc0,0x922ff56f } },
  18692. /* 237 */
  18693. { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46,
  18694. 0xf6c5cd62,0x72919a7d },
  18695. { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77,
  18696. 0x3624089a,0x5e791780 } },
  18697. /* 238 */
  18698. { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea,
  18699. 0xe24c2fab,0x4e0a5371 },
  18700. { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae,
  18701. 0xd56604ee,0xf5ff7818 } },
  18702. /* 239 */
  18703. { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a,
  18704. 0x533f5e64,0xe41df0e9 },
  18705. { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192,
  18706. 0xac4f155f,0x8edd7d6e } },
  18707. /* 240 */
  18708. { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c,
  18709. 0xed8aee96,0x1432c1ca },
  18710. { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5,
  18711. 0x5ac8d2c6,0xcaef480b } },
  18712. /* 241 */
  18713. { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0,
  18714. 0x8efae236,0xd0ba177e },
  18715. { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605,
  18716. 0x1c54ae16,0xf31c957c } },
  18717. /* 242 */
  18718. { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55,
  18719. 0x96e17c3a,0x013404cb },
  18720. { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682,
  18721. 0x91933e6c,0x6f377c4b } },
  18722. /* 243 */
  18723. { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037,
  18724. 0xd2d09506,0x6dba3e4e },
  18725. { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752,
  18726. 0x3becf4a7,0xf13cf342 } },
  18727. /* 244 */
  18728. { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6,
  18729. 0x274bbad3,0xc83fa9a9 },
  18730. { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e,
  18731. 0x5d702683,0xb49d70f4 } },
  18732. /* 245 */
  18733. { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418,
  18734. 0x0c30f1cf,0x59cfadbb },
  18735. { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c,
  18736. 0x354a4b67,0x5babf362 } },
  18737. /* 246 */
  18738. { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1,
  18739. 0x9026c8f0,0x6188c6a7 },
  18740. { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b,
  18741. 0xdf50b9d9,0x993fe475 } },
  18742. /* 247 */
  18743. { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a,
  18744. 0x4c80616b,0x81f76466 },
  18745. { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04,
  18746. 0x5fe9060d,0x564a812a } },
  18747. /* 248 */
  18748. { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f,
  18749. 0x00e51d6c,0x226bf3cf },
  18750. { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49,
  18751. 0xff257836,0x68779f47 } },
  18752. /* 249 */
  18753. { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28,
  18754. 0xeb092e0b,0x97bcb0d1 },
  18755. { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3,
  18756. 0x0a784655,0xa872ffe8 } },
  18757. /* 250 */
  18758. { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91,
  18759. 0xb732a36a,0x02812bfc },
  18760. { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398,
  18761. 0xfe5396af,0x07391cc9 } },
  18762. /* 251 */
  18763. { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8,
  18764. 0x7e6d2a08,0x355d2adc },
  18765. { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd,
  18766. 0x7c2a3a79,0x3dc2b1e3 } },
  18767. /* 252 */
  18768. { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590,
  18769. 0x3ccd846b,0xc4786910 },
  18770. { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5,
  18771. 0xd5bb4d32,0xccc42968 } },
  18772. /* 253 */
  18773. { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640,
  18774. 0xaa4871cf,0xe147eb42 },
  18775. { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47,
  18776. 0x080e96e3,0x239ac047 } },
  18777. /* 254 */
  18778. { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e,
  18779. 0xf5f7e59d,0xc55fa1a3 },
  18780. { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998,
  18781. 0xd4f4b699,0x094cd99c } },
  18782. /* 255 */
  18783. { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9,
  18784. 0x42abad33,0xb90a30b6 },
  18785. { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc,
  18786. 0x1b7924f7,0x019f8b9a } },
  18787. };
  18788. /* Multiply the base point of P256 by the scalar and return the result.
  18789. * If map is true then convert result to affine coordinates.
  18790. *
  18791. * r Resulting point.
  18792. * k Scalar to multiply by.
  18793. * map Indicates whether to convert result to affine.
  18794. * ct Constant time required.
  18795. * heap Heap to use for allocation.
  18796. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  18797. */
  18798. static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
  18799. int map, int ct, void* heap)
  18800. {
  18801. return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
  18802. k, map, ct, heap);
  18803. }
  18804. #endif
  18805. /* Multiply the base point of P256 by the scalar and return the result.
  18806. * If map is true then convert result to affine coordinates.
  18807. *
  18808. * km Scalar to multiply by.
  18809. * r Resulting point.
  18810. * map Indicates whether to convert result to affine.
  18811. * heap Heap to use for allocation.
  18812. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  18813. */
  18814. int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
  18815. {
  18816. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  18817. sp_point_256 p;
  18818. sp_digit kd[8];
  18819. #endif
  18820. sp_point_256* point;
  18821. sp_digit* k = NULL;
  18822. int err = MP_OKAY;
  18823. err = sp_256_point_new_8(heap, p, point);
  18824. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  18825. if (err == MP_OKAY) {
  18826. k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
  18827. DYNAMIC_TYPE_ECC);
  18828. if (k == NULL) {
  18829. err = MEMORY_E;
  18830. }
  18831. }
  18832. #else
  18833. k = kd;
  18834. #endif
  18835. if (err == MP_OKAY) {
  18836. sp_256_from_mp(k, 8, km);
  18837. err = sp_256_ecc_mulmod_base_8(point, k, map, 1, heap);
  18838. }
  18839. if (err == MP_OKAY) {
  18840. err = sp_256_point_to_ecc_point_8(point, r);
  18841. }
  18842. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  18843. if (k != NULL) {
  18844. XFREE(k, heap, DYNAMIC_TYPE_ECC);
  18845. }
  18846. #endif
  18847. sp_256_point_free_8(point, 0, heap);
  18848. return err;
  18849. }
  18850. #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
  18851. defined(HAVE_ECC_VERIFY)
  18852. /* Returns 1 if the number of zero.
  18853. * Implementation is constant time.
  18854. *
  18855. * a Number to check.
  18856. * returns 1 if the number is zero and 0 otherwise.
  18857. */
  18858. static int sp_256_iszero_8(const sp_digit* a)
  18859. {
  18860. return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
  18861. }
  18862. #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
  18863. /* Add 1 to a. (a = a + 1)
  18864. *
  18865. * a A single precision integer.
  18866. */
  18867. SP_NOINLINE static void sp_256_add_one_8(sp_digit* a)
  18868. {
  18869. __asm__ __volatile__ (
  18870. "mov r2, #1\n\t"
  18871. "ldr r1, [%[a], #0]\n\t"
  18872. "adds r1, r1, r2\n\t"
  18873. "mov r2, #0\n\t"
  18874. "str r1, [%[a], #0]\n\t"
  18875. "ldr r1, [%[a], #4]\n\t"
  18876. "adcs r1, r1, r2\n\t"
  18877. "str r1, [%[a], #4]\n\t"
  18878. "ldr r1, [%[a], #8]\n\t"
  18879. "adcs r1, r1, r2\n\t"
  18880. "str r1, [%[a], #8]\n\t"
  18881. "ldr r1, [%[a], #12]\n\t"
  18882. "adcs r1, r1, r2\n\t"
  18883. "str r1, [%[a], #12]\n\t"
  18884. "ldr r1, [%[a], #16]\n\t"
  18885. "adcs r1, r1, r2\n\t"
  18886. "str r1, [%[a], #16]\n\t"
  18887. "ldr r1, [%[a], #20]\n\t"
  18888. "adcs r1, r1, r2\n\t"
  18889. "str r1, [%[a], #20]\n\t"
  18890. "ldr r1, [%[a], #24]\n\t"
  18891. "adcs r1, r1, r2\n\t"
  18892. "str r1, [%[a], #24]\n\t"
  18893. "ldr r1, [%[a], #28]\n\t"
  18894. "adcs r1, r1, r2\n\t"
  18895. "str r1, [%[a], #28]\n\t"
  18896. :
  18897. : [a] "r" (a)
  18898. : "memory", "r1", "r2"
  18899. );
  18900. }
  18901. /* Read big endian unsigned byte array into r.
  18902. *
  18903. * r A single precision integer.
  18904. * size Maximum number of bytes to convert
  18905. * a Byte array.
  18906. * n Number of bytes in array to read.
  18907. */
  18908. static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
  18909. {
  18910. int i, j = 0;
  18911. word32 s = 0;
  18912. r[0] = 0;
  18913. for (i = n-1; i >= 0; i--) {
  18914. r[j] |= (((sp_digit)a[i]) << s);
  18915. if (s >= 24U) {
  18916. r[j] &= 0xffffffff;
  18917. s = 32U - s;
  18918. if (j + 1 >= size) {
  18919. break;
  18920. }
  18921. r[++j] = (sp_digit)a[i] >> s;
  18922. s = 8U - s;
  18923. }
  18924. else {
  18925. s += 8U;
  18926. }
  18927. }
  18928. for (j++; j < size; j++) {
  18929. r[j] = 0;
  18930. }
  18931. }
  18932. /* Generates a scalar that is in the range 1..order-1.
  18933. *
  18934. * rng Random number generator.
  18935. * k Scalar value.
  18936. * returns RNG failures, MEMORY_E when memory allocation fails and
  18937. * MP_OKAY on success.
  18938. */
  18939. static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k)
  18940. {
  18941. int err;
  18942. byte buf[32];
  18943. do {
  18944. err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
  18945. if (err == 0) {
  18946. sp_256_from_bin(k, 8, buf, (int)sizeof(buf));
  18947. if (sp_256_cmp_8(k, p256_order2) < 0) {
  18948. sp_256_add_one_8(k);
  18949. break;
  18950. }
  18951. }
  18952. }
  18953. while (err == 0);
  18954. return err;
  18955. }
  18956. /* Makes a random EC key pair.
  18957. *
  18958. * rng Random number generator.
  18959. * priv Generated private value.
  18960. * pub Generated public point.
  18961. * heap Heap to use for allocation.
  18962. * returns ECC_INF_E when the point does not have the correct order, RNG
  18963. * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
  18964. */
  18965. int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
  18966. {
  18967. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  18968. sp_point_256 p;
  18969. sp_digit kd[8];
  18970. #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
  18971. sp_point_256 inf;
  18972. #endif
  18973. #endif
  18974. sp_point_256* point;
  18975. sp_digit* k = NULL;
  18976. #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
  18977. sp_point_256* infinity = NULL;
  18978. #endif
  18979. int err;
  18980. (void)heap;
  18981. err = sp_256_point_new_8(heap, p, point);
  18982. #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
  18983. if (err == MP_OKAY) {
  18984. err = sp_256_point_new_8(heap, inf, infinity);
  18985. }
  18986. #endif
  18987. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  18988. if (err == MP_OKAY) {
  18989. k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
  18990. DYNAMIC_TYPE_ECC);
  18991. if (k == NULL) {
  18992. err = MEMORY_E;
  18993. }
  18994. }
  18995. #else
  18996. k = kd;
  18997. #endif
  18998. if (err == MP_OKAY) {
  18999. err = sp_256_ecc_gen_k_8(rng, k);
  19000. }
  19001. if (err == MP_OKAY) {
  19002. err = sp_256_ecc_mulmod_base_8(point, k, 1, 1, NULL);
  19003. }
  19004. #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
  19005. if (err == MP_OKAY) {
  19006. err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, 1, NULL);
  19007. }
  19008. if (err == MP_OKAY) {
  19009. if (sp_256_iszero_8(point->x) || sp_256_iszero_8(point->y)) {
  19010. err = ECC_INF_E;
  19011. }
  19012. }
  19013. #endif
  19014. if (err == MP_OKAY) {
  19015. err = sp_256_to_mp(k, priv);
  19016. }
  19017. if (err == MP_OKAY) {
  19018. err = sp_256_point_to_ecc_point_8(point, pub);
  19019. }
  19020. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  19021. if (k != NULL) {
  19022. XFREE(k, heap, DYNAMIC_TYPE_ECC);
  19023. }
  19024. #endif
  19025. #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
  19026. sp_256_point_free_8(infinity, 1, heap);
  19027. #endif
  19028. sp_256_point_free_8(point, 1, heap);
  19029. return err;
  19030. }
  19031. #ifdef HAVE_ECC_DHE
  19032. /* Write r as big endian to byte array.
  19033. * Fixed length number of bytes written: 32
  19034. *
  19035. * r A single precision integer.
  19036. * a Byte array.
  19037. */
  19038. static void sp_256_to_bin(sp_digit* r, byte* a)
  19039. {
  19040. int i, j, s = 0, b;
  19041. j = 256 / 8 - 1;
  19042. a[j] = 0;
  19043. for (i=0; i<8 && j>=0; i++) {
  19044. b = 0;
  19045. /* lint allow cast of mismatch sp_digit and int */
  19046. a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
  19047. b += 8 - s;
  19048. if (j < 0) {
  19049. break;
  19050. }
  19051. while (b < 32) {
  19052. a[j--] = (byte)(r[i] >> b);
  19053. b += 8;
  19054. if (j < 0) {
  19055. break;
  19056. }
  19057. }
  19058. s = 8 - (b - 32);
  19059. if (j >= 0) {
  19060. a[j] = 0;
  19061. }
  19062. if (s != 0) {
  19063. j++;
  19064. }
  19065. }
  19066. }
  19067. /* Multiply the point by the scalar and serialize the X ordinate.
  19068. * The number is 0 padded to maximum size on output.
  19069. *
  19070. * priv Scalar to multiply the point by.
  19071. * pub Point to multiply.
  19072. * out Buffer to hold X ordinate.
  19073. * outLen On entry, size of the buffer in bytes.
  19074. * On exit, length of data in buffer in bytes.
  19075. * heap Heap to use for allocation.
  19076. * returns BUFFER_E if the buffer is to small for output size,
  19077. * MEMORY_E when memory allocation fails and MP_OKAY on success.
  19078. */
  19079. int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
  19080. word32* outLen, void* heap)
  19081. {
  19082. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  19083. sp_point_256 p;
  19084. sp_digit kd[8];
  19085. #endif
  19086. sp_point_256* point = NULL;
  19087. sp_digit* k = NULL;
  19088. int err = MP_OKAY;
  19089. if (*outLen < 32U) {
  19090. err = BUFFER_E;
  19091. }
  19092. if (err == MP_OKAY) {
  19093. err = sp_256_point_new_8(heap, p, point);
  19094. }
  19095. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  19096. if (err == MP_OKAY) {
  19097. k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
  19098. DYNAMIC_TYPE_ECC);
  19099. if (k == NULL)
  19100. err = MEMORY_E;
  19101. }
  19102. #else
  19103. k = kd;
  19104. #endif
  19105. if (err == MP_OKAY) {
  19106. sp_256_from_mp(k, 8, priv);
  19107. sp_256_point_from_ecc_point_8(point, pub);
  19108. err = sp_256_ecc_mulmod_8(point, point, k, 1, 1, heap);
  19109. }
  19110. if (err == MP_OKAY) {
  19111. sp_256_to_bin(point->x, out);
  19112. *outLen = 32;
  19113. }
  19114. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  19115. if (k != NULL) {
  19116. XFREE(k, heap, DYNAMIC_TYPE_ECC);
  19117. }
  19118. #endif
  19119. sp_256_point_free_8(point, 0, heap);
  19120. return err;
  19121. }
  19122. #endif /* HAVE_ECC_DHE */
  19123. #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
  19124. /* Multiply a and b into r. (r = a * b)
  19125. *
  19126. * r A single precision integer.
  19127. * a A single precision integer.
  19128. * b A single precision integer.
  19129. */
  19130. SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a,
  19131. const sp_digit* b)
  19132. {
  19133. sp_digit tmp_arr[8];
  19134. sp_digit* tmp = tmp_arr;
  19135. __asm__ __volatile__ (
  19136. /* A[0] * B[0] */
  19137. "ldr r6, [%[a], #0]\n\t"
  19138. "ldr r8, [%[b], #0]\n\t"
  19139. "umull r3, r4, r6, r8\n\t"
  19140. "mov r5, #0\n\t"
  19141. "str r3, [%[tmp], #0]\n\t"
  19142. "mov r3, #0\n\t"
  19143. /* A[0] * B[1] */
  19144. "ldr r8, [%[b], #4]\n\t"
  19145. "umull r6, r8, r6, r8\n\t"
  19146. "adds r4, r4, r6\n\t"
  19147. "adc r5, r5, r8\n\t"
  19148. /* A[1] * B[0] */
  19149. "ldr r6, [%[a], #4]\n\t"
  19150. "ldr r8, [%[b], #0]\n\t"
  19151. "umull r6, r8, r6, r8\n\t"
  19152. "adds r4, r4, r6\n\t"
  19153. "adcs r5, r5, r8\n\t"
  19154. "adc r3, r3, #0\n\t"
  19155. "str r4, [%[tmp], #4]\n\t"
  19156. "mov r4, #0\n\t"
  19157. /* A[0] * B[2] */
  19158. "ldr r6, [%[a], #0]\n\t"
  19159. "ldr r8, [%[b], #8]\n\t"
  19160. "umull r6, r8, r6, r8\n\t"
  19161. "adds r5, r5, r6\n\t"
  19162. "adcs r3, r3, r8\n\t"
  19163. "adc r4, r4, #0\n\t"
  19164. /* A[1] * B[1] */
  19165. "ldr r6, [%[a], #4]\n\t"
  19166. "ldr r8, [%[b], #4]\n\t"
  19167. "umull r6, r8, r6, r8\n\t"
  19168. "adds r5, r5, r6\n\t"
  19169. "adcs r3, r3, r8\n\t"
  19170. "adc r4, r4, #0\n\t"
  19171. /* A[2] * B[0] */
  19172. "ldr r6, [%[a], #8]\n\t"
  19173. "ldr r8, [%[b], #0]\n\t"
  19174. "umull r6, r8, r6, r8\n\t"
  19175. "adds r5, r5, r6\n\t"
  19176. "adcs r3, r3, r8\n\t"
  19177. "adc r4, r4, #0\n\t"
  19178. "str r5, [%[tmp], #8]\n\t"
  19179. "mov r5, #0\n\t"
  19180. /* A[0] * B[3] */
  19181. "ldr r6, [%[a], #0]\n\t"
  19182. "ldr r8, [%[b], #12]\n\t"
  19183. "umull r6, r8, r6, r8\n\t"
  19184. "adds r3, r3, r6\n\t"
  19185. "adcs r4, r4, r8\n\t"
  19186. "adc r5, r5, #0\n\t"
  19187. /* A[1] * B[2] */
  19188. "ldr r6, [%[a], #4]\n\t"
  19189. "ldr r8, [%[b], #8]\n\t"
  19190. "umull r6, r8, r6, r8\n\t"
  19191. "adds r3, r3, r6\n\t"
  19192. "adcs r4, r4, r8\n\t"
  19193. "adc r5, r5, #0\n\t"
  19194. /* A[2] * B[1] */
  19195. "ldr r6, [%[a], #8]\n\t"
  19196. "ldr r8, [%[b], #4]\n\t"
  19197. "umull r6, r8, r6, r8\n\t"
  19198. "adds r3, r3, r6\n\t"
  19199. "adcs r4, r4, r8\n\t"
  19200. "adc r5, r5, #0\n\t"
  19201. /* A[3] * B[0] */
  19202. "ldr r6, [%[a], #12]\n\t"
  19203. "ldr r8, [%[b], #0]\n\t"
  19204. "umull r6, r8, r6, r8\n\t"
  19205. "adds r3, r3, r6\n\t"
  19206. "adcs r4, r4, r8\n\t"
  19207. "adc r5, r5, #0\n\t"
  19208. "str r3, [%[tmp], #12]\n\t"
  19209. "mov r3, #0\n\t"
  19210. /* A[0] * B[4] */
  19211. "ldr r6, [%[a], #0]\n\t"
  19212. "ldr r8, [%[b], #16]\n\t"
  19213. "umull r6, r8, r6, r8\n\t"
  19214. "adds r4, r4, r6\n\t"
  19215. "adcs r5, r5, r8\n\t"
  19216. "adc r3, r3, #0\n\t"
  19217. /* A[1] * B[3] */
  19218. "ldr r6, [%[a], #4]\n\t"
  19219. "ldr r8, [%[b], #12]\n\t"
  19220. "umull r6, r8, r6, r8\n\t"
  19221. "adds r4, r4, r6\n\t"
  19222. "adcs r5, r5, r8\n\t"
  19223. "adc r3, r3, #0\n\t"
  19224. /* A[2] * B[2] */
  19225. "ldr r6, [%[a], #8]\n\t"
  19226. "ldr r8, [%[b], #8]\n\t"
  19227. "umull r6, r8, r6, r8\n\t"
  19228. "adds r4, r4, r6\n\t"
  19229. "adcs r5, r5, r8\n\t"
  19230. "adc r3, r3, #0\n\t"
  19231. /* A[3] * B[1] */
  19232. "ldr r6, [%[a], #12]\n\t"
  19233. "ldr r8, [%[b], #4]\n\t"
  19234. "umull r6, r8, r6, r8\n\t"
  19235. "adds r4, r4, r6\n\t"
  19236. "adcs r5, r5, r8\n\t"
  19237. "adc r3, r3, #0\n\t"
  19238. /* A[4] * B[0] */
  19239. "ldr r6, [%[a], #16]\n\t"
  19240. "ldr r8, [%[b], #0]\n\t"
  19241. "umull r6, r8, r6, r8\n\t"
  19242. "adds r4, r4, r6\n\t"
  19243. "adcs r5, r5, r8\n\t"
  19244. "adc r3, r3, #0\n\t"
  19245. "str r4, [%[tmp], #16]\n\t"
  19246. "mov r4, #0\n\t"
  19247. /* A[0] * B[5] */
  19248. "ldr r6, [%[a], #0]\n\t"
  19249. "ldr r8, [%[b], #20]\n\t"
  19250. "umull r6, r8, r6, r8\n\t"
  19251. "adds r5, r5, r6\n\t"
  19252. "adcs r3, r3, r8\n\t"
  19253. "adc r4, r4, #0\n\t"
  19254. /* A[1] * B[4] */
  19255. "ldr r6, [%[a], #4]\n\t"
  19256. "ldr r8, [%[b], #16]\n\t"
  19257. "umull r6, r8, r6, r8\n\t"
  19258. "adds r5, r5, r6\n\t"
  19259. "adcs r3, r3, r8\n\t"
  19260. "adc r4, r4, #0\n\t"
  19261. /* A[2] * B[3] */
  19262. "ldr r6, [%[a], #8]\n\t"
  19263. "ldr r8, [%[b], #12]\n\t"
  19264. "umull r6, r8, r6, r8\n\t"
  19265. "adds r5, r5, r6\n\t"
  19266. "adcs r3, r3, r8\n\t"
  19267. "adc r4, r4, #0\n\t"
  19268. /* A[3] * B[2] */
  19269. "ldr r6, [%[a], #12]\n\t"
  19270. "ldr r8, [%[b], #8]\n\t"
  19271. "umull r6, r8, r6, r8\n\t"
  19272. "adds r5, r5, r6\n\t"
  19273. "adcs r3, r3, r8\n\t"
  19274. "adc r4, r4, #0\n\t"
  19275. /* A[4] * B[1] */
  19276. "ldr r6, [%[a], #16]\n\t"
  19277. "ldr r8, [%[b], #4]\n\t"
  19278. "umull r6, r8, r6, r8\n\t"
  19279. "adds r5, r5, r6\n\t"
  19280. "adcs r3, r3, r8\n\t"
  19281. "adc r4, r4, #0\n\t"
  19282. /* A[5] * B[0] */
  19283. "ldr r6, [%[a], #20]\n\t"
  19284. "ldr r8, [%[b], #0]\n\t"
  19285. "umull r6, r8, r6, r8\n\t"
  19286. "adds r5, r5, r6\n\t"
  19287. "adcs r3, r3, r8\n\t"
  19288. "adc r4, r4, #0\n\t"
  19289. "str r5, [%[tmp], #20]\n\t"
  19290. "mov r5, #0\n\t"
  19291. /* A[0] * B[6] */
  19292. "ldr r6, [%[a], #0]\n\t"
  19293. "ldr r8, [%[b], #24]\n\t"
  19294. "umull r6, r8, r6, r8\n\t"
  19295. "adds r3, r3, r6\n\t"
  19296. "adcs r4, r4, r8\n\t"
  19297. "adc r5, r5, #0\n\t"
  19298. /* A[1] * B[5] */
  19299. "ldr r6, [%[a], #4]\n\t"
  19300. "ldr r8, [%[b], #20]\n\t"
  19301. "umull r6, r8, r6, r8\n\t"
  19302. "adds r3, r3, r6\n\t"
  19303. "adcs r4, r4, r8\n\t"
  19304. "adc r5, r5, #0\n\t"
  19305. /* A[2] * B[4] */
  19306. "ldr r6, [%[a], #8]\n\t"
  19307. "ldr r8, [%[b], #16]\n\t"
  19308. "umull r6, r8, r6, r8\n\t"
  19309. "adds r3, r3, r6\n\t"
  19310. "adcs r4, r4, r8\n\t"
  19311. "adc r5, r5, #0\n\t"
  19312. /* A[3] * B[3] */
  19313. "ldr r6, [%[a], #12]\n\t"
  19314. "ldr r8, [%[b], #12]\n\t"
  19315. "umull r6, r8, r6, r8\n\t"
  19316. "adds r3, r3, r6\n\t"
  19317. "adcs r4, r4, r8\n\t"
  19318. "adc r5, r5, #0\n\t"
  19319. /* A[4] * B[2] */
  19320. "ldr r6, [%[a], #16]\n\t"
  19321. "ldr r8, [%[b], #8]\n\t"
  19322. "umull r6, r8, r6, r8\n\t"
  19323. "adds r3, r3, r6\n\t"
  19324. "adcs r4, r4, r8\n\t"
  19325. "adc r5, r5, #0\n\t"
  19326. /* A[5] * B[1] */
  19327. "ldr r6, [%[a], #20]\n\t"
  19328. "ldr r8, [%[b], #4]\n\t"
  19329. "umull r6, r8, r6, r8\n\t"
  19330. "adds r3, r3, r6\n\t"
  19331. "adcs r4, r4, r8\n\t"
  19332. "adc r5, r5, #0\n\t"
  19333. /* A[6] * B[0] */
  19334. "ldr r6, [%[a], #24]\n\t"
  19335. "ldr r8, [%[b], #0]\n\t"
  19336. "umull r6, r8, r6, r8\n\t"
  19337. "adds r3, r3, r6\n\t"
  19338. "adcs r4, r4, r8\n\t"
  19339. "adc r5, r5, #0\n\t"
  19340. "str r3, [%[tmp], #24]\n\t"
  19341. "mov r3, #0\n\t"
  19342. /* A[0] * B[7] */
  19343. "ldr r6, [%[a], #0]\n\t"
  19344. "ldr r8, [%[b], #28]\n\t"
  19345. "umull r6, r8, r6, r8\n\t"
  19346. "adds r4, r4, r6\n\t"
  19347. "adcs r5, r5, r8\n\t"
  19348. "adc r3, r3, #0\n\t"
  19349. /* A[1] * B[6] */
  19350. "ldr r6, [%[a], #4]\n\t"
  19351. "ldr r8, [%[b], #24]\n\t"
  19352. "umull r6, r8, r6, r8\n\t"
  19353. "adds r4, r4, r6\n\t"
  19354. "adcs r5, r5, r8\n\t"
  19355. "adc r3, r3, #0\n\t"
  19356. /* A[2] * B[5] */
  19357. "ldr r6, [%[a], #8]\n\t"
  19358. "ldr r8, [%[b], #20]\n\t"
  19359. "umull r6, r8, r6, r8\n\t"
  19360. "adds r4, r4, r6\n\t"
  19361. "adcs r5, r5, r8\n\t"
  19362. "adc r3, r3, #0\n\t"
  19363. /* A[3] * B[4] */
  19364. "ldr r6, [%[a], #12]\n\t"
  19365. "ldr r8, [%[b], #16]\n\t"
  19366. "umull r6, r8, r6, r8\n\t"
  19367. "adds r4, r4, r6\n\t"
  19368. "adcs r5, r5, r8\n\t"
  19369. "adc r3, r3, #0\n\t"
  19370. /* A[4] * B[3] */
  19371. "ldr r6, [%[a], #16]\n\t"
  19372. "ldr r8, [%[b], #12]\n\t"
  19373. "umull r6, r8, r6, r8\n\t"
  19374. "adds r4, r4, r6\n\t"
  19375. "adcs r5, r5, r8\n\t"
  19376. "adc r3, r3, #0\n\t"
  19377. /* A[5] * B[2] */
  19378. "ldr r6, [%[a], #20]\n\t"
  19379. "ldr r8, [%[b], #8]\n\t"
  19380. "umull r6, r8, r6, r8\n\t"
  19381. "adds r4, r4, r6\n\t"
  19382. "adcs r5, r5, r8\n\t"
  19383. "adc r3, r3, #0\n\t"
  19384. /* A[6] * B[1] */
  19385. "ldr r6, [%[a], #24]\n\t"
  19386. "ldr r8, [%[b], #4]\n\t"
  19387. "umull r6, r8, r6, r8\n\t"
  19388. "adds r4, r4, r6\n\t"
  19389. "adcs r5, r5, r8\n\t"
  19390. "adc r3, r3, #0\n\t"
  19391. /* A[7] * B[0] */
  19392. "ldr r6, [%[a], #28]\n\t"
  19393. "ldr r8, [%[b], #0]\n\t"
  19394. "umull r6, r8, r6, r8\n\t"
  19395. "adds r4, r4, r6\n\t"
  19396. "adcs r5, r5, r8\n\t"
  19397. "adc r3, r3, #0\n\t"
  19398. "str r4, [%[tmp], #28]\n\t"
  19399. "mov r4, #0\n\t"
  19400. /* A[1] * B[7] */
  19401. "ldr r6, [%[a], #4]\n\t"
  19402. "ldr r8, [%[b], #28]\n\t"
  19403. "umull r6, r8, r6, r8\n\t"
  19404. "adds r5, r5, r6\n\t"
  19405. "adcs r3, r3, r8\n\t"
  19406. "adc r4, r4, #0\n\t"
  19407. /* A[2] * B[6] */
  19408. "ldr r6, [%[a], #8]\n\t"
  19409. "ldr r8, [%[b], #24]\n\t"
  19410. "umull r6, r8, r6, r8\n\t"
  19411. "adds r5, r5, r6\n\t"
  19412. "adcs r3, r3, r8\n\t"
  19413. "adc r4, r4, #0\n\t"
  19414. /* A[3] * B[5] */
  19415. "ldr r6, [%[a], #12]\n\t"
  19416. "ldr r8, [%[b], #20]\n\t"
  19417. "umull r6, r8, r6, r8\n\t"
  19418. "adds r5, r5, r6\n\t"
  19419. "adcs r3, r3, r8\n\t"
  19420. "adc r4, r4, #0\n\t"
  19421. /* A[4] * B[4] */
  19422. "ldr r6, [%[a], #16]\n\t"
  19423. "ldr r8, [%[b], #16]\n\t"
  19424. "umull r6, r8, r6, r8\n\t"
  19425. "adds r5, r5, r6\n\t"
  19426. "adcs r3, r3, r8\n\t"
  19427. "adc r4, r4, #0\n\t"
  19428. /* A[5] * B[3] */
  19429. "ldr r6, [%[a], #20]\n\t"
  19430. "ldr r8, [%[b], #12]\n\t"
  19431. "umull r6, r8, r6, r8\n\t"
  19432. "adds r5, r5, r6\n\t"
  19433. "adcs r3, r3, r8\n\t"
  19434. "adc r4, r4, #0\n\t"
  19435. /* A[6] * B[2] */
  19436. "ldr r6, [%[a], #24]\n\t"
  19437. "ldr r8, [%[b], #8]\n\t"
  19438. "umull r6, r8, r6, r8\n\t"
  19439. "adds r5, r5, r6\n\t"
  19440. "adcs r3, r3, r8\n\t"
  19441. "adc r4, r4, #0\n\t"
  19442. /* A[7] * B[1] */
  19443. "ldr r6, [%[a], #28]\n\t"
  19444. "ldr r8, [%[b], #4]\n\t"
  19445. "umull r6, r8, r6, r8\n\t"
  19446. "adds r5, r5, r6\n\t"
  19447. "adcs r3, r3, r8\n\t"
  19448. "adc r4, r4, #0\n\t"
  19449. "str r5, [%[r], #32]\n\t"
  19450. "mov r5, #0\n\t"
  19451. /* A[2] * B[7] */
  19452. "ldr r6, [%[a], #8]\n\t"
  19453. "ldr r8, [%[b], #28]\n\t"
  19454. "umull r6, r8, r6, r8\n\t"
  19455. "adds r3, r3, r6\n\t"
  19456. "adcs r4, r4, r8\n\t"
  19457. "adc r5, r5, #0\n\t"
  19458. /* A[3] * B[6] */
  19459. "ldr r6, [%[a], #12]\n\t"
  19460. "ldr r8, [%[b], #24]\n\t"
  19461. "umull r6, r8, r6, r8\n\t"
  19462. "adds r3, r3, r6\n\t"
  19463. "adcs r4, r4, r8\n\t"
  19464. "adc r5, r5, #0\n\t"
  19465. /* A[4] * B[5] */
  19466. "ldr r6, [%[a], #16]\n\t"
  19467. "ldr r8, [%[b], #20]\n\t"
  19468. "umull r6, r8, r6, r8\n\t"
  19469. "adds r3, r3, r6\n\t"
  19470. "adcs r4, r4, r8\n\t"
  19471. "adc r5, r5, #0\n\t"
  19472. /* A[5] * B[4] */
  19473. "ldr r6, [%[a], #20]\n\t"
  19474. "ldr r8, [%[b], #16]\n\t"
  19475. "umull r6, r8, r6, r8\n\t"
  19476. "adds r3, r3, r6\n\t"
  19477. "adcs r4, r4, r8\n\t"
  19478. "adc r5, r5, #0\n\t"
  19479. /* A[6] * B[3] */
  19480. "ldr r6, [%[a], #24]\n\t"
  19481. "ldr r8, [%[b], #12]\n\t"
  19482. "umull r6, r8, r6, r8\n\t"
  19483. "adds r3, r3, r6\n\t"
  19484. "adcs r4, r4, r8\n\t"
  19485. "adc r5, r5, #0\n\t"
  19486. /* A[7] * B[2] */
  19487. "ldr r6, [%[a], #28]\n\t"
  19488. "ldr r8, [%[b], #8]\n\t"
  19489. "umull r6, r8, r6, r8\n\t"
  19490. "adds r3, r3, r6\n\t"
  19491. "adcs r4, r4, r8\n\t"
  19492. "adc r5, r5, #0\n\t"
  19493. "str r3, [%[r], #36]\n\t"
  19494. "mov r3, #0\n\t"
  19495. /* A[3] * B[7] */
  19496. "ldr r6, [%[a], #12]\n\t"
  19497. "ldr r8, [%[b], #28]\n\t"
  19498. "umull r6, r8, r6, r8\n\t"
  19499. "adds r4, r4, r6\n\t"
  19500. "adcs r5, r5, r8\n\t"
  19501. "adc r3, r3, #0\n\t"
  19502. /* A[4] * B[6] */
  19503. "ldr r6, [%[a], #16]\n\t"
  19504. "ldr r8, [%[b], #24]\n\t"
  19505. "umull r6, r8, r6, r8\n\t"
  19506. "adds r4, r4, r6\n\t"
  19507. "adcs r5, r5, r8\n\t"
  19508. "adc r3, r3, #0\n\t"
  19509. /* A[5] * B[5] */
  19510. "ldr r6, [%[a], #20]\n\t"
  19511. "ldr r8, [%[b], #20]\n\t"
  19512. "umull r6, r8, r6, r8\n\t"
  19513. "adds r4, r4, r6\n\t"
  19514. "adcs r5, r5, r8\n\t"
  19515. "adc r3, r3, #0\n\t"
  19516. /* A[6] * B[4] */
  19517. "ldr r6, [%[a], #24]\n\t"
  19518. "ldr r8, [%[b], #16]\n\t"
  19519. "umull r6, r8, r6, r8\n\t"
  19520. "adds r4, r4, r6\n\t"
  19521. "adcs r5, r5, r8\n\t"
  19522. "adc r3, r3, #0\n\t"
  19523. /* A[7] * B[3] */
  19524. "ldr r6, [%[a], #28]\n\t"
  19525. "ldr r8, [%[b], #12]\n\t"
  19526. "umull r6, r8, r6, r8\n\t"
  19527. "adds r4, r4, r6\n\t"
  19528. "adcs r5, r5, r8\n\t"
  19529. "adc r3, r3, #0\n\t"
  19530. "str r4, [%[r], #40]\n\t"
  19531. "mov r4, #0\n\t"
  19532. /* A[4] * B[7] */
  19533. "ldr r6, [%[a], #16]\n\t"
  19534. "ldr r8, [%[b], #28]\n\t"
  19535. "umull r6, r8, r6, r8\n\t"
  19536. "adds r5, r5, r6\n\t"
  19537. "adcs r3, r3, r8\n\t"
  19538. "adc r4, r4, #0\n\t"
  19539. /* A[5] * B[6] */
  19540. "ldr r6, [%[a], #20]\n\t"
  19541. "ldr r8, [%[b], #24]\n\t"
  19542. "umull r6, r8, r6, r8\n\t"
  19543. "adds r5, r5, r6\n\t"
  19544. "adcs r3, r3, r8\n\t"
  19545. "adc r4, r4, #0\n\t"
  19546. /* A[6] * B[5] */
  19547. "ldr r6, [%[a], #24]\n\t"
  19548. "ldr r8, [%[b], #20]\n\t"
  19549. "umull r6, r8, r6, r8\n\t"
  19550. "adds r5, r5, r6\n\t"
  19551. "adcs r3, r3, r8\n\t"
  19552. "adc r4, r4, #0\n\t"
  19553. /* A[7] * B[4] */
  19554. "ldr r6, [%[a], #28]\n\t"
  19555. "ldr r8, [%[b], #16]\n\t"
  19556. "umull r6, r8, r6, r8\n\t"
  19557. "adds r5, r5, r6\n\t"
  19558. "adcs r3, r3, r8\n\t"
  19559. "adc r4, r4, #0\n\t"
  19560. "str r5, [%[r], #44]\n\t"
  19561. "mov r5, #0\n\t"
  19562. /* A[5] * B[7] */
  19563. "ldr r6, [%[a], #20]\n\t"
  19564. "ldr r8, [%[b], #28]\n\t"
  19565. "umull r6, r8, r6, r8\n\t"
  19566. "adds r3, r3, r6\n\t"
  19567. "adcs r4, r4, r8\n\t"
  19568. "adc r5, r5, #0\n\t"
  19569. /* A[6] * B[6] */
  19570. "ldr r6, [%[a], #24]\n\t"
  19571. "ldr r8, [%[b], #24]\n\t"
  19572. "umull r6, r8, r6, r8\n\t"
  19573. "adds r3, r3, r6\n\t"
  19574. "adcs r4, r4, r8\n\t"
  19575. "adc r5, r5, #0\n\t"
  19576. /* A[7] * B[5] */
  19577. "ldr r6, [%[a], #28]\n\t"
  19578. "ldr r8, [%[b], #20]\n\t"
  19579. "umull r6, r8, r6, r8\n\t"
  19580. "adds r3, r3, r6\n\t"
  19581. "adcs r4, r4, r8\n\t"
  19582. "adc r5, r5, #0\n\t"
  19583. "str r3, [%[r], #48]\n\t"
  19584. "mov r3, #0\n\t"
  19585. /* A[6] * B[7] */
  19586. "ldr r6, [%[a], #24]\n\t"
  19587. "ldr r8, [%[b], #28]\n\t"
  19588. "umull r6, r8, r6, r8\n\t"
  19589. "adds r4, r4, r6\n\t"
  19590. "adcs r5, r5, r8\n\t"
  19591. "adc r3, r3, #0\n\t"
  19592. /* A[7] * B[6] */
  19593. "ldr r6, [%[a], #28]\n\t"
  19594. "ldr r8, [%[b], #24]\n\t"
  19595. "umull r6, r8, r6, r8\n\t"
  19596. "adds r4, r4, r6\n\t"
  19597. "adcs r5, r5, r8\n\t"
  19598. "adc r3, r3, #0\n\t"
  19599. "str r4, [%[r], #52]\n\t"
  19600. "mov r4, #0\n\t"
  19601. /* A[7] * B[7] */
  19602. "ldr r6, [%[a], #28]\n\t"
  19603. "ldr r8, [%[b], #28]\n\t"
  19604. "umull r6, r8, r6, r8\n\t"
  19605. "adds r5, r5, r6\n\t"
  19606. "adc r3, r3, r8\n\t"
  19607. "str r5, [%[r], #56]\n\t"
  19608. "str r3, [%[r], #60]\n\t"
  19609. /* Transfer tmp to r */
  19610. "ldr r3, [%[tmp], #0]\n\t"
  19611. "ldr r4, [%[tmp], #4]\n\t"
  19612. "ldr r5, [%[tmp], #8]\n\t"
  19613. "ldr r6, [%[tmp], #12]\n\t"
  19614. "str r3, [%[r], #0]\n\t"
  19615. "str r4, [%[r], #4]\n\t"
  19616. "str r5, [%[r], #8]\n\t"
  19617. "str r6, [%[r], #12]\n\t"
  19618. "ldr r3, [%[tmp], #16]\n\t"
  19619. "ldr r4, [%[tmp], #20]\n\t"
  19620. "ldr r5, [%[tmp], #24]\n\t"
  19621. "ldr r6, [%[tmp], #28]\n\t"
  19622. "str r3, [%[r], #16]\n\t"
  19623. "str r4, [%[r], #20]\n\t"
  19624. "str r5, [%[r], #24]\n\t"
  19625. "str r6, [%[r], #28]\n\t"
  19626. :
  19627. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
  19628. : "memory", "r3", "r4", "r5", "r6", "r8"
  19629. );
  19630. }
  19631. #endif
  19632. #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
  19633. #ifdef WOLFSSL_SP_SMALL
  19634. /* Sub b from a into a. (a -= b)
  19635. *
  19636. * a A single precision integer.
  19637. * b A single precision integer.
  19638. */
  19639. SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
  19640. const sp_digit* b)
  19641. {
  19642. sp_digit c = 0;
  19643. __asm__ __volatile__ (
  19644. "mov r8, %[a]\n\t"
  19645. "add r8, r8, #32\n\t"
  19646. "\n1:\n\t"
  19647. "mov r5, #0\n\t"
  19648. "subs r5, r5, %[c]\n\t"
  19649. "ldr r3, [%[a]]\n\t"
  19650. "ldr r4, [%[a], #4]\n\t"
  19651. "ldr r5, [%[b]]\n\t"
  19652. "ldr r6, [%[b], #4]\n\t"
  19653. "sbcs r3, r3, r5\n\t"
  19654. "sbcs r4, r4, r6\n\t"
  19655. "str r3, [%[a]]\n\t"
  19656. "str r4, [%[a], #4]\n\t"
  19657. "sbc %[c], %[c], %[c]\n\t"
  19658. "add %[a], %[a], #8\n\t"
  19659. "add %[b], %[b], #8\n\t"
  19660. "cmp %[a], r8\n\t"
  19661. #ifdef __GNUC__
  19662. "bne 1b\n\t"
  19663. #else
  19664. "bne.n 1b\n\t"
  19665. #endif /* __GNUC__ */
  19666. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  19667. :
  19668. : "memory", "r3", "r4", "r5", "r6", "r8"
  19669. );
  19670. return c;
  19671. }
  19672. #else
  19673. /* Sub b from a into r. (r = a - b)
  19674. *
  19675. * r A single precision integer.
  19676. * a A single precision integer.
  19677. * b A single precision integer.
  19678. */
  19679. SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
  19680. const sp_digit* b)
  19681. {
  19682. sp_digit c = 0;
  19683. __asm__ __volatile__ (
  19684. "ldm %[a], {r3, r4}\n\t"
  19685. "ldm %[b]!, {r5, r6}\n\t"
  19686. "subs r3, r3, r5\n\t"
  19687. "sbcs r4, r4, r6\n\t"
  19688. "stm %[a]!, {r3, r4}\n\t"
  19689. "ldm %[a], {r3, r4}\n\t"
  19690. "ldm %[b]!, {r5, r6}\n\t"
  19691. "sbcs r3, r3, r5\n\t"
  19692. "sbcs r4, r4, r6\n\t"
  19693. "stm %[a]!, {r3, r4}\n\t"
  19694. "ldm %[a], {r3, r4}\n\t"
  19695. "ldm %[b]!, {r5, r6}\n\t"
  19696. "sbcs r3, r3, r5\n\t"
  19697. "sbcs r4, r4, r6\n\t"
  19698. "stm %[a]!, {r3, r4}\n\t"
  19699. "ldm %[a], {r3, r4}\n\t"
  19700. "ldm %[b]!, {r5, r6}\n\t"
  19701. "sbcs r3, r3, r5\n\t"
  19702. "sbcs r4, r4, r6\n\t"
  19703. "stm %[a]!, {r3, r4}\n\t"
  19704. "sbc %[c], %[c], %[c]\n\t"
  19705. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  19706. :
  19707. : "memory", "r3", "r4", "r5", "r6"
  19708. );
  19709. return c;
  19710. }
  19711. #endif /* WOLFSSL_SP_SMALL */
  19712. /* Mul a by digit b into r. (r = a * b)
  19713. *
  19714. * r A single precision integer.
  19715. * a A single precision integer.
  19716. * b A single precision digit.
  19717. */
  19718. SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a,
  19719. sp_digit b)
  19720. {
  19721. __asm__ __volatile__ (
  19722. "add r9, %[a], #32\n\t"
  19723. /* A[0] * B */
  19724. "ldr r6, [%[a]], #4\n\t"
  19725. "umull r5, r3, r6, %[b]\n\t"
  19726. "mov r4, #0\n\t"
  19727. "str r5, [%[r]], #4\n\t"
  19728. /* A[0] * B - Done */
  19729. "\n1:\n\t"
  19730. "mov r5, #0\n\t"
  19731. /* A[] * B */
  19732. "ldr r6, [%[a]], #4\n\t"
  19733. "umull r6, r8, r6, %[b]\n\t"
  19734. "adds r3, r3, r6\n\t"
  19735. "adcs r4, r4, r8\n\t"
  19736. "adc r5, r5, #0\n\t"
  19737. /* A[] * B - Done */
  19738. "str r3, [%[r]], #4\n\t"
  19739. "mov r3, r4\n\t"
  19740. "mov r4, r5\n\t"
  19741. "cmp %[a], r9\n\t"
  19742. #ifdef __GNUC__
  19743. "blt 1b\n\t"
  19744. #else
  19745. "blt.n 1b\n\t"
  19746. #endif /* __GNUC__ */
  19747. "str r3, [%[r]]\n\t"
  19748. : [r] "+r" (r), [a] "+r" (a)
  19749. : [b] "r" (b)
  19750. : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
  19751. );
  19752. }
  19753. /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  19754. *
  19755. * d1 The high order half of the number to divide.
  19756. * d0 The low order half of the number to divide.
  19757. * div The dividend.
  19758. * returns the result of the division.
  19759. *
  19760. * Note that this is an approximate div. It may give an answer 1 larger.
  19761. */
  19762. SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0,
  19763. sp_digit div)
  19764. {
  19765. sp_digit r = 0;
  19766. __asm__ __volatile__ (
  19767. "lsr r6, %[div], #16\n\t"
  19768. "add r6, r6, #1\n\t"
  19769. "udiv r4, %[d1], r6\n\t"
  19770. "lsl r8, r4, #16\n\t"
  19771. "umull r4, r5, %[div], r8\n\t"
  19772. "subs %[d0], %[d0], r4\n\t"
  19773. "sbc %[d1], %[d1], r5\n\t"
  19774. "udiv r5, %[d1], r6\n\t"
  19775. "lsl r4, r5, #16\n\t"
  19776. "add r8, r8, r4\n\t"
  19777. "umull r4, r5, %[div], r4\n\t"
  19778. "subs %[d0], %[d0], r4\n\t"
  19779. "sbc %[d1], %[d1], r5\n\t"
  19780. "lsl r4, %[d1], #16\n\t"
  19781. "orr r4, r4, %[d0], lsr #16\n\t"
  19782. "udiv r4, r4, r6\n\t"
  19783. "add r8, r8, r4\n\t"
  19784. "umull r4, r5, %[div], r4\n\t"
  19785. "subs %[d0], %[d0], r4\n\t"
  19786. "sbc %[d1], %[d1], r5\n\t"
  19787. "lsl r4, %[d1], #16\n\t"
  19788. "orr r4, r4, %[d0], lsr #16\n\t"
  19789. "udiv r4, r4, r6\n\t"
  19790. "add r8, r8, r4\n\t"
  19791. "umull r4, r5, %[div], r4\n\t"
  19792. "subs %[d0], %[d0], r4\n\t"
  19793. "sbc %[d1], %[d1], r5\n\t"
  19794. "udiv r4, %[d0], %[div]\n\t"
  19795. "add r8, r8, r4\n\t"
  19796. "mov %[r], r8\n\t"
  19797. : [r] "+r" (r)
  19798. : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
  19799. : "r4", "r5", "r6", "r8"
  19800. );
  19801. return r;
  19802. }
  19803. /* AND m into each word of a and store in r.
  19804. *
  19805. * r A single precision integer.
  19806. * a A single precision integer.
  19807. * m Mask to AND against each digit.
  19808. */
  19809. static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
  19810. {
  19811. #ifdef WOLFSSL_SP_SMALL
  19812. int i;
  19813. for (i=0; i<8; i++) {
  19814. r[i] = a[i] & m;
  19815. }
  19816. #else
  19817. r[0] = a[0] & m;
  19818. r[1] = a[1] & m;
  19819. r[2] = a[2] & m;
  19820. r[3] = a[3] & m;
  19821. r[4] = a[4] & m;
  19822. r[5] = a[5] & m;
  19823. r[6] = a[6] & m;
  19824. r[7] = a[7] & m;
  19825. #endif
  19826. }
  19827. /* Divide d in a and put remainder into r (m*d + r = a)
  19828. * m is not calculated as it is not needed at this time.
  19829. *
  19830. * a Number to be divided.
  19831. * d Number to divide with.
  19832. * m Multiplier result.
  19833. * r Remainder from the division.
  19834. * returns MP_OKAY indicating success.
  19835. */
  19836. static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m,
  19837. sp_digit* r)
  19838. {
  19839. sp_digit t1[16], t2[9];
  19840. sp_digit div, r1;
  19841. int i;
  19842. (void)m;
  19843. div = d[7];
  19844. XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
  19845. for (i=7; i>=0; i--) {
  19846. sp_digit hi = t1[8 + i] - (t1[8 + i] == div);
  19847. r1 = div_256_word_8(hi, t1[8 + i - 1], div);
  19848. sp_256_mul_d_8(t2, d, r1);
  19849. t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
  19850. t1[8 + i] -= t2[8];
  19851. sp_256_mask_8(t2, d, t1[8 + i]);
  19852. t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
  19853. sp_256_mask_8(t2, d, t1[8 + i]);
  19854. t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
  19855. }
  19856. r1 = sp_256_cmp_8(t1, d) >= 0;
  19857. sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1);
  19858. return MP_OKAY;
  19859. }
  19860. /* Reduce a modulo m into r. (r = a mod m)
  19861. *
  19862. * r A single precision number that is the reduced result.
  19863. * a A single precision number that is to be reduced.
  19864. * m A single precision number that is the modulus to reduce with.
  19865. * returns MP_OKAY indicating success.
  19866. */
  19867. static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
  19868. {
  19869. return sp_256_div_8(a, m, NULL, r);
  19870. }
  19871. #endif
  19872. #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
  19873. /* Square a and put result in r. (r = a * a)
  19874. *
  19875. * r A single precision integer.
  19876. * a A single precision integer.
  19877. */
  19878. SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
  19879. {
  19880. sp_digit tmp_arr[8];
  19881. sp_digit* tmp = tmp_arr;
  19882. __asm__ __volatile__ (
  19883. /* A[0] * A[0] */
  19884. "ldr r6, [%[a], #0]\n\t"
  19885. "umull r3, r4, r6, r6\n\t"
  19886. "mov r5, #0\n\t"
  19887. "str r3, [%[tmp], #0]\n\t"
  19888. "mov r3, #0\n\t"
  19889. /* A[0] * A[1] */
  19890. "ldr r8, [%[a], #4]\n\t"
  19891. "umull r6, r8, r6, r8\n\t"
  19892. "adds r4, r4, r6\n\t"
  19893. "adc r5, r5, r8\n\t"
  19894. "adds r4, r4, r6\n\t"
  19895. "adcs r5, r5, r8\n\t"
  19896. "adc r3, r3, #0\n\t"
  19897. "str r4, [%[tmp], #4]\n\t"
  19898. "mov r4, #0\n\t"
  19899. /* A[0] * A[2] */
  19900. "ldr r6, [%[a], #0]\n\t"
  19901. "ldr r8, [%[a], #8]\n\t"
  19902. "umull r6, r8, r6, r8\n\t"
  19903. "adds r5, r5, r6\n\t"
  19904. "adc r3, r3, r8\n\t"
  19905. "adds r5, r5, r6\n\t"
  19906. "adcs r3, r3, r8\n\t"
  19907. "adc r4, r4, #0\n\t"
  19908. /* A[1] * A[1] */
  19909. "ldr r6, [%[a], #4]\n\t"
  19910. "umull r6, r8, r6, r6\n\t"
  19911. "adds r5, r5, r6\n\t"
  19912. "adcs r3, r3, r8\n\t"
  19913. "adc r4, r4, #0\n\t"
  19914. "str r5, [%[tmp], #8]\n\t"
  19915. "mov r5, #0\n\t"
  19916. /* A[0] * A[3] */
  19917. "ldr r6, [%[a], #0]\n\t"
  19918. "ldr r8, [%[a], #12]\n\t"
  19919. "umull r9, r10, r6, r8\n\t"
  19920. "mov r11, #0\n\t"
  19921. /* A[1] * A[2] */
  19922. "ldr r6, [%[a], #4]\n\t"
  19923. "ldr r8, [%[a], #8]\n\t"
  19924. "umull r6, r8, r6, r8\n\t"
  19925. "adds r9, r9, r6\n\t"
  19926. "adcs r10, r10, r8\n\t"
  19927. "adc r11, r11, #0\n\t"
  19928. "adds r9, r9, r9\n\t"
  19929. "adcs r10, r10, r10\n\t"
  19930. "adc r11, r11, r11\n\t"
  19931. "adds r3, r3, r9\n\t"
  19932. "adcs r4, r4, r10\n\t"
  19933. "adc r5, r5, r11\n\t"
  19934. "str r3, [%[tmp], #12]\n\t"
  19935. "mov r3, #0\n\t"
  19936. /* A[0] * A[4] */
  19937. "ldr r6, [%[a], #0]\n\t"
  19938. "ldr r8, [%[a], #16]\n\t"
  19939. "umull r9, r10, r6, r8\n\t"
  19940. "mov r11, #0\n\t"
  19941. /* A[1] * A[3] */
  19942. "ldr r6, [%[a], #4]\n\t"
  19943. "ldr r8, [%[a], #12]\n\t"
  19944. "umull r6, r8, r6, r8\n\t"
  19945. "adds r9, r9, r6\n\t"
  19946. "adcs r10, r10, r8\n\t"
  19947. "adc r11, r11, #0\n\t"
  19948. /* A[2] * A[2] */
  19949. "ldr r6, [%[a], #8]\n\t"
  19950. "umull r6, r8, r6, r6\n\t"
  19951. "adds r4, r4, r6\n\t"
  19952. "adcs r5, r5, r8\n\t"
  19953. "adc r3, r3, #0\n\t"
  19954. "adds r9, r9, r9\n\t"
  19955. "adcs r10, r10, r10\n\t"
  19956. "adc r11, r11, r11\n\t"
  19957. "adds r4, r4, r9\n\t"
  19958. "adcs r5, r5, r10\n\t"
  19959. "adc r3, r3, r11\n\t"
  19960. "str r4, [%[tmp], #16]\n\t"
  19961. "mov r4, #0\n\t"
  19962. /* A[0] * A[5] */
  19963. "ldr r6, [%[a], #0]\n\t"
  19964. "ldr r8, [%[a], #20]\n\t"
  19965. "umull r9, r10, r6, r8\n\t"
  19966. "mov r11, #0\n\t"
  19967. /* A[1] * A[4] */
  19968. "ldr r6, [%[a], #4]\n\t"
  19969. "ldr r8, [%[a], #16]\n\t"
  19970. "umull r6, r8, r6, r8\n\t"
  19971. "adds r9, r9, r6\n\t"
  19972. "adcs r10, r10, r8\n\t"
  19973. "adc r11, r11, #0\n\t"
  19974. /* A[2] * A[3] */
  19975. "ldr r6, [%[a], #8]\n\t"
  19976. "ldr r8, [%[a], #12]\n\t"
  19977. "umull r6, r8, r6, r8\n\t"
  19978. "adds r9, r9, r6\n\t"
  19979. "adcs r10, r10, r8\n\t"
  19980. "adc r11, r11, #0\n\t"
  19981. "adds r9, r9, r9\n\t"
  19982. "adcs r10, r10, r10\n\t"
  19983. "adc r11, r11, r11\n\t"
  19984. "adds r5, r5, r9\n\t"
  19985. "adcs r3, r3, r10\n\t"
  19986. "adc r4, r4, r11\n\t"
  19987. "str r5, [%[tmp], #20]\n\t"
  19988. "mov r5, #0\n\t"
  19989. /* A[0] * A[6] */
  19990. "ldr r6, [%[a], #0]\n\t"
  19991. "ldr r8, [%[a], #24]\n\t"
  19992. "umull r9, r10, r6, r8\n\t"
  19993. "mov r11, #0\n\t"
  19994. /* A[1] * A[5] */
  19995. "ldr r6, [%[a], #4]\n\t"
  19996. "ldr r8, [%[a], #20]\n\t"
  19997. "umull r6, r8, r6, r8\n\t"
  19998. "adds r9, r9, r6\n\t"
  19999. "adcs r10, r10, r8\n\t"
  20000. "adc r11, r11, #0\n\t"
  20001. /* A[2] * A[4] */
  20002. "ldr r6, [%[a], #8]\n\t"
  20003. "ldr r8, [%[a], #16]\n\t"
  20004. "umull r6, r8, r6, r8\n\t"
  20005. "adds r9, r9, r6\n\t"
  20006. "adcs r10, r10, r8\n\t"
  20007. "adc r11, r11, #0\n\t"
  20008. /* A[3] * A[3] */
  20009. "ldr r6, [%[a], #12]\n\t"
  20010. "umull r6, r8, r6, r6\n\t"
  20011. "adds r3, r3, r6\n\t"
  20012. "adcs r4, r4, r8\n\t"
  20013. "adc r5, r5, #0\n\t"
  20014. "adds r9, r9, r9\n\t"
  20015. "adcs r10, r10, r10\n\t"
  20016. "adc r11, r11, r11\n\t"
  20017. "adds r3, r3, r9\n\t"
  20018. "adcs r4, r4, r10\n\t"
  20019. "adc r5, r5, r11\n\t"
  20020. "str r3, [%[tmp], #24]\n\t"
  20021. "mov r3, #0\n\t"
  20022. /* A[0] * A[7] */
  20023. "ldr r6, [%[a], #0]\n\t"
  20024. "ldr r8, [%[a], #28]\n\t"
  20025. "umull r9, r10, r6, r8\n\t"
  20026. "mov r11, #0\n\t"
  20027. /* A[1] * A[6] */
  20028. "ldr r6, [%[a], #4]\n\t"
  20029. "ldr r8, [%[a], #24]\n\t"
  20030. "umull r6, r8, r6, r8\n\t"
  20031. "adds r9, r9, r6\n\t"
  20032. "adcs r10, r10, r8\n\t"
  20033. "adc r11, r11, #0\n\t"
  20034. /* A[2] * A[5] */
  20035. "ldr r6, [%[a], #8]\n\t"
  20036. "ldr r8, [%[a], #20]\n\t"
  20037. "umull r6, r8, r6, r8\n\t"
  20038. "adds r9, r9, r6\n\t"
  20039. "adcs r10, r10, r8\n\t"
  20040. "adc r11, r11, #0\n\t"
  20041. /* A[3] * A[4] */
  20042. "ldr r6, [%[a], #12]\n\t"
  20043. "ldr r8, [%[a], #16]\n\t"
  20044. "umull r6, r8, r6, r8\n\t"
  20045. "adds r9, r9, r6\n\t"
  20046. "adcs r10, r10, r8\n\t"
  20047. "adc r11, r11, #0\n\t"
  20048. "adds r9, r9, r9\n\t"
  20049. "adcs r10, r10, r10\n\t"
  20050. "adc r11, r11, r11\n\t"
  20051. "adds r4, r4, r9\n\t"
  20052. "adcs r5, r5, r10\n\t"
  20053. "adc r3, r3, r11\n\t"
  20054. "str r4, [%[tmp], #28]\n\t"
  20055. "mov r4, #0\n\t"
  20056. /* A[1] * A[7] */
  20057. "ldr r6, [%[a], #4]\n\t"
  20058. "ldr r8, [%[a], #28]\n\t"
  20059. "umull r9, r10, r6, r8\n\t"
  20060. "mov r11, #0\n\t"
  20061. /* A[2] * A[6] */
  20062. "ldr r6, [%[a], #8]\n\t"
  20063. "ldr r8, [%[a], #24]\n\t"
  20064. "umull r6, r8, r6, r8\n\t"
  20065. "adds r9, r9, r6\n\t"
  20066. "adcs r10, r10, r8\n\t"
  20067. "adc r11, r11, #0\n\t"
  20068. /* A[3] * A[5] */
  20069. "ldr r6, [%[a], #12]\n\t"
  20070. "ldr r8, [%[a], #20]\n\t"
  20071. "umull r6, r8, r6, r8\n\t"
  20072. "adds r9, r9, r6\n\t"
  20073. "adcs r10, r10, r8\n\t"
  20074. "adc r11, r11, #0\n\t"
  20075. /* A[4] * A[4] */
  20076. "ldr r6, [%[a], #16]\n\t"
  20077. "umull r6, r8, r6, r6\n\t"
  20078. "adds r5, r5, r6\n\t"
  20079. "adcs r3, r3, r8\n\t"
  20080. "adc r4, r4, #0\n\t"
  20081. "adds r9, r9, r9\n\t"
  20082. "adcs r10, r10, r10\n\t"
  20083. "adc r11, r11, r11\n\t"
  20084. "adds r5, r5, r9\n\t"
  20085. "adcs r3, r3, r10\n\t"
  20086. "adc r4, r4, r11\n\t"
  20087. "str r5, [%[r], #32]\n\t"
  20088. "mov r5, #0\n\t"
  20089. /* A[2] * A[7] */
  20090. "ldr r6, [%[a], #8]\n\t"
  20091. "ldr r8, [%[a], #28]\n\t"
  20092. "umull r9, r10, r6, r8\n\t"
  20093. "mov r11, #0\n\t"
  20094. /* A[3] * A[6] */
  20095. "ldr r6, [%[a], #12]\n\t"
  20096. "ldr r8, [%[a], #24]\n\t"
  20097. "umull r6, r8, r6, r8\n\t"
  20098. "adds r9, r9, r6\n\t"
  20099. "adcs r10, r10, r8\n\t"
  20100. "adc r11, r11, #0\n\t"
  20101. /* A[4] * A[5] */
  20102. "ldr r6, [%[a], #16]\n\t"
  20103. "ldr r8, [%[a], #20]\n\t"
  20104. "umull r6, r8, r6, r8\n\t"
  20105. "adds r9, r9, r6\n\t"
  20106. "adcs r10, r10, r8\n\t"
  20107. "adc r11, r11, #0\n\t"
  20108. "adds r9, r9, r9\n\t"
  20109. "adcs r10, r10, r10\n\t"
  20110. "adc r11, r11, r11\n\t"
  20111. "adds r3, r3, r9\n\t"
  20112. "adcs r4, r4, r10\n\t"
  20113. "adc r5, r5, r11\n\t"
  20114. "str r3, [%[r], #36]\n\t"
  20115. "mov r3, #0\n\t"
  20116. /* A[3] * A[7] */
  20117. "ldr r6, [%[a], #12]\n\t"
  20118. "ldr r8, [%[a], #28]\n\t"
  20119. "umull r9, r10, r6, r8\n\t"
  20120. "mov r11, #0\n\t"
  20121. /* A[4] * A[6] */
  20122. "ldr r6, [%[a], #16]\n\t"
  20123. "ldr r8, [%[a], #24]\n\t"
  20124. "umull r6, r8, r6, r8\n\t"
  20125. "adds r9, r9, r6\n\t"
  20126. "adcs r10, r10, r8\n\t"
  20127. "adc r11, r11, #0\n\t"
  20128. /* A[5] * A[5] */
  20129. "ldr r6, [%[a], #20]\n\t"
  20130. "umull r6, r8, r6, r6\n\t"
  20131. "adds r4, r4, r6\n\t"
  20132. "adcs r5, r5, r8\n\t"
  20133. "adc r3, r3, #0\n\t"
  20134. "adds r9, r9, r9\n\t"
  20135. "adcs r10, r10, r10\n\t"
  20136. "adc r11, r11, r11\n\t"
  20137. "adds r4, r4, r9\n\t"
  20138. "adcs r5, r5, r10\n\t"
  20139. "adc r3, r3, r11\n\t"
  20140. "str r4, [%[r], #40]\n\t"
  20141. "mov r4, #0\n\t"
  20142. /* A[4] * A[7] */
  20143. "ldr r6, [%[a], #16]\n\t"
  20144. "ldr r8, [%[a], #28]\n\t"
  20145. "umull r6, r8, r6, r8\n\t"
  20146. "adds r5, r5, r6\n\t"
  20147. "adcs r3, r3, r8\n\t"
  20148. "adc r4, r4, #0\n\t"
  20149. "adds r5, r5, r6\n\t"
  20150. "adcs r3, r3, r8\n\t"
  20151. "adc r4, r4, #0\n\t"
  20152. /* A[5] * A[6] */
  20153. "ldr r6, [%[a], #20]\n\t"
  20154. "ldr r8, [%[a], #24]\n\t"
  20155. "umull r6, r8, r6, r8\n\t"
  20156. "adds r5, r5, r6\n\t"
  20157. "adcs r3, r3, r8\n\t"
  20158. "adc r4, r4, #0\n\t"
  20159. "adds r5, r5, r6\n\t"
  20160. "adcs r3, r3, r8\n\t"
  20161. "adc r4, r4, #0\n\t"
  20162. "str r5, [%[r], #44]\n\t"
  20163. "mov r5, #0\n\t"
  20164. /* A[5] * A[7] */
  20165. "ldr r6, [%[a], #20]\n\t"
  20166. "ldr r8, [%[a], #28]\n\t"
  20167. "umull r6, r8, r6, r8\n\t"
  20168. "adds r3, r3, r6\n\t"
  20169. "adcs r4, r4, r8\n\t"
  20170. "adc r5, r5, #0\n\t"
  20171. "adds r3, r3, r6\n\t"
  20172. "adcs r4, r4, r8\n\t"
  20173. "adc r5, r5, #0\n\t"
  20174. /* A[6] * A[6] */
  20175. "ldr r6, [%[a], #24]\n\t"
  20176. "umull r6, r8, r6, r6\n\t"
  20177. "adds r3, r3, r6\n\t"
  20178. "adcs r4, r4, r8\n\t"
  20179. "adc r5, r5, #0\n\t"
  20180. "str r3, [%[r], #48]\n\t"
  20181. "mov r3, #0\n\t"
  20182. /* A[6] * A[7] */
  20183. "ldr r6, [%[a], #24]\n\t"
  20184. "ldr r8, [%[a], #28]\n\t"
  20185. "umull r6, r8, r6, r8\n\t"
  20186. "adds r4, r4, r6\n\t"
  20187. "adcs r5, r5, r8\n\t"
  20188. "adc r3, r3, #0\n\t"
  20189. "adds r4, r4, r6\n\t"
  20190. "adcs r5, r5, r8\n\t"
  20191. "adc r3, r3, #0\n\t"
  20192. "str r4, [%[r], #52]\n\t"
  20193. "mov r4, #0\n\t"
  20194. /* A[7] * A[7] */
  20195. "ldr r6, [%[a], #28]\n\t"
  20196. "umull r6, r8, r6, r6\n\t"
  20197. "adds r5, r5, r6\n\t"
  20198. "adc r3, r3, r8\n\t"
  20199. "str r5, [%[r], #56]\n\t"
  20200. "str r3, [%[r], #60]\n\t"
  20201. /* Transfer tmp to r */
  20202. "ldr r3, [%[tmp], #0]\n\t"
  20203. "ldr r4, [%[tmp], #4]\n\t"
  20204. "ldr r5, [%[tmp], #8]\n\t"
  20205. "ldr r6, [%[tmp], #12]\n\t"
  20206. "str r3, [%[r], #0]\n\t"
  20207. "str r4, [%[r], #4]\n\t"
  20208. "str r5, [%[r], #8]\n\t"
  20209. "str r6, [%[r], #12]\n\t"
  20210. "ldr r3, [%[tmp], #16]\n\t"
  20211. "ldr r4, [%[tmp], #20]\n\t"
  20212. "ldr r5, [%[tmp], #24]\n\t"
  20213. "ldr r6, [%[tmp], #28]\n\t"
  20214. "str r3, [%[r], #16]\n\t"
  20215. "str r4, [%[r], #20]\n\t"
  20216. "str r5, [%[r], #24]\n\t"
  20217. "str r6, [%[r], #28]\n\t"
  20218. :
  20219. : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
  20220. : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11"
  20221. );
  20222. }
  20223. #ifdef WOLFSSL_SP_SMALL
  20224. /* Order-2 for the P256 curve. */
  20225. static const uint32_t p256_order_minus_2[8] = {
  20226. 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
  20227. 0x00000000U,0xffffffffU
  20228. };
  20229. #else
  20230. /* The low half of the order-2 of the P256 curve. */
  20231. static const uint32_t p256_order_low[4] = {
  20232. 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
  20233. };
  20234. #endif /* WOLFSSL_SP_SMALL */
  20235. /* Multiply two number mod the order of P256 curve. (r = a * b mod order)
  20236. *
  20237. * r Result of the multiplication.
  20238. * a First operand of the multiplication.
  20239. * b Second operand of the multiplication.
  20240. */
  20241. static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
  20242. {
  20243. sp_256_mul_8(r, a, b);
  20244. sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
  20245. }
  20246. /* Square number mod the order of P256 curve. (r = a * a mod order)
  20247. *
  20248. * r Result of the squaring.
  20249. * a Number to square.
  20250. */
  20251. static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a)
  20252. {
  20253. sp_256_sqr_8(r, a);
  20254. sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
  20255. }
  20256. #ifndef WOLFSSL_SP_SMALL
  20257. /* Square number mod the order of P256 curve a number of times.
  20258. * (r = a ^ n mod order)
  20259. *
  20260. * r Result of the squaring.
  20261. * a Number to square.
  20262. */
  20263. static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n)
  20264. {
  20265. int i;
  20266. sp_256_mont_sqr_order_8(r, a);
  20267. for (i=1; i<n; i++) {
  20268. sp_256_mont_sqr_order_8(r, r);
  20269. }
  20270. }
  20271. #endif /* !WOLFSSL_SP_SMALL */
  20272. /* Invert the number, in Montgomery form, modulo the order of the P256 curve.
  20273. * (r = 1 / a mod order)
  20274. *
  20275. * r Inverse result.
  20276. * a Number to invert.
  20277. * td Temporary data.
  20278. */
  20279. #ifdef WOLFSSL_SP_NONBLOCK
  20280. typedef struct sp_256_mont_inv_order_8_ctx {
  20281. int state;
  20282. int i;
  20283. } sp_256_mont_inv_order_8_ctx;
  20284. static int sp_256_mont_inv_order_8_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const sp_digit* a,
  20285. sp_digit* t)
  20286. {
  20287. int err = FP_WOULDBLOCK;
  20288. sp_256_mont_inv_order_8_ctx* ctx = (sp_256_mont_inv_order_8_ctx*)sp_ctx;
  20289. typedef char ctx_size_test[sizeof(sp_256_mont_inv_order_8_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
  20290. (void)sizeof(ctx_size_test);
  20291. switch (ctx->state) {
  20292. case 0:
  20293. XMEMCPY(t, a, sizeof(sp_digit) * 8);
  20294. ctx->i = 254;
  20295. ctx->state = 1;
  20296. break;
  20297. case 1:
  20298. sp_256_mont_sqr_order_8(t, t);
  20299. ctx->state = 2;
  20300. break;
  20301. case 2:
  20302. if ((p256_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
  20303. sp_256_mont_mul_order_8(t, t, a);
  20304. }
  20305. ctx->i--;
  20306. ctx->state = (ctx->i == 0) ? 3 : 1;
  20307. break;
  20308. case 3:
  20309. XMEMCPY(r, t, sizeof(sp_digit) * 8U);
  20310. err = MP_OKAY;
  20311. break;
  20312. }
  20313. return err;
  20314. }
  20315. #endif /* WOLFSSL_SP_NONBLOCK */
  20316. static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a,
  20317. sp_digit* td)
  20318. {
  20319. #ifdef WOLFSSL_SP_SMALL
  20320. sp_digit* t = td;
  20321. int i;
  20322. XMEMCPY(t, a, sizeof(sp_digit) * 8);
  20323. for (i=254; i>=0; i--) {
  20324. sp_256_mont_sqr_order_8(t, t);
  20325. if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
  20326. sp_256_mont_mul_order_8(t, t, a);
  20327. }
  20328. }
  20329. XMEMCPY(r, t, sizeof(sp_digit) * 8U);
  20330. #else
  20331. sp_digit* t = td;
  20332. sp_digit* t2 = td + 2 * 8;
  20333. sp_digit* t3 = td + 4 * 8;
  20334. int i;
  20335. /* t = a^2 */
  20336. sp_256_mont_sqr_order_8(t, a);
  20337. /* t = a^3 = t * a */
  20338. sp_256_mont_mul_order_8(t, t, a);
  20339. /* t2= a^c = t ^ 2 ^ 2 */
  20340. sp_256_mont_sqr_n_order_8(t2, t, 2);
  20341. /* t3= a^f = t2 * t */
  20342. sp_256_mont_mul_order_8(t3, t2, t);
  20343. /* t2= a^f0 = t3 ^ 2 ^ 4 */
  20344. sp_256_mont_sqr_n_order_8(t2, t3, 4);
  20345. /* t = a^ff = t2 * t3 */
  20346. sp_256_mont_mul_order_8(t, t2, t3);
  20347. /* t3= a^ff00 = t ^ 2 ^ 8 */
  20348. sp_256_mont_sqr_n_order_8(t2, t, 8);
  20349. /* t = a^ffff = t2 * t */
  20350. sp_256_mont_mul_order_8(t, t2, t);
  20351. /* t2= a^ffff0000 = t ^ 2 ^ 16 */
  20352. sp_256_mont_sqr_n_order_8(t2, t, 16);
  20353. /* t = a^ffffffff = t2 * t */
  20354. sp_256_mont_mul_order_8(t, t2, t);
  20355. /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
  20356. sp_256_mont_sqr_n_order_8(t2, t, 64);
  20357. /* t2= a^ffffffff00000000ffffffff = t2 * t */
  20358. sp_256_mont_mul_order_8(t2, t2, t);
  20359. /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
  20360. sp_256_mont_sqr_n_order_8(t2, t2, 32);
  20361. /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
  20362. sp_256_mont_mul_order_8(t2, t2, t);
  20363. /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
  20364. for (i=127; i>=112; i--) {
  20365. sp_256_mont_sqr_order_8(t2, t2);
  20366. if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
  20367. sp_256_mont_mul_order_8(t2, t2, a);
  20368. }
  20369. }
  20370. /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
  20371. sp_256_mont_sqr_n_order_8(t2, t2, 4);
  20372. sp_256_mont_mul_order_8(t2, t2, t3);
  20373. /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
  20374. for (i=107; i>=64; i--) {
  20375. sp_256_mont_sqr_order_8(t2, t2);
  20376. if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
  20377. sp_256_mont_mul_order_8(t2, t2, a);
  20378. }
  20379. }
  20380. /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
  20381. sp_256_mont_sqr_n_order_8(t2, t2, 4);
  20382. sp_256_mont_mul_order_8(t2, t2, t3);
  20383. /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
  20384. for (i=59; i>=32; i--) {
  20385. sp_256_mont_sqr_order_8(t2, t2);
  20386. if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
  20387. sp_256_mont_mul_order_8(t2, t2, a);
  20388. }
  20389. }
  20390. /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
  20391. sp_256_mont_sqr_n_order_8(t2, t2, 4);
  20392. sp_256_mont_mul_order_8(t2, t2, t3);
  20393. /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
  20394. for (i=27; i>=0; i--) {
  20395. sp_256_mont_sqr_order_8(t2, t2);
  20396. if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
  20397. sp_256_mont_mul_order_8(t2, t2, a);
  20398. }
  20399. }
  20400. /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
  20401. sp_256_mont_sqr_n_order_8(t2, t2, 4);
  20402. /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
  20403. sp_256_mont_mul_order_8(r, t2, t3);
  20404. #endif /* WOLFSSL_SP_SMALL */
  20405. }
  20406. #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
  20407. #ifdef HAVE_ECC_SIGN
  20408. #ifndef SP_ECC_MAX_SIG_GEN
  20409. #define SP_ECC_MAX_SIG_GEN 64
  20410. #endif
  20411. /* Sign the hash using the private key.
  20412. * e = [hash, 256 bits] from binary
  20413. * r = (k.G)->x mod order
  20414. * s = (r * x + e) / k mod order
  20415. * The hash is truncated to the first 256 bits.
  20416. *
  20417. * hash Hash to sign.
  20418. * hashLen Length of the hash data.
  20419. * rng Random number generator.
  20420. * priv Private part of key - scalar.
  20421. * rm First part of result as an mp_int.
  20422. * sm Sirst part of result as an mp_int.
  20423. * heap Heap to use for allocation.
  20424. * returns RNG failures, MEMORY_E when memory allocation fails and
  20425. * MP_OKAY on success.
  20426. */
  20427. #ifdef WOLFSSL_SP_NONBLOCK
  20428. typedef struct sp_ecc_sign_256_ctx {
  20429. int state;
  20430. union {
  20431. sp_256_ecc_mulmod_8_ctx mulmod_ctx;
  20432. sp_256_mont_inv_order_8_ctx mont_inv_order_ctx;
  20433. };
  20434. sp_digit e[2*8];
  20435. sp_digit x[2*8];
  20436. sp_digit k[2*8];
  20437. sp_digit r[2*8];
  20438. sp_digit tmp[3 * 2*8];
  20439. sp_point_256 point;
  20440. sp_digit* s;
  20441. sp_digit* kInv;
  20442. int i;
  20443. } sp_ecc_sign_256_ctx;
  20444. int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
  20445. mp_int* rm, mp_int* sm, mp_int* km, void* heap)
  20446. {
  20447. int err = FP_WOULDBLOCK;
  20448. sp_ecc_sign_256_ctx* ctx = (sp_ecc_sign_256_ctx*)sp_ctx->data;
  20449. typedef char ctx_size_test[sizeof(sp_ecc_sign_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
  20450. (void)sizeof(ctx_size_test);
  20451. (void)heap;
  20452. switch (ctx->state) {
  20453. case 0: /* INIT */
  20454. ctx->s = ctx->e;
  20455. ctx->kInv = ctx->k;
  20456. if (hashLen > 32U) {
  20457. hashLen = 32U;
  20458. }
  20459. sp_256_from_bin(ctx->e, 8, hash, (int)hashLen);
  20460. ctx->i = SP_ECC_MAX_SIG_GEN;
  20461. ctx->state = 1;
  20462. break;
  20463. case 1: /* GEN */
  20464. sp_256_from_mp(ctx->x, 8, priv);
  20465. /* New random point. */
  20466. if (km == NULL || mp_iszero(km)) {
  20467. err = sp_256_ecc_gen_k_8(rng, ctx->k);
  20468. }
  20469. else {
  20470. sp_256_from_mp(ctx->k, 8, km);
  20471. mp_zero(km);
  20472. }
  20473. XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
  20474. ctx->state = 2;
  20475. break;
  20476. case 2: /* MULMOD */
  20477. err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx,
  20478. &ctx->point, &p256_base, ctx->k, 1, 1, heap);
  20479. if (err == MP_OKAY) {
  20480. ctx->state = 3;
  20481. }
  20482. break;
  20483. case 3: /* MODORDER */
  20484. {
  20485. int32_t c;
  20486. /* r = point->x mod order */
  20487. XMEMCPY(ctx->r, ctx->point.x, sizeof(sp_digit) * 8U);
  20488. sp_256_norm_8(ctx->r);
  20489. c = sp_256_cmp_8(ctx->r, p256_order);
  20490. sp_256_cond_sub_8(ctx->r, ctx->r, p256_order, 0L - (sp_digit)(c >= 0));
  20491. sp_256_norm_8(ctx->r);
  20492. ctx->state = 4;
  20493. break;
  20494. }
  20495. case 4: /* KMODORDER */
  20496. /* Conv k to Montgomery form (mod order) */
  20497. sp_256_mul_8(ctx->k, ctx->k, p256_norm_order);
  20498. err = sp_256_mod_8(ctx->k, ctx->k, p256_order);
  20499. if (err == MP_OKAY) {
  20500. sp_256_norm_8(ctx->k);
  20501. XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
  20502. ctx->state = 5;
  20503. }
  20504. break;
  20505. case 5: /* KINV */
  20506. /* kInv = 1/k mod order */
  20507. err = sp_256_mont_inv_order_8_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->kInv, ctx->k, ctx->tmp);
  20508. if (err == MP_OKAY) {
  20509. XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
  20510. ctx->state = 6;
  20511. }
  20512. break;
  20513. case 6: /* KINVNORM */
  20514. sp_256_norm_8(ctx->kInv);
  20515. ctx->state = 7;
  20516. break;
  20517. case 7: /* R */
  20518. /* s = r * x + e */
  20519. sp_256_mul_8(ctx->x, ctx->x, ctx->r);
  20520. ctx->state = 8;
  20521. break;
  20522. case 8: /* S1 */
  20523. err = sp_256_mod_8(ctx->x, ctx->x, p256_order);
  20524. if (err == MP_OKAY)
  20525. ctx->state = 9;
  20526. break;
  20527. case 9: /* S2 */
  20528. {
  20529. sp_digit carry;
  20530. int32_t c;
  20531. sp_256_norm_8(ctx->x);
  20532. carry = sp_256_add_8(ctx->s, ctx->e, ctx->x);
  20533. sp_256_cond_sub_8(ctx->s, ctx->s, p256_order, 0 - carry);
  20534. sp_256_norm_8(ctx->s);
  20535. c = sp_256_cmp_8(ctx->s, p256_order);
  20536. sp_256_cond_sub_8(ctx->s, ctx->s, p256_order, 0L - (sp_digit)(c >= 0));
  20537. sp_256_norm_8(ctx->s);
  20538. /* s = s * k^-1 mod order */
  20539. sp_256_mont_mul_order_8(ctx->s, ctx->s, ctx->kInv);
  20540. sp_256_norm_8(ctx->s);
  20541. /* Check that signature is usable. */
  20542. if (sp_256_iszero_8(ctx->s) == 0) {
  20543. ctx->state = 10;
  20544. break;
  20545. }
  20546. /* not usable gen, try again */
  20547. ctx->i--;
  20548. if (ctx->i == 0) {
  20549. err = RNG_FAILURE_E;
  20550. }
  20551. ctx->state = 1;
  20552. break;
  20553. }
  20554. case 10: /* RES */
  20555. err = sp_256_to_mp(ctx->r, rm);
  20556. if (err == MP_OKAY) {
  20557. err = sp_256_to_mp(ctx->s, sm);
  20558. }
  20559. break;
  20560. }
  20561. if (err == MP_OKAY && ctx->state != 10) {
  20562. err = FP_WOULDBLOCK;
  20563. }
  20564. if (err != FP_WOULDBLOCK) {
  20565. XMEMSET(ctx->e, 0, sizeof(sp_digit) * 2U * 8U);
  20566. XMEMSET(ctx->x, 0, sizeof(sp_digit) * 2U * 8U);
  20567. XMEMSET(ctx->k, 0, sizeof(sp_digit) * 2U * 8U);
  20568. XMEMSET(ctx->r, 0, sizeof(sp_digit) * 2U * 8U);
  20569. XMEMSET(ctx->tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U);
  20570. }
  20571. return err;
  20572. }
  20573. #endif /* WOLFSSL_SP_NONBLOCK */
  20574. int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
  20575. mp_int* rm, mp_int* sm, mp_int* km, void* heap)
  20576. {
  20577. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  20578. sp_digit* d = NULL;
  20579. #else
  20580. sp_digit ed[2*8];
  20581. sp_digit xd[2*8];
  20582. sp_digit kd[2*8];
  20583. sp_digit rd[2*8];
  20584. sp_digit td[3 * 2*8];
  20585. sp_point_256 p;
  20586. #endif
  20587. sp_digit* e = NULL;
  20588. sp_digit* x = NULL;
  20589. sp_digit* k = NULL;
  20590. sp_digit* r = NULL;
  20591. sp_digit* tmp = NULL;
  20592. sp_point_256* point = NULL;
  20593. sp_digit carry;
  20594. sp_digit* s = NULL;
  20595. sp_digit* kInv = NULL;
  20596. int err = MP_OKAY;
  20597. int32_t c;
  20598. int i;
  20599. (void)heap;
  20600. err = sp_256_point_new_8(heap, p, point);
  20601. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  20602. if (err == MP_OKAY) {
  20603. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap,
  20604. DYNAMIC_TYPE_ECC);
  20605. if (d == NULL) {
  20606. err = MEMORY_E;
  20607. }
  20608. }
  20609. #endif
  20610. if (err == MP_OKAY) {
  20611. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  20612. e = d + 0 * 8;
  20613. x = d + 2 * 8;
  20614. k = d + 4 * 8;
  20615. r = d + 6 * 8;
  20616. tmp = d + 8 * 8;
  20617. #else
  20618. e = ed;
  20619. x = xd;
  20620. k = kd;
  20621. r = rd;
  20622. tmp = td;
  20623. #endif
  20624. s = e;
  20625. kInv = k;
  20626. if (hashLen > 32U) {
  20627. hashLen = 32U;
  20628. }
  20629. }
  20630. for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
  20631. sp_256_from_mp(x, 8, priv);
  20632. /* New random point. */
  20633. if (km == NULL || mp_iszero(km)) {
  20634. err = sp_256_ecc_gen_k_8(rng, k);
  20635. }
  20636. else {
  20637. sp_256_from_mp(k, 8, km);
  20638. mp_zero(km);
  20639. }
  20640. if (err == MP_OKAY) {
  20641. err = sp_256_ecc_mulmod_base_8(point, k, 1, 1, NULL);
  20642. }
  20643. if (err == MP_OKAY) {
  20644. /* r = point->x mod order */
  20645. XMEMCPY(r, point->x, sizeof(sp_digit) * 8U);
  20646. sp_256_norm_8(r);
  20647. c = sp_256_cmp_8(r, p256_order);
  20648. sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0));
  20649. sp_256_norm_8(r);
  20650. /* Conv k to Montgomery form (mod order) */
  20651. sp_256_mul_8(k, k, p256_norm_order);
  20652. err = sp_256_mod_8(k, k, p256_order);
  20653. }
  20654. if (err == MP_OKAY) {
  20655. sp_256_norm_8(k);
  20656. /* kInv = 1/k mod order */
  20657. sp_256_mont_inv_order_8(kInv, k, tmp);
  20658. sp_256_norm_8(kInv);
  20659. /* s = r * x + e */
  20660. sp_256_mul_8(x, x, r);
  20661. err = sp_256_mod_8(x, x, p256_order);
  20662. }
  20663. if (err == MP_OKAY) {
  20664. sp_256_norm_8(x);
  20665. sp_256_from_bin(e, 8, hash, (int)hashLen);
  20666. carry = sp_256_add_8(s, e, x);
  20667. sp_256_cond_sub_8(s, s, p256_order, 0 - carry);
  20668. sp_256_norm_8(s);
  20669. c = sp_256_cmp_8(s, p256_order);
  20670. sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0));
  20671. sp_256_norm_8(s);
  20672. /* s = s * k^-1 mod order */
  20673. sp_256_mont_mul_order_8(s, s, kInv);
  20674. sp_256_norm_8(s);
  20675. /* Check that signature is usable. */
  20676. if (sp_256_iszero_8(s) == 0) {
  20677. break;
  20678. }
  20679. }
  20680. #ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP
  20681. i = 1;
  20682. #endif
  20683. }
  20684. if (i == 0) {
  20685. err = RNG_FAILURE_E;
  20686. }
  20687. if (err == MP_OKAY) {
  20688. err = sp_256_to_mp(r, rm);
  20689. }
  20690. if (err == MP_OKAY) {
  20691. err = sp_256_to_mp(s, sm);
  20692. }
  20693. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  20694. if (d != NULL) {
  20695. XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8);
  20696. XFREE(d, heap, DYNAMIC_TYPE_ECC);
  20697. }
  20698. #else
  20699. XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U);
  20700. XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U);
  20701. XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U);
  20702. XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
  20703. XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
  20704. XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U);
  20705. #endif
  20706. sp_256_point_free_8(point, 1, heap);
  20707. return err;
  20708. }
  20709. #endif /* HAVE_ECC_SIGN */
  20710. #ifndef WOLFSSL_SP_SMALL
  20711. static void sp_256_rshift1_8(sp_digit* r, sp_digit* a)
  20712. {
  20713. __asm__ __volatile__ (
  20714. "mov r10, #0\n\t"
  20715. "mov r9, #0\n\t"
  20716. "ldr r3, [%[a], #16]\n\t"
  20717. "ldr r4, [%[a], #20]\n\t"
  20718. "ldr r5, [%[a], #24]\n\t"
  20719. "ldr r6, [%[a], #28]\n\t"
  20720. "lsr r7, r3, #1\n\t"
  20721. "and r3, r3, #1\n\t"
  20722. "lsr r8, r4, #1\n\t"
  20723. "lsr r10, r5, #1\n\t"
  20724. "lsr r14, r6, #1\n\t"
  20725. "orr r7, r7, r4, lsl #31\n\t"
  20726. "orr r8, r8, r5, lsl #31\n\t"
  20727. "orr r10, r10, r6, lsl #31\n\t"
  20728. "orr r14, r14, r9, lsl #31\n\t"
  20729. "mov r9, r3\n\t"
  20730. "str r7, [%[r], #16]\n\t"
  20731. "str r8, [%[r], #20]\n\t"
  20732. "str r10, [%[r], #24]\n\t"
  20733. "str r14, [%[r], #28]\n\t"
  20734. "ldr r3, [%[r], #0]\n\t"
  20735. "ldr r4, [%[r], #4]\n\t"
  20736. "ldr r5, [%[r], #8]\n\t"
  20737. "ldr r6, [%[r], #12]\n\t"
  20738. "lsr r7, r3, #1\n\t"
  20739. "lsr r8, r4, #1\n\t"
  20740. "lsr r10, r5, #1\n\t"
  20741. "lsr r14, r6, #1\n\t"
  20742. "orr r7, r7, r4, lsl #31\n\t"
  20743. "orr r8, r8, r5, lsl #31\n\t"
  20744. "orr r10, r10, r6, lsl #31\n\t"
  20745. "orr r14, r14, r9, lsl #31\n\t"
  20746. "str r7, [%[r], #0]\n\t"
  20747. "str r8, [%[r], #4]\n\t"
  20748. "str r10, [%[r], #8]\n\t"
  20749. "str r14, [%[r], #12]\n\t"
  20750. :
  20751. : [r] "r" (r), [a] "r" (a)
  20752. : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9"
  20753. );
  20754. }
  20755. /* Divide the number by 2 mod the modulus. (r = a / 2 % m)
  20756. *
  20757. * r Result of division by 2.
  20758. * a Number to divide.
  20759. * m Modulus.
  20760. */
  20761. static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
  20762. {
  20763. __asm__ __volatile__ (
  20764. "mov r10, #0\n\t"
  20765. "ldr r3, [%[a], #0]\n\t"
  20766. "ands r9, r3, #1\n\t"
  20767. "beq 1f\n\t"
  20768. "ldr r4, [%[a], #4]\n\t"
  20769. "ldr r5, [%[a], #8]\n\t"
  20770. "ldr r6, [%[a], #12]\n\t"
  20771. "ldr r7, [%[m], #0]\n\t"
  20772. "ldr r8, [%[m], #4]\n\t"
  20773. "ldr r10, [%[m], #8]\n\t"
  20774. "ldr r14, [%[m], #12]\n\t"
  20775. "adds r3, r3, r7\n\t"
  20776. "adcs r4, r4, r8\n\t"
  20777. "adcs r5, r5, r10\n\t"
  20778. "adcs r6, r6, r14\n\t"
  20779. "str r3, [%[r], #0]\n\t"
  20780. "str r4, [%[r], #4]\n\t"
  20781. "str r5, [%[r], #8]\n\t"
  20782. "str r6, [%[r], #12]\n\t"
  20783. "ldr r3, [%[a], #16]\n\t"
  20784. "ldr r4, [%[a], #20]\n\t"
  20785. "ldr r5, [%[a], #24]\n\t"
  20786. "ldr r6, [%[a], #28]\n\t"
  20787. "ldr r7, [%[m], #16]\n\t"
  20788. "ldr r8, [%[m], #20]\n\t"
  20789. "ldr r10, [%[m], #24]\n\t"
  20790. "ldr r14, [%[m], #28]\n\t"
  20791. "adcs r3, r3, r7\n\t"
  20792. "adcs r4, r4, r8\n\t"
  20793. "adcs r5, r5, r10\n\t"
  20794. "adcs r6, r6, r14\n\t"
  20795. "adc r9, r10, r10\n\t"
  20796. "b 2f\n\t"
  20797. "\n1:\n\t"
  20798. "ldr r3, [%[a], #16]\n\t"
  20799. "ldr r4, [%[a], #20]\n\t"
  20800. "ldr r5, [%[a], #24]\n\t"
  20801. "ldr r6, [%[a], #28]\n\t"
  20802. "\n2:\n\t"
  20803. "lsr r7, r3, #1\n\t"
  20804. "and r3, r3, #1\n\t"
  20805. "lsr r8, r4, #1\n\t"
  20806. "lsr r10, r5, #1\n\t"
  20807. "lsr r14, r6, #1\n\t"
  20808. "orr r7, r7, r4, lsl #31\n\t"
  20809. "orr r8, r8, r5, lsl #31\n\t"
  20810. "orr r10, r10, r6, lsl #31\n\t"
  20811. "orr r14, r14, r9, lsl #31\n\t"
  20812. "mov r9, r3\n\t"
  20813. "str r7, [%[r], #16]\n\t"
  20814. "str r8, [%[r], #20]\n\t"
  20815. "str r10, [%[r], #24]\n\t"
  20816. "str r14, [%[r], #28]\n\t"
  20817. "ldr r3, [%[r], #0]\n\t"
  20818. "ldr r4, [%[r], #4]\n\t"
  20819. "ldr r5, [%[r], #8]\n\t"
  20820. "ldr r6, [%[r], #12]\n\t"
  20821. "lsr r7, r3, #1\n\t"
  20822. "lsr r8, r4, #1\n\t"
  20823. "lsr r10, r5, #1\n\t"
  20824. "lsr r14, r6, #1\n\t"
  20825. "orr r7, r7, r4, lsl #31\n\t"
  20826. "orr r8, r8, r5, lsl #31\n\t"
  20827. "orr r10, r10, r6, lsl #31\n\t"
  20828. "orr r14, r14, r9, lsl #31\n\t"
  20829. "str r7, [%[r], #0]\n\t"
  20830. "str r8, [%[r], #4]\n\t"
  20831. "str r10, [%[r], #8]\n\t"
  20832. "str r14, [%[r], #12]\n\t"
  20833. :
  20834. : [r] "r" (r), [a] "r" (a), [m] "r" (m)
  20835. : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9"
  20836. );
  20837. }
  20838. static int sp_256_num_bits_8(sp_digit* a)
  20839. {
  20840. int r = 0;
  20841. __asm__ __volatile__ (
  20842. "ldr r2, [%[a], #28]\n\t"
  20843. "cmp r2, #0\n\t"
  20844. "beq 7f\n\t"
  20845. "mov r3, #256\n\t"
  20846. "clz %[r], r2\n\t"
  20847. "sub %[r], r3, %[r]\n\t"
  20848. "b 9f\n\t"
  20849. "\n7:\n\t"
  20850. "ldr r2, [%[a], #24]\n\t"
  20851. "cmp r2, #0\n\t"
  20852. "beq 6f\n\t"
  20853. "mov r3, #224\n\t"
  20854. "clz %[r], r2\n\t"
  20855. "sub %[r], r3, %[r]\n\t"
  20856. "b 9f\n\t"
  20857. "\n6:\n\t"
  20858. "ldr r2, [%[a], #20]\n\t"
  20859. "cmp r2, #0\n\t"
  20860. "beq 5f\n\t"
  20861. "mov r3, #192\n\t"
  20862. "clz %[r], r2\n\t"
  20863. "sub %[r], r3, %[r]\n\t"
  20864. "b 9f\n\t"
  20865. "\n5:\n\t"
  20866. "ldr r2, [%[a], #16]\n\t"
  20867. "cmp r2, #0\n\t"
  20868. "beq 4f\n\t"
  20869. "mov r3, #160\n\t"
  20870. "clz %[r], r2\n\t"
  20871. "sub %[r], r3, %[r]\n\t"
  20872. "b 9f\n\t"
  20873. "\n4:\n\t"
  20874. "ldr r2, [%[a], #12]\n\t"
  20875. "cmp r2, #0\n\t"
  20876. "beq 3f\n\t"
  20877. "mov r3, #128\n\t"
  20878. "clz %[r], r2\n\t"
  20879. "sub %[r], r3, %[r]\n\t"
  20880. "b 9f\n\t"
  20881. "\n3:\n\t"
  20882. "ldr r2, [%[a], #8]\n\t"
  20883. "cmp r2, #0\n\t"
  20884. "beq 2f\n\t"
  20885. "mov r3, #96\n\t"
  20886. "clz %[r], r2\n\t"
  20887. "sub %[r], r3, %[r]\n\t"
  20888. "b 9f\n\t"
  20889. "\n2:\n\t"
  20890. "ldr r2, [%[a], #4]\n\t"
  20891. "cmp r2, #0\n\t"
  20892. "beq 1f\n\t"
  20893. "mov r3, #64\n\t"
  20894. "clz %[r], r2\n\t"
  20895. "sub %[r], r3, %[r]\n\t"
  20896. "b 9f\n\t"
  20897. "\n1:\n\t"
  20898. "ldr r2, [%[a], #0]\n\t"
  20899. "mov r3, #32\n\t"
  20900. "clz %[r], r2\n\t"
  20901. "sub %[r], r3, %[r]\n\t"
  20902. "\n9:\n\t"
  20903. : [r] "+r" (r)
  20904. : [a] "r" (a)
  20905. : "r2", "r3"
  20906. );
  20907. return r;
  20908. }
  20909. /* Non-constant time modular inversion.
  20910. *
  20911. * @param [out] r Resulting number.
  20912. * @param [in] a Number to invert.
  20913. * @param [in] m Modulus.
  20914. * @return MP_OKAY on success.
  20915. */
  20916. static int sp_256_mod_inv_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
  20917. {
  20918. sp_digit u[8];
  20919. sp_digit v[8];
  20920. sp_digit b[8];
  20921. sp_digit d[8];
  20922. int ut, vt;
  20923. sp_digit o;
  20924. XMEMCPY(u, m, sizeof(u));
  20925. XMEMCPY(v, a, sizeof(v));
  20926. ut = sp_256_num_bits_8(u);
  20927. vt = sp_256_num_bits_8(v);
  20928. XMEMSET(b, 0, sizeof(b));
  20929. if ((v[0] & 1) == 0) {
  20930. sp_256_rshift1_8(v, v);
  20931. XMEMCPY(d, m, sizeof(u));
  20932. d[0] += 1;
  20933. sp_256_rshift1_8(d, d);
  20934. vt--;
  20935. while ((v[0] & 1) == 0) {
  20936. sp_256_rshift1_8(v, v);
  20937. sp_256_div2_mod_8(d, d, m);
  20938. vt--;
  20939. }
  20940. }
  20941. else {
  20942. XMEMSET(d+1, 0, sizeof(d)-sizeof(sp_digit));
  20943. d[0] = 1;
  20944. }
  20945. while (ut > 1 && vt > 1) {
  20946. if (ut > vt || (ut == vt && sp_256_cmp_8(u, v) >= 0)) {
  20947. sp_256_sub_8(u, u, v);
  20948. o = sp_256_sub_8(b, b, d);
  20949. if (o != 0)
  20950. sp_256_add_8(b, b, m);
  20951. ut = sp_256_num_bits_8(u);
  20952. do {
  20953. sp_256_rshift1_8(u, u);
  20954. sp_256_div2_mod_8(b, b, m);
  20955. ut--;
  20956. }
  20957. while (ut > 0 && (u[0] & 1) == 0);
  20958. }
  20959. else {
  20960. sp_256_sub_8(v, v, u);
  20961. o = sp_256_sub_8(d, d, b);
  20962. if (o != 0)
  20963. sp_256_add_8(d, d, m);
  20964. vt = sp_256_num_bits_8(v);
  20965. do {
  20966. sp_256_rshift1_8(v, v);
  20967. sp_256_div2_mod_8(d, d, m);
  20968. vt--;
  20969. }
  20970. while (vt > 0 && (v[0] & 1) == 0);
  20971. }
  20972. }
  20973. if (ut == 1)
  20974. XMEMCPY(r, b, sizeof(b));
  20975. else
  20976. XMEMCPY(r, d, sizeof(d));
  20977. return MP_OKAY;
  20978. }
  20979. #endif /* WOLFSSL_SP_SMALL */
  20980. #ifdef HAVE_ECC_VERIFY
  20981. /* Verify the signature values with the hash and public key.
  20982. * e = Truncate(hash, 256)
  20983. * u1 = e/s mod order
  20984. * u2 = r/s mod order
  20985. * r == (u1.G + u2.Q)->x mod order
  20986. * Optimization: Leave point in projective form.
  20987. * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
  20988. * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
  20989. * The hash is truncated to the first 256 bits.
  20990. *
  20991. * hash Hash to sign.
  20992. * hashLen Length of the hash data.
  20993. * rng Random number generator.
  20994. * priv Private part of key - scalar.
  20995. * rm First part of result as an mp_int.
  20996. * sm Sirst part of result as an mp_int.
  20997. * heap Heap to use for allocation.
  20998. * returns RNG failures, MEMORY_E when memory allocation fails and
  20999. * MP_OKAY on success.
  21000. */
  21001. #ifdef WOLFSSL_SP_NONBLOCK
  21002. typedef struct sp_ecc_verify_256_ctx {
  21003. int state;
  21004. union {
  21005. sp_256_ecc_mulmod_8_ctx mulmod_ctx;
  21006. sp_256_mont_inv_order_8_ctx mont_inv_order_ctx;
  21007. sp_256_proj_point_dbl_8_ctx dbl_ctx;
  21008. sp_256_proj_point_add_8_ctx add_ctx;
  21009. };
  21010. sp_digit u1[2*8];
  21011. sp_digit u2[2*8];
  21012. sp_digit s[2*8];
  21013. sp_digit tmp[2*8 * 5];
  21014. sp_point_256 p1;
  21015. sp_point_256 p2;
  21016. } sp_ecc_verify_256_ctx;
  21017. int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, mp_int* pX,
  21018. mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
  21019. {
  21020. int err = FP_WOULDBLOCK;
  21021. sp_ecc_verify_256_ctx* ctx = (sp_ecc_verify_256_ctx*)sp_ctx->data;
  21022. typedef char ctx_size_test[sizeof(sp_ecc_verify_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
  21023. (void)sizeof(ctx_size_test);
  21024. switch (ctx->state) {
  21025. case 0: /* INIT */
  21026. if (hashLen > 32U) {
  21027. hashLen = 32U;
  21028. }
  21029. sp_256_from_bin(ctx->u1, 8, hash, (int)hashLen);
  21030. sp_256_from_mp(ctx->u2, 8, r);
  21031. sp_256_from_mp(ctx->s, 8, sm);
  21032. sp_256_from_mp(ctx->p2.x, 8, pX);
  21033. sp_256_from_mp(ctx->p2.y, 8, pY);
  21034. sp_256_from_mp(ctx->p2.z, 8, pZ);
  21035. ctx->state = 1;
  21036. break;
  21037. case 1: /* NORMS0 */
  21038. sp_256_mul_8(ctx->s, ctx->s, p256_norm_order);
  21039. err = sp_256_mod_8(ctx->s, ctx->s, p256_order);
  21040. if (err == MP_OKAY)
  21041. ctx->state = 2;
  21042. break;
  21043. case 2: /* NORMS1 */
  21044. sp_256_norm_8(ctx->s);
  21045. XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
  21046. ctx->state = 3;
  21047. break;
  21048. case 3: /* NORMS2 */
  21049. err = sp_256_mont_inv_order_8_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
  21050. if (err == MP_OKAY) {
  21051. ctx->state = 4;
  21052. }
  21053. break;
  21054. case 4: /* NORMS3 */
  21055. sp_256_mont_mul_order_8(ctx->u1, ctx->u1, ctx->s);
  21056. ctx->state = 5;
  21057. break;
  21058. case 5: /* NORMS4 */
  21059. sp_256_mont_mul_order_8(ctx->u2, ctx->u2, ctx->s);
  21060. XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
  21061. ctx->state = 6;
  21062. break;
  21063. case 6: /* MULBASE */
  21064. err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p256_base, ctx->u1, 0, 0, heap);
  21065. if (err == MP_OKAY) {
  21066. if (sp_256_iszero_8(ctx->p1.z)) {
  21067. ctx->p1.infinity = 1;
  21068. }
  21069. XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
  21070. ctx->state = 7;
  21071. }
  21072. break;
  21073. case 7: /* MULMOD */
  21074. err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
  21075. if (err == MP_OKAY) {
  21076. if (sp_256_iszero_8(ctx->p2.z)) {
  21077. ctx->p2.infinity = 1;
  21078. }
  21079. XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
  21080. ctx->state = 8;
  21081. }
  21082. break;
  21083. case 8: /* ADD */
  21084. err = sp_256_proj_point_add_8_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
  21085. if (err == MP_OKAY)
  21086. ctx->state = 9;
  21087. break;
  21088. case 9: /* DBLPREP */
  21089. if (sp_256_iszero_8(ctx->p1.z)) {
  21090. if (sp_256_iszero_8(ctx->p1.x) && sp_256_iszero_8(ctx->p1.y)) {
  21091. XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
  21092. ctx->state = 10;
  21093. break;
  21094. }
  21095. else {
  21096. /* Y ordinate is not used from here - don't set. */
  21097. int i;
  21098. for (i=0; i<8; i++) {
  21099. ctx->p1.x[i] = 0;
  21100. }
  21101. XMEMCPY(ctx->p1.z, p256_norm_mod, sizeof(p256_norm_mod));
  21102. }
  21103. }
  21104. ctx->state = 11;
  21105. break;
  21106. case 10: /* DBL */
  21107. err = sp_256_proj_point_dbl_8_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1,
  21108. &ctx->p2, ctx->tmp);
  21109. if (err == MP_OKAY) {
  21110. ctx->state = 11;
  21111. }
  21112. break;
  21113. case 11: /* MONT */
  21114. /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
  21115. /* Reload r and convert to Montgomery form. */
  21116. sp_256_from_mp(ctx->u2, 8, r);
  21117. err = sp_256_mod_mul_norm_8(ctx->u2, ctx->u2, p256_mod);
  21118. if (err == MP_OKAY)
  21119. ctx->state = 12;
  21120. break;
  21121. case 12: /* SQR */
  21122. /* u1 = r.z'.z' mod prime */
  21123. sp_256_mont_sqr_8(ctx->p1.z, ctx->p1.z, p256_mod, p256_mp_mod);
  21124. ctx->state = 13;
  21125. break;
  21126. case 13: /* MUL */
  21127. sp_256_mont_mul_8(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
  21128. ctx->state = 14;
  21129. break;
  21130. case 14: /* RES */
  21131. err = MP_OKAY; /* math okay, now check result */
  21132. *res = (int)(sp_256_cmp_8(ctx->p1.x, ctx->u1) == 0);
  21133. if (*res == 0) {
  21134. sp_digit carry;
  21135. int32_t c;
  21136. /* Reload r and add order. */
  21137. sp_256_from_mp(ctx->u2, 8, r);
  21138. carry = sp_256_add_8(ctx->u2, ctx->u2, p256_order);
  21139. /* Carry means result is greater than mod and is not valid. */
  21140. if (carry == 0) {
  21141. sp_256_norm_8(ctx->u2);
  21142. /* Compare with mod and if greater or equal then not valid. */
  21143. c = sp_256_cmp_8(ctx->u2, p256_mod);
  21144. if (c < 0) {
  21145. /* Convert to Montogomery form */
  21146. err = sp_256_mod_mul_norm_8(ctx->u2, ctx->u2, p256_mod);
  21147. if (err == MP_OKAY) {
  21148. /* u1 = (r + 1*order).z'.z' mod prime */
  21149. sp_256_mont_mul_8(ctx->u1, ctx->u2, ctx->p1.z, p256_mod,
  21150. p256_mp_mod);
  21151. *res = (int)(sp_256_cmp_8(ctx->p1.x, ctx->u1) == 0);
  21152. }
  21153. }
  21154. }
  21155. }
  21156. break;
  21157. }
  21158. if (err == MP_OKAY && ctx->state != 14) {
  21159. err = FP_WOULDBLOCK;
  21160. }
  21161. return err;
  21162. }
  21163. #endif /* WOLFSSL_SP_NONBLOCK */
  21164. int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
  21165. mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
  21166. {
  21167. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21168. sp_digit* d = NULL;
  21169. #else
  21170. sp_digit u1d[2*8];
  21171. sp_digit u2d[2*8];
  21172. sp_digit sd[2*8];
  21173. sp_digit tmpd[2*8 * 5];
  21174. sp_point_256 p1d;
  21175. sp_point_256 p2d;
  21176. #endif
  21177. sp_digit* u1 = NULL;
  21178. sp_digit* u2 = NULL;
  21179. sp_digit* s = NULL;
  21180. sp_digit* tmp = NULL;
  21181. sp_point_256* p1;
  21182. sp_point_256* p2 = NULL;
  21183. sp_digit carry;
  21184. int32_t c;
  21185. int err;
  21186. err = sp_256_point_new_8(heap, p1d, p1);
  21187. if (err == MP_OKAY) {
  21188. err = sp_256_point_new_8(heap, p2d, p2);
  21189. }
  21190. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21191. if (err == MP_OKAY) {
  21192. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap,
  21193. DYNAMIC_TYPE_ECC);
  21194. if (d == NULL) {
  21195. err = MEMORY_E;
  21196. }
  21197. }
  21198. #endif
  21199. if (err == MP_OKAY) {
  21200. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21201. u1 = d + 0 * 8;
  21202. u2 = d + 2 * 8;
  21203. s = d + 4 * 8;
  21204. tmp = d + 6 * 8;
  21205. #else
  21206. u1 = u1d;
  21207. u2 = u2d;
  21208. s = sd;
  21209. tmp = tmpd;
  21210. #endif
  21211. if (hashLen > 32U) {
  21212. hashLen = 32U;
  21213. }
  21214. sp_256_from_bin(u1, 8, hash, (int)hashLen);
  21215. sp_256_from_mp(u2, 8, r);
  21216. sp_256_from_mp(s, 8, sm);
  21217. sp_256_from_mp(p2->x, 8, pX);
  21218. sp_256_from_mp(p2->y, 8, pY);
  21219. sp_256_from_mp(p2->z, 8, pZ);
  21220. #ifndef WOLFSSL_SP_SMALL
  21221. {
  21222. sp_256_mod_inv_8(s, s, p256_order);
  21223. }
  21224. #endif /* !WOLFSSL_SP_SMALL */
  21225. {
  21226. sp_256_mul_8(s, s, p256_norm_order);
  21227. }
  21228. err = sp_256_mod_8(s, s, p256_order);
  21229. }
  21230. if (err == MP_OKAY) {
  21231. sp_256_norm_8(s);
  21232. #ifdef WOLFSSL_SP_SMALL
  21233. {
  21234. sp_256_mont_inv_order_8(s, s, tmp);
  21235. sp_256_mont_mul_order_8(u1, u1, s);
  21236. sp_256_mont_mul_order_8(u2, u2, s);
  21237. }
  21238. #else
  21239. {
  21240. sp_256_mont_mul_order_8(u1, u1, s);
  21241. sp_256_mont_mul_order_8(u2, u2, s);
  21242. }
  21243. #endif /* WOLFSSL_SP_SMALL */
  21244. err = sp_256_ecc_mulmod_base_8(p1, u1, 0, 0, heap);
  21245. }
  21246. if ((err == MP_OKAY) && sp_256_iszero_8(p1->z)) {
  21247. p1->infinity = 1;
  21248. }
  21249. if (err == MP_OKAY) {
  21250. err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, 0, heap);
  21251. }
  21252. if ((err == MP_OKAY) && sp_256_iszero_8(p2->z)) {
  21253. p2->infinity = 1;
  21254. }
  21255. if (err == MP_OKAY) {
  21256. {
  21257. sp_256_proj_point_add_8(p1, p1, p2, tmp);
  21258. if (sp_256_iszero_8(p1->z)) {
  21259. if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) {
  21260. sp_256_proj_point_dbl_8(p1, p2, tmp);
  21261. }
  21262. else {
  21263. /* Y ordinate is not used from here - don't set. */
  21264. p1->x[0] = 0;
  21265. p1->x[1] = 0;
  21266. p1->x[2] = 0;
  21267. p1->x[3] = 0;
  21268. p1->x[4] = 0;
  21269. p1->x[5] = 0;
  21270. p1->x[6] = 0;
  21271. p1->x[7] = 0;
  21272. XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
  21273. }
  21274. }
  21275. }
  21276. /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
  21277. /* Reload r and convert to Montgomery form. */
  21278. sp_256_from_mp(u2, 8, r);
  21279. err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
  21280. }
  21281. if (err == MP_OKAY) {
  21282. /* u1 = r.z'.z' mod prime */
  21283. sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod);
  21284. sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod);
  21285. *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
  21286. if (*res == 0) {
  21287. /* Reload r and add order. */
  21288. sp_256_from_mp(u2, 8, r);
  21289. carry = sp_256_add_8(u2, u2, p256_order);
  21290. /* Carry means result is greater than mod and is not valid. */
  21291. if (carry == 0) {
  21292. sp_256_norm_8(u2);
  21293. /* Compare with mod and if greater or equal then not valid. */
  21294. c = sp_256_cmp_8(u2, p256_mod);
  21295. if (c < 0) {
  21296. /* Convert to Montogomery form */
  21297. err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
  21298. if (err == MP_OKAY) {
  21299. /* u1 = (r + 1*order).z'.z' mod prime */
  21300. sp_256_mont_mul_8(u1, u2, p1->z, p256_mod,
  21301. p256_mp_mod);
  21302. *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
  21303. }
  21304. }
  21305. }
  21306. }
  21307. }
  21308. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21309. if (d != NULL)
  21310. XFREE(d, heap, DYNAMIC_TYPE_ECC);
  21311. #endif
  21312. sp_256_point_free_8(p1, 0, heap);
  21313. sp_256_point_free_8(p2, 0, heap);
  21314. return err;
  21315. }
  21316. #endif /* HAVE_ECC_VERIFY */
  21317. #ifdef HAVE_ECC_CHECK_KEY
  21318. /* Check that the x and y oridinates are a valid point on the curve.
  21319. *
  21320. * point EC point.
  21321. * heap Heap to use if dynamically allocating.
  21322. * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
  21323. * not on the curve and MP_OKAY otherwise.
  21324. */
  21325. static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap)
  21326. {
  21327. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21328. sp_digit* d = NULL;
  21329. #else
  21330. sp_digit t1d[2*8];
  21331. sp_digit t2d[2*8];
  21332. #endif
  21333. sp_digit* t1;
  21334. sp_digit* t2;
  21335. int err = MP_OKAY;
  21336. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21337. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
  21338. if (d == NULL) {
  21339. err = MEMORY_E;
  21340. }
  21341. #endif
  21342. (void)heap;
  21343. if (err == MP_OKAY) {
  21344. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21345. t1 = d + 0 * 8;
  21346. t2 = d + 2 * 8;
  21347. #else
  21348. t1 = t1d;
  21349. t2 = t2d;
  21350. #endif
  21351. sp_256_sqr_8(t1, point->y);
  21352. (void)sp_256_mod_8(t1, t1, p256_mod);
  21353. sp_256_sqr_8(t2, point->x);
  21354. (void)sp_256_mod_8(t2, t2, p256_mod);
  21355. sp_256_mul_8(t2, t2, point->x);
  21356. (void)sp_256_mod_8(t2, t2, p256_mod);
  21357. (void)sp_256_sub_8(t2, p256_mod, t2);
  21358. sp_256_mont_add_8(t1, t1, t2, p256_mod);
  21359. sp_256_mont_add_8(t1, t1, point->x, p256_mod);
  21360. sp_256_mont_add_8(t1, t1, point->x, p256_mod);
  21361. sp_256_mont_add_8(t1, t1, point->x, p256_mod);
  21362. if (sp_256_cmp_8(t1, p256_b) != 0) {
  21363. err = MP_VAL;
  21364. }
  21365. }
  21366. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21367. if (d != NULL) {
  21368. XFREE(d, heap, DYNAMIC_TYPE_ECC);
  21369. }
  21370. #endif
  21371. return err;
  21372. }
  21373. /* Check that the x and y oridinates are a valid point on the curve.
  21374. *
  21375. * pX X ordinate of EC point.
  21376. * pY Y ordinate of EC point.
  21377. * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
  21378. * not on the curve and MP_OKAY otherwise.
  21379. */
  21380. int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
  21381. {
  21382. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  21383. sp_point_256 pubd;
  21384. #endif
  21385. sp_point_256* pub;
  21386. byte one[1] = { 1 };
  21387. int err;
  21388. err = sp_256_point_new_8(NULL, pubd, pub);
  21389. if (err == MP_OKAY) {
  21390. sp_256_from_mp(pub->x, 8, pX);
  21391. sp_256_from_mp(pub->y, 8, pY);
  21392. sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
  21393. err = sp_256_ecc_is_point_8(pub, NULL);
  21394. }
  21395. sp_256_point_free_8(pub, 0, NULL);
  21396. return err;
  21397. }
  21398. /* Check that the private scalar generates the EC point (px, py), the point is
  21399. * on the curve and the point has the correct order.
  21400. *
  21401. * pX X ordinate of EC point.
  21402. * pY Y ordinate of EC point.
  21403. * privm Private scalar that generates EC point.
  21404. * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
  21405. * not on the curve, ECC_INF_E if the point does not have the correct order,
  21406. * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
  21407. * MP_OKAY otherwise.
  21408. */
  21409. int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
  21410. {
  21411. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  21412. sp_digit privd[8];
  21413. sp_point_256 pubd;
  21414. sp_point_256 pd;
  21415. #endif
  21416. sp_digit* priv = NULL;
  21417. sp_point_256* pub;
  21418. sp_point_256* p = NULL;
  21419. byte one[1] = { 1 };
  21420. int err;
  21421. err = sp_256_point_new_8(heap, pubd, pub);
  21422. if (err == MP_OKAY) {
  21423. err = sp_256_point_new_8(heap, pd, p);
  21424. }
  21425. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21426. if (err == MP_OKAY && privm) {
  21427. priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
  21428. DYNAMIC_TYPE_ECC);
  21429. if (priv == NULL) {
  21430. err = MEMORY_E;
  21431. }
  21432. }
  21433. #endif
  21434. /* Quick check the lengs of public key ordinates and private key are in
  21435. * range. Proper check later.
  21436. */
  21437. if ((err == MP_OKAY) && ((mp_count_bits(pX) > 256) ||
  21438. (mp_count_bits(pY) > 256) ||
  21439. ((privm != NULL) && (mp_count_bits(privm) > 256)))) {
  21440. err = ECC_OUT_OF_RANGE_E;
  21441. }
  21442. if (err == MP_OKAY) {
  21443. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  21444. priv = privd;
  21445. #endif
  21446. sp_256_from_mp(pub->x, 8, pX);
  21447. sp_256_from_mp(pub->y, 8, pY);
  21448. sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
  21449. if (privm)
  21450. sp_256_from_mp(priv, 8, privm);
  21451. /* Check point at infinitiy. */
  21452. if ((sp_256_iszero_8(pub->x) != 0) &&
  21453. (sp_256_iszero_8(pub->y) != 0)) {
  21454. err = ECC_INF_E;
  21455. }
  21456. }
  21457. if (err == MP_OKAY) {
  21458. /* Check range of X and Y */
  21459. if (sp_256_cmp_8(pub->x, p256_mod) >= 0 ||
  21460. sp_256_cmp_8(pub->y, p256_mod) >= 0) {
  21461. err = ECC_OUT_OF_RANGE_E;
  21462. }
  21463. }
  21464. if (err == MP_OKAY) {
  21465. /* Check point is on curve */
  21466. err = sp_256_ecc_is_point_8(pub, heap);
  21467. }
  21468. if (err == MP_OKAY) {
  21469. /* Point * order = infinity */
  21470. err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, 1, heap);
  21471. }
  21472. if (err == MP_OKAY) {
  21473. /* Check result is infinity */
  21474. if ((sp_256_iszero_8(p->x) == 0) ||
  21475. (sp_256_iszero_8(p->y) == 0)) {
  21476. err = ECC_INF_E;
  21477. }
  21478. }
  21479. if (privm) {
  21480. if (err == MP_OKAY) {
  21481. /* Base * private = point */
  21482. err = sp_256_ecc_mulmod_base_8(p, priv, 1, 1, heap);
  21483. }
  21484. if (err == MP_OKAY) {
  21485. /* Check result is public key */
  21486. if (sp_256_cmp_8(p->x, pub->x) != 0 ||
  21487. sp_256_cmp_8(p->y, pub->y) != 0) {
  21488. err = ECC_PRIV_KEY_E;
  21489. }
  21490. }
  21491. }
  21492. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21493. if (priv != NULL) {
  21494. XFREE(priv, heap, DYNAMIC_TYPE_ECC);
  21495. }
  21496. #endif
  21497. sp_256_point_free_8(p, 0, heap);
  21498. sp_256_point_free_8(pub, 0, heap);
  21499. return err;
  21500. }
  21501. #endif
  21502. #ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
  21503. /* Add two projective EC points together.
  21504. * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
  21505. *
  21506. * pX First EC point's X ordinate.
  21507. * pY First EC point's Y ordinate.
  21508. * pZ First EC point's Z ordinate.
  21509. * qX Second EC point's X ordinate.
  21510. * qY Second EC point's Y ordinate.
  21511. * qZ Second EC point's Z ordinate.
  21512. * rX Resultant EC point's X ordinate.
  21513. * rY Resultant EC point's Y ordinate.
  21514. * rZ Resultant EC point's Z ordinate.
  21515. * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
  21516. */
  21517. int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
  21518. mp_int* qX, mp_int* qY, mp_int* qZ,
  21519. mp_int* rX, mp_int* rY, mp_int* rZ)
  21520. {
  21521. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  21522. sp_digit tmpd[2 * 8 * 5];
  21523. sp_point_256 pd;
  21524. sp_point_256 qd;
  21525. #endif
  21526. sp_digit* tmp = NULL;
  21527. sp_point_256* p;
  21528. sp_point_256* q = NULL;
  21529. int err;
  21530. err = sp_256_point_new_8(NULL, pd, p);
  21531. if (err == MP_OKAY) {
  21532. err = sp_256_point_new_8(NULL, qd, q);
  21533. }
  21534. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21535. if (err == MP_OKAY) {
  21536. tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL,
  21537. DYNAMIC_TYPE_ECC);
  21538. if (tmp == NULL) {
  21539. err = MEMORY_E;
  21540. }
  21541. }
  21542. #else
  21543. tmp = tmpd;
  21544. #endif
  21545. if (err == MP_OKAY) {
  21546. sp_256_from_mp(p->x, 8, pX);
  21547. sp_256_from_mp(p->y, 8, pY);
  21548. sp_256_from_mp(p->z, 8, pZ);
  21549. sp_256_from_mp(q->x, 8, qX);
  21550. sp_256_from_mp(q->y, 8, qY);
  21551. sp_256_from_mp(q->z, 8, qZ);
  21552. sp_256_proj_point_add_8(p, p, q, tmp);
  21553. }
  21554. if (err == MP_OKAY) {
  21555. err = sp_256_to_mp(p->x, rX);
  21556. }
  21557. if (err == MP_OKAY) {
  21558. err = sp_256_to_mp(p->y, rY);
  21559. }
  21560. if (err == MP_OKAY) {
  21561. err = sp_256_to_mp(p->z, rZ);
  21562. }
  21563. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21564. if (tmp != NULL) {
  21565. XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
  21566. }
  21567. #endif
  21568. sp_256_point_free_8(q, 0, NULL);
  21569. sp_256_point_free_8(p, 0, NULL);
  21570. return err;
  21571. }
  21572. /* Double a projective EC point.
  21573. * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
  21574. *
  21575. * pX EC point's X ordinate.
  21576. * pY EC point's Y ordinate.
  21577. * pZ EC point's Z ordinate.
  21578. * rX Resultant EC point's X ordinate.
  21579. * rY Resultant EC point's Y ordinate.
  21580. * rZ Resultant EC point's Z ordinate.
  21581. * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
  21582. */
  21583. int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
  21584. mp_int* rX, mp_int* rY, mp_int* rZ)
  21585. {
  21586. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  21587. sp_digit tmpd[2 * 8 * 2];
  21588. sp_point_256 pd;
  21589. #endif
  21590. sp_digit* tmp = NULL;
  21591. sp_point_256* p;
  21592. int err;
  21593. err = sp_256_point_new_8(NULL, pd, p);
  21594. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21595. if (err == MP_OKAY) {
  21596. tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL,
  21597. DYNAMIC_TYPE_ECC);
  21598. if (tmp == NULL) {
  21599. err = MEMORY_E;
  21600. }
  21601. }
  21602. #else
  21603. tmp = tmpd;
  21604. #endif
  21605. if (err == MP_OKAY) {
  21606. sp_256_from_mp(p->x, 8, pX);
  21607. sp_256_from_mp(p->y, 8, pY);
  21608. sp_256_from_mp(p->z, 8, pZ);
  21609. sp_256_proj_point_dbl_8(p, p, tmp);
  21610. }
  21611. if (err == MP_OKAY) {
  21612. err = sp_256_to_mp(p->x, rX);
  21613. }
  21614. if (err == MP_OKAY) {
  21615. err = sp_256_to_mp(p->y, rY);
  21616. }
  21617. if (err == MP_OKAY) {
  21618. err = sp_256_to_mp(p->z, rZ);
  21619. }
  21620. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21621. if (tmp != NULL) {
  21622. XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
  21623. }
  21624. #endif
  21625. sp_256_point_free_8(p, 0, NULL);
  21626. return err;
  21627. }
  21628. /* Map a projective EC point to affine in place.
  21629. * pZ will be one.
  21630. *
  21631. * pX EC point's X ordinate.
  21632. * pY EC point's Y ordinate.
  21633. * pZ EC point's Z ordinate.
  21634. * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
  21635. */
  21636. int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
  21637. {
  21638. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  21639. sp_digit tmpd[2 * 8 * 4];
  21640. sp_point_256 pd;
  21641. #endif
  21642. sp_digit* tmp = NULL;
  21643. sp_point_256* p;
  21644. int err;
  21645. err = sp_256_point_new_8(NULL, pd, p);
  21646. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21647. if (err == MP_OKAY) {
  21648. tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL,
  21649. DYNAMIC_TYPE_ECC);
  21650. if (tmp == NULL) {
  21651. err = MEMORY_E;
  21652. }
  21653. }
  21654. #else
  21655. tmp = tmpd;
  21656. #endif
  21657. if (err == MP_OKAY) {
  21658. sp_256_from_mp(p->x, 8, pX);
  21659. sp_256_from_mp(p->y, 8, pY);
  21660. sp_256_from_mp(p->z, 8, pZ);
  21661. sp_256_map_8(p, p, tmp);
  21662. }
  21663. if (err == MP_OKAY) {
  21664. err = sp_256_to_mp(p->x, pX);
  21665. }
  21666. if (err == MP_OKAY) {
  21667. err = sp_256_to_mp(p->y, pY);
  21668. }
  21669. if (err == MP_OKAY) {
  21670. err = sp_256_to_mp(p->z, pZ);
  21671. }
  21672. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21673. if (tmp != NULL) {
  21674. XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
  21675. }
  21676. #endif
  21677. sp_256_point_free_8(p, 0, NULL);
  21678. return err;
  21679. }
  21680. #endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
  21681. #ifdef HAVE_COMP_KEY
  21682. /* Find the square root of a number mod the prime of the curve.
  21683. *
  21684. * y The number to operate on and the result.
  21685. * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
  21686. */
  21687. static int sp_256_mont_sqrt_8(sp_digit* y)
  21688. {
  21689. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21690. sp_digit* d;
  21691. #else
  21692. sp_digit t1d[2 * 8];
  21693. sp_digit t2d[2 * 8];
  21694. #endif
  21695. sp_digit* t1;
  21696. sp_digit* t2;
  21697. int err = MP_OKAY;
  21698. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21699. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
  21700. if (d == NULL) {
  21701. err = MEMORY_E;
  21702. }
  21703. #endif
  21704. if (err == MP_OKAY) {
  21705. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21706. t1 = d + 0 * 8;
  21707. t2 = d + 2 * 8;
  21708. #else
  21709. t1 = t1d;
  21710. t2 = t2d;
  21711. #endif
  21712. {
  21713. /* t2 = y ^ 0x2 */
  21714. sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
  21715. /* t1 = y ^ 0x3 */
  21716. sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod);
  21717. /* t2 = y ^ 0xc */
  21718. sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod);
  21719. /* t1 = y ^ 0xf */
  21720. sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
  21721. /* t2 = y ^ 0xf0 */
  21722. sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod);
  21723. /* t1 = y ^ 0xff */
  21724. sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
  21725. /* t2 = y ^ 0xff00 */
  21726. sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod);
  21727. /* t1 = y ^ 0xffff */
  21728. sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
  21729. /* t2 = y ^ 0xffff0000 */
  21730. sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod);
  21731. /* t1 = y ^ 0xffffffff */
  21732. sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
  21733. /* t1 = y ^ 0xffffffff00000000 */
  21734. sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod);
  21735. /* t1 = y ^ 0xffffffff00000001 */
  21736. sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
  21737. /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
  21738. sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod);
  21739. /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
  21740. sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
  21741. sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod);
  21742. }
  21743. }
  21744. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21745. if (d != NULL) {
  21746. XFREE(d, NULL, DYNAMIC_TYPE_ECC);
  21747. }
  21748. #endif
  21749. return err;
  21750. }
  21751. /* Uncompress the point given the X ordinate.
  21752. *
  21753. * xm X ordinate.
  21754. * odd Whether the Y ordinate is odd.
  21755. * ym Calculated Y ordinate.
  21756. * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
  21757. */
  21758. int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
  21759. {
  21760. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21761. sp_digit* d;
  21762. #else
  21763. sp_digit xd[2 * 8];
  21764. sp_digit yd[2 * 8];
  21765. #endif
  21766. sp_digit* x = NULL;
  21767. sp_digit* y = NULL;
  21768. int err = MP_OKAY;
  21769. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21770. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
  21771. if (d == NULL) {
  21772. err = MEMORY_E;
  21773. }
  21774. #endif
  21775. if (err == MP_OKAY) {
  21776. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21777. x = d + 0 * 8;
  21778. y = d + 2 * 8;
  21779. #else
  21780. x = xd;
  21781. y = yd;
  21782. #endif
  21783. sp_256_from_mp(x, 8, xm);
  21784. err = sp_256_mod_mul_norm_8(x, x, p256_mod);
  21785. }
  21786. if (err == MP_OKAY) {
  21787. /* y = x^3 */
  21788. {
  21789. sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod);
  21790. sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
  21791. }
  21792. /* y = x^3 - 3x */
  21793. sp_256_mont_sub_8(y, y, x, p256_mod);
  21794. sp_256_mont_sub_8(y, y, x, p256_mod);
  21795. sp_256_mont_sub_8(y, y, x, p256_mod);
  21796. /* y = x^3 - 3x + b */
  21797. err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod);
  21798. }
  21799. if (err == MP_OKAY) {
  21800. sp_256_mont_add_8(y, y, x, p256_mod);
  21801. /* y = sqrt(x^3 - 3x + b) */
  21802. err = sp_256_mont_sqrt_8(y);
  21803. }
  21804. if (err == MP_OKAY) {
  21805. XMEMSET(y + 8, 0, 8U * sizeof(sp_digit));
  21806. sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod);
  21807. if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
  21808. sp_256_mont_sub_8(y, p256_mod, y, p256_mod);
  21809. }
  21810. err = sp_256_to_mp(y, ym);
  21811. }
  21812. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21813. if (d != NULL) {
  21814. XFREE(d, NULL, DYNAMIC_TYPE_ECC);
  21815. }
  21816. #endif
  21817. return err;
  21818. }
  21819. #endif
  21820. #endif /* !WOLFSSL_SP_NO_256 */
  21821. #ifdef WOLFSSL_SP_384
  21822. /* Point structure to use. */
  21823. typedef struct sp_point_384 {
  21824. sp_digit x[2 * 12];
  21825. sp_digit y[2 * 12];
  21826. sp_digit z[2 * 12];
  21827. int infinity;
  21828. } sp_point_384;
  21829. /* The modulus (prime) of the curve P384. */
  21830. static const sp_digit p384_mod[12] = {
  21831. 0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff,
  21832. 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
  21833. };
  21834. /* The Montogmery normalizer for modulus of the curve P384. */
  21835. static const sp_digit p384_norm_mod[12] = {
  21836. 0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000,
  21837. 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
  21838. };
  21839. /* The Montogmery multiplier for modulus of the curve P384. */
  21840. static sp_digit p384_mp_mod = 0x00000001;
  21841. #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
  21842. defined(HAVE_ECC_VERIFY)
  21843. /* The order of the curve P384. */
  21844. static const sp_digit p384_order[12] = {
  21845. 0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
  21846. 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
  21847. };
  21848. #endif
  21849. /* The order of the curve P384 minus 2. */
  21850. static const sp_digit p384_order2[12] = {
  21851. 0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
  21852. 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
  21853. };
  21854. #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
  21855. /* The Montogmery normalizer for order of the curve P384. */
  21856. static const sp_digit p384_norm_order[12] = {
  21857. 0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e,
  21858. 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
  21859. };
  21860. #endif
  21861. #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
  21862. /* The Montogmery multiplier for order of the curve P384. */
  21863. static sp_digit p384_mp_order = 0xe88fdc45;
  21864. #endif
  21865. /* The base point of curve P384. */
  21866. static const sp_point_384 p384_base = {
  21867. /* X ordinate */
  21868. {
  21869. 0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0,
  21870. 0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22,
  21871. 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
  21872. },
  21873. /* Y ordinate */
  21874. {
  21875. 0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113,
  21876. 0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a,
  21877. 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
  21878. },
  21879. /* Z ordinate */
  21880. {
  21881. 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
  21882. 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
  21883. 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
  21884. },
  21885. /* infinity */
  21886. 0
  21887. };
  21888. #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
  21889. static const sp_digit p384_b[12] = {
  21890. 0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f,
  21891. 0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7
  21892. };
  21893. #endif
  21894. static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p)
  21895. {
  21896. int ret = MP_OKAY;
  21897. (void)heap;
  21898. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21899. (void)sp;
  21900. *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
  21901. #else
  21902. *p = sp;
  21903. #endif
  21904. if (*p == NULL) {
  21905. ret = MEMORY_E;
  21906. }
  21907. return ret;
  21908. }
  21909. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21910. /* Allocate memory for point and return error. */
  21911. #define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p))
  21912. #else
  21913. /* Set pointer to data and return no error. */
  21914. #define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p))
  21915. #endif
  21916. static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap)
  21917. {
  21918. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21919. /* If valid pointer then clear point data if requested and free data. */
  21920. if (p != NULL) {
  21921. if (clear != 0) {
  21922. XMEMSET(p, 0, sizeof(*p));
  21923. }
  21924. XFREE(p, heap, DYNAMIC_TYPE_ECC);
  21925. }
  21926. #else
  21927. /* Clear point data if requested. */
  21928. if (clear != 0) {
  21929. XMEMSET(p, 0, sizeof(*p));
  21930. }
  21931. #endif
  21932. (void)heap;
  21933. }
  21934. /* Multiply a number by Montogmery normalizer mod modulus (prime).
  21935. *
  21936. * r The resulting Montgomery form number.
  21937. * a The number to convert.
  21938. * m The modulus (prime).
  21939. * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
  21940. */
  21941. static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
  21942. {
  21943. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21944. int64_t* t;
  21945. #else
  21946. int64_t t[12];
  21947. #endif
  21948. int64_t o;
  21949. int err = MP_OKAY;
  21950. (void)m;
  21951. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  21952. t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC);
  21953. if (t == NULL) {
  21954. err = MEMORY_E;
  21955. }
  21956. #endif
  21957. if (err == MP_OKAY) {
  21958. /* 1 0 0 0 0 0 0 0 1 1 0 -1 */
  21959. t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11];
  21960. /* -1 1 0 0 0 0 0 0 -1 0 1 1 */
  21961. t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11];
  21962. /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */
  21963. t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11];
  21964. /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */
  21965. t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11];
  21966. /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */
  21967. t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] - 2 * (uint64_t)a[11];
  21968. /* 0 1 1 0 -1 1 0 0 0 1 2 1 */
  21969. t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11];
  21970. /* 0 0 1 1 0 -1 1 0 0 0 1 2 */
  21971. t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11];
  21972. /* 0 0 0 1 1 0 -1 1 0 0 0 1 */
  21973. t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11];
  21974. /* 0 0 0 0 1 1 0 -1 1 0 0 0 */
  21975. t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8];
  21976. /* 0 0 0 0 0 1 1 0 -1 1 0 0 */
  21977. t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9];
  21978. /* 0 0 0 0 0 0 1 1 0 -1 1 0 */
  21979. t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10];
  21980. /* 0 0 0 0 0 0 0 1 1 0 -1 1 */
  21981. t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11];
  21982. t[1] += t[0] >> 32; t[0] &= 0xffffffff;
  21983. t[2] += t[1] >> 32; t[1] &= 0xffffffff;
  21984. t[3] += t[2] >> 32; t[2] &= 0xffffffff;
  21985. t[4] += t[3] >> 32; t[3] &= 0xffffffff;
  21986. t[5] += t[4] >> 32; t[4] &= 0xffffffff;
  21987. t[6] += t[5] >> 32; t[5] &= 0xffffffff;
  21988. t[7] += t[6] >> 32; t[6] &= 0xffffffff;
  21989. t[8] += t[7] >> 32; t[7] &= 0xffffffff;
  21990. t[9] += t[8] >> 32; t[8] &= 0xffffffff;
  21991. t[10] += t[9] >> 32; t[9] &= 0xffffffff;
  21992. t[11] += t[10] >> 32; t[10] &= 0xffffffff;
  21993. o = t[11] >> 32; t[11] &= 0xffffffff;
  21994. t[0] += o;
  21995. t[1] -= o;
  21996. t[3] += o;
  21997. t[4] += o;
  21998. t[1] += t[0] >> 32; t[0] &= 0xffffffff;
  21999. t[2] += t[1] >> 32; t[1] &= 0xffffffff;
  22000. t[3] += t[2] >> 32; t[2] &= 0xffffffff;
  22001. t[4] += t[3] >> 32; t[3] &= 0xffffffff;
  22002. t[5] += t[4] >> 32; t[4] &= 0xffffffff;
  22003. t[6] += t[5] >> 32; t[5] &= 0xffffffff;
  22004. t[7] += t[6] >> 32; t[6] &= 0xffffffff;
  22005. t[8] += t[7] >> 32; t[7] &= 0xffffffff;
  22006. t[9] += t[8] >> 32; t[8] &= 0xffffffff;
  22007. t[10] += t[9] >> 32; t[9] &= 0xffffffff;
  22008. t[11] += t[10] >> 32; t[10] &= 0xffffffff;
  22009. r[0] = t[0];
  22010. r[1] = t[1];
  22011. r[2] = t[2];
  22012. r[3] = t[3];
  22013. r[4] = t[4];
  22014. r[5] = t[5];
  22015. r[6] = t[6];
  22016. r[7] = t[7];
  22017. r[8] = t[8];
  22018. r[9] = t[9];
  22019. r[10] = t[10];
  22020. r[11] = t[11];
  22021. }
  22022. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  22023. if (t != NULL)
  22024. XFREE(t, NULL, DYNAMIC_TYPE_ECC);
  22025. #endif
  22026. return err;
  22027. }
  22028. /* Convert an mp_int to an array of sp_digit.
  22029. *
  22030. * r A single precision integer.
  22031. * size Maximum number of bytes to convert
  22032. * a A multi-precision integer.
  22033. */
  22034. static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
  22035. {
  22036. #if DIGIT_BIT == 32
  22037. int j;
  22038. XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
  22039. for (j = a->used; j < size; j++) {
  22040. r[j] = 0;
  22041. }
  22042. #elif DIGIT_BIT > 32
  22043. int i, j = 0;
  22044. word32 s = 0;
  22045. r[0] = 0;
  22046. for (i = 0; i < a->used && j < size; i++) {
  22047. r[j] |= ((sp_digit)a->dp[i] << s);
  22048. r[j] &= 0xffffffff;
  22049. s = 32U - s;
  22050. if (j + 1 >= size) {
  22051. break;
  22052. }
  22053. /* lint allow cast of mismatch word32 and mp_digit */
  22054. r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
  22055. while ((s + 32U) <= (word32)DIGIT_BIT) {
  22056. s += 32U;
  22057. r[j] &= 0xffffffff;
  22058. if (j + 1 >= size) {
  22059. break;
  22060. }
  22061. if (s < (word32)DIGIT_BIT) {
  22062. /* lint allow cast of mismatch word32 and mp_digit */
  22063. r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
  22064. }
  22065. else {
  22066. r[++j] = 0L;
  22067. }
  22068. }
  22069. s = (word32)DIGIT_BIT - s;
  22070. }
  22071. for (j++; j < size; j++) {
  22072. r[j] = 0;
  22073. }
  22074. #else
  22075. int i, j = 0, s = 0;
  22076. r[0] = 0;
  22077. for (i = 0; i < a->used && j < size; i++) {
  22078. r[j] |= ((sp_digit)a->dp[i]) << s;
  22079. if (s + DIGIT_BIT >= 32) {
  22080. r[j] &= 0xffffffff;
  22081. if (j + 1 >= size) {
  22082. break;
  22083. }
  22084. s = 32 - s;
  22085. if (s == DIGIT_BIT) {
  22086. r[++j] = 0;
  22087. s = 0;
  22088. }
  22089. else {
  22090. r[++j] = a->dp[i] >> s;
  22091. s = DIGIT_BIT - s;
  22092. }
  22093. }
  22094. else {
  22095. s += DIGIT_BIT;
  22096. }
  22097. }
  22098. for (j++; j < size; j++) {
  22099. r[j] = 0;
  22100. }
  22101. #endif
  22102. }
  22103. /* Convert a point of type ecc_point to type sp_point_384.
  22104. *
  22105. * p Point of type sp_point_384 (result).
  22106. * pm Point of type ecc_point.
  22107. */
  22108. static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm)
  22109. {
  22110. XMEMSET(p->x, 0, sizeof(p->x));
  22111. XMEMSET(p->y, 0, sizeof(p->y));
  22112. XMEMSET(p->z, 0, sizeof(p->z));
  22113. sp_384_from_mp(p->x, 12, pm->x);
  22114. sp_384_from_mp(p->y, 12, pm->y);
  22115. sp_384_from_mp(p->z, 12, pm->z);
  22116. p->infinity = 0;
  22117. }
  22118. /* Convert an array of sp_digit to an mp_int.
  22119. *
  22120. * a A single precision integer.
  22121. * r A multi-precision integer.
  22122. */
  22123. static int sp_384_to_mp(const sp_digit* a, mp_int* r)
  22124. {
  22125. int err;
  22126. err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
  22127. if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
  22128. #if DIGIT_BIT == 32
  22129. XMEMCPY(r->dp, a, sizeof(sp_digit) * 12);
  22130. r->used = 12;
  22131. mp_clamp(r);
  22132. #elif DIGIT_BIT < 32
  22133. int i, j = 0, s = 0;
  22134. r->dp[0] = 0;
  22135. for (i = 0; i < 12; i++) {
  22136. r->dp[j] |= (mp_digit)(a[i] << s);
  22137. r->dp[j] &= (1L << DIGIT_BIT) - 1;
  22138. s = DIGIT_BIT - s;
  22139. r->dp[++j] = (mp_digit)(a[i] >> s);
  22140. while (s + DIGIT_BIT <= 32) {
  22141. s += DIGIT_BIT;
  22142. r->dp[j++] &= (1L << DIGIT_BIT) - 1;
  22143. if (s == SP_WORD_SIZE) {
  22144. r->dp[j] = 0;
  22145. }
  22146. else {
  22147. r->dp[j] = (mp_digit)(a[i] >> s);
  22148. }
  22149. }
  22150. s = 32 - s;
  22151. }
  22152. r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
  22153. mp_clamp(r);
  22154. #else
  22155. int i, j = 0, s = 0;
  22156. r->dp[0] = 0;
  22157. for (i = 0; i < 12; i++) {
  22158. r->dp[j] |= ((mp_digit)a[i]) << s;
  22159. if (s + 32 >= DIGIT_BIT) {
  22160. #if DIGIT_BIT != 32 && DIGIT_BIT != 64
  22161. r->dp[j] &= (1L << DIGIT_BIT) - 1;
  22162. #endif
  22163. s = DIGIT_BIT - s;
  22164. r->dp[++j] = a[i] >> s;
  22165. s = 32 - s;
  22166. }
  22167. else {
  22168. s += 32;
  22169. }
  22170. }
  22171. r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
  22172. mp_clamp(r);
  22173. #endif
  22174. }
  22175. return err;
  22176. }
  22177. /* Convert a point of type sp_point_384 to type ecc_point.
  22178. *
  22179. * p Point of type sp_point_384.
  22180. * pm Point of type ecc_point (result).
  22181. * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
  22182. * MP_OKAY.
  22183. */
  22184. static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
  22185. {
  22186. int err;
  22187. err = sp_384_to_mp(p->x, pm->x);
  22188. if (err == MP_OKAY) {
  22189. err = sp_384_to_mp(p->y, pm->y);
  22190. }
  22191. if (err == MP_OKAY) {
  22192. err = sp_384_to_mp(p->z, pm->z);
  22193. }
  22194. return err;
  22195. }
  22196. /* Multiply a and b into r. (r = a * b)
  22197. *
  22198. * r A single precision integer.
  22199. * a A single precision integer.
  22200. * b A single precision integer.
  22201. */
  22202. SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a,
  22203. const sp_digit* b)
  22204. {
  22205. sp_digit tmp_arr[12 * 2];
  22206. sp_digit* tmp = tmp_arr;
  22207. __asm__ __volatile__ (
  22208. "mov r3, #0\n\t"
  22209. "mov r4, #0\n\t"
  22210. "mov r9, r3\n\t"
  22211. "mov r12, %[r]\n\t"
  22212. "mov r10, %[a]\n\t"
  22213. "mov r11, %[b]\n\t"
  22214. "mov r6, #48\n\t"
  22215. "add r6, r6, r10\n\t"
  22216. "mov r14, r6\n\t"
  22217. "\n1:\n\t"
  22218. "mov %[r], #0\n\t"
  22219. "mov r5, #0\n\t"
  22220. "mov r6, #44\n\t"
  22221. "mov %[a], r9\n\t"
  22222. "subs %[a], %[a], r6\n\t"
  22223. "sbc r6, r6, r6\n\t"
  22224. "mvn r6, r6\n\t"
  22225. "and %[a], %[a], r6\n\t"
  22226. "mov %[b], r9\n\t"
  22227. "sub %[b], %[b], %[a]\n\t"
  22228. "add %[a], %[a], r10\n\t"
  22229. "add %[b], %[b], r11\n\t"
  22230. "\n2:\n\t"
  22231. /* Multiply Start */
  22232. "ldr r6, [%[a]]\n\t"
  22233. "ldr r8, [%[b]]\n\t"
  22234. "umull r6, r8, r6, r8\n\t"
  22235. "adds r3, r3, r6\n\t"
  22236. "adcs r4, r4, r8\n\t"
  22237. "adc r5, r5, %[r]\n\t"
  22238. /* Multiply Done */
  22239. "add %[a], %[a], #4\n\t"
  22240. "sub %[b], %[b], #4\n\t"
  22241. "cmp %[a], r14\n\t"
  22242. #ifdef __GNUC__
  22243. "beq 3f\n\t"
  22244. #else
  22245. "beq.n 3f\n\t"
  22246. #endif /* __GNUC__ */
  22247. "mov r6, r9\n\t"
  22248. "add r6, r6, r10\n\t"
  22249. "cmp %[a], r6\n\t"
  22250. #ifdef __GNUC__
  22251. "ble 2b\n\t"
  22252. #else
  22253. "ble.n 2b\n\t"
  22254. #endif /* __GNUC__ */
  22255. "\n3:\n\t"
  22256. "mov %[r], r12\n\t"
  22257. "mov r8, r9\n\t"
  22258. "str r3, [%[r], r8]\n\t"
  22259. "mov r3, r4\n\t"
  22260. "mov r4, r5\n\t"
  22261. "add r8, r8, #4\n\t"
  22262. "mov r9, r8\n\t"
  22263. "mov r6, #88\n\t"
  22264. "cmp r8, r6\n\t"
  22265. #ifdef __GNUC__
  22266. "ble 1b\n\t"
  22267. #else
  22268. "ble.n 1b\n\t"
  22269. #endif /* __GNUC__ */
  22270. "str r3, [%[r], r8]\n\t"
  22271. "mov %[a], r10\n\t"
  22272. "mov %[b], r11\n\t"
  22273. :
  22274. : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
  22275. : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  22276. );
  22277. XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
  22278. }
  22279. /* Conditionally subtract b from a using the mask m.
  22280. * m is -1 to subtract and 0 when not copying.
  22281. *
  22282. * r A single precision number representing condition subtract result.
  22283. * a A single precision number to subtract from.
  22284. * b A single precision number to subtract.
  22285. * m Mask value to apply.
  22286. */
  22287. SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a,
  22288. const sp_digit* b, sp_digit m)
  22289. {
  22290. sp_digit c = 0;
  22291. __asm__ __volatile__ (
  22292. "mov r5, #48\n\t"
  22293. "mov r9, r5\n\t"
  22294. "mov r8, #0\n\t"
  22295. "\n1:\n\t"
  22296. "ldr r6, [%[b], r8]\n\t"
  22297. "and r6, r6, %[m]\n\t"
  22298. "mov r5, #0\n\t"
  22299. "subs r5, r5, %[c]\n\t"
  22300. "ldr r5, [%[a], r8]\n\t"
  22301. "sbcs r5, r5, r6\n\t"
  22302. "sbcs %[c], %[c], %[c]\n\t"
  22303. "str r5, [%[r], r8]\n\t"
  22304. "add r8, r8, #4\n\t"
  22305. "cmp r8, r9\n\t"
  22306. #ifdef __GNUC__
  22307. "blt 1b\n\t"
  22308. #else
  22309. "blt.n 1b\n\t"
  22310. #endif /* __GNUC__ */
  22311. : [c] "+r" (c)
  22312. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  22313. : "memory", "r5", "r6", "r8", "r9"
  22314. );
  22315. return c;
  22316. }
  22317. #define sp_384_mont_reduce_order_12 sp_384_mont_reduce_12
  22318. /* Reduce the number back to 384 bits using Montgomery reduction.
  22319. *
  22320. * a A single precision number to reduce in place.
  22321. * m The single precision number representing the modulus.
  22322. * mp The digit representing the negative inverse of m mod 2^n.
  22323. */
  22324. SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m,
  22325. sp_digit mp)
  22326. {
  22327. sp_digit ca = 0;
  22328. __asm__ __volatile__ (
  22329. "mov r9, %[mp]\n\t"
  22330. "mov r12, %[m]\n\t"
  22331. "mov r10, %[a]\n\t"
  22332. "mov r4, #0\n\t"
  22333. "add r11, r10, #48\n\t"
  22334. "\n1:\n\t"
  22335. /* mu = a[i] * mp */
  22336. "mov %[mp], r9\n\t"
  22337. "ldr %[a], [r10]\n\t"
  22338. "mul %[mp], %[mp], %[a]\n\t"
  22339. "mov %[m], r12\n\t"
  22340. "add r14, r10, #40\n\t"
  22341. "\n2:\n\t"
  22342. /* a[i+j] += m[j] * mu */
  22343. "ldr %[a], [r10]\n\t"
  22344. "mov r5, #0\n\t"
  22345. /* Multiply m[j] and mu - Start */
  22346. "ldr r8, [%[m]], #4\n\t"
  22347. "umull r6, r8, %[mp], r8\n\t"
  22348. "adds %[a], %[a], r6\n\t"
  22349. "adc r5, r5, r8\n\t"
  22350. /* Multiply m[j] and mu - Done */
  22351. "adds r4, r4, %[a]\n\t"
  22352. "adc r5, r5, #0\n\t"
  22353. "str r4, [r10], #4\n\t"
  22354. /* a[i+j+1] += m[j+1] * mu */
  22355. "ldr %[a], [r10]\n\t"
  22356. "mov r4, #0\n\t"
  22357. /* Multiply m[j] and mu - Start */
  22358. "ldr r8, [%[m]], #4\n\t"
  22359. "umull r6, r8, %[mp], r8\n\t"
  22360. "adds %[a], %[a], r6\n\t"
  22361. "adc r4, r4, r8\n\t"
  22362. /* Multiply m[j] and mu - Done */
  22363. "adds r5, r5, %[a]\n\t"
  22364. "adc r4, r4, #0\n\t"
  22365. "str r5, [r10], #4\n\t"
  22366. "cmp r10, r14\n\t"
  22367. #ifdef __GNUC__
  22368. "blt 2b\n\t"
  22369. #else
  22370. "blt.n 2b\n\t"
  22371. #endif /* __GNUC__ */
  22372. /* a[i+10] += m[10] * mu */
  22373. "ldr %[a], [r10]\n\t"
  22374. "mov r5, #0\n\t"
  22375. /* Multiply m[j] and mu - Start */
  22376. "ldr r8, [%[m]], #4\n\t"
  22377. "umull r6, r8, %[mp], r8\n\t"
  22378. "adds %[a], %[a], r6\n\t"
  22379. "adc r5, r5, r8\n\t"
  22380. /* Multiply m[j] and mu - Done */
  22381. "adds r4, r4, %[a]\n\t"
  22382. "adc r5, r5, #0\n\t"
  22383. "str r4, [r10], #4\n\t"
  22384. /* a[i+11] += m[11] * mu */
  22385. "mov r4, %[ca]\n\t"
  22386. "mov %[ca], #0\n\t"
  22387. /* Multiply m[11] and mu - Start */
  22388. "ldr r8, [%[m]]\n\t"
  22389. "umull r6, r8, %[mp], r8\n\t"
  22390. "adds r5, r5, r6\n\t"
  22391. "adcs r4, r4, r8\n\t"
  22392. "adc %[ca], %[ca], #0\n\t"
  22393. /* Multiply m[11] and mu - Done */
  22394. "ldr r6, [r10]\n\t"
  22395. "ldr r8, [r10, #4]\n\t"
  22396. "adds r6, r6, r5\n\t"
  22397. "adcs r8, r8, r4\n\t"
  22398. "adc %[ca], %[ca], #0\n\t"
  22399. "str r6, [r10]\n\t"
  22400. "str r8, [r10, #4]\n\t"
  22401. /* Next word in a */
  22402. "sub r10, r10, #40\n\t"
  22403. "cmp r10, r11\n\t"
  22404. #ifdef __GNUC__
  22405. "blt 1b\n\t"
  22406. #else
  22407. "blt.n 1b\n\t"
  22408. #endif /* __GNUC__ */
  22409. "mov %[a], r10\n\t"
  22410. "mov %[m], r12\n\t"
  22411. : [ca] "+r" (ca), [a] "+r" (a)
  22412. : [m] "r" (m), [mp] "r" (mp)
  22413. : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
  22414. );
  22415. sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca);
  22416. }
  22417. /* Multiply two Montogmery form numbers mod the modulus (prime).
  22418. * (r = a * b mod m)
  22419. *
  22420. * r Result of multiplication.
  22421. * a First number to multiply in Montogmery form.
  22422. * b Second number to multiply in Montogmery form.
  22423. * m Modulus (prime).
  22424. * mp Montogmery mulitplier.
  22425. */
  22426. static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
  22427. const sp_digit* m, sp_digit mp)
  22428. {
  22429. sp_384_mul_12(r, a, b);
  22430. sp_384_mont_reduce_12(r, m, mp);
  22431. }
  22432. /* Square a and put result in r. (r = a * a)
  22433. *
  22434. * r A single precision integer.
  22435. * a A single precision integer.
  22436. */
  22437. SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
  22438. {
  22439. __asm__ __volatile__ (
  22440. "mov r3, #0\n\t"
  22441. "mov r4, #0\n\t"
  22442. "mov r5, #0\n\t"
  22443. "mov r9, r3\n\t"
  22444. "mov r12, %[r]\n\t"
  22445. "mov r6, #96\n\t"
  22446. "neg r6, r6\n\t"
  22447. "add sp, sp, r6\n\t"
  22448. "mov r11, sp\n\t"
  22449. "mov r10, %[a]\n\t"
  22450. "\n1:\n\t"
  22451. "mov %[r], #0\n\t"
  22452. "mov r6, #44\n\t"
  22453. "mov %[a], r9\n\t"
  22454. "subs %[a], %[a], r6\n\t"
  22455. "sbc r6, r6, r6\n\t"
  22456. "mvn r6, r6\n\t"
  22457. "and %[a], %[a], r6\n\t"
  22458. "mov r2, r9\n\t"
  22459. "sub r2, r2, %[a]\n\t"
  22460. "add %[a], %[a], r10\n\t"
  22461. "add r2, r2, r10\n\t"
  22462. "\n2:\n\t"
  22463. "cmp r2, %[a]\n\t"
  22464. #ifdef __GNUC__
  22465. "beq 4f\n\t"
  22466. #else
  22467. "beq.n 4f\n\t"
  22468. #endif /* __GNUC__ */
  22469. /* Multiply * 2: Start */
  22470. "ldr r6, [%[a]]\n\t"
  22471. "ldr r8, [r2]\n\t"
  22472. "umull r6, r8, r6, r8\n\t"
  22473. "adds r3, r3, r6\n\t"
  22474. "adcs r4, r4, r8\n\t"
  22475. "adc r5, r5, %[r]\n\t"
  22476. "adds r3, r3, r6\n\t"
  22477. "adcs r4, r4, r8\n\t"
  22478. "adc r5, r5, %[r]\n\t"
  22479. /* Multiply * 2: Done */
  22480. #ifdef __GNUC__
  22481. "bal 5f\n\t"
  22482. #else
  22483. "bal.n 5f\n\t"
  22484. #endif /* __GNUC__ */
  22485. "\n4:\n\t"
  22486. /* Square: Start */
  22487. "ldr r6, [%[a]]\n\t"
  22488. "umull r6, r8, r6, r6\n\t"
  22489. "adds r3, r3, r6\n\t"
  22490. "adcs r4, r4, r8\n\t"
  22491. "adc r5, r5, %[r]\n\t"
  22492. /* Square: Done */
  22493. "\n5:\n\t"
  22494. "add %[a], %[a], #4\n\t"
  22495. "sub r2, r2, #4\n\t"
  22496. "mov r6, #48\n\t"
  22497. "add r6, r6, r10\n\t"
  22498. "cmp %[a], r6\n\t"
  22499. #ifdef __GNUC__
  22500. "beq 3f\n\t"
  22501. #else
  22502. "beq.n 3f\n\t"
  22503. #endif /* __GNUC__ */
  22504. "cmp %[a], r2\n\t"
  22505. #ifdef __GNUC__
  22506. "bgt 3f\n\t"
  22507. #else
  22508. "bgt.n 3f\n\t"
  22509. #endif /* __GNUC__ */
  22510. "mov r8, r9\n\t"
  22511. "add r8, r8, r10\n\t"
  22512. "cmp %[a], r8\n\t"
  22513. #ifdef __GNUC__
  22514. "ble 2b\n\t"
  22515. #else
  22516. "ble.n 2b\n\t"
  22517. #endif /* __GNUC__ */
  22518. "\n3:\n\t"
  22519. "mov %[r], r11\n\t"
  22520. "mov r8, r9\n\t"
  22521. "str r3, [%[r], r8]\n\t"
  22522. "mov r3, r4\n\t"
  22523. "mov r4, r5\n\t"
  22524. "mov r5, #0\n\t"
  22525. "add r8, r8, #4\n\t"
  22526. "mov r9, r8\n\t"
  22527. "mov r6, #88\n\t"
  22528. "cmp r8, r6\n\t"
  22529. #ifdef __GNUC__
  22530. "ble 1b\n\t"
  22531. #else
  22532. "ble.n 1b\n\t"
  22533. #endif /* __GNUC__ */
  22534. "mov %[a], r10\n\t"
  22535. "str r3, [%[r], r8]\n\t"
  22536. "mov %[r], r12\n\t"
  22537. "mov %[a], r11\n\t"
  22538. "mov r3, #92\n\t"
  22539. "\n4:\n\t"
  22540. "ldr r6, [%[a], r3]\n\t"
  22541. "str r6, [%[r], r3]\n\t"
  22542. "subs r3, r3, #4\n\t"
  22543. #ifdef __GNUC__
  22544. "bge 4b\n\t"
  22545. #else
  22546. "bge.n 4b\n\t"
  22547. #endif /* __GNUC__ */
  22548. "mov r6, #96\n\t"
  22549. "add sp, sp, r6\n\t"
  22550. :
  22551. : [r] "r" (r), [a] "r" (a)
  22552. : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
  22553. );
  22554. }
  22555. /* Square the Montgomery form number. (r = a * a mod m)
  22556. *
  22557. * r Result of squaring.
  22558. * a Number to square in Montogmery form.
  22559. * m Modulus (prime).
  22560. * mp Montogmery mulitplier.
  22561. */
  22562. static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m,
  22563. sp_digit mp)
  22564. {
  22565. sp_384_sqr_12(r, a);
  22566. sp_384_mont_reduce_12(r, m, mp);
  22567. }
  22568. #if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
  22569. /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
  22570. *
  22571. * r Result of squaring.
  22572. * a Number to square in Montogmery form.
  22573. * n Number of times to square.
  22574. * m Modulus (prime).
  22575. * mp Montogmery mulitplier.
  22576. */
  22577. static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n,
  22578. const sp_digit* m, sp_digit mp)
  22579. {
  22580. sp_384_mont_sqr_12(r, a, m, mp);
  22581. for (; n > 1; n--) {
  22582. sp_384_mont_sqr_12(r, r, m, mp);
  22583. }
  22584. }
  22585. #endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
  22586. #ifdef WOLFSSL_SP_SMALL
  22587. /* Mod-2 for the P384 curve. */
  22588. static const uint32_t p384_mod_minus_2[12] = {
  22589. 0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
  22590. 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
  22591. };
  22592. #endif /* !WOLFSSL_SP_SMALL */
  22593. /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
  22594. * P384 curve. (r = 1 / a mod m)
  22595. *
  22596. * r Inverse result.
  22597. * a Number to invert.
  22598. * td Temporary data.
  22599. */
  22600. static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
  22601. {
  22602. #ifdef WOLFSSL_SP_SMALL
  22603. sp_digit* t = td;
  22604. int i;
  22605. XMEMCPY(t, a, sizeof(sp_digit) * 12);
  22606. for (i=382; i>=0; i--) {
  22607. sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod);
  22608. if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
  22609. sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod);
  22610. }
  22611. XMEMCPY(r, t, sizeof(sp_digit) * 12);
  22612. #else
  22613. sp_digit* t1 = td;
  22614. sp_digit* t2 = td + 2 * 12;
  22615. sp_digit* t3 = td + 4 * 12;
  22616. sp_digit* t4 = td + 6 * 12;
  22617. sp_digit* t5 = td + 8 * 12;
  22618. /* 0x2 */
  22619. sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod);
  22620. /* 0x3 */
  22621. sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod);
  22622. /* 0xc */
  22623. sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod);
  22624. /* 0xf */
  22625. sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod);
  22626. /* 0x1e */
  22627. sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod);
  22628. /* 0x1f */
  22629. sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod);
  22630. /* 0x3e0 */
  22631. sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod);
  22632. /* 0x3ff */
  22633. sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
  22634. /* 0x7fe0 */
  22635. sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod);
  22636. /* 0x7fff */
  22637. sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod);
  22638. /* 0x3fff8000 */
  22639. sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod);
  22640. /* 0x3fffffff */
  22641. sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
  22642. /* 0xfffffffc */
  22643. sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod);
  22644. /* 0xfffffffd */
  22645. sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod);
  22646. /* 0xffffffff */
  22647. sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod);
  22648. /* 0xfffffffc0000000 */
  22649. sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod);
  22650. /* 0xfffffffffffffff */
  22651. sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
  22652. /* 0xfffffffffffffff000000000000000 */
  22653. sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod);
  22654. /* 0xffffffffffffffffffffffffffffff */
  22655. sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
  22656. /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
  22657. sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod);
  22658. /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
  22659. sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
  22660. /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
  22661. sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod);
  22662. /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
  22663. sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
  22664. /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
  22665. sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod);
  22666. /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
  22667. sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod);
  22668. /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
  22669. sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod);
  22670. /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
  22671. sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod);
  22672. #endif /* WOLFSSL_SP_SMALL */
  22673. }
  22674. /* Compare a with b in constant time.
  22675. *
  22676. * a A single precision integer.
  22677. * b A single precision integer.
  22678. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  22679. * respectively.
  22680. */
  22681. SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
  22682. {
  22683. sp_digit r = 0;
  22684. __asm__ __volatile__ (
  22685. "mov r3, #0\n\t"
  22686. "mvn r3, r3\n\t"
  22687. "mov r6, #44\n\t"
  22688. "\n1:\n\t"
  22689. "ldr r8, [%[a], r6]\n\t"
  22690. "ldr r5, [%[b], r6]\n\t"
  22691. "and r8, r8, r3\n\t"
  22692. "and r5, r5, r3\n\t"
  22693. "mov r4, r8\n\t"
  22694. "subs r8, r8, r5\n\t"
  22695. "sbc r8, r8, r8\n\t"
  22696. "add %[r], %[r], r8\n\t"
  22697. "mvn r8, r8\n\t"
  22698. "and r3, r3, r8\n\t"
  22699. "subs r5, r5, r4\n\t"
  22700. "sbc r8, r8, r8\n\t"
  22701. "sub %[r], %[r], r8\n\t"
  22702. "mvn r8, r8\n\t"
  22703. "and r3, r3, r8\n\t"
  22704. "sub r6, r6, #4\n\t"
  22705. "cmp r6, #0\n\t"
  22706. #ifdef __GNUC__
  22707. "bge 1b\n\t"
  22708. #else
  22709. "bge.n 1b\n\t"
  22710. #endif /* __GNUC__ */
  22711. : [r] "+r" (r)
  22712. : [a] "r" (a), [b] "r" (b)
  22713. : "r3", "r4", "r5", "r6", "r8"
  22714. );
  22715. return r;
  22716. }
  22717. /* Normalize the values in each word to 32.
  22718. *
  22719. * a Array of sp_digit to normalize.
  22720. */
  22721. #define sp_384_norm_12(a)
  22722. /* Map the Montgomery form projective coordinate point to an affine point.
  22723. *
  22724. * r Resulting affine coordinate point.
  22725. * p Montgomery form projective coordinate point.
  22726. * t Temporary ordinate data.
  22727. */
  22728. static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
  22729. {
  22730. sp_digit* t1 = t;
  22731. sp_digit* t2 = t + 2*12;
  22732. int32_t n;
  22733. sp_384_mont_inv_12(t1, p->z, t + 2*12);
  22734. sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
  22735. sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
  22736. /* x /= z^2 */
  22737. sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod);
  22738. XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U);
  22739. sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod);
  22740. /* Reduce x to less than modulus */
  22741. n = sp_384_cmp_12(r->x, p384_mod);
  22742. sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
  22743. (sp_digit)1 : (sp_digit)0));
  22744. sp_384_norm_12(r->x);
  22745. /* y /= z^3 */
  22746. sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod);
  22747. XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U);
  22748. sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod);
  22749. /* Reduce y to less than modulus */
  22750. n = sp_384_cmp_12(r->y, p384_mod);
  22751. sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
  22752. (sp_digit)1 : (sp_digit)0));
  22753. sp_384_norm_12(r->y);
  22754. XMEMSET(r->z, 0, sizeof(r->z));
  22755. r->z[0] = 1;
  22756. }
  22757. #ifdef WOLFSSL_SP_SMALL
  22758. /* Add b to a into r. (r = a + b)
  22759. *
  22760. * r A single precision integer.
  22761. * a A single precision integer.
  22762. * b A single precision integer.
  22763. */
  22764. SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
  22765. const sp_digit* b)
  22766. {
  22767. sp_digit c = 0;
  22768. __asm__ __volatile__ (
  22769. "mov r6, %[a]\n\t"
  22770. "mov r8, #0\n\t"
  22771. "add r6, r6, #48\n\t"
  22772. "sub r8, r8, #1\n\t"
  22773. "\n1:\n\t"
  22774. "adds %[c], %[c], r8\n\t"
  22775. "ldr r4, [%[a]]\n\t"
  22776. "ldr r5, [%[b]]\n\t"
  22777. "adcs r4, r4, r5\n\t"
  22778. "str r4, [%[r]]\n\t"
  22779. "mov %[c], #0\n\t"
  22780. "adc %[c], %[c], %[c]\n\t"
  22781. "add %[a], %[a], #4\n\t"
  22782. "add %[b], %[b], #4\n\t"
  22783. "add %[r], %[r], #4\n\t"
  22784. "cmp %[a], r6\n\t"
  22785. #ifdef __GNUC__
  22786. "bne 1b\n\t"
  22787. #else
  22788. "bne.n 1b\n\t"
  22789. #endif /* __GNUC__ */
  22790. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  22791. :
  22792. : "memory", "r4", "r5", "r6", "r8"
  22793. );
  22794. return c;
  22795. }
  22796. #else
  22797. /* Add b to a into r. (r = a + b)
  22798. *
  22799. * r A single precision integer.
  22800. * a A single precision integer.
  22801. * b A single precision integer.
  22802. */
  22803. SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
  22804. const sp_digit* b)
  22805. {
  22806. sp_digit c = 0;
  22807. __asm__ __volatile__ (
  22808. "ldm %[a]!, {r4, r5}\n\t"
  22809. "ldm %[b]!, {r6, r8}\n\t"
  22810. "adds r4, r4, r6\n\t"
  22811. "adcs r5, r5, r8\n\t"
  22812. "stm %[r]!, {r4, r5}\n\t"
  22813. "ldm %[a]!, {r4, r5}\n\t"
  22814. "ldm %[b]!, {r6, r8}\n\t"
  22815. "adcs r4, r4, r6\n\t"
  22816. "adcs r5, r5, r8\n\t"
  22817. "stm %[r]!, {r4, r5}\n\t"
  22818. "ldm %[a]!, {r4, r5}\n\t"
  22819. "ldm %[b]!, {r6, r8}\n\t"
  22820. "adcs r4, r4, r6\n\t"
  22821. "adcs r5, r5, r8\n\t"
  22822. "stm %[r]!, {r4, r5}\n\t"
  22823. "ldm %[a]!, {r4, r5}\n\t"
  22824. "ldm %[b]!, {r6, r8}\n\t"
  22825. "adcs r4, r4, r6\n\t"
  22826. "adcs r5, r5, r8\n\t"
  22827. "stm %[r]!, {r4, r5}\n\t"
  22828. "ldm %[a]!, {r4, r5}\n\t"
  22829. "ldm %[b]!, {r6, r8}\n\t"
  22830. "adcs r4, r4, r6\n\t"
  22831. "adcs r5, r5, r8\n\t"
  22832. "stm %[r]!, {r4, r5}\n\t"
  22833. "ldm %[a]!, {r4, r5}\n\t"
  22834. "ldm %[b]!, {r6, r8}\n\t"
  22835. "adcs r4, r4, r6\n\t"
  22836. "adcs r5, r5, r8\n\t"
  22837. "stm %[r]!, {r4, r5}\n\t"
  22838. "mov %[c], #0\n\t"
  22839. "adc %[c], %[c], %[c]\n\t"
  22840. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  22841. :
  22842. : "memory", "r4", "r5", "r6", "r8"
  22843. );
  22844. return c;
  22845. }
  22846. #endif /* WOLFSSL_SP_SMALL */
  22847. /* Add two Montgomery form numbers (r = a + b % m).
  22848. *
  22849. * r Result of addition.
  22850. * a First number to add in Montogmery form.
  22851. * b Second number to add in Montogmery form.
  22852. * m Modulus (prime).
  22853. */
  22854. SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
  22855. const sp_digit* m)
  22856. {
  22857. sp_digit o;
  22858. o = sp_384_add_12(r, a, b);
  22859. sp_384_cond_sub_12(r, r, m, 0 - o);
  22860. }
  22861. /* Double a Montgomery form number (r = a + a % m).
  22862. *
  22863. * r Result of doubling.
  22864. * a Number to double in Montogmery form.
  22865. * m Modulus (prime).
  22866. */
  22867. SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
  22868. {
  22869. sp_digit o;
  22870. o = sp_384_add_12(r, a, a);
  22871. sp_384_cond_sub_12(r, r, m, 0 - o);
  22872. }
  22873. /* Triple a Montgomery form number (r = a + a + a % m).
  22874. *
  22875. * r Result of Tripling.
  22876. * a Number to triple in Montogmery form.
  22877. * m Modulus (prime).
  22878. */
  22879. SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
  22880. {
  22881. sp_digit o;
  22882. o = sp_384_add_12(r, a, a);
  22883. sp_384_cond_sub_12(r, r, m, 0 - o);
  22884. o = sp_384_add_12(r, r, a);
  22885. sp_384_cond_sub_12(r, r, m, 0 - o);
  22886. }
  22887. #ifdef WOLFSSL_SP_SMALL
  22888. /* Sub b from a into r. (r = a - b)
  22889. *
  22890. * r A single precision integer.
  22891. * a A single precision integer.
  22892. * b A single precision integer.
  22893. */
  22894. SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
  22895. const sp_digit* b)
  22896. {
  22897. sp_digit c = 0;
  22898. __asm__ __volatile__ (
  22899. "mov r6, %[a]\n\t"
  22900. "add r6, r6, #48\n\t"
  22901. "\n1:\n\t"
  22902. "mov r5, #0\n\t"
  22903. "subs r5, r5, %[c]\n\t"
  22904. "ldr r4, [%[a]]\n\t"
  22905. "ldr r5, [%[b]]\n\t"
  22906. "sbcs r4, r4, r5\n\t"
  22907. "str r4, [%[r]]\n\t"
  22908. "sbc %[c], %[c], %[c]\n\t"
  22909. "add %[a], %[a], #4\n\t"
  22910. "add %[b], %[b], #4\n\t"
  22911. "add %[r], %[r], #4\n\t"
  22912. "cmp %[a], r6\n\t"
  22913. #ifdef __GNUC__
  22914. "bne 1b\n\t"
  22915. #else
  22916. "bne.n 1b\n\t"
  22917. #endif /* __GNUC__ */
  22918. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  22919. :
  22920. : "memory", "r4", "r5", "r6"
  22921. );
  22922. return c;
  22923. }
  22924. #else
  22925. /* Sub b from a into r. (r = a - b)
  22926. *
  22927. * r A single precision integer.
  22928. * a A single precision integer.
  22929. * b A single precision integer.
  22930. */
  22931. SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
  22932. const sp_digit* b)
  22933. {
  22934. sp_digit c = 0;
  22935. __asm__ __volatile__ (
  22936. "ldr r4, [%[a], #0]\n\t"
  22937. "ldr r5, [%[a], #4]\n\t"
  22938. "ldr r6, [%[b], #0]\n\t"
  22939. "ldr r8, [%[b], #4]\n\t"
  22940. "subs r4, r4, r6\n\t"
  22941. "sbcs r5, r5, r8\n\t"
  22942. "str r4, [%[r], #0]\n\t"
  22943. "str r5, [%[r], #4]\n\t"
  22944. "ldr r4, [%[a], #8]\n\t"
  22945. "ldr r5, [%[a], #12]\n\t"
  22946. "ldr r6, [%[b], #8]\n\t"
  22947. "ldr r8, [%[b], #12]\n\t"
  22948. "sbcs r4, r4, r6\n\t"
  22949. "sbcs r5, r5, r8\n\t"
  22950. "str r4, [%[r], #8]\n\t"
  22951. "str r5, [%[r], #12]\n\t"
  22952. "ldr r4, [%[a], #16]\n\t"
  22953. "ldr r5, [%[a], #20]\n\t"
  22954. "ldr r6, [%[b], #16]\n\t"
  22955. "ldr r8, [%[b], #20]\n\t"
  22956. "sbcs r4, r4, r6\n\t"
  22957. "sbcs r5, r5, r8\n\t"
  22958. "str r4, [%[r], #16]\n\t"
  22959. "str r5, [%[r], #20]\n\t"
  22960. "ldr r4, [%[a], #24]\n\t"
  22961. "ldr r5, [%[a], #28]\n\t"
  22962. "ldr r6, [%[b], #24]\n\t"
  22963. "ldr r8, [%[b], #28]\n\t"
  22964. "sbcs r4, r4, r6\n\t"
  22965. "sbcs r5, r5, r8\n\t"
  22966. "str r4, [%[r], #24]\n\t"
  22967. "str r5, [%[r], #28]\n\t"
  22968. "ldr r4, [%[a], #32]\n\t"
  22969. "ldr r5, [%[a], #36]\n\t"
  22970. "ldr r6, [%[b], #32]\n\t"
  22971. "ldr r8, [%[b], #36]\n\t"
  22972. "sbcs r4, r4, r6\n\t"
  22973. "sbcs r5, r5, r8\n\t"
  22974. "str r4, [%[r], #32]\n\t"
  22975. "str r5, [%[r], #36]\n\t"
  22976. "ldr r4, [%[a], #40]\n\t"
  22977. "ldr r5, [%[a], #44]\n\t"
  22978. "ldr r6, [%[b], #40]\n\t"
  22979. "ldr r8, [%[b], #44]\n\t"
  22980. "sbcs r4, r4, r6\n\t"
  22981. "sbcs r5, r5, r8\n\t"
  22982. "str r4, [%[r], #40]\n\t"
  22983. "str r5, [%[r], #44]\n\t"
  22984. "sbc %[c], %[c], %[c]\n\t"
  22985. : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
  22986. :
  22987. : "memory", "r4", "r5", "r6", "r8"
  22988. );
  22989. return c;
  22990. }
  22991. #endif /* WOLFSSL_SP_SMALL */
  22992. /* Conditionally add a and b using the mask m.
  22993. * m is -1 to add and 0 when not.
  22994. *
  22995. * r A single precision number representing conditional add result.
  22996. * a A single precision number to add with.
  22997. * b A single precision number to add.
  22998. * m Mask value to apply.
  22999. */
  23000. SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
  23001. sp_digit m)
  23002. {
  23003. sp_digit c = 0;
  23004. __asm__ __volatile__ (
  23005. "mov r5, #48\n\t"
  23006. "mov r9, r5\n\t"
  23007. "mov r8, #0\n\t"
  23008. "\n1:\n\t"
  23009. "ldr r6, [%[b], r8]\n\t"
  23010. "and r6, r6, %[m]\n\t"
  23011. "adds r5, %[c], #-1\n\t"
  23012. "ldr r5, [%[a], r8]\n\t"
  23013. "adcs r5, r5, r6\n\t"
  23014. "mov %[c], #0\n\t"
  23015. "adcs %[c], %[c], %[c]\n\t"
  23016. "str r5, [%[r], r8]\n\t"
  23017. "add r8, r8, #4\n\t"
  23018. "cmp r8, r9\n\t"
  23019. #ifdef __GNUC__
  23020. "blt 1b\n\t"
  23021. #else
  23022. "blt.n 1b\n\t"
  23023. #endif /* __GNUC__ */
  23024. : [c] "+r" (c)
  23025. : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
  23026. : "memory", "r5", "r6", "r8", "r9"
  23027. );
  23028. return c;
  23029. }
  23030. /* Subtract two Montgomery form numbers (r = a - b % m).
  23031. *
  23032. * r Result of subtration.
  23033. * a Number to subtract from in Montogmery form.
  23034. * b Number to subtract with in Montogmery form.
  23035. * m Modulus (prime).
  23036. */
  23037. SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
  23038. const sp_digit* m)
  23039. {
  23040. sp_digit o;
  23041. o = sp_384_sub_12(r, a, b);
  23042. sp_384_cond_add_12(r, r, m, o);
  23043. }
  23044. static void sp_384_rshift1_12(sp_digit* r, sp_digit* a)
  23045. {
  23046. __asm__ __volatile__ (
  23047. "ldr r2, [%[a]]\n\t"
  23048. "ldr r3, [%[a], #4]\n\t"
  23049. "lsr r2, r2, #1\n\t"
  23050. "orr r2, r2, r3, lsl #31\n\t"
  23051. "lsr r3, r3, #1\n\t"
  23052. "ldr r4, [%[a], #8]\n\t"
  23053. "str r2, [%[r], #0]\n\t"
  23054. "orr r3, r3, r4, lsl #31\n\t"
  23055. "lsr r4, r4, #1\n\t"
  23056. "ldr r2, [%[a], #12]\n\t"
  23057. "str r3, [%[r], #4]\n\t"
  23058. "orr r4, r4, r2, lsl #31\n\t"
  23059. "lsr r2, r2, #1\n\t"
  23060. "ldr r3, [%[a], #16]\n\t"
  23061. "str r4, [%[r], #8]\n\t"
  23062. "orr r2, r2, r3, lsl #31\n\t"
  23063. "lsr r3, r3, #1\n\t"
  23064. "ldr r4, [%[a], #20]\n\t"
  23065. "str r2, [%[r], #12]\n\t"
  23066. "orr r3, r3, r4, lsl #31\n\t"
  23067. "lsr r4, r4, #1\n\t"
  23068. "ldr r2, [%[a], #24]\n\t"
  23069. "str r3, [%[r], #16]\n\t"
  23070. "orr r4, r4, r2, lsl #31\n\t"
  23071. "lsr r2, r2, #1\n\t"
  23072. "ldr r3, [%[a], #28]\n\t"
  23073. "str r4, [%[r], #20]\n\t"
  23074. "orr r2, r2, r3, lsl #31\n\t"
  23075. "lsr r3, r3, #1\n\t"
  23076. "ldr r4, [%[a], #32]\n\t"
  23077. "str r2, [%[r], #24]\n\t"
  23078. "orr r3, r3, r4, lsl #31\n\t"
  23079. "lsr r4, r4, #1\n\t"
  23080. "ldr r2, [%[a], #36]\n\t"
  23081. "str r3, [%[r], #28]\n\t"
  23082. "orr r4, r4, r2, lsl #31\n\t"
  23083. "lsr r2, r2, #1\n\t"
  23084. "ldr r3, [%[a], #40]\n\t"
  23085. "str r4, [%[r], #32]\n\t"
  23086. "orr r2, r2, r3, lsl #31\n\t"
  23087. "lsr r3, r3, #1\n\t"
  23088. "ldr r4, [%[a], #44]\n\t"
  23089. "str r2, [%[r], #36]\n\t"
  23090. "orr r3, r3, r4, lsl #31\n\t"
  23091. "lsr r4, r4, #1\n\t"
  23092. "str r3, [%[r], #40]\n\t"
  23093. "str r4, [%[r], #44]\n\t"
  23094. :
  23095. : [r] "r" (r), [a] "r" (a)
  23096. : "memory", "r2", "r3", "r4"
  23097. );
  23098. }
  23099. /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  23100. *
  23101. * r Result of division by 2.
  23102. * a Number to divide.
  23103. * m Modulus (prime).
  23104. */
  23105. SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
  23106. {
  23107. sp_digit o;
  23108. o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1));
  23109. sp_384_rshift1_12(r, r);
  23110. r[11] |= o << 31;
  23111. }
  23112. /* Double the Montgomery form projective point p.
  23113. *
  23114. * r Result of doubling point.
  23115. * p Point to double.
  23116. * t Temporary ordinate data.
  23117. */
  23118. #ifdef WOLFSSL_SP_NONBLOCK
  23119. typedef struct sp_384_proj_point_dbl_12_ctx {
  23120. int state;
  23121. sp_digit* t1;
  23122. sp_digit* t2;
  23123. sp_digit* x;
  23124. sp_digit* y;
  23125. sp_digit* z;
  23126. } sp_384_proj_point_dbl_12_ctx;
  23127. static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t)
  23128. {
  23129. int err = FP_WOULDBLOCK;
  23130. sp_384_proj_point_dbl_12_ctx* ctx = (sp_384_proj_point_dbl_12_ctx*)sp_ctx->data;
  23131. typedef char ctx_size_test[sizeof(sp_384_proj_point_dbl_12_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
  23132. (void)sizeof(ctx_size_test);
  23133. switch (ctx->state) {
  23134. case 0:
  23135. ctx->t1 = t;
  23136. ctx->t2 = t + 2*12;
  23137. ctx->x = r->x;
  23138. ctx->y = r->y;
  23139. ctx->z = r->z;
  23140. /* Put infinity into result. */
  23141. if (r != p) {
  23142. r->infinity = p->infinity;
  23143. }
  23144. ctx->state = 1;
  23145. break;
  23146. case 1:
  23147. /* T1 = Z * Z */
  23148. sp_384_mont_sqr_12(ctx->t1, p->z, p384_mod, p384_mp_mod);
  23149. ctx->state = 2;
  23150. break;
  23151. case 2:
  23152. /* Z = Y * Z */
  23153. sp_384_mont_mul_12(ctx->z, p->y, p->z, p384_mod, p384_mp_mod);
  23154. ctx->state = 3;
  23155. break;
  23156. case 3:
  23157. /* Z = 2Z */
  23158. sp_384_mont_dbl_12(ctx->z, ctx->z, p384_mod);
  23159. ctx->state = 4;
  23160. break;
  23161. case 4:
  23162. /* T2 = X - T1 */
  23163. sp_384_mont_sub_12(ctx->t2, p->x, ctx->t1, p384_mod);
  23164. ctx->state = 5;
  23165. break;
  23166. case 5:
  23167. /* T1 = X + T1 */
  23168. sp_384_mont_add_12(ctx->t1, p->x, ctx->t1, p384_mod);
  23169. ctx->state = 6;
  23170. break;
  23171. case 6:
  23172. /* T2 = T1 * T2 */
  23173. sp_384_mont_mul_12(ctx->t2, ctx->t1, ctx->t2, p384_mod, p384_mp_mod);
  23174. ctx->state = 7;
  23175. break;
  23176. case 7:
  23177. /* T1 = 3T2 */
  23178. sp_384_mont_tpl_12(ctx->t1, ctx->t2, p384_mod);
  23179. ctx->state = 8;
  23180. break;
  23181. case 8:
  23182. /* Y = 2Y */
  23183. sp_384_mont_dbl_12(ctx->y, p->y, p384_mod);
  23184. ctx->state = 9;
  23185. break;
  23186. case 9:
  23187. /* Y = Y * Y */
  23188. sp_384_mont_sqr_12(ctx->y, ctx->y, p384_mod, p384_mp_mod);
  23189. ctx->state = 10;
  23190. break;
  23191. case 10:
  23192. /* T2 = Y * Y */
  23193. sp_384_mont_sqr_12(ctx->t2, ctx->y, p384_mod, p384_mp_mod);
  23194. ctx->state = 11;
  23195. break;
  23196. case 11:
  23197. /* T2 = T2/2 */
  23198. sp_384_div2_12(ctx->t2, ctx->t2, p384_mod);
  23199. ctx->state = 12;
  23200. break;
  23201. case 12:
  23202. /* Y = Y * X */
  23203. sp_384_mont_mul_12(ctx->y, ctx->y, p->x, p384_mod, p384_mp_mod);
  23204. ctx->state = 13;
  23205. break;
  23206. case 13:
  23207. /* X = T1 * T1 */
  23208. sp_384_mont_sqr_12(ctx->x, ctx->t1, p384_mod, p384_mp_mod);
  23209. ctx->state = 14;
  23210. break;
  23211. case 14:
  23212. /* X = X - Y */
  23213. sp_384_mont_sub_12(ctx->x, ctx->x, ctx->y, p384_mod);
  23214. ctx->state = 15;
  23215. break;
  23216. case 15:
  23217. /* X = X - Y */
  23218. sp_384_mont_sub_12(ctx->x, ctx->x, ctx->y, p384_mod);
  23219. ctx->state = 16;
  23220. break;
  23221. case 16:
  23222. /* Y = Y - X */
  23223. sp_384_mont_sub_12(ctx->y, ctx->y, ctx->x, p384_mod);
  23224. ctx->state = 17;
  23225. break;
  23226. case 17:
  23227. /* Y = Y * T1 */
  23228. sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t1, p384_mod, p384_mp_mod);
  23229. ctx->state = 18;
  23230. break;
  23231. case 18:
  23232. /* Y = Y - T2 */
  23233. sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t2, p384_mod);
  23234. ctx->state = 19;
  23235. /* fall-through */
  23236. case 19:
  23237. err = MP_OKAY;
  23238. break;
  23239. }
  23240. if (err == MP_OKAY && ctx->state != 19) {
  23241. err = FP_WOULDBLOCK;
  23242. }
  23243. return err;
  23244. }
  23245. #endif /* WOLFSSL_SP_NONBLOCK */
  23246. static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
  23247. {
  23248. sp_digit* t1 = t;
  23249. sp_digit* t2 = t + 2*12;
  23250. sp_digit* x;
  23251. sp_digit* y;
  23252. sp_digit* z;
  23253. x = r->x;
  23254. y = r->y;
  23255. z = r->z;
  23256. /* Put infinity into result. */
  23257. if (r != p) {
  23258. r->infinity = p->infinity;
  23259. }
  23260. /* T1 = Z * Z */
  23261. sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod);
  23262. /* Z = Y * Z */
  23263. sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod);
  23264. /* Z = 2Z */
  23265. sp_384_mont_dbl_12(z, z, p384_mod);
  23266. /* T2 = X - T1 */
  23267. sp_384_mont_sub_12(t2, p->x, t1, p384_mod);
  23268. /* T1 = X + T1 */
  23269. sp_384_mont_add_12(t1, p->x, t1, p384_mod);
  23270. /* T2 = T1 * T2 */
  23271. sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod);
  23272. /* T1 = 3T2 */
  23273. sp_384_mont_tpl_12(t1, t2, p384_mod);
  23274. /* Y = 2Y */
  23275. sp_384_mont_dbl_12(y, p->y, p384_mod);
  23276. /* Y = Y * Y */
  23277. sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod);
  23278. /* T2 = Y * Y */
  23279. sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
  23280. /* T2 = T2/2 */
  23281. sp_384_div2_12(t2, t2, p384_mod);
  23282. /* Y = Y * X */
  23283. sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod);
  23284. /* X = T1 * T1 */
  23285. sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod);
  23286. /* X = X - Y */
  23287. sp_384_mont_sub_12(x, x, y, p384_mod);
  23288. /* X = X - Y */
  23289. sp_384_mont_sub_12(x, x, y, p384_mod);
  23290. /* Y = Y - X */
  23291. sp_384_mont_sub_12(y, y, x, p384_mod);
  23292. /* Y = Y * T1 */
  23293. sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod);
  23294. /* Y = Y - T2 */
  23295. sp_384_mont_sub_12(y, y, t2, p384_mod);
  23296. }
  23297. /* Compare two numbers to determine if they are equal.
  23298. * Constant time implementation.
  23299. *
  23300. * a First number to compare.
  23301. * b Second number to compare.
  23302. * returns 1 when equal and 0 otherwise.
  23303. */
  23304. static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b)
  23305. {
  23306. return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
  23307. (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
  23308. (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0;
  23309. }
  23310. /* Add two Montgomery form projective points.
  23311. *
  23312. * r Result of addition.
  23313. * p First point to add.
  23314. * q Second point to add.
  23315. * t Temporary ordinate data.
  23316. */
  23317. #ifdef WOLFSSL_SP_NONBLOCK
  23318. typedef struct sp_384_proj_point_add_12_ctx {
  23319. int state;
  23320. sp_384_proj_point_dbl_12_ctx dbl_ctx;
  23321. const sp_point_384* ap[2];
  23322. sp_point_384* rp[2];
  23323. sp_digit* t1;
  23324. sp_digit* t2;
  23325. sp_digit* t3;
  23326. sp_digit* t4;
  23327. sp_digit* t5;
  23328. sp_digit* x;
  23329. sp_digit* y;
  23330. sp_digit* z;
  23331. } sp_384_proj_point_add_12_ctx;
  23332. static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r,
  23333. const sp_point_384* p, const sp_point_384* q, sp_digit* t)
  23334. {
  23335. int err = FP_WOULDBLOCK;
  23336. sp_384_proj_point_add_12_ctx* ctx = (sp_384_proj_point_add_12_ctx*)sp_ctx->data;
  23337. /* Ensure only the first point is the same as the result. */
  23338. if (q == r) {
  23339. const sp_point_384* a = p;
  23340. p = q;
  23341. q = a;
  23342. }
  23343. typedef char ctx_size_test[sizeof(sp_384_proj_point_add_12_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
  23344. (void)sizeof(ctx_size_test);
  23345. switch (ctx->state) {
  23346. case 0: /* INIT */
  23347. ctx->t1 = t;
  23348. ctx->t2 = t + 2*12;
  23349. ctx->t3 = t + 4*12;
  23350. ctx->t4 = t + 6*12;
  23351. ctx->t5 = t + 8*12;
  23352. ctx->state = 1;
  23353. break;
  23354. case 1:
  23355. /* Check double */
  23356. (void)sp_384_sub_12(ctx->t1, p384_mod, q->y);
  23357. sp_384_norm_12(ctx->t1);
  23358. if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
  23359. (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, ctx->t1))) != 0)
  23360. {
  23361. XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
  23362. ctx->state = 2;
  23363. }
  23364. else {
  23365. ctx->state = 3;
  23366. }
  23367. break;
  23368. case 2:
  23369. err = sp_384_proj_point_dbl_12_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t);
  23370. if (err == MP_OKAY)
  23371. ctx->state = 27; /* done */
  23372. break;
  23373. case 3:
  23374. {
  23375. int i;
  23376. ctx->rp[0] = r;
  23377. /*lint allow cast to different type of pointer*/
  23378. ctx->rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
  23379. XMEMSET(ctx->rp[1], 0, sizeof(sp_point_384));
  23380. ctx->x = ctx->rp[p->infinity | q->infinity]->x;
  23381. ctx->y = ctx->rp[p->infinity | q->infinity]->y;
  23382. ctx->z = ctx->rp[p->infinity | q->infinity]->z;
  23383. ctx->ap[0] = p;
  23384. ctx->ap[1] = q;
  23385. for (i=0; i<12; i++) {
  23386. r->x[i] = ctx->ap[p->infinity]->x[i];
  23387. }
  23388. for (i=0; i<12; i++) {
  23389. r->y[i] = ctx->ap[p->infinity]->y[i];
  23390. }
  23391. for (i=0; i<12; i++) {
  23392. r->z[i] = ctx->ap[p->infinity]->z[i];
  23393. }
  23394. r->infinity = ctx->ap[p->infinity]->infinity;
  23395. ctx->state = 4;
  23396. break;
  23397. }
  23398. case 4:
  23399. /* U1 = X1*Z2^2 */
  23400. sp_384_mont_sqr_12(ctx->t1, q->z, p384_mod, p384_mp_mod);
  23401. ctx->state = 5;
  23402. break;
  23403. case 5:
  23404. sp_384_mont_mul_12(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod);
  23405. ctx->state = 6;
  23406. break;
  23407. case 6:
  23408. sp_384_mont_mul_12(ctx->t1, ctx->t1, ctx->x, p384_mod, p384_mp_mod);
  23409. ctx->state = 7;
  23410. break;
  23411. case 7:
  23412. /* U2 = X2*Z1^2 */
  23413. sp_384_mont_sqr_12(ctx->t2, ctx->z, p384_mod, p384_mp_mod);
  23414. ctx->state = 8;
  23415. break;
  23416. case 8:
  23417. sp_384_mont_mul_12(ctx->t4, ctx->t2, ctx->z, p384_mod, p384_mp_mod);
  23418. ctx->state = 9;
  23419. break;
  23420. case 9:
  23421. sp_384_mont_mul_12(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod);
  23422. ctx->state = 10;
  23423. break;
  23424. case 10:
  23425. /* S1 = Y1*Z2^3 */
  23426. sp_384_mont_mul_12(ctx->t3, ctx->t3, ctx->y, p384_mod, p384_mp_mod);
  23427. ctx->state = 11;
  23428. break;
  23429. case 11:
  23430. /* S2 = Y2*Z1^3 */
  23431. sp_384_mont_mul_12(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod);
  23432. ctx->state = 12;
  23433. break;
  23434. case 12:
  23435. /* H = U2 - U1 */
  23436. sp_384_mont_sub_12(ctx->t2, ctx->t2, ctx->t1, p384_mod);
  23437. ctx->state = 13;
  23438. break;
  23439. case 13:
  23440. /* R = S2 - S1 */
  23441. sp_384_mont_sub_12(ctx->t4, ctx->t4, ctx->t3, p384_mod);
  23442. ctx->state = 14;
  23443. break;
  23444. case 14:
  23445. /* Z3 = H*Z1*Z2 */
  23446. sp_384_mont_mul_12(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod);
  23447. ctx->state = 15;
  23448. break;
  23449. case 15:
  23450. sp_384_mont_mul_12(ctx->z, ctx->z, ctx->t2, p384_mod, p384_mp_mod);
  23451. ctx->state = 16;
  23452. break;
  23453. case 16:
  23454. /* X3 = R^2 - H^3 - 2*U1*H^2 */
  23455. sp_384_mont_sqr_12(ctx->x, ctx->t4, p384_mod, p384_mp_mod);
  23456. ctx->state = 17;
  23457. break;
  23458. case 17:
  23459. sp_384_mont_sqr_12(ctx->t5, ctx->t2, p384_mod, p384_mp_mod);
  23460. ctx->state = 18;
  23461. break;
  23462. case 18:
  23463. sp_384_mont_mul_12(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod);
  23464. ctx->state = 19;
  23465. break;
  23466. case 19:
  23467. sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod);
  23468. ctx->state = 20;
  23469. break;
  23470. case 20:
  23471. sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t5, p384_mod);
  23472. ctx->state = 21;
  23473. break;
  23474. case 21:
  23475. sp_384_mont_dbl_12(ctx->t1, ctx->y, p384_mod);
  23476. ctx->state = 22;
  23477. break;
  23478. case 22:
  23479. sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t1, p384_mod);
  23480. ctx->state = 23;
  23481. break;
  23482. case 23:
  23483. /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
  23484. sp_384_mont_sub_12(ctx->y, ctx->y, ctx->x, p384_mod);
  23485. ctx->state = 24;
  23486. break;
  23487. case 24:
  23488. sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod);
  23489. ctx->state = 25;
  23490. break;
  23491. case 25:
  23492. sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod);
  23493. ctx->state = 26;
  23494. break;
  23495. case 26:
  23496. sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t5, p384_mod);
  23497. ctx->state = 27;
  23498. /* fall-through */
  23499. case 27:
  23500. err = MP_OKAY;
  23501. break;
  23502. }
  23503. if (err == MP_OKAY && ctx->state != 27) {
  23504. err = FP_WOULDBLOCK;
  23505. }
  23506. return err;
  23507. }
  23508. #endif /* WOLFSSL_SP_NONBLOCK */
  23509. static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
  23510. sp_digit* t)
  23511. {
  23512. const sp_point_384* ap[2];
  23513. sp_point_384* rp[2];
  23514. sp_digit* t1 = t;
  23515. sp_digit* t2 = t + 2*12;
  23516. sp_digit* t3 = t + 4*12;
  23517. sp_digit* t4 = t + 6*12;
  23518. sp_digit* t5 = t + 8*12;
  23519. sp_digit* x;
  23520. sp_digit* y;
  23521. sp_digit* z;
  23522. int i;
  23523. /* Ensure only the first point is the same as the result. */
  23524. if (q == r) {
  23525. const sp_point_384* a = p;
  23526. p = q;
  23527. q = a;
  23528. }
  23529. /* Check double */
  23530. (void)sp_384_sub_12(t1, p384_mod, q->y);
  23531. sp_384_norm_12(t1);
  23532. if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
  23533. (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
  23534. sp_384_proj_point_dbl_12(r, p, t);
  23535. }
  23536. else {
  23537. rp[0] = r;
  23538. /*lint allow cast to different type of pointer*/
  23539. rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
  23540. XMEMSET(rp[1], 0, sizeof(sp_point_384));
  23541. x = rp[p->infinity | q->infinity]->x;
  23542. y = rp[p->infinity | q->infinity]->y;
  23543. z = rp[p->infinity | q->infinity]->z;
  23544. ap[0] = p;
  23545. ap[1] = q;
  23546. for (i=0; i<12; i++) {
  23547. r->x[i] = ap[p->infinity]->x[i];
  23548. }
  23549. for (i=0; i<12; i++) {
  23550. r->y[i] = ap[p->infinity]->y[i];
  23551. }
  23552. for (i=0; i<12; i++) {
  23553. r->z[i] = ap[p->infinity]->z[i];
  23554. }
  23555. r->infinity = ap[p->infinity]->infinity;
  23556. /* U1 = X1*Z2^2 */
  23557. sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod);
  23558. sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod);
  23559. sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod);
  23560. /* U2 = X2*Z1^2 */
  23561. sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
  23562. sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
  23563. sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
  23564. /* S1 = Y1*Z2^3 */
  23565. sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod);
  23566. /* S2 = Y2*Z1^3 */
  23567. sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
  23568. /* H = U2 - U1 */
  23569. sp_384_mont_sub_12(t2, t2, t1, p384_mod);
  23570. /* R = S2 - S1 */
  23571. sp_384_mont_sub_12(t4, t4, t3, p384_mod);
  23572. /* Z3 = H*Z1*Z2 */
  23573. sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod);
  23574. sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
  23575. /* X3 = R^2 - H^3 - 2*U1*H^2 */
  23576. sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod);
  23577. sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
  23578. sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod);
  23579. sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
  23580. sp_384_mont_sub_12(x, x, t5, p384_mod);
  23581. sp_384_mont_dbl_12(t1, y, p384_mod);
  23582. sp_384_mont_sub_12(x, x, t1, p384_mod);
  23583. /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
  23584. sp_384_mont_sub_12(y, y, x, p384_mod);
  23585. sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod);
  23586. sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod);
  23587. sp_384_mont_sub_12(y, y, t5, p384_mod);
  23588. }
  23589. }
  23590. #ifndef WC_NO_CACHE_RESISTANT
  23591. /* Touch each possible point that could be being copied.
  23592. *
  23593. * r Point to copy into.
  23594. * table Table - start of the entires to access
  23595. * idx Index of entry to retrieve.
  23596. */
  23597. static void sp_384_get_point_16_12(sp_point_384* r, const sp_point_384* table,
  23598. int idx)
  23599. {
  23600. int i;
  23601. sp_digit mask;
  23602. r->x[0] = 0;
  23603. r->x[1] = 0;
  23604. r->x[2] = 0;
  23605. r->x[3] = 0;
  23606. r->x[4] = 0;
  23607. r->x[5] = 0;
  23608. r->x[6] = 0;
  23609. r->x[7] = 0;
  23610. r->x[8] = 0;
  23611. r->x[9] = 0;
  23612. r->x[10] = 0;
  23613. r->x[11] = 0;
  23614. r->y[0] = 0;
  23615. r->y[1] = 0;
  23616. r->y[2] = 0;
  23617. r->y[3] = 0;
  23618. r->y[4] = 0;
  23619. r->y[5] = 0;
  23620. r->y[6] = 0;
  23621. r->y[7] = 0;
  23622. r->y[8] = 0;
  23623. r->y[9] = 0;
  23624. r->y[10] = 0;
  23625. r->y[11] = 0;
  23626. r->z[0] = 0;
  23627. r->z[1] = 0;
  23628. r->z[2] = 0;
  23629. r->z[3] = 0;
  23630. r->z[4] = 0;
  23631. r->z[5] = 0;
  23632. r->z[6] = 0;
  23633. r->z[7] = 0;
  23634. r->z[8] = 0;
  23635. r->z[9] = 0;
  23636. r->z[10] = 0;
  23637. r->z[11] = 0;
  23638. for (i = 1; i < 16; i++) {
  23639. mask = 0 - (i == idx);
  23640. r->x[0] |= mask & table[i].x[0];
  23641. r->x[1] |= mask & table[i].x[1];
  23642. r->x[2] |= mask & table[i].x[2];
  23643. r->x[3] |= mask & table[i].x[3];
  23644. r->x[4] |= mask & table[i].x[4];
  23645. r->x[5] |= mask & table[i].x[5];
  23646. r->x[6] |= mask & table[i].x[6];
  23647. r->x[7] |= mask & table[i].x[7];
  23648. r->x[8] |= mask & table[i].x[8];
  23649. r->x[9] |= mask & table[i].x[9];
  23650. r->x[10] |= mask & table[i].x[10];
  23651. r->x[11] |= mask & table[i].x[11];
  23652. r->y[0] |= mask & table[i].y[0];
  23653. r->y[1] |= mask & table[i].y[1];
  23654. r->y[2] |= mask & table[i].y[2];
  23655. r->y[3] |= mask & table[i].y[3];
  23656. r->y[4] |= mask & table[i].y[4];
  23657. r->y[5] |= mask & table[i].y[5];
  23658. r->y[6] |= mask & table[i].y[6];
  23659. r->y[7] |= mask & table[i].y[7];
  23660. r->y[8] |= mask & table[i].y[8];
  23661. r->y[9] |= mask & table[i].y[9];
  23662. r->y[10] |= mask & table[i].y[10];
  23663. r->y[11] |= mask & table[i].y[11];
  23664. r->z[0] |= mask & table[i].z[0];
  23665. r->z[1] |= mask & table[i].z[1];
  23666. r->z[2] |= mask & table[i].z[2];
  23667. r->z[3] |= mask & table[i].z[3];
  23668. r->z[4] |= mask & table[i].z[4];
  23669. r->z[5] |= mask & table[i].z[5];
  23670. r->z[6] |= mask & table[i].z[6];
  23671. r->z[7] |= mask & table[i].z[7];
  23672. r->z[8] |= mask & table[i].z[8];
  23673. r->z[9] |= mask & table[i].z[9];
  23674. r->z[10] |= mask & table[i].z[10];
  23675. r->z[11] |= mask & table[i].z[11];
  23676. }
  23677. }
  23678. #endif /* !WC_NO_CACHE_RESISTANT */
  23679. /* Multiply the point by the scalar and return the result.
  23680. * If map is true then convert result to affine coordinates.
  23681. *
  23682. * Simple, smaller code size and memory size, of windowing.
  23683. * Calculate uindow of 4 bits.
  23684. * Only add points from table.
  23685. *
  23686. * r Resulting point.
  23687. * g Point to multiply.
  23688. * k Scalar to multiply by.
  23689. * map Indicates whether to convert result to affine.
  23690. * ct Constant time required.
  23691. * heap Heap to use for allocation.
  23692. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  23693. */
  23694. static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
  23695. int map, int ct, void* heap)
  23696. {
  23697. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  23698. sp_point_384 td[16];
  23699. sp_point_384 rtd;
  23700. sp_digit tmpd[2 * 12 * 6];
  23701. #ifndef WC_NO_CACHE_RESISTANT
  23702. sp_point_384 pd;
  23703. #endif
  23704. #endif
  23705. sp_point_384* t;
  23706. sp_point_384* rt;
  23707. #ifndef WC_NO_CACHE_RESISTANT
  23708. sp_point_384* p;
  23709. #endif
  23710. sp_digit* tmp;
  23711. sp_digit n;
  23712. int i;
  23713. int c, y;
  23714. int err;
  23715. /* Constant time used for cache attack resistance implementation. */
  23716. (void)ct;
  23717. (void)heap;
  23718. err = sp_384_point_new_12(heap, rtd, rt);
  23719. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  23720. #ifndef WC_NO_CACHE_RESISTANT
  23721. t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 17, heap, DYNAMIC_TYPE_ECC);
  23722. #else
  23723. t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
  23724. #endif
  23725. if (t == NULL)
  23726. err = MEMORY_E;
  23727. tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
  23728. DYNAMIC_TYPE_ECC);
  23729. if (tmp == NULL)
  23730. err = MEMORY_E;
  23731. #else
  23732. t = td;
  23733. tmp = tmpd;
  23734. #endif
  23735. if (err == MP_OKAY) {
  23736. #ifndef WC_NO_CACHE_RESISTANT
  23737. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  23738. p = t + 16;
  23739. #else
  23740. p = &pd;
  23741. #endif
  23742. #endif
  23743. /* t[0] = {0, 0, 1} * norm */
  23744. XMEMSET(&t[0], 0, sizeof(t[0]));
  23745. t[0].infinity = 1;
  23746. /* t[1] = {g->x, g->y, g->z} * norm */
  23747. (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod);
  23748. (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod);
  23749. (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod);
  23750. t[1].infinity = 0;
  23751. sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp);
  23752. t[ 2].infinity = 0;
  23753. sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp);
  23754. t[ 3].infinity = 0;
  23755. sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp);
  23756. t[ 4].infinity = 0;
  23757. sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp);
  23758. t[ 5].infinity = 0;
  23759. sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp);
  23760. t[ 6].infinity = 0;
  23761. sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp);
  23762. t[ 7].infinity = 0;
  23763. sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp);
  23764. t[ 8].infinity = 0;
  23765. sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp);
  23766. t[ 9].infinity = 0;
  23767. sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp);
  23768. t[10].infinity = 0;
  23769. sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp);
  23770. t[11].infinity = 0;
  23771. sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp);
  23772. t[12].infinity = 0;
  23773. sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp);
  23774. t[13].infinity = 0;
  23775. sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp);
  23776. t[14].infinity = 0;
  23777. sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp);
  23778. t[15].infinity = 0;
  23779. i = 10;
  23780. n = k[i+1] << 0;
  23781. c = 28;
  23782. y = n >> 28;
  23783. #ifndef WC_NO_CACHE_RESISTANT
  23784. if (ct) {
  23785. sp_384_get_point_16_12(rt, t, y);
  23786. rt->infinity = !y;
  23787. }
  23788. else
  23789. #endif
  23790. {
  23791. XMEMCPY(rt, &t[y], sizeof(sp_point_384));
  23792. }
  23793. n <<= 4;
  23794. for (; i>=0 || c>=4; ) {
  23795. if (c < 4) {
  23796. n |= k[i--];
  23797. c += 32;
  23798. }
  23799. y = (n >> 28) & 0xf;
  23800. n <<= 4;
  23801. c -= 4;
  23802. sp_384_proj_point_dbl_12(rt, rt, tmp);
  23803. sp_384_proj_point_dbl_12(rt, rt, tmp);
  23804. sp_384_proj_point_dbl_12(rt, rt, tmp);
  23805. sp_384_proj_point_dbl_12(rt, rt, tmp);
  23806. #ifndef WC_NO_CACHE_RESISTANT
  23807. if (ct) {
  23808. sp_384_get_point_16_12(p, t, y);
  23809. p->infinity = !y;
  23810. sp_384_proj_point_add_12(rt, rt, p, tmp);
  23811. }
  23812. else
  23813. #endif
  23814. {
  23815. sp_384_proj_point_add_12(rt, rt, &t[y], tmp);
  23816. }
  23817. }
  23818. if (map != 0) {
  23819. sp_384_map_12(r, rt, tmp);
  23820. }
  23821. else {
  23822. XMEMCPY(r, rt, sizeof(sp_point_384));
  23823. }
  23824. }
  23825. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  23826. if (tmp != NULL) {
  23827. XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6);
  23828. XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
  23829. }
  23830. if (t != NULL) {
  23831. XMEMSET(t, 0, sizeof(sp_point_384) * 16);
  23832. XFREE(t, heap, DYNAMIC_TYPE_ECC);
  23833. }
  23834. #else
  23835. ForceZero(tmpd, sizeof(tmpd));
  23836. ForceZero(td, sizeof(td));
  23837. #endif
  23838. sp_384_point_free_12(rt, 1, heap);
  23839. return err;
  23840. }
  23841. /* A table entry for pre-computed points. */
  23842. typedef struct sp_table_entry_384 {
  23843. sp_digit x[12];
  23844. sp_digit y[12];
  23845. } sp_table_entry_384;
  23846. #ifdef FP_ECC
  23847. /* Double the Montgomery form projective point p a number of times.
  23848. *
  23849. * r Result of repeated doubling of point.
  23850. * p Point to double.
  23851. * n Number of times to double
  23852. * t Temporary ordinate data.
  23853. */
  23854. static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t)
  23855. {
  23856. sp_digit* w = t;
  23857. sp_digit* a = t + 2*12;
  23858. sp_digit* b = t + 4*12;
  23859. sp_digit* t1 = t + 6*12;
  23860. sp_digit* t2 = t + 8*12;
  23861. sp_digit* x;
  23862. sp_digit* y;
  23863. sp_digit* z;
  23864. x = p->x;
  23865. y = p->y;
  23866. z = p->z;
  23867. /* Y = 2*Y */
  23868. sp_384_mont_dbl_12(y, y, p384_mod);
  23869. /* W = Z^4 */
  23870. sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod);
  23871. sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod);
  23872. #ifndef WOLFSSL_SP_SMALL
  23873. while (--n > 0)
  23874. #else
  23875. while (--n >= 0)
  23876. #endif
  23877. {
  23878. /* A = 3*(X^2 - W) */
  23879. sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
  23880. sp_384_mont_sub_12(t1, t1, w, p384_mod);
  23881. sp_384_mont_tpl_12(a, t1, p384_mod);
  23882. /* B = X*Y^2 */
  23883. sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
  23884. sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
  23885. /* X = A^2 - 2B */
  23886. sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
  23887. sp_384_mont_dbl_12(t2, b, p384_mod);
  23888. sp_384_mont_sub_12(x, x, t2, p384_mod);
  23889. /* Z = Z*Y */
  23890. sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
  23891. /* t2 = Y^4 */
  23892. sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
  23893. #ifdef WOLFSSL_SP_SMALL
  23894. if (n != 0)
  23895. #endif
  23896. {
  23897. /* W = W*Y^4 */
  23898. sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod);
  23899. }
  23900. /* y = 2*A*(B - X) - Y^4 */
  23901. sp_384_mont_sub_12(y, b, x, p384_mod);
  23902. sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
  23903. sp_384_mont_dbl_12(y, y, p384_mod);
  23904. sp_384_mont_sub_12(y, y, t1, p384_mod);
  23905. }
  23906. #ifndef WOLFSSL_SP_SMALL
  23907. /* A = 3*(X^2 - W) */
  23908. sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
  23909. sp_384_mont_sub_12(t1, t1, w, p384_mod);
  23910. sp_384_mont_tpl_12(a, t1, p384_mod);
  23911. /* B = X*Y^2 */
  23912. sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
  23913. sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
  23914. /* X = A^2 - 2B */
  23915. sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
  23916. sp_384_mont_dbl_12(t2, b, p384_mod);
  23917. sp_384_mont_sub_12(x, x, t2, p384_mod);
  23918. /* Z = Z*Y */
  23919. sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
  23920. /* t2 = Y^4 */
  23921. sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
  23922. /* y = 2*A*(B - X) - Y^4 */
  23923. sp_384_mont_sub_12(y, b, x, p384_mod);
  23924. sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
  23925. sp_384_mont_dbl_12(y, y, p384_mod);
  23926. sp_384_mont_sub_12(y, y, t1, p384_mod);
  23927. #endif
  23928. /* Y = Y/2 */
  23929. sp_384_div2_12(y, y, p384_mod);
  23930. }
  23931. /* Convert the projective point to affine.
  23932. * Ordinates are in Montgomery form.
  23933. *
  23934. * a Point to convert.
  23935. * t Temporary data.
  23936. */
  23937. static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t)
  23938. {
  23939. sp_digit* t1 = t;
  23940. sp_digit* t2 = t + 2 * 12;
  23941. sp_digit* tmp = t + 4 * 12;
  23942. sp_384_mont_inv_12(t1, a->z, tmp);
  23943. sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
  23944. sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
  23945. sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod);
  23946. sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod);
  23947. XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
  23948. }
  23949. #endif /* FP_ECC */
  23950. /* Add two Montgomery form projective points. The second point has a q value of
  23951. * one.
  23952. * Only the first point can be the same pointer as the result point.
  23953. *
  23954. * r Result of addition.
  23955. * p First point to add.
  23956. * q Second point to add.
  23957. * t Temporary ordinate data.
  23958. */
  23959. static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p,
  23960. const sp_point_384* q, sp_digit* t)
  23961. {
  23962. const sp_point_384* ap[2];
  23963. sp_point_384* rp[2];
  23964. sp_digit* t1 = t;
  23965. sp_digit* t2 = t + 2*12;
  23966. sp_digit* t3 = t + 4*12;
  23967. sp_digit* t4 = t + 6*12;
  23968. sp_digit* t5 = t + 8*12;
  23969. sp_digit* x;
  23970. sp_digit* y;
  23971. sp_digit* z;
  23972. int i;
  23973. /* Check double */
  23974. (void)sp_384_sub_12(t1, p384_mod, q->y);
  23975. sp_384_norm_12(t1);
  23976. if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
  23977. (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
  23978. sp_384_proj_point_dbl_12(r, p, t);
  23979. }
  23980. else {
  23981. rp[0] = r;
  23982. /*lint allow cast to different type of pointer*/
  23983. rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
  23984. XMEMSET(rp[1], 0, sizeof(sp_point_384));
  23985. x = rp[p->infinity | q->infinity]->x;
  23986. y = rp[p->infinity | q->infinity]->y;
  23987. z = rp[p->infinity | q->infinity]->z;
  23988. ap[0] = p;
  23989. ap[1] = q;
  23990. for (i=0; i<12; i++) {
  23991. r->x[i] = ap[p->infinity]->x[i];
  23992. }
  23993. for (i=0; i<12; i++) {
  23994. r->y[i] = ap[p->infinity]->y[i];
  23995. }
  23996. for (i=0; i<12; i++) {
  23997. r->z[i] = ap[p->infinity]->z[i];
  23998. }
  23999. r->infinity = ap[p->infinity]->infinity;
  24000. /* U2 = X2*Z1^2 */
  24001. sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
  24002. sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
  24003. sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
  24004. /* S2 = Y2*Z1^3 */
  24005. sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
  24006. /* H = U2 - X1 */
  24007. sp_384_mont_sub_12(t2, t2, x, p384_mod);
  24008. /* R = S2 - Y1 */
  24009. sp_384_mont_sub_12(t4, t4, y, p384_mod);
  24010. /* Z3 = H*Z1 */
  24011. sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
  24012. /* X3 = R^2 - H^3 - 2*X1*H^2 */
  24013. sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod);
  24014. sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
  24015. sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod);
  24016. sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
  24017. sp_384_mont_sub_12(x, t1, t5, p384_mod);
  24018. sp_384_mont_dbl_12(t1, t3, p384_mod);
  24019. sp_384_mont_sub_12(x, x, t1, p384_mod);
  24020. /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
  24021. sp_384_mont_sub_12(t3, t3, x, p384_mod);
  24022. sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod);
  24023. sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod);
  24024. sp_384_mont_sub_12(y, t3, t5, p384_mod);
  24025. }
  24026. }
  24027. #ifdef WOLFSSL_SP_SMALL
  24028. #ifdef FP_ECC
  24029. /* Generate the pre-computed table of points for the base point.
  24030. *
  24031. * a The base point.
  24032. * table Place to store generated point data.
  24033. * tmp Temporary data.
  24034. * heap Heap to use for allocation.
  24035. */
  24036. static int sp_384_gen_stripe_table_12(const sp_point_384* a,
  24037. sp_table_entry_384* table, sp_digit* tmp, void* heap)
  24038. {
  24039. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  24040. sp_point_384 td, s1d, s2d;
  24041. #endif
  24042. sp_point_384* t;
  24043. sp_point_384* s1 = NULL;
  24044. sp_point_384* s2 = NULL;
  24045. int i, j;
  24046. int err;
  24047. (void)heap;
  24048. err = sp_384_point_new_12(heap, td, t);
  24049. if (err == MP_OKAY) {
  24050. err = sp_384_point_new_12(heap, s1d, s1);
  24051. }
  24052. if (err == MP_OKAY) {
  24053. err = sp_384_point_new_12(heap, s2d, s2);
  24054. }
  24055. if (err == MP_OKAY) {
  24056. err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
  24057. }
  24058. if (err == MP_OKAY) {
  24059. err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
  24060. }
  24061. if (err == MP_OKAY) {
  24062. err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
  24063. }
  24064. if (err == MP_OKAY) {
  24065. t->infinity = 0;
  24066. sp_384_proj_to_affine_12(t, tmp);
  24067. XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
  24068. s1->infinity = 0;
  24069. XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
  24070. s2->infinity = 0;
  24071. /* table[0] = {0, 0, infinity} */
  24072. XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
  24073. /* table[1] = Affine version of 'a' in Montgomery form */
  24074. XMEMCPY(table[1].x, t->x, sizeof(table->x));
  24075. XMEMCPY(table[1].y, t->y, sizeof(table->y));
  24076. for (i=1; i<4; i++) {
  24077. sp_384_proj_point_dbl_n_12(t, 96, tmp);
  24078. sp_384_proj_to_affine_12(t, tmp);
  24079. XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
  24080. XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
  24081. }
  24082. for (i=1; i<4; i++) {
  24083. XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
  24084. XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
  24085. for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
  24086. XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
  24087. XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
  24088. sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
  24089. sp_384_proj_to_affine_12(t, tmp);
  24090. XMEMCPY(table[j].x, t->x, sizeof(table->x));
  24091. XMEMCPY(table[j].y, t->y, sizeof(table->y));
  24092. }
  24093. }
  24094. }
  24095. sp_384_point_free_12(s2, 0, heap);
  24096. sp_384_point_free_12(s1, 0, heap);
  24097. sp_384_point_free_12( t, 0, heap);
  24098. return err;
  24099. }
  24100. #endif /* FP_ECC */
  24101. #ifndef WC_NO_CACHE_RESISTANT
  24102. /* Touch each possible entry that could be being copied.
  24103. *
  24104. * r Point to copy into.
  24105. * table Table - start of the entires to access
  24106. * idx Index of entry to retrieve.
  24107. */
  24108. static void sp_384_get_entry_16_12(sp_point_384* r,
  24109. const sp_table_entry_384* table, int idx)
  24110. {
  24111. int i;
  24112. sp_digit mask;
  24113. r->x[0] = 0;
  24114. r->x[1] = 0;
  24115. r->x[2] = 0;
  24116. r->x[3] = 0;
  24117. r->x[4] = 0;
  24118. r->x[5] = 0;
  24119. r->x[6] = 0;
  24120. r->x[7] = 0;
  24121. r->x[8] = 0;
  24122. r->x[9] = 0;
  24123. r->x[10] = 0;
  24124. r->x[11] = 0;
  24125. r->y[0] = 0;
  24126. r->y[1] = 0;
  24127. r->y[2] = 0;
  24128. r->y[3] = 0;
  24129. r->y[4] = 0;
  24130. r->y[5] = 0;
  24131. r->y[6] = 0;
  24132. r->y[7] = 0;
  24133. r->y[8] = 0;
  24134. r->y[9] = 0;
  24135. r->y[10] = 0;
  24136. r->y[11] = 0;
  24137. for (i = 1; i < 16; i++) {
  24138. mask = 0 - (i == idx);
  24139. r->x[0] |= mask & table[i].x[0];
  24140. r->x[1] |= mask & table[i].x[1];
  24141. r->x[2] |= mask & table[i].x[2];
  24142. r->x[3] |= mask & table[i].x[3];
  24143. r->x[4] |= mask & table[i].x[4];
  24144. r->x[5] |= mask & table[i].x[5];
  24145. r->x[6] |= mask & table[i].x[6];
  24146. r->x[7] |= mask & table[i].x[7];
  24147. r->x[8] |= mask & table[i].x[8];
  24148. r->x[9] |= mask & table[i].x[9];
  24149. r->x[10] |= mask & table[i].x[10];
  24150. r->x[11] |= mask & table[i].x[11];
  24151. r->y[0] |= mask & table[i].y[0];
  24152. r->y[1] |= mask & table[i].y[1];
  24153. r->y[2] |= mask & table[i].y[2];
  24154. r->y[3] |= mask & table[i].y[3];
  24155. r->y[4] |= mask & table[i].y[4];
  24156. r->y[5] |= mask & table[i].y[5];
  24157. r->y[6] |= mask & table[i].y[6];
  24158. r->y[7] |= mask & table[i].y[7];
  24159. r->y[8] |= mask & table[i].y[8];
  24160. r->y[9] |= mask & table[i].y[9];
  24161. r->y[10] |= mask & table[i].y[10];
  24162. r->y[11] |= mask & table[i].y[11];
  24163. }
  24164. }
  24165. #endif /* !WC_NO_CACHE_RESISTANT */
  24166. /* Multiply the point by the scalar and return the result.
  24167. * If map is true then convert result to affine coordinates.
  24168. *
  24169. * Implementation uses striping of bits.
  24170. * Choose bits 4 bits apart.
  24171. *
  24172. * r Resulting point.
  24173. * k Scalar to multiply by.
  24174. * table Pre-computed table.
  24175. * map Indicates whether to convert result to affine.
  24176. * ct Constant time required.
  24177. * heap Heap to use for allocation.
  24178. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  24179. */
  24180. static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
  24181. const sp_table_entry_384* table, const sp_digit* k, int map,
  24182. int ct, void* heap)
  24183. {
  24184. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  24185. sp_point_384 rtd;
  24186. sp_point_384 pd;
  24187. sp_digit td[2 * 12 * 6];
  24188. #endif
  24189. sp_point_384* rt;
  24190. sp_point_384* p = NULL;
  24191. sp_digit* t;
  24192. int i, j;
  24193. int y, x;
  24194. int err;
  24195. (void)g;
  24196. /* Constant time used for cache attack resistance implementation. */
  24197. (void)ct;
  24198. (void)heap;
  24199. err = sp_384_point_new_12(heap, rtd, rt);
  24200. if (err == MP_OKAY) {
  24201. err = sp_384_point_new_12(heap, pd, p);
  24202. }
  24203. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  24204. t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
  24205. DYNAMIC_TYPE_ECC);
  24206. if (t == NULL) {
  24207. err = MEMORY_E;
  24208. }
  24209. #else
  24210. t = td;
  24211. #endif
  24212. if (err == MP_OKAY) {
  24213. XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
  24214. XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
  24215. y = 0;
  24216. for (j=0,x=95; j<4; j++,x+=96) {
  24217. y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
  24218. }
  24219. #ifndef WC_NO_CACHE_RESISTANT
  24220. if (ct) {
  24221. sp_384_get_entry_16_12(rt, table, y);
  24222. } else
  24223. #endif
  24224. {
  24225. XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
  24226. XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
  24227. }
  24228. rt->infinity = !y;
  24229. for (i=94; i>=0; i--) {
  24230. y = 0;
  24231. for (j=0,x=i; j<4; j++,x+=96) {
  24232. y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
  24233. }
  24234. sp_384_proj_point_dbl_12(rt, rt, t);
  24235. #ifndef WC_NO_CACHE_RESISTANT
  24236. if (ct) {
  24237. sp_384_get_entry_16_12(p, table, y);
  24238. }
  24239. else
  24240. #endif
  24241. {
  24242. XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
  24243. XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
  24244. }
  24245. p->infinity = !y;
  24246. sp_384_proj_point_add_qz1_12(rt, rt, p, t);
  24247. }
  24248. if (map != 0) {
  24249. sp_384_map_12(r, rt, t);
  24250. }
  24251. else {
  24252. XMEMCPY(r, rt, sizeof(sp_point_384));
  24253. }
  24254. }
  24255. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  24256. if (t != NULL) {
  24257. XFREE(t, heap, DYNAMIC_TYPE_ECC);
  24258. }
  24259. #endif
  24260. sp_384_point_free_12(p, 0, heap);
  24261. sp_384_point_free_12(rt, 0, heap);
  24262. return err;
  24263. }
  24264. #ifdef FP_ECC
  24265. #ifndef FP_ENTRIES
  24266. #define FP_ENTRIES 16
  24267. #endif
  24268. typedef struct sp_cache_384_t {
  24269. sp_digit x[12];
  24270. sp_digit y[12];
  24271. sp_table_entry_384 table[16];
  24272. uint32_t cnt;
  24273. int set;
  24274. } sp_cache_384_t;
  24275. static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
  24276. static THREAD_LS_T int sp_cache_384_last = -1;
  24277. static THREAD_LS_T int sp_cache_384_inited = 0;
  24278. #ifndef HAVE_THREAD_LS
  24279. static volatile int initCacheMutex_384 = 0;
  24280. static wolfSSL_Mutex sp_cache_384_lock;
  24281. #endif
  24282. static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
  24283. {
  24284. int i, j;
  24285. uint32_t least;
  24286. if (sp_cache_384_inited == 0) {
  24287. for (i=0; i<FP_ENTRIES; i++) {
  24288. sp_cache_384[i].set = 0;
  24289. }
  24290. sp_cache_384_inited = 1;
  24291. }
  24292. /* Compare point with those in cache. */
  24293. for (i=0; i<FP_ENTRIES; i++) {
  24294. if (!sp_cache_384[i].set)
  24295. continue;
  24296. if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
  24297. sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
  24298. sp_cache_384[i].cnt++;
  24299. break;
  24300. }
  24301. }
  24302. /* No match. */
  24303. if (i == FP_ENTRIES) {
  24304. /* Find empty entry. */
  24305. i = (sp_cache_384_last + 1) % FP_ENTRIES;
  24306. for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
  24307. if (!sp_cache_384[i].set) {
  24308. break;
  24309. }
  24310. }
  24311. /* Evict least used. */
  24312. if (i == sp_cache_384_last) {
  24313. least = sp_cache_384[0].cnt;
  24314. for (j=1; j<FP_ENTRIES; j++) {
  24315. if (sp_cache_384[j].cnt < least) {
  24316. i = j;
  24317. least = sp_cache_384[i].cnt;
  24318. }
  24319. }
  24320. }
  24321. XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
  24322. XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
  24323. sp_cache_384[i].set = 1;
  24324. sp_cache_384[i].cnt = 1;
  24325. }
  24326. *cache = &sp_cache_384[i];
  24327. sp_cache_384_last = i;
  24328. }
  24329. #endif /* FP_ECC */
  24330. /* Multiply the base point of P384 by the scalar and return the result.
  24331. * If map is true then convert result to affine coordinates.
  24332. *
  24333. * r Resulting point.
  24334. * g Point to multiply.
  24335. * k Scalar to multiply by.
  24336. * map Indicates whether to convert result to affine.
  24337. * ct Constant time required.
  24338. * heap Heap to use for allocation.
  24339. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  24340. */
  24341. static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
  24342. int map, int ct, void* heap)
  24343. {
  24344. #ifndef FP_ECC
  24345. return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap);
  24346. #else
  24347. sp_digit tmp[2 * 12 * 7];
  24348. sp_cache_384_t* cache;
  24349. int err = MP_OKAY;
  24350. #ifndef HAVE_THREAD_LS
  24351. if (initCacheMutex_384 == 0) {
  24352. wc_InitMutex(&sp_cache_384_lock);
  24353. initCacheMutex_384 = 1;
  24354. }
  24355. if (wc_LockMutex(&sp_cache_384_lock) != 0)
  24356. err = BAD_MUTEX_E;
  24357. #endif /* HAVE_THREAD_LS */
  24358. if (err == MP_OKAY) {
  24359. sp_ecc_get_cache_384(g, &cache);
  24360. if (cache->cnt == 2)
  24361. sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
  24362. #ifndef HAVE_THREAD_LS
  24363. wc_UnLockMutex(&sp_cache_384_lock);
  24364. #endif /* HAVE_THREAD_LS */
  24365. if (cache->cnt < 2) {
  24366. err = sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap);
  24367. }
  24368. else {
  24369. err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
  24370. map, ct, heap);
  24371. }
  24372. }
  24373. return err;
  24374. #endif
  24375. }
  24376. #else
  24377. #ifdef FP_ECC
  24378. /* Generate the pre-computed table of points for the base point.
  24379. *
  24380. * a The base point.
  24381. * table Place to store generated point data.
  24382. * tmp Temporary data.
  24383. * heap Heap to use for allocation.
  24384. */
  24385. static int sp_384_gen_stripe_table_12(const sp_point_384* a,
  24386. sp_table_entry_384* table, sp_digit* tmp, void* heap)
  24387. {
  24388. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  24389. sp_point_384 td, s1d, s2d;
  24390. #endif
  24391. sp_point_384* t;
  24392. sp_point_384* s1 = NULL;
  24393. sp_point_384* s2 = NULL;
  24394. int i, j;
  24395. int err;
  24396. (void)heap;
  24397. err = sp_384_point_new_12(heap, td, t);
  24398. if (err == MP_OKAY) {
  24399. err = sp_384_point_new_12(heap, s1d, s1);
  24400. }
  24401. if (err == MP_OKAY) {
  24402. err = sp_384_point_new_12(heap, s2d, s2);
  24403. }
  24404. if (err == MP_OKAY) {
  24405. err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
  24406. }
  24407. if (err == MP_OKAY) {
  24408. err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
  24409. }
  24410. if (err == MP_OKAY) {
  24411. err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
  24412. }
  24413. if (err == MP_OKAY) {
  24414. t->infinity = 0;
  24415. sp_384_proj_to_affine_12(t, tmp);
  24416. XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
  24417. s1->infinity = 0;
  24418. XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
  24419. s2->infinity = 0;
  24420. /* table[0] = {0, 0, infinity} */
  24421. XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
  24422. /* table[1] = Affine version of 'a' in Montgomery form */
  24423. XMEMCPY(table[1].x, t->x, sizeof(table->x));
  24424. XMEMCPY(table[1].y, t->y, sizeof(table->y));
  24425. for (i=1; i<8; i++) {
  24426. sp_384_proj_point_dbl_n_12(t, 48, tmp);
  24427. sp_384_proj_to_affine_12(t, tmp);
  24428. XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
  24429. XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
  24430. }
  24431. for (i=1; i<8; i++) {
  24432. XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
  24433. XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
  24434. for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
  24435. XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
  24436. XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
  24437. sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
  24438. sp_384_proj_to_affine_12(t, tmp);
  24439. XMEMCPY(table[j].x, t->x, sizeof(table->x));
  24440. XMEMCPY(table[j].y, t->y, sizeof(table->y));
  24441. }
  24442. }
  24443. }
  24444. sp_384_point_free_12(s2, 0, heap);
  24445. sp_384_point_free_12(s1, 0, heap);
  24446. sp_384_point_free_12( t, 0, heap);
  24447. return err;
  24448. }
  24449. #endif /* FP_ECC */
  24450. #ifndef WC_NO_CACHE_RESISTANT
  24451. /* Touch each possible entry that could be being copied.
  24452. *
  24453. * r Point to copy into.
  24454. * table Table - start of the entires to access
  24455. * idx Index of entry to retrieve.
  24456. */
  24457. static void sp_384_get_entry_256_12(sp_point_384* r,
  24458. const sp_table_entry_384* table, int idx)
  24459. {
  24460. int i;
  24461. sp_digit mask;
  24462. r->x[0] = 0;
  24463. r->x[1] = 0;
  24464. r->x[2] = 0;
  24465. r->x[3] = 0;
  24466. r->x[4] = 0;
  24467. r->x[5] = 0;
  24468. r->x[6] = 0;
  24469. r->x[7] = 0;
  24470. r->x[8] = 0;
  24471. r->x[9] = 0;
  24472. r->x[10] = 0;
  24473. r->x[11] = 0;
  24474. r->y[0] = 0;
  24475. r->y[1] = 0;
  24476. r->y[2] = 0;
  24477. r->y[3] = 0;
  24478. r->y[4] = 0;
  24479. r->y[5] = 0;
  24480. r->y[6] = 0;
  24481. r->y[7] = 0;
  24482. r->y[8] = 0;
  24483. r->y[9] = 0;
  24484. r->y[10] = 0;
  24485. r->y[11] = 0;
  24486. for (i = 1; i < 256; i++) {
  24487. mask = 0 - (i == idx);
  24488. r->x[0] |= mask & table[i].x[0];
  24489. r->x[1] |= mask & table[i].x[1];
  24490. r->x[2] |= mask & table[i].x[2];
  24491. r->x[3] |= mask & table[i].x[3];
  24492. r->x[4] |= mask & table[i].x[4];
  24493. r->x[5] |= mask & table[i].x[5];
  24494. r->x[6] |= mask & table[i].x[6];
  24495. r->x[7] |= mask & table[i].x[7];
  24496. r->x[8] |= mask & table[i].x[8];
  24497. r->x[9] |= mask & table[i].x[9];
  24498. r->x[10] |= mask & table[i].x[10];
  24499. r->x[11] |= mask & table[i].x[11];
  24500. r->y[0] |= mask & table[i].y[0];
  24501. r->y[1] |= mask & table[i].y[1];
  24502. r->y[2] |= mask & table[i].y[2];
  24503. r->y[3] |= mask & table[i].y[3];
  24504. r->y[4] |= mask & table[i].y[4];
  24505. r->y[5] |= mask & table[i].y[5];
  24506. r->y[6] |= mask & table[i].y[6];
  24507. r->y[7] |= mask & table[i].y[7];
  24508. r->y[8] |= mask & table[i].y[8];
  24509. r->y[9] |= mask & table[i].y[9];
  24510. r->y[10] |= mask & table[i].y[10];
  24511. r->y[11] |= mask & table[i].y[11];
  24512. }
  24513. }
  24514. #endif /* !WC_NO_CACHE_RESISTANT */
  24515. /* Multiply the point by the scalar and return the result.
  24516. * If map is true then convert result to affine coordinates.
  24517. *
  24518. * Implementation uses striping of bits.
  24519. * Choose bits 8 bits apart.
  24520. *
  24521. * r Resulting point.
  24522. * k Scalar to multiply by.
  24523. * table Pre-computed table.
  24524. * map Indicates whether to convert result to affine.
  24525. * ct Constant time required.
  24526. * heap Heap to use for allocation.
  24527. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  24528. */
  24529. static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
  24530. const sp_table_entry_384* table, const sp_digit* k, int map,
  24531. int ct, void* heap)
  24532. {
  24533. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  24534. sp_point_384 rtd;
  24535. sp_point_384 pd;
  24536. sp_digit td[2 * 12 * 6];
  24537. #endif
  24538. sp_point_384* rt;
  24539. sp_point_384* p = NULL;
  24540. sp_digit* t;
  24541. int i, j;
  24542. int y, x;
  24543. int err;
  24544. (void)g;
  24545. /* Constant time used for cache attack resistance implementation. */
  24546. (void)ct;
  24547. (void)heap;
  24548. err = sp_384_point_new_12(heap, rtd, rt);
  24549. if (err == MP_OKAY) {
  24550. err = sp_384_point_new_12(heap, pd, p);
  24551. }
  24552. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  24553. t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
  24554. DYNAMIC_TYPE_ECC);
  24555. if (t == NULL) {
  24556. err = MEMORY_E;
  24557. }
  24558. #else
  24559. t = td;
  24560. #endif
  24561. if (err == MP_OKAY) {
  24562. XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
  24563. XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
  24564. y = 0;
  24565. for (j=0,x=47; j<8; j++,x+=48) {
  24566. y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
  24567. }
  24568. #ifndef WC_NO_CACHE_RESISTANT
  24569. if (ct) {
  24570. sp_384_get_entry_256_12(rt, table, y);
  24571. } else
  24572. #endif
  24573. {
  24574. XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
  24575. XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
  24576. }
  24577. rt->infinity = !y;
  24578. for (i=46; i>=0; i--) {
  24579. y = 0;
  24580. for (j=0,x=i; j<8; j++,x+=48) {
  24581. y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
  24582. }
  24583. sp_384_proj_point_dbl_12(rt, rt, t);
  24584. #ifndef WC_NO_CACHE_RESISTANT
  24585. if (ct) {
  24586. sp_384_get_entry_256_12(p, table, y);
  24587. }
  24588. else
  24589. #endif
  24590. {
  24591. XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
  24592. XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
  24593. }
  24594. p->infinity = !y;
  24595. sp_384_proj_point_add_qz1_12(rt, rt, p, t);
  24596. }
  24597. if (map != 0) {
  24598. sp_384_map_12(r, rt, t);
  24599. }
  24600. else {
  24601. XMEMCPY(r, rt, sizeof(sp_point_384));
  24602. }
  24603. }
  24604. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  24605. if (t != NULL) {
  24606. XFREE(t, heap, DYNAMIC_TYPE_ECC);
  24607. }
  24608. #endif
  24609. sp_384_point_free_12(p, 0, heap);
  24610. sp_384_point_free_12(rt, 0, heap);
  24611. return err;
  24612. }
  24613. #ifdef FP_ECC
  24614. #ifndef FP_ENTRIES
  24615. #define FP_ENTRIES 16
  24616. #endif
  24617. typedef struct sp_cache_384_t {
  24618. sp_digit x[12];
  24619. sp_digit y[12];
  24620. sp_table_entry_384 table[256];
  24621. uint32_t cnt;
  24622. int set;
  24623. } sp_cache_384_t;
  24624. static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
  24625. static THREAD_LS_T int sp_cache_384_last = -1;
  24626. static THREAD_LS_T int sp_cache_384_inited = 0;
  24627. #ifndef HAVE_THREAD_LS
  24628. static volatile int initCacheMutex_384 = 0;
  24629. static wolfSSL_Mutex sp_cache_384_lock;
  24630. #endif
  24631. static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
  24632. {
  24633. int i, j;
  24634. uint32_t least;
  24635. if (sp_cache_384_inited == 0) {
  24636. for (i=0; i<FP_ENTRIES; i++) {
  24637. sp_cache_384[i].set = 0;
  24638. }
  24639. sp_cache_384_inited = 1;
  24640. }
  24641. /* Compare point with those in cache. */
  24642. for (i=0; i<FP_ENTRIES; i++) {
  24643. if (!sp_cache_384[i].set)
  24644. continue;
  24645. if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
  24646. sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
  24647. sp_cache_384[i].cnt++;
  24648. break;
  24649. }
  24650. }
  24651. /* No match. */
  24652. if (i == FP_ENTRIES) {
  24653. /* Find empty entry. */
  24654. i = (sp_cache_384_last + 1) % FP_ENTRIES;
  24655. for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
  24656. if (!sp_cache_384[i].set) {
  24657. break;
  24658. }
  24659. }
  24660. /* Evict least used. */
  24661. if (i == sp_cache_384_last) {
  24662. least = sp_cache_384[0].cnt;
  24663. for (j=1; j<FP_ENTRIES; j++) {
  24664. if (sp_cache_384[j].cnt < least) {
  24665. i = j;
  24666. least = sp_cache_384[i].cnt;
  24667. }
  24668. }
  24669. }
  24670. XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
  24671. XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
  24672. sp_cache_384[i].set = 1;
  24673. sp_cache_384[i].cnt = 1;
  24674. }
  24675. *cache = &sp_cache_384[i];
  24676. sp_cache_384_last = i;
  24677. }
  24678. #endif /* FP_ECC */
  24679. /* Multiply the base point of P384 by the scalar and return the result.
  24680. * If map is true then convert result to affine coordinates.
  24681. *
  24682. * r Resulting point.
  24683. * g Point to multiply.
  24684. * k Scalar to multiply by.
  24685. * map Indicates whether to convert result to affine.
  24686. * ct Constant time required.
  24687. * heap Heap to use for allocation.
  24688. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  24689. */
  24690. static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
  24691. int map, int ct, void* heap)
  24692. {
  24693. #ifndef FP_ECC
  24694. return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap);
  24695. #else
  24696. sp_digit tmp[2 * 12 * 7];
  24697. sp_cache_384_t* cache;
  24698. int err = MP_OKAY;
  24699. #ifndef HAVE_THREAD_LS
  24700. if (initCacheMutex_384 == 0) {
  24701. wc_InitMutex(&sp_cache_384_lock);
  24702. initCacheMutex_384 = 1;
  24703. }
  24704. if (wc_LockMutex(&sp_cache_384_lock) != 0)
  24705. err = BAD_MUTEX_E;
  24706. #endif /* HAVE_THREAD_LS */
  24707. if (err == MP_OKAY) {
  24708. sp_ecc_get_cache_384(g, &cache);
  24709. if (cache->cnt == 2)
  24710. sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
  24711. #ifndef HAVE_THREAD_LS
  24712. wc_UnLockMutex(&sp_cache_384_lock);
  24713. #endif /* HAVE_THREAD_LS */
  24714. if (cache->cnt < 2) {
  24715. err = sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap);
  24716. }
  24717. else {
  24718. err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
  24719. map, ct, heap);
  24720. }
  24721. }
  24722. return err;
  24723. #endif
  24724. }
  24725. #endif /* WOLFSSL_SP_SMALL */
  24726. /* Multiply the point by the scalar and return the result.
  24727. * If map is true then convert result to affine coordinates.
  24728. *
  24729. * km Scalar to multiply by.
  24730. * p Point to multiply.
  24731. * r Resulting point.
  24732. * map Indicates whether to convert result to affine.
  24733. * heap Heap to use for allocation.
  24734. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  24735. */
  24736. int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
  24737. void* heap)
  24738. {
  24739. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  24740. sp_point_384 p;
  24741. sp_digit kd[12];
  24742. #endif
  24743. sp_point_384* point;
  24744. sp_digit* k = NULL;
  24745. int err = MP_OKAY;
  24746. err = sp_384_point_new_12(heap, p, point);
  24747. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  24748. if (err == MP_OKAY) {
  24749. k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
  24750. DYNAMIC_TYPE_ECC);
  24751. if (k == NULL)
  24752. err = MEMORY_E;
  24753. }
  24754. #else
  24755. k = kd;
  24756. #endif
  24757. if (err == MP_OKAY) {
  24758. sp_384_from_mp(k, 12, km);
  24759. sp_384_point_from_ecc_point_12(point, gm);
  24760. err = sp_384_ecc_mulmod_12(point, point, k, map, 1, heap);
  24761. }
  24762. if (err == MP_OKAY) {
  24763. err = sp_384_point_to_ecc_point_12(point, r);
  24764. }
  24765. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  24766. if (k != NULL) {
  24767. XFREE(k, heap, DYNAMIC_TYPE_ECC);
  24768. }
  24769. #endif
  24770. sp_384_point_free_12(point, 0, heap);
  24771. return err;
  24772. }
  24773. #ifdef WOLFSSL_SP_SMALL
  24774. static const sp_table_entry_384 p384_table[16] = {
  24775. /* 0 */
  24776. { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
  24777. { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
  24778. /* 1 */
  24779. { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
  24780. 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
  24781. { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
  24782. 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
  24783. /* 2 */
  24784. { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
  24785. 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
  24786. { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
  24787. 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
  24788. /* 3 */
  24789. { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
  24790. 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
  24791. { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
  24792. 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
  24793. /* 4 */
  24794. { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
  24795. 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
  24796. { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
  24797. 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
  24798. /* 5 */
  24799. { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
  24800. 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
  24801. { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
  24802. 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
  24803. /* 6 */
  24804. { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
  24805. 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
  24806. { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
  24807. 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
  24808. /* 7 */
  24809. { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
  24810. 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
  24811. { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
  24812. 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
  24813. /* 8 */
  24814. { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
  24815. 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
  24816. { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
  24817. 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
  24818. /* 9 */
  24819. { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
  24820. 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
  24821. { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
  24822. 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
  24823. /* 10 */
  24824. { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
  24825. 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
  24826. { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
  24827. 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
  24828. /* 11 */
  24829. { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
  24830. 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
  24831. { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
  24832. 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
  24833. /* 12 */
  24834. { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
  24835. 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
  24836. { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
  24837. 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
  24838. /* 13 */
  24839. { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
  24840. 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
  24841. { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
  24842. 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
  24843. /* 14 */
  24844. { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
  24845. 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
  24846. { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
  24847. 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
  24848. /* 15 */
  24849. { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
  24850. 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
  24851. { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
  24852. 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
  24853. };
  24854. /* Multiply the base point of P384 by the scalar and return the result.
  24855. * If map is true then convert result to affine coordinates.
  24856. *
  24857. * r Resulting point.
  24858. * k Scalar to multiply by.
  24859. * map Indicates whether to convert result to affine.
  24860. * ct Constant time required.
  24861. * heap Heap to use for allocation.
  24862. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  24863. */
  24864. static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
  24865. int map, int ct, void* heap)
  24866. {
  24867. return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
  24868. k, map, ct, heap);
  24869. }
  24870. #else
  24871. static const sp_table_entry_384 p384_table[256] = {
  24872. /* 0 */
  24873. { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
  24874. { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
  24875. /* 1 */
  24876. { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
  24877. 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
  24878. { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
  24879. 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
  24880. /* 2 */
  24881. { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4,
  24882. 0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c },
  24883. { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a,
  24884. 0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } },
  24885. /* 3 */
  24886. { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d,
  24887. 0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 },
  24888. { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a,
  24889. 0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } },
  24890. /* 4 */
  24891. { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
  24892. 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
  24893. { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
  24894. 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
  24895. /* 5 */
  24896. { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
  24897. 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
  24898. { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
  24899. 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
  24900. /* 6 */
  24901. { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18,
  24902. 0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 },
  24903. { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f,
  24904. 0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } },
  24905. /* 7 */
  24906. { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631,
  24907. 0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 },
  24908. { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6,
  24909. 0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } },
  24910. /* 8 */
  24911. { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826,
  24912. 0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 },
  24913. { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751,
  24914. 0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } },
  24915. /* 9 */
  24916. { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb,
  24917. 0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 },
  24918. { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed,
  24919. 0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } },
  24920. /* 10 */
  24921. { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf,
  24922. 0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 },
  24923. { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce,
  24924. 0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } },
  24925. /* 11 */
  24926. { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75,
  24927. 0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 },
  24928. { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498,
  24929. 0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } },
  24930. /* 12 */
  24931. { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0,
  24932. 0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 },
  24933. { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63,
  24934. 0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } },
  24935. /* 13 */
  24936. { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556,
  24937. 0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c },
  24938. { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc,
  24939. 0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } },
  24940. /* 14 */
  24941. { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161,
  24942. 0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 },
  24943. { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076,
  24944. 0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } },
  24945. /* 15 */
  24946. { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4,
  24947. 0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a },
  24948. { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8,
  24949. 0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } },
  24950. /* 16 */
  24951. { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
  24952. 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
  24953. { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
  24954. 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
  24955. /* 17 */
  24956. { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
  24957. 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
  24958. { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
  24959. 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
  24960. /* 18 */
  24961. { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3,
  24962. 0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf },
  24963. { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660,
  24964. 0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } },
  24965. /* 19 */
  24966. { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b,
  24967. 0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc },
  24968. { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4,
  24969. 0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } },
  24970. /* 20 */
  24971. { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
  24972. 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
  24973. { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
  24974. 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
  24975. /* 21 */
  24976. { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
  24977. 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
  24978. { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
  24979. 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
  24980. /* 22 */
  24981. { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928,
  24982. 0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 },
  24983. { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e,
  24984. 0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } },
  24985. /* 23 */
  24986. { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865,
  24987. 0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a },
  24988. { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35,
  24989. 0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } },
  24990. /* 24 */
  24991. { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521,
  24992. 0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc },
  24993. { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8,
  24994. 0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } },
  24995. /* 25 */
  24996. { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d,
  24997. 0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 },
  24998. { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278,
  24999. 0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } },
  25000. /* 26 */
  25001. { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0,
  25002. 0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 },
  25003. { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705,
  25004. 0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } },
  25005. /* 27 */
  25006. { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b,
  25007. 0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 },
  25008. { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac,
  25009. 0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } },
  25010. /* 28 */
  25011. { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2,
  25012. 0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 },
  25013. { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5,
  25014. 0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } },
  25015. /* 29 */
  25016. { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7,
  25017. 0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 },
  25018. { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0,
  25019. 0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } },
  25020. /* 30 */
  25021. { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56,
  25022. 0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 },
  25023. { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2,
  25024. 0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } },
  25025. /* 31 */
  25026. { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc,
  25027. 0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 },
  25028. { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f,
  25029. 0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } },
  25030. /* 32 */
  25031. { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3,
  25032. 0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a },
  25033. { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07,
  25034. 0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } },
  25035. /* 33 */
  25036. { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4,
  25037. 0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 },
  25038. { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e,
  25039. 0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } },
  25040. /* 34 */
  25041. { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228,
  25042. 0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 },
  25043. { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8,
  25044. 0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } },
  25045. /* 35 */
  25046. { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f,
  25047. 0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 },
  25048. { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe,
  25049. 0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } },
  25050. /* 36 */
  25051. { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80,
  25052. 0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 },
  25053. { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924,
  25054. 0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } },
  25055. /* 37 */
  25056. { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645,
  25057. 0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 },
  25058. { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea,
  25059. 0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } },
  25060. /* 38 */
  25061. { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c,
  25062. 0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 },
  25063. { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd,
  25064. 0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } },
  25065. /* 39 */
  25066. { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a,
  25067. 0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 },
  25068. { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49,
  25069. 0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } },
  25070. /* 40 */
  25071. { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764,
  25072. 0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 },
  25073. { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c,
  25074. 0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } },
  25075. /* 41 */
  25076. { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472,
  25077. 0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b },
  25078. { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b,
  25079. 0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } },
  25080. /* 42 */
  25081. { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc,
  25082. 0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f },
  25083. { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d,
  25084. 0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } },
  25085. /* 43 */
  25086. { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790,
  25087. 0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 },
  25088. { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2,
  25089. 0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } },
  25090. /* 44 */
  25091. { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b,
  25092. 0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef },
  25093. { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd,
  25094. 0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } },
  25095. /* 45 */
  25096. { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9,
  25097. 0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 },
  25098. { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b,
  25099. 0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } },
  25100. /* 46 */
  25101. { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9,
  25102. 0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 },
  25103. { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967,
  25104. 0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } },
  25105. /* 47 */
  25106. { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12,
  25107. 0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc },
  25108. { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543,
  25109. 0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } },
  25110. /* 48 */
  25111. { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3,
  25112. 0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 },
  25113. { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963,
  25114. 0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } },
  25115. /* 49 */
  25116. { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f,
  25117. 0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb },
  25118. { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358,
  25119. 0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } },
  25120. /* 50 */
  25121. { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87,
  25122. 0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 },
  25123. { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1,
  25124. 0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } },
  25125. /* 51 */
  25126. { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18,
  25127. 0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 },
  25128. { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552,
  25129. 0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } },
  25130. /* 52 */
  25131. { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff,
  25132. 0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 },
  25133. { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e,
  25134. 0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } },
  25135. /* 53 */
  25136. { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f,
  25137. 0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 },
  25138. { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3,
  25139. 0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } },
  25140. /* 54 */
  25141. { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348,
  25142. 0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 },
  25143. { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419,
  25144. 0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } },
  25145. /* 55 */
  25146. { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485,
  25147. 0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 },
  25148. { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc,
  25149. 0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } },
  25150. /* 56 */
  25151. { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1,
  25152. 0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a },
  25153. { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528,
  25154. 0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } },
  25155. /* 57 */
  25156. { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405,
  25157. 0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 },
  25158. { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856,
  25159. 0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } },
  25160. /* 58 */
  25161. { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102,
  25162. 0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 },
  25163. { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967,
  25164. 0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } },
  25165. /* 59 */
  25166. { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c,
  25167. 0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 },
  25168. { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a,
  25169. 0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } },
  25170. /* 60 */
  25171. { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a,
  25172. 0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 },
  25173. { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41,
  25174. 0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } },
  25175. /* 61 */
  25176. { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b,
  25177. 0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 },
  25178. { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745,
  25179. 0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } },
  25180. /* 62 */
  25181. { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2,
  25182. 0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 },
  25183. { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb,
  25184. 0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } },
  25185. /* 63 */
  25186. { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2,
  25187. 0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 },
  25188. { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f,
  25189. 0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } },
  25190. /* 64 */
  25191. { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
  25192. 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
  25193. { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
  25194. 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
  25195. /* 65 */
  25196. { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
  25197. 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
  25198. { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
  25199. 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
  25200. /* 66 */
  25201. { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753,
  25202. 0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e },
  25203. { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac,
  25204. 0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } },
  25205. /* 67 */
  25206. { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c,
  25207. 0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb },
  25208. { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe,
  25209. 0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } },
  25210. /* 68 */
  25211. { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
  25212. 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
  25213. { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
  25214. 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
  25215. /* 69 */
  25216. { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
  25217. 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
  25218. { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
  25219. 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
  25220. /* 70 */
  25221. { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311,
  25222. 0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 },
  25223. { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5,
  25224. 0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } },
  25225. /* 71 */
  25226. { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43,
  25227. 0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 },
  25228. { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9,
  25229. 0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } },
  25230. /* 72 */
  25231. { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a,
  25232. 0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 },
  25233. { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601,
  25234. 0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } },
  25235. /* 73 */
  25236. { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806,
  25237. 0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc },
  25238. { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37,
  25239. 0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } },
  25240. /* 74 */
  25241. { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460,
  25242. 0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d },
  25243. { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b,
  25244. 0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } },
  25245. /* 75 */
  25246. { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a,
  25247. 0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 },
  25248. { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02,
  25249. 0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } },
  25250. /* 76 */
  25251. { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94,
  25252. 0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 },
  25253. { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101,
  25254. 0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } },
  25255. /* 77 */
  25256. { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2,
  25257. 0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 },
  25258. { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45,
  25259. 0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } },
  25260. /* 78 */
  25261. { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50,
  25262. 0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 },
  25263. { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e,
  25264. 0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } },
  25265. /* 79 */
  25266. { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d,
  25267. 0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 },
  25268. { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5,
  25269. 0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } },
  25270. /* 80 */
  25271. { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
  25272. 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
  25273. { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
  25274. 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
  25275. /* 81 */
  25276. { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
  25277. 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
  25278. { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
  25279. 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
  25280. /* 82 */
  25281. { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8,
  25282. 0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df },
  25283. { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d,
  25284. 0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } },
  25285. /* 83 */
  25286. { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b,
  25287. 0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 },
  25288. { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc,
  25289. 0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } },
  25290. /* 84 */
  25291. { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
  25292. 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
  25293. { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
  25294. 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
  25295. /* 85 */
  25296. { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
  25297. 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
  25298. { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
  25299. 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
  25300. /* 86 */
  25301. { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b,
  25302. 0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c },
  25303. { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930,
  25304. 0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } },
  25305. /* 87 */
  25306. { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd,
  25307. 0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 },
  25308. { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd,
  25309. 0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } },
  25310. /* 88 */
  25311. { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d,
  25312. 0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 },
  25313. { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378,
  25314. 0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } },
  25315. /* 89 */
  25316. { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8,
  25317. 0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 },
  25318. { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14,
  25319. 0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } },
  25320. /* 90 */
  25321. { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338,
  25322. 0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a },
  25323. { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8,
  25324. 0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } },
  25325. /* 91 */
  25326. { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3,
  25327. 0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c },
  25328. { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a,
  25329. 0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } },
  25330. /* 92 */
  25331. { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45,
  25332. 0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d },
  25333. { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967,
  25334. 0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } },
  25335. /* 93 */
  25336. { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f,
  25337. 0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 },
  25338. { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4,
  25339. 0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } },
  25340. /* 94 */
  25341. { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7,
  25342. 0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 },
  25343. { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5,
  25344. 0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } },
  25345. /* 95 */
  25346. { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41,
  25347. 0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 },
  25348. { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f,
  25349. 0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } },
  25350. /* 96 */
  25351. { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9,
  25352. 0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb },
  25353. { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1,
  25354. 0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } },
  25355. /* 97 */
  25356. { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3,
  25357. 0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 },
  25358. { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25,
  25359. 0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } },
  25360. /* 98 */
  25361. { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247,
  25362. 0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 },
  25363. { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7,
  25364. 0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } },
  25365. /* 99 */
  25366. { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283,
  25367. 0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e },
  25368. { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39,
  25369. 0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } },
  25370. /* 100 */
  25371. { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06,
  25372. 0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a },
  25373. { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062,
  25374. 0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } },
  25375. /* 101 */
  25376. { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642,
  25377. 0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f },
  25378. { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175,
  25379. 0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } },
  25380. /* 102 */
  25381. { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e,
  25382. 0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 },
  25383. { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02,
  25384. 0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } },
  25385. /* 103 */
  25386. { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414,
  25387. 0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 },
  25388. { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c,
  25389. 0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } },
  25390. /* 104 */
  25391. { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46,
  25392. 0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 },
  25393. { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d,
  25394. 0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } },
  25395. /* 105 */
  25396. { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b,
  25397. 0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 },
  25398. { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd,
  25399. 0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } },
  25400. /* 106 */
  25401. { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20,
  25402. 0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be },
  25403. { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d,
  25404. 0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } },
  25405. /* 107 */
  25406. { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4,
  25407. 0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 },
  25408. { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e,
  25409. 0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } },
  25410. /* 108 */
  25411. { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9,
  25412. 0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 },
  25413. { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570,
  25414. 0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } },
  25415. /* 109 */
  25416. { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2,
  25417. 0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 },
  25418. { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626,
  25419. 0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } },
  25420. /* 110 */
  25421. { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d,
  25422. 0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc },
  25423. { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12,
  25424. 0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } },
  25425. /* 111 */
  25426. { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965,
  25427. 0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 },
  25428. { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b,
  25429. 0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } },
  25430. /* 112 */
  25431. { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f,
  25432. 0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 },
  25433. { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749,
  25434. 0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } },
  25435. /* 113 */
  25436. { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70,
  25437. 0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea },
  25438. { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd,
  25439. 0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } },
  25440. /* 114 */
  25441. { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084,
  25442. 0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 },
  25443. { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58,
  25444. 0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } },
  25445. /* 115 */
  25446. { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f,
  25447. 0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e },
  25448. { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f,
  25449. 0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } },
  25450. /* 116 */
  25451. { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b,
  25452. 0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d },
  25453. { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659,
  25454. 0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } },
  25455. /* 117 */
  25456. { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907,
  25457. 0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb },
  25458. { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec,
  25459. 0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } },
  25460. /* 118 */
  25461. { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2,
  25462. 0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 },
  25463. { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347,
  25464. 0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } },
  25465. /* 119 */
  25466. { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a,
  25467. 0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 },
  25468. { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257,
  25469. 0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } },
  25470. /* 120 */
  25471. { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a,
  25472. 0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce },
  25473. { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc,
  25474. 0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } },
  25475. /* 121 */
  25476. { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d,
  25477. 0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e },
  25478. { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736,
  25479. 0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } },
  25480. /* 122 */
  25481. { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370,
  25482. 0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e },
  25483. { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262,
  25484. 0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } },
  25485. /* 123 */
  25486. { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7,
  25487. 0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 },
  25488. { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241,
  25489. 0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } },
  25490. /* 124 */
  25491. { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627,
  25492. 0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 },
  25493. { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f,
  25494. 0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } },
  25495. /* 125 */
  25496. { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397,
  25497. 0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 },
  25498. { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972,
  25499. 0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } },
  25500. /* 126 */
  25501. { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b,
  25502. 0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 },
  25503. { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454,
  25504. 0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } },
  25505. /* 127 */
  25506. { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5,
  25507. 0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 },
  25508. { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4,
  25509. 0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } },
  25510. /* 128 */
  25511. { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878,
  25512. 0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 },
  25513. { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7,
  25514. 0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } },
  25515. /* 129 */
  25516. { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5,
  25517. 0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 },
  25518. { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02,
  25519. 0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } },
  25520. /* 130 */
  25521. { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187,
  25522. 0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b },
  25523. { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078,
  25524. 0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } },
  25525. /* 131 */
  25526. { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf,
  25527. 0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf },
  25528. { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be,
  25529. 0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } },
  25530. /* 132 */
  25531. { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8,
  25532. 0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 },
  25533. { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149,
  25534. 0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } },
  25535. /* 133 */
  25536. { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb,
  25537. 0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c },
  25538. { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64,
  25539. 0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } },
  25540. /* 134 */
  25541. { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259,
  25542. 0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 },
  25543. { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41,
  25544. 0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } },
  25545. /* 135 */
  25546. { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533,
  25547. 0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b },
  25548. { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40,
  25549. 0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } },
  25550. /* 136 */
  25551. { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6,
  25552. 0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d },
  25553. { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2,
  25554. 0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } },
  25555. /* 137 */
  25556. { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b,
  25557. 0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e },
  25558. { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc,
  25559. 0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } },
  25560. /* 138 */
  25561. { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090,
  25562. 0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 },
  25563. { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6,
  25564. 0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } },
  25565. /* 139 */
  25566. { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac,
  25567. 0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc },
  25568. { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7,
  25569. 0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } },
  25570. /* 140 */
  25571. { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40,
  25572. 0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 },
  25573. { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db,
  25574. 0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } },
  25575. /* 141 */
  25576. { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d,
  25577. 0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b },
  25578. { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13,
  25579. 0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } },
  25580. /* 142 */
  25581. { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb,
  25582. 0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 },
  25583. { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348,
  25584. 0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } },
  25585. /* 143 */
  25586. { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761,
  25587. 0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b },
  25588. { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04,
  25589. 0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } },
  25590. /* 144 */
  25591. { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7,
  25592. 0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a },
  25593. { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3,
  25594. 0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } },
  25595. /* 145 */
  25596. { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e,
  25597. 0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 },
  25598. { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276,
  25599. 0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } },
  25600. /* 146 */
  25601. { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2,
  25602. 0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 },
  25603. { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66,
  25604. 0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } },
  25605. /* 147 */
  25606. { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979,
  25607. 0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb },
  25608. { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918,
  25609. 0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } },
  25610. /* 148 */
  25611. { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df,
  25612. 0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e },
  25613. { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5,
  25614. 0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } },
  25615. /* 149 */
  25616. { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a,
  25617. 0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 },
  25618. { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2,
  25619. 0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } },
  25620. /* 150 */
  25621. { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82,
  25622. 0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba },
  25623. { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048,
  25624. 0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } },
  25625. /* 151 */
  25626. { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407,
  25627. 0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a },
  25628. { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44,
  25629. 0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } },
  25630. /* 152 */
  25631. { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc,
  25632. 0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 },
  25633. { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec,
  25634. 0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } },
  25635. /* 153 */
  25636. { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6,
  25637. 0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 },
  25638. { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada,
  25639. 0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } },
  25640. /* 154 */
  25641. { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0,
  25642. 0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c },
  25643. { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2,
  25644. 0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } },
  25645. /* 155 */
  25646. { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd,
  25647. 0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e },
  25648. { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32,
  25649. 0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } },
  25650. /* 156 */
  25651. { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f,
  25652. 0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 },
  25653. { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1,
  25654. 0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } },
  25655. /* 157 */
  25656. { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729,
  25657. 0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 },
  25658. { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508,
  25659. 0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } },
  25660. /* 158 */
  25661. { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b,
  25662. 0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 },
  25663. { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646,
  25664. 0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } },
  25665. /* 159 */
  25666. { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102,
  25667. 0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 },
  25668. { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39,
  25669. 0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } },
  25670. /* 160 */
  25671. { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64,
  25672. 0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 },
  25673. { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1,
  25674. 0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } },
  25675. /* 161 */
  25676. { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b,
  25677. 0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d },
  25678. { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954,
  25679. 0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } },
  25680. /* 162 */
  25681. { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5,
  25682. 0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 },
  25683. { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe,
  25684. 0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } },
  25685. /* 163 */
  25686. { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288,
  25687. 0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd },
  25688. { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792,
  25689. 0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } },
  25690. /* 164 */
  25691. { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce,
  25692. 0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e },
  25693. { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34,
  25694. 0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } },
  25695. /* 165 */
  25696. { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013,
  25697. 0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 },
  25698. { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c,
  25699. 0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } },
  25700. /* 166 */
  25701. { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a,
  25702. 0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f },
  25703. { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396,
  25704. 0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } },
  25705. /* 167 */
  25706. { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a,
  25707. 0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 },
  25708. { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc,
  25709. 0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } },
  25710. /* 168 */
  25711. { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e,
  25712. 0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 },
  25713. { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6,
  25714. 0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } },
  25715. /* 169 */
  25716. { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630,
  25717. 0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad },
  25718. { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246,
  25719. 0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } },
  25720. /* 170 */
  25721. { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5,
  25722. 0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d },
  25723. { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b,
  25724. 0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } },
  25725. /* 171 */
  25726. { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d,
  25727. 0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 },
  25728. { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8,
  25729. 0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } },
  25730. /* 172 */
  25731. { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba,
  25732. 0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 },
  25733. { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0,
  25734. 0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } },
  25735. /* 173 */
  25736. { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9,
  25737. 0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 },
  25738. { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9,
  25739. 0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } },
  25740. /* 174 */
  25741. { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c,
  25742. 0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 },
  25743. { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d,
  25744. 0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } },
  25745. /* 175 */
  25746. { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc,
  25747. 0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 },
  25748. { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8,
  25749. 0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } },
  25750. /* 176 */
  25751. { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d,
  25752. 0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 },
  25753. { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7,
  25754. 0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } },
  25755. /* 177 */
  25756. { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d,
  25757. 0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 },
  25758. { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef,
  25759. 0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } },
  25760. /* 178 */
  25761. { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960,
  25762. 0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 },
  25763. { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596,
  25764. 0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } },
  25765. /* 179 */
  25766. { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c,
  25767. 0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef },
  25768. { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d,
  25769. 0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } },
  25770. /* 180 */
  25771. { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2,
  25772. 0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 },
  25773. { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8,
  25774. 0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } },
  25775. /* 181 */
  25776. { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a,
  25777. 0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c },
  25778. { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683,
  25779. 0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } },
  25780. /* 182 */
  25781. { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f,
  25782. 0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf },
  25783. { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4,
  25784. 0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } },
  25785. /* 183 */
  25786. { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63,
  25787. 0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f },
  25788. { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e,
  25789. 0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } },
  25790. /* 184 */
  25791. { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670,
  25792. 0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e },
  25793. { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1,
  25794. 0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } },
  25795. /* 185 */
  25796. { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45,
  25797. 0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba },
  25798. { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5,
  25799. 0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } },
  25800. /* 186 */
  25801. { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7,
  25802. 0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a },
  25803. { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21,
  25804. 0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } },
  25805. /* 187 */
  25806. { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2,
  25807. 0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 },
  25808. { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2,
  25809. 0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } },
  25810. /* 188 */
  25811. { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319,
  25812. 0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f },
  25813. { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860,
  25814. 0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } },
  25815. /* 189 */
  25816. { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de,
  25817. 0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 },
  25818. { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6,
  25819. 0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } },
  25820. /* 190 */
  25821. { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e,
  25822. 0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 },
  25823. { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd,
  25824. 0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } },
  25825. /* 191 */
  25826. { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13,
  25827. 0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e },
  25828. { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51,
  25829. 0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } },
  25830. /* 192 */
  25831. { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d,
  25832. 0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 },
  25833. { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb,
  25834. 0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } },
  25835. /* 193 */
  25836. { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a,
  25837. 0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd },
  25838. { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184,
  25839. 0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } },
  25840. /* 194 */
  25841. { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db,
  25842. 0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 },
  25843. { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145,
  25844. 0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } },
  25845. /* 195 */
  25846. { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1,
  25847. 0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d },
  25848. { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f,
  25849. 0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } },
  25850. /* 196 */
  25851. { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a,
  25852. 0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 },
  25853. { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568,
  25854. 0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } },
  25855. /* 197 */
  25856. { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612,
  25857. 0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f },
  25858. { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0,
  25859. 0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } },
  25860. /* 198 */
  25861. { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57,
  25862. 0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f },
  25863. { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6,
  25864. 0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } },
  25865. /* 199 */
  25866. { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d,
  25867. 0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 },
  25868. { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5,
  25869. 0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } },
  25870. /* 200 */
  25871. { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e,
  25872. 0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 },
  25873. { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9,
  25874. 0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } },
  25875. /* 201 */
  25876. { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800,
  25877. 0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e },
  25878. { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c,
  25879. 0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } },
  25880. /* 202 */
  25881. { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39,
  25882. 0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 },
  25883. { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c,
  25884. 0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } },
  25885. /* 203 */
  25886. { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139,
  25887. 0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 },
  25888. { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7,
  25889. 0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } },
  25890. /* 204 */
  25891. { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92,
  25892. 0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db },
  25893. { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2,
  25894. 0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } },
  25895. /* 205 */
  25896. { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc,
  25897. 0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 },
  25898. { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc,
  25899. 0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } },
  25900. /* 206 */
  25901. { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0,
  25902. 0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 },
  25903. { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a,
  25904. 0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } },
  25905. /* 207 */
  25906. { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73,
  25907. 0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf },
  25908. { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1,
  25909. 0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } },
  25910. /* 208 */
  25911. { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0,
  25912. 0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 },
  25913. { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d,
  25914. 0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } },
  25915. /* 209 */
  25916. { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6,
  25917. 0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 },
  25918. { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370,
  25919. 0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } },
  25920. /* 210 */
  25921. { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553,
  25922. 0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 },
  25923. { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806,
  25924. 0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } },
  25925. /* 211 */
  25926. { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6,
  25927. 0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e },
  25928. { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b,
  25929. 0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } },
  25930. /* 212 */
  25931. { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b,
  25932. 0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 },
  25933. { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314,
  25934. 0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } },
  25935. /* 213 */
  25936. { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c,
  25937. 0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 },
  25938. { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08,
  25939. 0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } },
  25940. /* 214 */
  25941. { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0,
  25942. 0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 },
  25943. { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180,
  25944. 0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } },
  25945. /* 215 */
  25946. { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d,
  25947. 0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f },
  25948. { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277,
  25949. 0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } },
  25950. /* 216 */
  25951. { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4,
  25952. 0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b },
  25953. { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072,
  25954. 0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } },
  25955. /* 217 */
  25956. { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44,
  25957. 0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 },
  25958. { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1,
  25959. 0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } },
  25960. /* 218 */
  25961. { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247,
  25962. 0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 },
  25963. { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a,
  25964. 0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } },
  25965. /* 219 */
  25966. { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361,
  25967. 0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 },
  25968. { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd,
  25969. 0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } },
  25970. /* 220 */
  25971. { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc,
  25972. 0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 },
  25973. { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5,
  25974. 0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } },
  25975. /* 221 */
  25976. { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094,
  25977. 0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 },
  25978. { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf,
  25979. 0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } },
  25980. /* 222 */
  25981. { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b,
  25982. 0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 },
  25983. { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b,
  25984. 0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } },
  25985. /* 223 */
  25986. { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094,
  25987. 0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b },
  25988. { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d,
  25989. 0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } },
  25990. /* 224 */
  25991. { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61,
  25992. 0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a },
  25993. { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95,
  25994. 0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } },
  25995. /* 225 */
  25996. { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947,
  25997. 0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 },
  25998. { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24,
  25999. 0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } },
  26000. /* 226 */
  26001. { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e,
  26002. 0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 },
  26003. { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0,
  26004. 0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } },
  26005. /* 227 */
  26006. { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404,
  26007. 0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc },
  26008. { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73,
  26009. 0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } },
  26010. /* 228 */
  26011. { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d,
  26012. 0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f },
  26013. { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5,
  26014. 0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } },
  26015. /* 229 */
  26016. { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee,
  26017. 0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b },
  26018. { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb,
  26019. 0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } },
  26020. /* 230 */
  26021. { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf,
  26022. 0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd },
  26023. { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8,
  26024. 0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } },
  26025. /* 231 */
  26026. { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d,
  26027. 0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 },
  26028. { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052,
  26029. 0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } },
  26030. /* 232 */
  26031. { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d,
  26032. 0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 },
  26033. { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e,
  26034. 0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } },
  26035. /* 233 */
  26036. { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1,
  26037. 0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b },
  26038. { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea,
  26039. 0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } },
  26040. /* 234 */
  26041. { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e,
  26042. 0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 },
  26043. { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787,
  26044. 0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } },
  26045. /* 235 */
  26046. { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365,
  26047. 0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 },
  26048. { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73,
  26049. 0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } },
  26050. /* 236 */
  26051. { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54,
  26052. 0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 },
  26053. { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef,
  26054. 0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } },
  26055. /* 237 */
  26056. { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd,
  26057. 0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 },
  26058. { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc,
  26059. 0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } },
  26060. /* 238 */
  26061. { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175,
  26062. 0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 },
  26063. { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf,
  26064. 0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } },
  26065. /* 239 */
  26066. { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391,
  26067. 0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 },
  26068. { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55,
  26069. 0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } },
  26070. /* 240 */
  26071. { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190,
  26072. 0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 },
  26073. { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d,
  26074. 0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } },
  26075. /* 241 */
  26076. { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7,
  26077. 0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 },
  26078. { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1,
  26079. 0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } },
  26080. /* 242 */
  26081. { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d,
  26082. 0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de },
  26083. { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1,
  26084. 0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } },
  26085. /* 243 */
  26086. { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031,
  26087. 0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 },
  26088. { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10,
  26089. 0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } },
  26090. /* 244 */
  26091. { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a,
  26092. 0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 },
  26093. { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a,
  26094. 0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } },
  26095. /* 245 */
  26096. { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13,
  26097. 0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 },
  26098. { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc,
  26099. 0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } },
  26100. /* 246 */
  26101. { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996,
  26102. 0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 },
  26103. { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6,
  26104. 0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } },
  26105. /* 247 */
  26106. { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e,
  26107. 0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 },
  26108. { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2,
  26109. 0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } },
  26110. /* 248 */
  26111. { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b,
  26112. 0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d },
  26113. { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f,
  26114. 0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } },
  26115. /* 249 */
  26116. { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7,
  26117. 0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 },
  26118. { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190,
  26119. 0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } },
  26120. /* 250 */
  26121. { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e,
  26122. 0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f },
  26123. { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d,
  26124. 0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } },
  26125. /* 251 */
  26126. { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4,
  26127. 0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df },
  26128. { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8,
  26129. 0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } },
  26130. /* 252 */
  26131. { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6,
  26132. 0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 },
  26133. { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6,
  26134. 0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } },
  26135. /* 253 */
  26136. { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7,
  26137. 0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 },
  26138. { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb,
  26139. 0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } },
  26140. /* 254 */
  26141. { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80,
  26142. 0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba },
  26143. { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6,
  26144. 0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } },
  26145. /* 255 */
  26146. { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29,
  26147. 0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 },
  26148. { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6,
  26149. 0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } },
  26150. };
  26151. /* Multiply the base point of P384 by the scalar and return the result.
  26152. * If map is true then convert result to affine coordinates.
  26153. *
  26154. * r Resulting point.
  26155. * k Scalar to multiply by.
  26156. * map Indicates whether to convert result to affine.
  26157. * ct Constant time required.
  26158. * heap Heap to use for allocation.
  26159. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  26160. */
  26161. static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
  26162. int map, int ct, void* heap)
  26163. {
  26164. return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
  26165. k, map, ct, heap);
  26166. }
  26167. #endif
  26168. /* Multiply the base point of P384 by the scalar and return the result.
  26169. * If map is true then convert result to affine coordinates.
  26170. *
  26171. * km Scalar to multiply by.
  26172. * r Resulting point.
  26173. * map Indicates whether to convert result to affine.
  26174. * heap Heap to use for allocation.
  26175. * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
  26176. */
  26177. int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
  26178. {
  26179. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  26180. sp_point_384 p;
  26181. sp_digit kd[12];
  26182. #endif
  26183. sp_point_384* point;
  26184. sp_digit* k = NULL;
  26185. int err = MP_OKAY;
  26186. err = sp_384_point_new_12(heap, p, point);
  26187. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  26188. if (err == MP_OKAY) {
  26189. k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
  26190. DYNAMIC_TYPE_ECC);
  26191. if (k == NULL) {
  26192. err = MEMORY_E;
  26193. }
  26194. }
  26195. #else
  26196. k = kd;
  26197. #endif
  26198. if (err == MP_OKAY) {
  26199. sp_384_from_mp(k, 12, km);
  26200. err = sp_384_ecc_mulmod_base_12(point, k, map, 1, heap);
  26201. }
  26202. if (err == MP_OKAY) {
  26203. err = sp_384_point_to_ecc_point_12(point, r);
  26204. }
  26205. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  26206. if (k != NULL) {
  26207. XFREE(k, heap, DYNAMIC_TYPE_ECC);
  26208. }
  26209. #endif
  26210. sp_384_point_free_12(point, 0, heap);
  26211. return err;
  26212. }
  26213. #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
  26214. defined(HAVE_ECC_VERIFY)
  26215. /* Returns 1 if the number of zero.
  26216. * Implementation is constant time.
  26217. *
  26218. * a Number to check.
  26219. * returns 1 if the number is zero and 0 otherwise.
  26220. */
  26221. static int sp_384_iszero_12(const sp_digit* a)
  26222. {
  26223. return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
  26224. a[8] | a[9] | a[10] | a[11]) == 0;
  26225. }
  26226. #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
  26227. /* Add 1 to a. (a = a + 1)
  26228. *
  26229. * a A single precision integer.
  26230. */
  26231. SP_NOINLINE static void sp_384_add_one_12(sp_digit* a)
  26232. {
  26233. __asm__ __volatile__ (
  26234. "mov r2, #1\n\t"
  26235. "ldr r1, [%[a], #0]\n\t"
  26236. "adds r1, r1, r2\n\t"
  26237. "mov r2, #0\n\t"
  26238. "str r1, [%[a], #0]\n\t"
  26239. "ldr r1, [%[a], #4]\n\t"
  26240. "adcs r1, r1, r2\n\t"
  26241. "str r1, [%[a], #4]\n\t"
  26242. "ldr r1, [%[a], #8]\n\t"
  26243. "adcs r1, r1, r2\n\t"
  26244. "str r1, [%[a], #8]\n\t"
  26245. "ldr r1, [%[a], #12]\n\t"
  26246. "adcs r1, r1, r2\n\t"
  26247. "str r1, [%[a], #12]\n\t"
  26248. "ldr r1, [%[a], #16]\n\t"
  26249. "adcs r1, r1, r2\n\t"
  26250. "str r1, [%[a], #16]\n\t"
  26251. "ldr r1, [%[a], #20]\n\t"
  26252. "adcs r1, r1, r2\n\t"
  26253. "str r1, [%[a], #20]\n\t"
  26254. "ldr r1, [%[a], #24]\n\t"
  26255. "adcs r1, r1, r2\n\t"
  26256. "str r1, [%[a], #24]\n\t"
  26257. "ldr r1, [%[a], #28]\n\t"
  26258. "adcs r1, r1, r2\n\t"
  26259. "str r1, [%[a], #28]\n\t"
  26260. "ldr r1, [%[a], #32]\n\t"
  26261. "adcs r1, r1, r2\n\t"
  26262. "str r1, [%[a], #32]\n\t"
  26263. "ldr r1, [%[a], #36]\n\t"
  26264. "adcs r1, r1, r2\n\t"
  26265. "str r1, [%[a], #36]\n\t"
  26266. "ldr r1, [%[a], #40]\n\t"
  26267. "adcs r1, r1, r2\n\t"
  26268. "str r1, [%[a], #40]\n\t"
  26269. "ldr r1, [%[a], #44]\n\t"
  26270. "adcs r1, r1, r2\n\t"
  26271. "str r1, [%[a], #44]\n\t"
  26272. :
  26273. : [a] "r" (a)
  26274. : "memory", "r1", "r2"
  26275. );
  26276. }
  26277. /* Read big endian unsigned byte array into r.
  26278. *
  26279. * r A single precision integer.
  26280. * size Maximum number of bytes to convert
  26281. * a Byte array.
  26282. * n Number of bytes in array to read.
  26283. */
  26284. static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
  26285. {
  26286. int i, j = 0;
  26287. word32 s = 0;
  26288. r[0] = 0;
  26289. for (i = n-1; i >= 0; i--) {
  26290. r[j] |= (((sp_digit)a[i]) << s);
  26291. if (s >= 24U) {
  26292. r[j] &= 0xffffffff;
  26293. s = 32U - s;
  26294. if (j + 1 >= size) {
  26295. break;
  26296. }
  26297. r[++j] = (sp_digit)a[i] >> s;
  26298. s = 8U - s;
  26299. }
  26300. else {
  26301. s += 8U;
  26302. }
  26303. }
  26304. for (j++; j < size; j++) {
  26305. r[j] = 0;
  26306. }
  26307. }
  26308. /* Generates a scalar that is in the range 1..order-1.
  26309. *
  26310. * rng Random number generator.
  26311. * k Scalar value.
  26312. * returns RNG failures, MEMORY_E when memory allocation fails and
  26313. * MP_OKAY on success.
  26314. */
  26315. static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k)
  26316. {
  26317. int err;
  26318. byte buf[48];
  26319. do {
  26320. err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
  26321. if (err == 0) {
  26322. sp_384_from_bin(k, 12, buf, (int)sizeof(buf));
  26323. if (sp_384_cmp_12(k, p384_order2) < 0) {
  26324. sp_384_add_one_12(k);
  26325. break;
  26326. }
  26327. }
  26328. }
  26329. while (err == 0);
  26330. return err;
  26331. }
  26332. /* Makes a random EC key pair.
  26333. *
  26334. * rng Random number generator.
  26335. * priv Generated private value.
  26336. * pub Generated public point.
  26337. * heap Heap to use for allocation.
  26338. * returns ECC_INF_E when the point does not have the correct order, RNG
  26339. * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
  26340. */
  26341. int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
  26342. {
  26343. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  26344. sp_point_384 p;
  26345. sp_digit kd[12];
  26346. #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
  26347. sp_point_384 inf;
  26348. #endif
  26349. #endif
  26350. sp_point_384* point;
  26351. sp_digit* k = NULL;
  26352. #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
  26353. sp_point_384* infinity = NULL;
  26354. #endif
  26355. int err;
  26356. (void)heap;
  26357. err = sp_384_point_new_12(heap, p, point);
  26358. #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
  26359. if (err == MP_OKAY) {
  26360. err = sp_384_point_new_12(heap, inf, infinity);
  26361. }
  26362. #endif
  26363. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  26364. if (err == MP_OKAY) {
  26365. k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
  26366. DYNAMIC_TYPE_ECC);
  26367. if (k == NULL) {
  26368. err = MEMORY_E;
  26369. }
  26370. }
  26371. #else
  26372. k = kd;
  26373. #endif
  26374. if (err == MP_OKAY) {
  26375. err = sp_384_ecc_gen_k_12(rng, k);
  26376. }
  26377. if (err == MP_OKAY) {
  26378. err = sp_384_ecc_mulmod_base_12(point, k, 1, 1, NULL);
  26379. }
  26380. #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
  26381. if (err == MP_OKAY) {
  26382. err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, 1, NULL);
  26383. }
  26384. if (err == MP_OKAY) {
  26385. if (sp_384_iszero_12(point->x) || sp_384_iszero_12(point->y)) {
  26386. err = ECC_INF_E;
  26387. }
  26388. }
  26389. #endif
  26390. if (err == MP_OKAY) {
  26391. err = sp_384_to_mp(k, priv);
  26392. }
  26393. if (err == MP_OKAY) {
  26394. err = sp_384_point_to_ecc_point_12(point, pub);
  26395. }
  26396. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  26397. if (k != NULL) {
  26398. XFREE(k, heap, DYNAMIC_TYPE_ECC);
  26399. }
  26400. #endif
  26401. #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
  26402. sp_384_point_free_12(infinity, 1, heap);
  26403. #endif
  26404. sp_384_point_free_12(point, 1, heap);
  26405. return err;
  26406. }
  26407. #ifdef HAVE_ECC_DHE
  26408. /* Write r as big endian to byte array.
  26409. * Fixed length number of bytes written: 48
  26410. *
  26411. * r A single precision integer.
  26412. * a Byte array.
  26413. */
  26414. static void sp_384_to_bin(sp_digit* r, byte* a)
  26415. {
  26416. int i, j, s = 0, b;
  26417. j = 384 / 8 - 1;
  26418. a[j] = 0;
  26419. for (i=0; i<12 && j>=0; i++) {
  26420. b = 0;
  26421. /* lint allow cast of mismatch sp_digit and int */
  26422. a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
  26423. b += 8 - s;
  26424. if (j < 0) {
  26425. break;
  26426. }
  26427. while (b < 32) {
  26428. a[j--] = (byte)(r[i] >> b);
  26429. b += 8;
  26430. if (j < 0) {
  26431. break;
  26432. }
  26433. }
  26434. s = 8 - (b - 32);
  26435. if (j >= 0) {
  26436. a[j] = 0;
  26437. }
  26438. if (s != 0) {
  26439. j++;
  26440. }
  26441. }
  26442. }
  26443. /* Multiply the point by the scalar and serialize the X ordinate.
  26444. * The number is 0 padded to maximum size on output.
  26445. *
  26446. * priv Scalar to multiply the point by.
  26447. * pub Point to multiply.
  26448. * out Buffer to hold X ordinate.
  26449. * outLen On entry, size of the buffer in bytes.
  26450. * On exit, length of data in buffer in bytes.
  26451. * heap Heap to use for allocation.
  26452. * returns BUFFER_E if the buffer is to small for output size,
  26453. * MEMORY_E when memory allocation fails and MP_OKAY on success.
  26454. */
  26455. int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
  26456. word32* outLen, void* heap)
  26457. {
  26458. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  26459. sp_point_384 p;
  26460. sp_digit kd[12];
  26461. #endif
  26462. sp_point_384* point = NULL;
  26463. sp_digit* k = NULL;
  26464. int err = MP_OKAY;
  26465. if (*outLen < 48U) {
  26466. err = BUFFER_E;
  26467. }
  26468. if (err == MP_OKAY) {
  26469. err = sp_384_point_new_12(heap, p, point);
  26470. }
  26471. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  26472. if (err == MP_OKAY) {
  26473. k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
  26474. DYNAMIC_TYPE_ECC);
  26475. if (k == NULL)
  26476. err = MEMORY_E;
  26477. }
  26478. #else
  26479. k = kd;
  26480. #endif
  26481. if (err == MP_OKAY) {
  26482. sp_384_from_mp(k, 12, priv);
  26483. sp_384_point_from_ecc_point_12(point, pub);
  26484. err = sp_384_ecc_mulmod_12(point, point, k, 1, 1, heap);
  26485. }
  26486. if (err == MP_OKAY) {
  26487. sp_384_to_bin(point->x, out);
  26488. *outLen = 48;
  26489. }
  26490. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  26491. if (k != NULL) {
  26492. XFREE(k, heap, DYNAMIC_TYPE_ECC);
  26493. }
  26494. #endif
  26495. sp_384_point_free_12(point, 0, heap);
  26496. return err;
  26497. }
  26498. #endif /* HAVE_ECC_DHE */
  26499. #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
  26500. #endif
  26501. #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
  26502. #ifdef WOLFSSL_SP_SMALL
  26503. /* Sub b from a into a. (a -= b)
  26504. *
  26505. * a A single precision integer.
  26506. * b A single precision integer.
  26507. */
  26508. SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
  26509. const sp_digit* b)
  26510. {
  26511. sp_digit c = 0;
  26512. __asm__ __volatile__ (
  26513. "mov r8, %[a]\n\t"
  26514. "add r8, r8, #48\n\t"
  26515. "\n1:\n\t"
  26516. "mov r5, #0\n\t"
  26517. "subs r5, r5, %[c]\n\t"
  26518. "ldr r3, [%[a]]\n\t"
  26519. "ldr r4, [%[a], #4]\n\t"
  26520. "ldr r5, [%[b]]\n\t"
  26521. "ldr r6, [%[b], #4]\n\t"
  26522. "sbcs r3, r3, r5\n\t"
  26523. "sbcs r4, r4, r6\n\t"
  26524. "str r3, [%[a]]\n\t"
  26525. "str r4, [%[a], #4]\n\t"
  26526. "sbc %[c], %[c], %[c]\n\t"
  26527. "add %[a], %[a], #8\n\t"
  26528. "add %[b], %[b], #8\n\t"
  26529. "cmp %[a], r8\n\t"
  26530. #ifdef __GNUC__
  26531. "bne 1b\n\t"
  26532. #else
  26533. "bne.n 1b\n\t"
  26534. #endif /* __GNUC__ */
  26535. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  26536. :
  26537. : "memory", "r3", "r4", "r5", "r6", "r8"
  26538. );
  26539. return c;
  26540. }
  26541. #else
  26542. /* Sub b from a into r. (r = a - b)
  26543. *
  26544. * r A single precision integer.
  26545. * a A single precision integer.
  26546. * b A single precision integer.
  26547. */
  26548. SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
  26549. const sp_digit* b)
  26550. {
  26551. sp_digit c = 0;
  26552. __asm__ __volatile__ (
  26553. "ldm %[a], {r3, r4}\n\t"
  26554. "ldm %[b]!, {r5, r6}\n\t"
  26555. "subs r3, r3, r5\n\t"
  26556. "sbcs r4, r4, r6\n\t"
  26557. "stm %[a]!, {r3, r4}\n\t"
  26558. "ldm %[a], {r3, r4}\n\t"
  26559. "ldm %[b]!, {r5, r6}\n\t"
  26560. "sbcs r3, r3, r5\n\t"
  26561. "sbcs r4, r4, r6\n\t"
  26562. "stm %[a]!, {r3, r4}\n\t"
  26563. "ldm %[a], {r3, r4}\n\t"
  26564. "ldm %[b]!, {r5, r6}\n\t"
  26565. "sbcs r3, r3, r5\n\t"
  26566. "sbcs r4, r4, r6\n\t"
  26567. "stm %[a]!, {r3, r4}\n\t"
  26568. "ldm %[a], {r3, r4}\n\t"
  26569. "ldm %[b]!, {r5, r6}\n\t"
  26570. "sbcs r3, r3, r5\n\t"
  26571. "sbcs r4, r4, r6\n\t"
  26572. "stm %[a]!, {r3, r4}\n\t"
  26573. "ldm %[a], {r3, r4}\n\t"
  26574. "ldm %[b]!, {r5, r6}\n\t"
  26575. "sbcs r3, r3, r5\n\t"
  26576. "sbcs r4, r4, r6\n\t"
  26577. "stm %[a]!, {r3, r4}\n\t"
  26578. "ldm %[a], {r3, r4}\n\t"
  26579. "ldm %[b]!, {r5, r6}\n\t"
  26580. "sbcs r3, r3, r5\n\t"
  26581. "sbcs r4, r4, r6\n\t"
  26582. "stm %[a]!, {r3, r4}\n\t"
  26583. "sbc %[c], %[c], %[c]\n\t"
  26584. : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
  26585. :
  26586. : "memory", "r3", "r4", "r5", "r6"
  26587. );
  26588. return c;
  26589. }
  26590. #endif /* WOLFSSL_SP_SMALL */
  26591. /* Mul a by digit b into r. (r = a * b)
  26592. *
  26593. * r A single precision integer.
  26594. * a A single precision integer.
  26595. * b A single precision digit.
  26596. */
  26597. SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a,
  26598. sp_digit b)
  26599. {
  26600. __asm__ __volatile__ (
  26601. "add r9, %[a], #48\n\t"
  26602. /* A[0] * B */
  26603. "ldr r6, [%[a]], #4\n\t"
  26604. "umull r5, r3, r6, %[b]\n\t"
  26605. "mov r4, #0\n\t"
  26606. "str r5, [%[r]], #4\n\t"
  26607. /* A[0] * B - Done */
  26608. "\n1:\n\t"
  26609. "mov r5, #0\n\t"
  26610. /* A[] * B */
  26611. "ldr r6, [%[a]], #4\n\t"
  26612. "umull r6, r8, r6, %[b]\n\t"
  26613. "adds r3, r3, r6\n\t"
  26614. "adcs r4, r4, r8\n\t"
  26615. "adc r5, r5, #0\n\t"
  26616. /* A[] * B - Done */
  26617. "str r3, [%[r]], #4\n\t"
  26618. "mov r3, r4\n\t"
  26619. "mov r4, r5\n\t"
  26620. "cmp %[a], r9\n\t"
  26621. #ifdef __GNUC__
  26622. "blt 1b\n\t"
  26623. #else
  26624. "blt.n 1b\n\t"
  26625. #endif /* __GNUC__ */
  26626. "str r3, [%[r]]\n\t"
  26627. : [r] "+r" (r), [a] "+r" (a)
  26628. : [b] "r" (b)
  26629. : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
  26630. );
  26631. }
  26632. /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  26633. *
  26634. * d1 The high order half of the number to divide.
  26635. * d0 The low order half of the number to divide.
  26636. * div The dividend.
  26637. * returns the result of the division.
  26638. *
  26639. * Note that this is an approximate div. It may give an answer 1 larger.
  26640. */
  26641. SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0,
  26642. sp_digit div)
  26643. {
  26644. sp_digit r = 0;
  26645. __asm__ __volatile__ (
  26646. "lsr r6, %[div], #16\n\t"
  26647. "add r6, r6, #1\n\t"
  26648. "udiv r4, %[d1], r6\n\t"
  26649. "lsl r8, r4, #16\n\t"
  26650. "umull r4, r5, %[div], r8\n\t"
  26651. "subs %[d0], %[d0], r4\n\t"
  26652. "sbc %[d1], %[d1], r5\n\t"
  26653. "udiv r5, %[d1], r6\n\t"
  26654. "lsl r4, r5, #16\n\t"
  26655. "add r8, r8, r4\n\t"
  26656. "umull r4, r5, %[div], r4\n\t"
  26657. "subs %[d0], %[d0], r4\n\t"
  26658. "sbc %[d1], %[d1], r5\n\t"
  26659. "lsl r4, %[d1], #16\n\t"
  26660. "orr r4, r4, %[d0], lsr #16\n\t"
  26661. "udiv r4, r4, r6\n\t"
  26662. "add r8, r8, r4\n\t"
  26663. "umull r4, r5, %[div], r4\n\t"
  26664. "subs %[d0], %[d0], r4\n\t"
  26665. "sbc %[d1], %[d1], r5\n\t"
  26666. "lsl r4, %[d1], #16\n\t"
  26667. "orr r4, r4, %[d0], lsr #16\n\t"
  26668. "udiv r4, r4, r6\n\t"
  26669. "add r8, r8, r4\n\t"
  26670. "umull r4, r5, %[div], r4\n\t"
  26671. "subs %[d0], %[d0], r4\n\t"
  26672. "sbc %[d1], %[d1], r5\n\t"
  26673. "udiv r4, %[d0], %[div]\n\t"
  26674. "add r8, r8, r4\n\t"
  26675. "mov %[r], r8\n\t"
  26676. : [r] "+r" (r)
  26677. : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
  26678. : "r4", "r5", "r6", "r8"
  26679. );
  26680. return r;
  26681. }
  26682. /* AND m into each word of a and store in r.
  26683. *
  26684. * r A single precision integer.
  26685. * a A single precision integer.
  26686. * m Mask to AND against each digit.
  26687. */
  26688. static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
  26689. {
  26690. #ifdef WOLFSSL_SP_SMALL
  26691. int i;
  26692. for (i=0; i<12; i++) {
  26693. r[i] = a[i] & m;
  26694. }
  26695. #else
  26696. r[0] = a[0] & m;
  26697. r[1] = a[1] & m;
  26698. r[2] = a[2] & m;
  26699. r[3] = a[3] & m;
  26700. r[4] = a[4] & m;
  26701. r[5] = a[5] & m;
  26702. r[6] = a[6] & m;
  26703. r[7] = a[7] & m;
  26704. r[8] = a[8] & m;
  26705. r[9] = a[9] & m;
  26706. r[10] = a[10] & m;
  26707. r[11] = a[11] & m;
  26708. #endif
  26709. }
  26710. /* Divide d in a and put remainder into r (m*d + r = a)
  26711. * m is not calculated as it is not needed at this time.
  26712. *
  26713. * a Number to be divided.
  26714. * d Number to divide with.
  26715. * m Multiplier result.
  26716. * r Remainder from the division.
  26717. * returns MP_OKAY indicating success.
  26718. */
  26719. static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m,
  26720. sp_digit* r)
  26721. {
  26722. sp_digit t1[24], t2[13];
  26723. sp_digit div, r1;
  26724. int i;
  26725. (void)m;
  26726. div = d[11];
  26727. XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
  26728. for (i=11; i>=0; i--) {
  26729. sp_digit hi = t1[12 + i] - (t1[12 + i] == div);
  26730. r1 = div_384_word_12(hi, t1[12 + i - 1], div);
  26731. sp_384_mul_d_12(t2, d, r1);
  26732. t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
  26733. t1[12 + i] -= t2[12];
  26734. sp_384_mask_12(t2, d, t1[12 + i]);
  26735. t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
  26736. sp_384_mask_12(t2, d, t1[12 + i]);
  26737. t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
  26738. }
  26739. r1 = sp_384_cmp_12(t1, d) >= 0;
  26740. sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1);
  26741. return MP_OKAY;
  26742. }
  26743. /* Reduce a modulo m into r. (r = a mod m)
  26744. *
  26745. * r A single precision number that is the reduced result.
  26746. * a A single precision number that is to be reduced.
  26747. * m A single precision number that is the modulus to reduce with.
  26748. * returns MP_OKAY indicating success.
  26749. */
  26750. static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
  26751. {
  26752. return sp_384_div_12(a, m, NULL, r);
  26753. }
  26754. #endif
  26755. #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
  26756. #ifdef WOLFSSL_SP_SMALL
  26757. /* Order-2 for the P384 curve. */
  26758. static const uint32_t p384_order_minus_2[12] = {
  26759. 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
  26760. 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
  26761. };
  26762. #else
  26763. /* The low half of the order-2 of the P384 curve. */
  26764. static const uint32_t p384_order_low[6] = {
  26765. 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
  26766. };
  26767. #endif /* WOLFSSL_SP_SMALL */
  26768. /* Multiply two number mod the order of P384 curve. (r = a * b mod order)
  26769. *
  26770. * r Result of the multiplication.
  26771. * a First operand of the multiplication.
  26772. * b Second operand of the multiplication.
  26773. */
  26774. static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
  26775. {
  26776. sp_384_mul_12(r, a, b);
  26777. sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
  26778. }
  26779. /* Square number mod the order of P384 curve. (r = a * a mod order)
  26780. *
  26781. * r Result of the squaring.
  26782. * a Number to square.
  26783. */
  26784. static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a)
  26785. {
  26786. sp_384_sqr_12(r, a);
  26787. sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
  26788. }
  26789. #ifndef WOLFSSL_SP_SMALL
  26790. /* Square number mod the order of P384 curve a number of times.
  26791. * (r = a ^ n mod order)
  26792. *
  26793. * r Result of the squaring.
  26794. * a Number to square.
  26795. */
  26796. static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n)
  26797. {
  26798. int i;
  26799. sp_384_mont_sqr_order_12(r, a);
  26800. for (i=1; i<n; i++) {
  26801. sp_384_mont_sqr_order_12(r, r);
  26802. }
  26803. }
  26804. #endif /* !WOLFSSL_SP_SMALL */
  26805. /* Invert the number, in Montgomery form, modulo the order of the P384 curve.
  26806. * (r = 1 / a mod order)
  26807. *
  26808. * r Inverse result.
  26809. * a Number to invert.
  26810. * td Temporary data.
  26811. */
  26812. #ifdef WOLFSSL_SP_NONBLOCK
  26813. typedef struct sp_384_mont_inv_order_12_ctx {
  26814. int state;
  26815. int i;
  26816. } sp_384_mont_inv_order_12_ctx;
  26817. static int sp_384_mont_inv_order_12_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const sp_digit* a,
  26818. sp_digit* t)
  26819. {
  26820. int err = FP_WOULDBLOCK;
  26821. sp_384_mont_inv_order_12_ctx* ctx = (sp_384_mont_inv_order_12_ctx*)sp_ctx;
  26822. typedef char ctx_size_test[sizeof(sp_384_mont_inv_order_12_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
  26823. (void)sizeof(ctx_size_test);
  26824. switch (ctx->state) {
  26825. case 0:
  26826. XMEMCPY(t, a, sizeof(sp_digit) * 12);
  26827. ctx->i = 382;
  26828. ctx->state = 1;
  26829. break;
  26830. case 1:
  26831. sp_384_mont_sqr_order_12(t, t);
  26832. ctx->state = 2;
  26833. break;
  26834. case 2:
  26835. if ((p384_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
  26836. sp_384_mont_mul_order_12(t, t, a);
  26837. }
  26838. ctx->i--;
  26839. ctx->state = (ctx->i == 0) ? 3 : 1;
  26840. break;
  26841. case 3:
  26842. XMEMCPY(r, t, sizeof(sp_digit) * 12U);
  26843. err = MP_OKAY;
  26844. break;
  26845. }
  26846. return err;
  26847. }
  26848. #endif /* WOLFSSL_SP_NONBLOCK */
  26849. static void sp_384_mont_inv_order_12(sp_digit* r, const sp_digit* a,
  26850. sp_digit* td)
  26851. {
  26852. #ifdef WOLFSSL_SP_SMALL
  26853. sp_digit* t = td;
  26854. int i;
  26855. XMEMCPY(t, a, sizeof(sp_digit) * 12);
  26856. for (i=382; i>=0; i--) {
  26857. sp_384_mont_sqr_order_12(t, t);
  26858. if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
  26859. sp_384_mont_mul_order_12(t, t, a);
  26860. }
  26861. }
  26862. XMEMCPY(r, t, sizeof(sp_digit) * 12U);
  26863. #else
  26864. sp_digit* t = td;
  26865. sp_digit* t2 = td + 2 * 12;
  26866. sp_digit* t3 = td + 4 * 12;
  26867. int i;
  26868. /* t = a^2 */
  26869. sp_384_mont_sqr_order_12(t, a);
  26870. /* t = a^3 = t * a */
  26871. sp_384_mont_mul_order_12(t, t, a);
  26872. /* t2= a^c = t ^ 2 ^ 2 */
  26873. sp_384_mont_sqr_n_order_12(t2, t, 2);
  26874. /* t = a^f = t2 * t */
  26875. sp_384_mont_mul_order_12(t, t2, t);
  26876. /* t2= a^f0 = t ^ 2 ^ 4 */
  26877. sp_384_mont_sqr_n_order_12(t2, t, 4);
  26878. /* t = a^ff = t2 * t */
  26879. sp_384_mont_mul_order_12(t, t2, t);
  26880. /* t2= a^ff00 = t ^ 2 ^ 8 */
  26881. sp_384_mont_sqr_n_order_12(t2, t, 8);
  26882. /* t3= a^ffff = t2 * t */
  26883. sp_384_mont_mul_order_12(t3, t2, t);
  26884. /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
  26885. sp_384_mont_sqr_n_order_12(t2, t3, 16);
  26886. /* t = a^ffffffff = t2 * t3 */
  26887. sp_384_mont_mul_order_12(t, t2, t3);
  26888. /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */
  26889. sp_384_mont_sqr_n_order_12(t2, t, 16);
  26890. /* t = a^ffffffffffff = t2 * t3 */
  26891. sp_384_mont_mul_order_12(t, t2, t3);
  26892. /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */
  26893. sp_384_mont_sqr_n_order_12(t2, t, 48);
  26894. /* t= a^fffffffffffffffffffffffff = t2 * t */
  26895. sp_384_mont_mul_order_12(t, t2, t);
  26896. /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
  26897. sp_384_mont_sqr_n_order_12(t2, t, 96);
  26898. /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
  26899. sp_384_mont_mul_order_12(t2, t2, t);
  26900. for (i=191; i>=1; i--) {
  26901. sp_384_mont_sqr_order_12(t2, t2);
  26902. if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
  26903. sp_384_mont_mul_order_12(t2, t2, a);
  26904. }
  26905. }
  26906. sp_384_mont_sqr_order_12(t2, t2);
  26907. sp_384_mont_mul_order_12(r, t2, a);
  26908. #endif /* WOLFSSL_SP_SMALL */
  26909. }
  26910. #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
  26911. #ifdef HAVE_ECC_SIGN
  26912. #ifndef SP_ECC_MAX_SIG_GEN
  26913. #define SP_ECC_MAX_SIG_GEN 64
  26914. #endif
  26915. /* Sign the hash using the private key.
  26916. * e = [hash, 384 bits] from binary
  26917. * r = (k.G)->x mod order
  26918. * s = (r * x + e) / k mod order
  26919. * The hash is truncated to the first 384 bits.
  26920. *
  26921. * hash Hash to sign.
  26922. * hashLen Length of the hash data.
  26923. * rng Random number generator.
  26924. * priv Private part of key - scalar.
  26925. * rm First part of result as an mp_int.
  26926. * sm Sirst part of result as an mp_int.
  26927. * heap Heap to use for allocation.
  26928. * returns RNG failures, MEMORY_E when memory allocation fails and
  26929. * MP_OKAY on success.
  26930. */
  26931. #ifdef WOLFSSL_SP_NONBLOCK
  26932. typedef struct sp_ecc_sign_384_ctx {
  26933. int state;
  26934. union {
  26935. sp_384_ecc_mulmod_12_ctx mulmod_ctx;
  26936. sp_384_mont_inv_order_12_ctx mont_inv_order_ctx;
  26937. };
  26938. sp_digit e[2*12];
  26939. sp_digit x[2*12];
  26940. sp_digit k[2*12];
  26941. sp_digit r[2*12];
  26942. sp_digit tmp[3 * 2*12];
  26943. sp_point_384 point;
  26944. sp_digit* s;
  26945. sp_digit* kInv;
  26946. int i;
  26947. } sp_ecc_sign_384_ctx;
  26948. int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
  26949. mp_int* rm, mp_int* sm, mp_int* km, void* heap)
  26950. {
  26951. int err = FP_WOULDBLOCK;
  26952. sp_ecc_sign_384_ctx* ctx = (sp_ecc_sign_384_ctx*)sp_ctx->data;
  26953. typedef char ctx_size_test[sizeof(sp_ecc_sign_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
  26954. (void)sizeof(ctx_size_test);
  26955. (void)heap;
  26956. switch (ctx->state) {
  26957. case 0: /* INIT */
  26958. ctx->s = ctx->e;
  26959. ctx->kInv = ctx->k;
  26960. if (hashLen > 48U) {
  26961. hashLen = 48U;
  26962. }
  26963. sp_384_from_bin(ctx->e, 12, hash, (int)hashLen);
  26964. ctx->i = SP_ECC_MAX_SIG_GEN;
  26965. ctx->state = 1;
  26966. break;
  26967. case 1: /* GEN */
  26968. sp_384_from_mp(ctx->x, 12, priv);
  26969. /* New random point. */
  26970. if (km == NULL || mp_iszero(km)) {
  26971. err = sp_384_ecc_gen_k_12(rng, ctx->k);
  26972. }
  26973. else {
  26974. sp_384_from_mp(ctx->k, 12, km);
  26975. mp_zero(km);
  26976. }
  26977. XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
  26978. ctx->state = 2;
  26979. break;
  26980. case 2: /* MULMOD */
  26981. err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx,
  26982. &ctx->point, &p384_base, ctx->k, 1, 1, heap);
  26983. if (err == MP_OKAY) {
  26984. ctx->state = 3;
  26985. }
  26986. break;
  26987. case 3: /* MODORDER */
  26988. {
  26989. int32_t c;
  26990. /* r = point->x mod order */
  26991. XMEMCPY(ctx->r, ctx->point.x, sizeof(sp_digit) * 12U);
  26992. sp_384_norm_12(ctx->r);
  26993. c = sp_384_cmp_12(ctx->r, p384_order);
  26994. sp_384_cond_sub_12(ctx->r, ctx->r, p384_order, 0L - (sp_digit)(c >= 0));
  26995. sp_384_norm_12(ctx->r);
  26996. ctx->state = 4;
  26997. break;
  26998. }
  26999. case 4: /* KMODORDER */
  27000. /* Conv k to Montgomery form (mod order) */
  27001. sp_384_mul_12(ctx->k, ctx->k, p384_norm_order);
  27002. err = sp_384_mod_12(ctx->k, ctx->k, p384_order);
  27003. if (err == MP_OKAY) {
  27004. sp_384_norm_12(ctx->k);
  27005. XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
  27006. ctx->state = 5;
  27007. }
  27008. break;
  27009. case 5: /* KINV */
  27010. /* kInv = 1/k mod order */
  27011. err = sp_384_mont_inv_order_12_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->kInv, ctx->k, ctx->tmp);
  27012. if (err == MP_OKAY) {
  27013. XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
  27014. ctx->state = 6;
  27015. }
  27016. break;
  27017. case 6: /* KINVNORM */
  27018. sp_384_norm_12(ctx->kInv);
  27019. ctx->state = 7;
  27020. break;
  27021. case 7: /* R */
  27022. /* s = r * x + e */
  27023. sp_384_mul_12(ctx->x, ctx->x, ctx->r);
  27024. ctx->state = 8;
  27025. break;
  27026. case 8: /* S1 */
  27027. err = sp_384_mod_12(ctx->x, ctx->x, p384_order);
  27028. if (err == MP_OKAY)
  27029. ctx->state = 9;
  27030. break;
  27031. case 9: /* S2 */
  27032. {
  27033. sp_digit carry;
  27034. int32_t c;
  27035. sp_384_norm_12(ctx->x);
  27036. carry = sp_384_add_12(ctx->s, ctx->e, ctx->x);
  27037. sp_384_cond_sub_12(ctx->s, ctx->s, p384_order, 0 - carry);
  27038. sp_384_norm_12(ctx->s);
  27039. c = sp_384_cmp_12(ctx->s, p384_order);
  27040. sp_384_cond_sub_12(ctx->s, ctx->s, p384_order, 0L - (sp_digit)(c >= 0));
  27041. sp_384_norm_12(ctx->s);
  27042. /* s = s * k^-1 mod order */
  27043. sp_384_mont_mul_order_12(ctx->s, ctx->s, ctx->kInv);
  27044. sp_384_norm_12(ctx->s);
  27045. /* Check that signature is usable. */
  27046. if (sp_384_iszero_12(ctx->s) == 0) {
  27047. ctx->state = 10;
  27048. break;
  27049. }
  27050. /* not usable gen, try again */
  27051. ctx->i--;
  27052. if (ctx->i == 0) {
  27053. err = RNG_FAILURE_E;
  27054. }
  27055. ctx->state = 1;
  27056. break;
  27057. }
  27058. case 10: /* RES */
  27059. err = sp_384_to_mp(ctx->r, rm);
  27060. if (err == MP_OKAY) {
  27061. err = sp_384_to_mp(ctx->s, sm);
  27062. }
  27063. break;
  27064. }
  27065. if (err == MP_OKAY && ctx->state != 10) {
  27066. err = FP_WOULDBLOCK;
  27067. }
  27068. if (err != FP_WOULDBLOCK) {
  27069. XMEMSET(ctx->e, 0, sizeof(sp_digit) * 2U * 12U);
  27070. XMEMSET(ctx->x, 0, sizeof(sp_digit) * 2U * 12U);
  27071. XMEMSET(ctx->k, 0, sizeof(sp_digit) * 2U * 12U);
  27072. XMEMSET(ctx->r, 0, sizeof(sp_digit) * 2U * 12U);
  27073. XMEMSET(ctx->tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U);
  27074. }
  27075. return err;
  27076. }
  27077. #endif /* WOLFSSL_SP_NONBLOCK */
  27078. int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
  27079. mp_int* rm, mp_int* sm, mp_int* km, void* heap)
  27080. {
  27081. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27082. sp_digit* d = NULL;
  27083. #else
  27084. sp_digit ed[2*12];
  27085. sp_digit xd[2*12];
  27086. sp_digit kd[2*12];
  27087. sp_digit rd[2*12];
  27088. sp_digit td[3 * 2*12];
  27089. sp_point_384 p;
  27090. #endif
  27091. sp_digit* e = NULL;
  27092. sp_digit* x = NULL;
  27093. sp_digit* k = NULL;
  27094. sp_digit* r = NULL;
  27095. sp_digit* tmp = NULL;
  27096. sp_point_384* point = NULL;
  27097. sp_digit carry;
  27098. sp_digit* s = NULL;
  27099. sp_digit* kInv = NULL;
  27100. int err = MP_OKAY;
  27101. int32_t c;
  27102. int i;
  27103. (void)heap;
  27104. err = sp_384_point_new_12(heap, p, point);
  27105. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27106. if (err == MP_OKAY) {
  27107. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap,
  27108. DYNAMIC_TYPE_ECC);
  27109. if (d == NULL) {
  27110. err = MEMORY_E;
  27111. }
  27112. }
  27113. #endif
  27114. if (err == MP_OKAY) {
  27115. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27116. e = d + 0 * 12;
  27117. x = d + 2 * 12;
  27118. k = d + 4 * 12;
  27119. r = d + 6 * 12;
  27120. tmp = d + 8 * 12;
  27121. #else
  27122. e = ed;
  27123. x = xd;
  27124. k = kd;
  27125. r = rd;
  27126. tmp = td;
  27127. #endif
  27128. s = e;
  27129. kInv = k;
  27130. if (hashLen > 48U) {
  27131. hashLen = 48U;
  27132. }
  27133. }
  27134. for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
  27135. sp_384_from_mp(x, 12, priv);
  27136. /* New random point. */
  27137. if (km == NULL || mp_iszero(km)) {
  27138. err = sp_384_ecc_gen_k_12(rng, k);
  27139. }
  27140. else {
  27141. sp_384_from_mp(k, 12, km);
  27142. mp_zero(km);
  27143. }
  27144. if (err == MP_OKAY) {
  27145. err = sp_384_ecc_mulmod_base_12(point, k, 1, 1, NULL);
  27146. }
  27147. if (err == MP_OKAY) {
  27148. /* r = point->x mod order */
  27149. XMEMCPY(r, point->x, sizeof(sp_digit) * 12U);
  27150. sp_384_norm_12(r);
  27151. c = sp_384_cmp_12(r, p384_order);
  27152. sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0));
  27153. sp_384_norm_12(r);
  27154. /* Conv k to Montgomery form (mod order) */
  27155. sp_384_mul_12(k, k, p384_norm_order);
  27156. err = sp_384_mod_12(k, k, p384_order);
  27157. }
  27158. if (err == MP_OKAY) {
  27159. sp_384_norm_12(k);
  27160. /* kInv = 1/k mod order */
  27161. sp_384_mont_inv_order_12(kInv, k, tmp);
  27162. sp_384_norm_12(kInv);
  27163. /* s = r * x + e */
  27164. sp_384_mul_12(x, x, r);
  27165. err = sp_384_mod_12(x, x, p384_order);
  27166. }
  27167. if (err == MP_OKAY) {
  27168. sp_384_norm_12(x);
  27169. sp_384_from_bin(e, 12, hash, (int)hashLen);
  27170. carry = sp_384_add_12(s, e, x);
  27171. sp_384_cond_sub_12(s, s, p384_order, 0 - carry);
  27172. sp_384_norm_12(s);
  27173. c = sp_384_cmp_12(s, p384_order);
  27174. sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0));
  27175. sp_384_norm_12(s);
  27176. /* s = s * k^-1 mod order */
  27177. sp_384_mont_mul_order_12(s, s, kInv);
  27178. sp_384_norm_12(s);
  27179. /* Check that signature is usable. */
  27180. if (sp_384_iszero_12(s) == 0) {
  27181. break;
  27182. }
  27183. }
  27184. #ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP
  27185. i = 1;
  27186. #endif
  27187. }
  27188. if (i == 0) {
  27189. err = RNG_FAILURE_E;
  27190. }
  27191. if (err == MP_OKAY) {
  27192. err = sp_384_to_mp(r, rm);
  27193. }
  27194. if (err == MP_OKAY) {
  27195. err = sp_384_to_mp(s, sm);
  27196. }
  27197. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27198. if (d != NULL) {
  27199. XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12);
  27200. XFREE(d, heap, DYNAMIC_TYPE_ECC);
  27201. }
  27202. #else
  27203. XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U);
  27204. XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U);
  27205. XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U);
  27206. XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
  27207. XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
  27208. XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U);
  27209. #endif
  27210. sp_384_point_free_12(point, 1, heap);
  27211. return err;
  27212. }
  27213. #endif /* HAVE_ECC_SIGN */
  27214. #ifndef WOLFSSL_SP_SMALL
  27215. /* Divide the number by 2 mod the modulus. (r = a / 2 % m)
  27216. *
  27217. * r Result of division by 2.
  27218. * a Number to divide.
  27219. * m Modulus.
  27220. */
  27221. static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
  27222. {
  27223. __asm__ __volatile__ (
  27224. "ldr r4, [%[a]]\n\t"
  27225. "ands r8, r4, #1\n\t"
  27226. "beq 1f\n\t"
  27227. "mov r12, #0\n\t"
  27228. "ldr r5, [%[a], #4]\n\t"
  27229. "ldr r6, [%[a], #8]\n\t"
  27230. "ldr r7, [%[a], #12]\n\t"
  27231. "ldr r8, [%[m], #0]\n\t"
  27232. "ldr r9, [%[m], #4]\n\t"
  27233. "ldr r10, [%[m], #8]\n\t"
  27234. "ldr r14, [%[m], #12]\n\t"
  27235. "adds r4, r4, r8\n\t"
  27236. "adcs r5, r5, r9\n\t"
  27237. "adcs r6, r6, r10\n\t"
  27238. "adcs r7, r7, r14\n\t"
  27239. "str r4, [%[r], #0]\n\t"
  27240. "str r5, [%[r], #4]\n\t"
  27241. "str r6, [%[r], #8]\n\t"
  27242. "str r7, [%[r], #12]\n\t"
  27243. "ldr r4, [%[a], #16]\n\t"
  27244. "ldr r5, [%[a], #20]\n\t"
  27245. "ldr r6, [%[a], #24]\n\t"
  27246. "ldr r7, [%[a], #28]\n\t"
  27247. "ldr r8, [%[m], #16]\n\t"
  27248. "ldr r9, [%[m], #20]\n\t"
  27249. "ldr r10, [%[m], #24]\n\t"
  27250. "ldr r14, [%[m], #28]\n\t"
  27251. "adcs r4, r4, r8\n\t"
  27252. "adcs r5, r5, r9\n\t"
  27253. "adcs r6, r6, r10\n\t"
  27254. "adcs r7, r7, r14\n\t"
  27255. "str r4, [%[r], #16]\n\t"
  27256. "str r5, [%[r], #20]\n\t"
  27257. "str r6, [%[r], #24]\n\t"
  27258. "str r7, [%[r], #28]\n\t"
  27259. "ldr r4, [%[a], #32]\n\t"
  27260. "ldr r5, [%[a], #36]\n\t"
  27261. "ldr r6, [%[a], #40]\n\t"
  27262. "ldr r7, [%[a], #44]\n\t"
  27263. "ldr r8, [%[m], #32]\n\t"
  27264. "ldr r9, [%[m], #36]\n\t"
  27265. "ldr r10, [%[m], #40]\n\t"
  27266. "ldr r14, [%[m], #44]\n\t"
  27267. "adcs r4, r4, r8\n\t"
  27268. "adcs r5, r5, r9\n\t"
  27269. "adcs r6, r6, r10\n\t"
  27270. "adcs r7, r7, r14\n\t"
  27271. "str r4, [%[r], #32]\n\t"
  27272. "str r5, [%[r], #36]\n\t"
  27273. "str r6, [%[r], #40]\n\t"
  27274. "str r7, [%[r], #44]\n\t"
  27275. "adc r8, r12, r12\n\t"
  27276. "b 2f\n\t"
  27277. "\n1:\n\t"
  27278. "ldr r5, [%[a], #2]\n\t"
  27279. "str r4, [%[r], #0]\n\t"
  27280. "str r5, [%[r], #2]\n\t"
  27281. "ldr r4, [%[a], #4]\n\t"
  27282. "ldr r5, [%[a], #6]\n\t"
  27283. "str r4, [%[r], #4]\n\t"
  27284. "str r5, [%[r], #6]\n\t"
  27285. "ldr r4, [%[a], #8]\n\t"
  27286. "ldr r5, [%[a], #10]\n\t"
  27287. "str r4, [%[r], #8]\n\t"
  27288. "str r5, [%[r], #10]\n\t"
  27289. "ldr r4, [%[a], #12]\n\t"
  27290. "ldr r5, [%[a], #14]\n\t"
  27291. "str r4, [%[r], #12]\n\t"
  27292. "str r5, [%[r], #14]\n\t"
  27293. "ldr r4, [%[a], #16]\n\t"
  27294. "ldr r5, [%[a], #18]\n\t"
  27295. "str r4, [%[r], #16]\n\t"
  27296. "str r5, [%[r], #18]\n\t"
  27297. "ldr r4, [%[a], #20]\n\t"
  27298. "ldr r5, [%[a], #22]\n\t"
  27299. "str r4, [%[r], #20]\n\t"
  27300. "str r5, [%[r], #22]\n\t"
  27301. "\n2:\n\t"
  27302. "ldr r3, [%[r]]\n\t"
  27303. "ldr r4, [%[r], #4]\n\t"
  27304. "lsr r3, r3, #1\n\t"
  27305. "orr r3, r3, r4, lsl #31\n\t"
  27306. "lsr r4, r4, #1\n\t"
  27307. "ldr r5, [%[a], #8]\n\t"
  27308. "str r3, [%[r], #0]\n\t"
  27309. "orr r4, r4, r5, lsl #31\n\t"
  27310. "lsr r5, r5, #1\n\t"
  27311. "ldr r3, [%[a], #12]\n\t"
  27312. "str r4, [%[r], #4]\n\t"
  27313. "orr r5, r5, r3, lsl #31\n\t"
  27314. "lsr r3, r3, #1\n\t"
  27315. "ldr r4, [%[a], #16]\n\t"
  27316. "str r5, [%[r], #8]\n\t"
  27317. "orr r3, r3, r4, lsl #31\n\t"
  27318. "lsr r4, r4, #1\n\t"
  27319. "ldr r5, [%[a], #20]\n\t"
  27320. "str r3, [%[r], #12]\n\t"
  27321. "orr r4, r4, r5, lsl #31\n\t"
  27322. "lsr r5, r5, #1\n\t"
  27323. "ldr r3, [%[a], #24]\n\t"
  27324. "str r4, [%[r], #16]\n\t"
  27325. "orr r5, r5, r3, lsl #31\n\t"
  27326. "lsr r3, r3, #1\n\t"
  27327. "ldr r4, [%[a], #28]\n\t"
  27328. "str r5, [%[r], #20]\n\t"
  27329. "orr r3, r3, r4, lsl #31\n\t"
  27330. "lsr r4, r4, #1\n\t"
  27331. "ldr r5, [%[a], #32]\n\t"
  27332. "str r3, [%[r], #24]\n\t"
  27333. "orr r4, r4, r5, lsl #31\n\t"
  27334. "lsr r5, r5, #1\n\t"
  27335. "ldr r3, [%[a], #36]\n\t"
  27336. "str r4, [%[r], #28]\n\t"
  27337. "orr r5, r5, r3, lsl #31\n\t"
  27338. "lsr r3, r3, #1\n\t"
  27339. "ldr r4, [%[a], #40]\n\t"
  27340. "str r5, [%[r], #32]\n\t"
  27341. "orr r3, r3, r4, lsl #31\n\t"
  27342. "lsr r4, r4, #1\n\t"
  27343. "ldr r5, [%[a], #44]\n\t"
  27344. "str r3, [%[r], #36]\n\t"
  27345. "orr r4, r4, r5, lsl #31\n\t"
  27346. "lsr r5, r5, #1\n\t"
  27347. "orr r5, r5, r8, lsl #31\n\t"
  27348. "str r4, [%[r], #40]\n\t"
  27349. "str r5, [%[r], #44]\n\t"
  27350. :
  27351. : [r] "r" (r), [a] "r" (a), [m] "r" (m)
  27352. : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14"
  27353. );
  27354. }
  27355. static int sp_384_num_bits_12(sp_digit* a)
  27356. {
  27357. int r = 0;
  27358. __asm__ __volatile__ (
  27359. "ldr r2, [%[a], #44]\n\t"
  27360. "cmp r2, #0\n\t"
  27361. "beq 11f\n\t"
  27362. "mov r3, #384\n\t"
  27363. "clz %[r], r2\n\t"
  27364. "sub %[r], r3, %[r]\n\t"
  27365. "b 13f\n\t"
  27366. "\n11:\n\t"
  27367. "ldr r2, [%[a], #40]\n\t"
  27368. "cmp r2, #0\n\t"
  27369. "beq 10f\n\t"
  27370. "mov r3, #352\n\t"
  27371. "clz %[r], r2\n\t"
  27372. "sub %[r], r3, %[r]\n\t"
  27373. "b 13f\n\t"
  27374. "\n10:\n\t"
  27375. "ldr r2, [%[a], #36]\n\t"
  27376. "cmp r2, #0\n\t"
  27377. "beq 9f\n\t"
  27378. "mov r3, #320\n\t"
  27379. "clz %[r], r2\n\t"
  27380. "sub %[r], r3, %[r]\n\t"
  27381. "b 13f\n\t"
  27382. "\n9:\n\t"
  27383. "ldr r2, [%[a], #32]\n\t"
  27384. "cmp r2, #0\n\t"
  27385. "beq 8f\n\t"
  27386. "mov r3, #288\n\t"
  27387. "clz %[r], r2\n\t"
  27388. "sub %[r], r3, %[r]\n\t"
  27389. "b 13f\n\t"
  27390. "\n8:\n\t"
  27391. "ldr r2, [%[a], #28]\n\t"
  27392. "cmp r2, #0\n\t"
  27393. "beq 7f\n\t"
  27394. "mov r3, #256\n\t"
  27395. "clz %[r], r2\n\t"
  27396. "sub %[r], r3, %[r]\n\t"
  27397. "b 13f\n\t"
  27398. "\n7:\n\t"
  27399. "ldr r2, [%[a], #24]\n\t"
  27400. "cmp r2, #0\n\t"
  27401. "beq 6f\n\t"
  27402. "mov r3, #224\n\t"
  27403. "clz %[r], r2\n\t"
  27404. "sub %[r], r3, %[r]\n\t"
  27405. "b 13f\n\t"
  27406. "\n6:\n\t"
  27407. "ldr r2, [%[a], #20]\n\t"
  27408. "cmp r2, #0\n\t"
  27409. "beq 5f\n\t"
  27410. "mov r3, #192\n\t"
  27411. "clz %[r], r2\n\t"
  27412. "sub %[r], r3, %[r]\n\t"
  27413. "b 13f\n\t"
  27414. "\n5:\n\t"
  27415. "ldr r2, [%[a], #16]\n\t"
  27416. "cmp r2, #0\n\t"
  27417. "beq 4f\n\t"
  27418. "mov r3, #160\n\t"
  27419. "clz %[r], r2\n\t"
  27420. "sub %[r], r3, %[r]\n\t"
  27421. "b 13f\n\t"
  27422. "\n4:\n\t"
  27423. "ldr r2, [%[a], #12]\n\t"
  27424. "cmp r2, #0\n\t"
  27425. "beq 3f\n\t"
  27426. "mov r3, #128\n\t"
  27427. "clz %[r], r2\n\t"
  27428. "sub %[r], r3, %[r]\n\t"
  27429. "b 13f\n\t"
  27430. "\n3:\n\t"
  27431. "ldr r2, [%[a], #8]\n\t"
  27432. "cmp r2, #0\n\t"
  27433. "beq 2f\n\t"
  27434. "mov r3, #96\n\t"
  27435. "clz %[r], r2\n\t"
  27436. "sub %[r], r3, %[r]\n\t"
  27437. "b 13f\n\t"
  27438. "\n2:\n\t"
  27439. "ldr r2, [%[a], #4]\n\t"
  27440. "cmp r2, #0\n\t"
  27441. "beq 1f\n\t"
  27442. "mov r3, #64\n\t"
  27443. "clz %[r], r2\n\t"
  27444. "sub %[r], r3, %[r]\n\t"
  27445. "b 13f\n\t"
  27446. "\n1:\n\t"
  27447. "ldr r2, [%[a], #0]\n\t"
  27448. "mov r3, #32\n\t"
  27449. "clz %[r], r2\n\t"
  27450. "sub %[r], r3, %[r]\n\t"
  27451. "\n13:\n\t"
  27452. : [r] "+r" (r)
  27453. : [a] "r" (a)
  27454. : "r2", "r3"
  27455. );
  27456. return r;
  27457. }
  27458. /* Non-constant time modular inversion.
  27459. *
  27460. * @param [out] r Resulting number.
  27461. * @param [in] a Number to invert.
  27462. * @param [in] m Modulus.
  27463. * @return MP_OKAY on success.
  27464. */
  27465. static int sp_384_mod_inv_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
  27466. {
  27467. sp_digit u[12];
  27468. sp_digit v[12];
  27469. sp_digit b[12];
  27470. sp_digit d[12];
  27471. int ut, vt;
  27472. sp_digit o;
  27473. XMEMCPY(u, m, sizeof(u));
  27474. XMEMCPY(v, a, sizeof(v));
  27475. ut = sp_384_num_bits_12(u);
  27476. vt = sp_384_num_bits_12(v);
  27477. XMEMSET(b, 0, sizeof(b));
  27478. if ((v[0] & 1) == 0) {
  27479. sp_384_rshift1_12(v, v);
  27480. XMEMCPY(d, m, sizeof(u));
  27481. d[0] += 1;
  27482. sp_384_rshift1_12(d, d);
  27483. vt--;
  27484. while ((v[0] & 1) == 0) {
  27485. sp_384_rshift1_12(v, v);
  27486. sp_384_div2_mod_12(d, d, m);
  27487. vt--;
  27488. }
  27489. }
  27490. else {
  27491. XMEMSET(d+1, 0, sizeof(d)-sizeof(sp_digit));
  27492. d[0] = 1;
  27493. }
  27494. while (ut > 1 && vt > 1) {
  27495. if (ut > vt || (ut == vt && sp_384_cmp_12(u, v) >= 0)) {
  27496. sp_384_sub_12(u, u, v);
  27497. o = sp_384_sub_12(b, b, d);
  27498. if (o != 0)
  27499. sp_384_add_12(b, b, m);
  27500. ut = sp_384_num_bits_12(u);
  27501. do {
  27502. sp_384_rshift1_12(u, u);
  27503. sp_384_div2_mod_12(b, b, m);
  27504. ut--;
  27505. }
  27506. while (ut > 0 && (u[0] & 1) == 0);
  27507. }
  27508. else {
  27509. sp_384_sub_12(v, v, u);
  27510. o = sp_384_sub_12(d, d, b);
  27511. if (o != 0)
  27512. sp_384_add_12(d, d, m);
  27513. vt = sp_384_num_bits_12(v);
  27514. do {
  27515. sp_384_rshift1_12(v, v);
  27516. sp_384_div2_mod_12(d, d, m);
  27517. vt--;
  27518. }
  27519. while (vt > 0 && (v[0] & 1) == 0);
  27520. }
  27521. }
  27522. if (ut == 1)
  27523. XMEMCPY(r, b, sizeof(b));
  27524. else
  27525. XMEMCPY(r, d, sizeof(d));
  27526. return MP_OKAY;
  27527. }
  27528. #endif /* WOLFSSL_SP_SMALL */
  27529. #ifdef HAVE_ECC_VERIFY
  27530. /* Verify the signature values with the hash and public key.
  27531. * e = Truncate(hash, 384)
  27532. * u1 = e/s mod order
  27533. * u2 = r/s mod order
  27534. * r == (u1.G + u2.Q)->x mod order
  27535. * Optimization: Leave point in projective form.
  27536. * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
  27537. * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
  27538. * The hash is truncated to the first 384 bits.
  27539. *
  27540. * hash Hash to sign.
  27541. * hashLen Length of the hash data.
  27542. * rng Random number generator.
  27543. * priv Private part of key - scalar.
  27544. * rm First part of result as an mp_int.
  27545. * sm Sirst part of result as an mp_int.
  27546. * heap Heap to use for allocation.
  27547. * returns RNG failures, MEMORY_E when memory allocation fails and
  27548. * MP_OKAY on success.
  27549. */
  27550. #ifdef WOLFSSL_SP_NONBLOCK
  27551. typedef struct sp_ecc_verify_384_ctx {
  27552. int state;
  27553. union {
  27554. sp_384_ecc_mulmod_12_ctx mulmod_ctx;
  27555. sp_384_mont_inv_order_12_ctx mont_inv_order_ctx;
  27556. sp_384_proj_point_dbl_12_ctx dbl_ctx;
  27557. sp_384_proj_point_add_12_ctx add_ctx;
  27558. };
  27559. sp_digit u1[2*12];
  27560. sp_digit u2[2*12];
  27561. sp_digit s[2*12];
  27562. sp_digit tmp[2*12 * 5];
  27563. sp_point_384 p1;
  27564. sp_point_384 p2;
  27565. } sp_ecc_verify_384_ctx;
  27566. int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, mp_int* pX,
  27567. mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
  27568. {
  27569. int err = FP_WOULDBLOCK;
  27570. sp_ecc_verify_384_ctx* ctx = (sp_ecc_verify_384_ctx*)sp_ctx->data;
  27571. typedef char ctx_size_test[sizeof(sp_ecc_verify_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
  27572. (void)sizeof(ctx_size_test);
  27573. switch (ctx->state) {
  27574. case 0: /* INIT */
  27575. if (hashLen > 48U) {
  27576. hashLen = 48U;
  27577. }
  27578. sp_384_from_bin(ctx->u1, 12, hash, (int)hashLen);
  27579. sp_384_from_mp(ctx->u2, 12, r);
  27580. sp_384_from_mp(ctx->s, 12, sm);
  27581. sp_384_from_mp(ctx->p2.x, 12, pX);
  27582. sp_384_from_mp(ctx->p2.y, 12, pY);
  27583. sp_384_from_mp(ctx->p2.z, 12, pZ);
  27584. ctx->state = 1;
  27585. break;
  27586. case 1: /* NORMS0 */
  27587. sp_384_mul_12(ctx->s, ctx->s, p384_norm_order);
  27588. err = sp_384_mod_12(ctx->s, ctx->s, p384_order);
  27589. if (err == MP_OKAY)
  27590. ctx->state = 2;
  27591. break;
  27592. case 2: /* NORMS1 */
  27593. sp_384_norm_12(ctx->s);
  27594. XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
  27595. ctx->state = 3;
  27596. break;
  27597. case 3: /* NORMS2 */
  27598. err = sp_384_mont_inv_order_12_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
  27599. if (err == MP_OKAY) {
  27600. ctx->state = 4;
  27601. }
  27602. break;
  27603. case 4: /* NORMS3 */
  27604. sp_384_mont_mul_order_12(ctx->u1, ctx->u1, ctx->s);
  27605. ctx->state = 5;
  27606. break;
  27607. case 5: /* NORMS4 */
  27608. sp_384_mont_mul_order_12(ctx->u2, ctx->u2, ctx->s);
  27609. XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
  27610. ctx->state = 6;
  27611. break;
  27612. case 6: /* MULBASE */
  27613. err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p384_base, ctx->u1, 0, 0, heap);
  27614. if (err == MP_OKAY) {
  27615. if (sp_384_iszero_12(ctx->p1.z)) {
  27616. ctx->p1.infinity = 1;
  27617. }
  27618. XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
  27619. ctx->state = 7;
  27620. }
  27621. break;
  27622. case 7: /* MULMOD */
  27623. err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
  27624. if (err == MP_OKAY) {
  27625. if (sp_384_iszero_12(ctx->p2.z)) {
  27626. ctx->p2.infinity = 1;
  27627. }
  27628. XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
  27629. ctx->state = 8;
  27630. }
  27631. break;
  27632. case 8: /* ADD */
  27633. err = sp_384_proj_point_add_12_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
  27634. if (err == MP_OKAY)
  27635. ctx->state = 9;
  27636. break;
  27637. case 9: /* DBLPREP */
  27638. if (sp_384_iszero_12(ctx->p1.z)) {
  27639. if (sp_384_iszero_12(ctx->p1.x) && sp_384_iszero_12(ctx->p1.y)) {
  27640. XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
  27641. ctx->state = 10;
  27642. break;
  27643. }
  27644. else {
  27645. /* Y ordinate is not used from here - don't set. */
  27646. int i;
  27647. for (i=0; i<12; i++) {
  27648. ctx->p1.x[i] = 0;
  27649. }
  27650. XMEMCPY(ctx->p1.z, p384_norm_mod, sizeof(p384_norm_mod));
  27651. }
  27652. }
  27653. ctx->state = 11;
  27654. break;
  27655. case 10: /* DBL */
  27656. err = sp_384_proj_point_dbl_12_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1,
  27657. &ctx->p2, ctx->tmp);
  27658. if (err == MP_OKAY) {
  27659. ctx->state = 11;
  27660. }
  27661. break;
  27662. case 11: /* MONT */
  27663. /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
  27664. /* Reload r and convert to Montgomery form. */
  27665. sp_384_from_mp(ctx->u2, 12, r);
  27666. err = sp_384_mod_mul_norm_12(ctx->u2, ctx->u2, p384_mod);
  27667. if (err == MP_OKAY)
  27668. ctx->state = 12;
  27669. break;
  27670. case 12: /* SQR */
  27671. /* u1 = r.z'.z' mod prime */
  27672. sp_384_mont_sqr_12(ctx->p1.z, ctx->p1.z, p384_mod, p384_mp_mod);
  27673. ctx->state = 13;
  27674. break;
  27675. case 13: /* MUL */
  27676. sp_384_mont_mul_12(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
  27677. ctx->state = 14;
  27678. break;
  27679. case 14: /* RES */
  27680. err = MP_OKAY; /* math okay, now check result */
  27681. *res = (int)(sp_384_cmp_12(ctx->p1.x, ctx->u1) == 0);
  27682. if (*res == 0) {
  27683. sp_digit carry;
  27684. int32_t c;
  27685. /* Reload r and add order. */
  27686. sp_384_from_mp(ctx->u2, 12, r);
  27687. carry = sp_384_add_12(ctx->u2, ctx->u2, p384_order);
  27688. /* Carry means result is greater than mod and is not valid. */
  27689. if (carry == 0) {
  27690. sp_384_norm_12(ctx->u2);
  27691. /* Compare with mod and if greater or equal then not valid. */
  27692. c = sp_384_cmp_12(ctx->u2, p384_mod);
  27693. if (c < 0) {
  27694. /* Convert to Montogomery form */
  27695. err = sp_384_mod_mul_norm_12(ctx->u2, ctx->u2, p384_mod);
  27696. if (err == MP_OKAY) {
  27697. /* u1 = (r + 1*order).z'.z' mod prime */
  27698. sp_384_mont_mul_12(ctx->u1, ctx->u2, ctx->p1.z, p384_mod,
  27699. p384_mp_mod);
  27700. *res = (int)(sp_384_cmp_12(ctx->p1.x, ctx->u1) == 0);
  27701. }
  27702. }
  27703. }
  27704. }
  27705. break;
  27706. }
  27707. if (err == MP_OKAY && ctx->state != 14) {
  27708. err = FP_WOULDBLOCK;
  27709. }
  27710. return err;
  27711. }
  27712. #endif /* WOLFSSL_SP_NONBLOCK */
  27713. int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
  27714. mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
  27715. {
  27716. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27717. sp_digit* d = NULL;
  27718. #else
  27719. sp_digit u1d[2*12];
  27720. sp_digit u2d[2*12];
  27721. sp_digit sd[2*12];
  27722. sp_digit tmpd[2*12 * 5];
  27723. sp_point_384 p1d;
  27724. sp_point_384 p2d;
  27725. #endif
  27726. sp_digit* u1 = NULL;
  27727. sp_digit* u2 = NULL;
  27728. sp_digit* s = NULL;
  27729. sp_digit* tmp = NULL;
  27730. sp_point_384* p1;
  27731. sp_point_384* p2 = NULL;
  27732. sp_digit carry;
  27733. int32_t c;
  27734. int err;
  27735. err = sp_384_point_new_12(heap, p1d, p1);
  27736. if (err == MP_OKAY) {
  27737. err = sp_384_point_new_12(heap, p2d, p2);
  27738. }
  27739. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27740. if (err == MP_OKAY) {
  27741. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap,
  27742. DYNAMIC_TYPE_ECC);
  27743. if (d == NULL) {
  27744. err = MEMORY_E;
  27745. }
  27746. }
  27747. #endif
  27748. if (err == MP_OKAY) {
  27749. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27750. u1 = d + 0 * 12;
  27751. u2 = d + 2 * 12;
  27752. s = d + 4 * 12;
  27753. tmp = d + 6 * 12;
  27754. #else
  27755. u1 = u1d;
  27756. u2 = u2d;
  27757. s = sd;
  27758. tmp = tmpd;
  27759. #endif
  27760. if (hashLen > 48U) {
  27761. hashLen = 48U;
  27762. }
  27763. sp_384_from_bin(u1, 12, hash, (int)hashLen);
  27764. sp_384_from_mp(u2, 12, r);
  27765. sp_384_from_mp(s, 12, sm);
  27766. sp_384_from_mp(p2->x, 12, pX);
  27767. sp_384_from_mp(p2->y, 12, pY);
  27768. sp_384_from_mp(p2->z, 12, pZ);
  27769. #ifndef WOLFSSL_SP_SMALL
  27770. {
  27771. sp_384_mod_inv_12(s, s, p384_order);
  27772. }
  27773. #endif /* !WOLFSSL_SP_SMALL */
  27774. {
  27775. sp_384_mul_12(s, s, p384_norm_order);
  27776. }
  27777. err = sp_384_mod_12(s, s, p384_order);
  27778. }
  27779. if (err == MP_OKAY) {
  27780. sp_384_norm_12(s);
  27781. #ifdef WOLFSSL_SP_SMALL
  27782. {
  27783. sp_384_mont_inv_order_12(s, s, tmp);
  27784. sp_384_mont_mul_order_12(u1, u1, s);
  27785. sp_384_mont_mul_order_12(u2, u2, s);
  27786. }
  27787. #else
  27788. {
  27789. sp_384_mont_mul_order_12(u1, u1, s);
  27790. sp_384_mont_mul_order_12(u2, u2, s);
  27791. }
  27792. #endif /* WOLFSSL_SP_SMALL */
  27793. err = sp_384_ecc_mulmod_base_12(p1, u1, 0, 0, heap);
  27794. }
  27795. if ((err == MP_OKAY) && sp_384_iszero_12(p1->z)) {
  27796. p1->infinity = 1;
  27797. }
  27798. if (err == MP_OKAY) {
  27799. err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, 0, heap);
  27800. }
  27801. if ((err == MP_OKAY) && sp_384_iszero_12(p2->z)) {
  27802. p2->infinity = 1;
  27803. }
  27804. if (err == MP_OKAY) {
  27805. {
  27806. sp_384_proj_point_add_12(p1, p1, p2, tmp);
  27807. if (sp_384_iszero_12(p1->z)) {
  27808. if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) {
  27809. sp_384_proj_point_dbl_12(p1, p2, tmp);
  27810. }
  27811. else {
  27812. /* Y ordinate is not used from here - don't set. */
  27813. p1->x[0] = 0;
  27814. p1->x[1] = 0;
  27815. p1->x[2] = 0;
  27816. p1->x[3] = 0;
  27817. p1->x[4] = 0;
  27818. p1->x[5] = 0;
  27819. p1->x[6] = 0;
  27820. p1->x[7] = 0;
  27821. p1->x[8] = 0;
  27822. p1->x[9] = 0;
  27823. p1->x[10] = 0;
  27824. p1->x[11] = 0;
  27825. XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
  27826. }
  27827. }
  27828. }
  27829. /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
  27830. /* Reload r and convert to Montgomery form. */
  27831. sp_384_from_mp(u2, 12, r);
  27832. err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
  27833. }
  27834. if (err == MP_OKAY) {
  27835. /* u1 = r.z'.z' mod prime */
  27836. sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod);
  27837. sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod);
  27838. *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
  27839. if (*res == 0) {
  27840. /* Reload r and add order. */
  27841. sp_384_from_mp(u2, 12, r);
  27842. carry = sp_384_add_12(u2, u2, p384_order);
  27843. /* Carry means result is greater than mod and is not valid. */
  27844. if (carry == 0) {
  27845. sp_384_norm_12(u2);
  27846. /* Compare with mod and if greater or equal then not valid. */
  27847. c = sp_384_cmp_12(u2, p384_mod);
  27848. if (c < 0) {
  27849. /* Convert to Montogomery form */
  27850. err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
  27851. if (err == MP_OKAY) {
  27852. /* u1 = (r + 1*order).z'.z' mod prime */
  27853. sp_384_mont_mul_12(u1, u2, p1->z, p384_mod,
  27854. p384_mp_mod);
  27855. *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
  27856. }
  27857. }
  27858. }
  27859. }
  27860. }
  27861. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27862. if (d != NULL)
  27863. XFREE(d, heap, DYNAMIC_TYPE_ECC);
  27864. #endif
  27865. sp_384_point_free_12(p1, 0, heap);
  27866. sp_384_point_free_12(p2, 0, heap);
  27867. return err;
  27868. }
  27869. #endif /* HAVE_ECC_VERIFY */
  27870. #ifdef HAVE_ECC_CHECK_KEY
  27871. /* Check that the x and y oridinates are a valid point on the curve.
  27872. *
  27873. * point EC point.
  27874. * heap Heap to use if dynamically allocating.
  27875. * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
  27876. * not on the curve and MP_OKAY otherwise.
  27877. */
  27878. static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap)
  27879. {
  27880. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27881. sp_digit* d = NULL;
  27882. #else
  27883. sp_digit t1d[2*12];
  27884. sp_digit t2d[2*12];
  27885. #endif
  27886. sp_digit* t1;
  27887. sp_digit* t2;
  27888. int err = MP_OKAY;
  27889. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27890. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC);
  27891. if (d == NULL) {
  27892. err = MEMORY_E;
  27893. }
  27894. #endif
  27895. (void)heap;
  27896. if (err == MP_OKAY) {
  27897. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27898. t1 = d + 0 * 12;
  27899. t2 = d + 2 * 12;
  27900. #else
  27901. t1 = t1d;
  27902. t2 = t2d;
  27903. #endif
  27904. sp_384_sqr_12(t1, point->y);
  27905. (void)sp_384_mod_12(t1, t1, p384_mod);
  27906. sp_384_sqr_12(t2, point->x);
  27907. (void)sp_384_mod_12(t2, t2, p384_mod);
  27908. sp_384_mul_12(t2, t2, point->x);
  27909. (void)sp_384_mod_12(t2, t2, p384_mod);
  27910. (void)sp_384_sub_12(t2, p384_mod, t2);
  27911. sp_384_mont_add_12(t1, t1, t2, p384_mod);
  27912. sp_384_mont_add_12(t1, t1, point->x, p384_mod);
  27913. sp_384_mont_add_12(t1, t1, point->x, p384_mod);
  27914. sp_384_mont_add_12(t1, t1, point->x, p384_mod);
  27915. if (sp_384_cmp_12(t1, p384_b) != 0) {
  27916. err = MP_VAL;
  27917. }
  27918. }
  27919. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27920. if (d != NULL) {
  27921. XFREE(d, heap, DYNAMIC_TYPE_ECC);
  27922. }
  27923. #endif
  27924. return err;
  27925. }
  27926. /* Check that the x and y oridinates are a valid point on the curve.
  27927. *
  27928. * pX X ordinate of EC point.
  27929. * pY Y ordinate of EC point.
  27930. * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
  27931. * not on the curve and MP_OKAY otherwise.
  27932. */
  27933. int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
  27934. {
  27935. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  27936. sp_point_384 pubd;
  27937. #endif
  27938. sp_point_384* pub;
  27939. byte one[1] = { 1 };
  27940. int err;
  27941. err = sp_384_point_new_12(NULL, pubd, pub);
  27942. if (err == MP_OKAY) {
  27943. sp_384_from_mp(pub->x, 12, pX);
  27944. sp_384_from_mp(pub->y, 12, pY);
  27945. sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
  27946. err = sp_384_ecc_is_point_12(pub, NULL);
  27947. }
  27948. sp_384_point_free_12(pub, 0, NULL);
  27949. return err;
  27950. }
  27951. /* Check that the private scalar generates the EC point (px, py), the point is
  27952. * on the curve and the point has the correct order.
  27953. *
  27954. * pX X ordinate of EC point.
  27955. * pY Y ordinate of EC point.
  27956. * privm Private scalar that generates EC point.
  27957. * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
  27958. * not on the curve, ECC_INF_E if the point does not have the correct order,
  27959. * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
  27960. * MP_OKAY otherwise.
  27961. */
  27962. int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
  27963. {
  27964. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  27965. sp_digit privd[12];
  27966. sp_point_384 pubd;
  27967. sp_point_384 pd;
  27968. #endif
  27969. sp_digit* priv = NULL;
  27970. sp_point_384* pub;
  27971. sp_point_384* p = NULL;
  27972. byte one[1] = { 1 };
  27973. int err;
  27974. err = sp_384_point_new_12(heap, pubd, pub);
  27975. if (err == MP_OKAY) {
  27976. err = sp_384_point_new_12(heap, pd, p);
  27977. }
  27978. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  27979. if (err == MP_OKAY && privm) {
  27980. priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
  27981. DYNAMIC_TYPE_ECC);
  27982. if (priv == NULL) {
  27983. err = MEMORY_E;
  27984. }
  27985. }
  27986. #endif
  27987. /* Quick check the lengs of public key ordinates and private key are in
  27988. * range. Proper check later.
  27989. */
  27990. if ((err == MP_OKAY) && ((mp_count_bits(pX) > 384) ||
  27991. (mp_count_bits(pY) > 384) ||
  27992. ((privm != NULL) && (mp_count_bits(privm) > 384)))) {
  27993. err = ECC_OUT_OF_RANGE_E;
  27994. }
  27995. if (err == MP_OKAY) {
  27996. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  27997. priv = privd;
  27998. #endif
  27999. sp_384_from_mp(pub->x, 12, pX);
  28000. sp_384_from_mp(pub->y, 12, pY);
  28001. sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
  28002. if (privm)
  28003. sp_384_from_mp(priv, 12, privm);
  28004. /* Check point at infinitiy. */
  28005. if ((sp_384_iszero_12(pub->x) != 0) &&
  28006. (sp_384_iszero_12(pub->y) != 0)) {
  28007. err = ECC_INF_E;
  28008. }
  28009. }
  28010. if (err == MP_OKAY) {
  28011. /* Check range of X and Y */
  28012. if (sp_384_cmp_12(pub->x, p384_mod) >= 0 ||
  28013. sp_384_cmp_12(pub->y, p384_mod) >= 0) {
  28014. err = ECC_OUT_OF_RANGE_E;
  28015. }
  28016. }
  28017. if (err == MP_OKAY) {
  28018. /* Check point is on curve */
  28019. err = sp_384_ecc_is_point_12(pub, heap);
  28020. }
  28021. if (err == MP_OKAY) {
  28022. /* Point * order = infinity */
  28023. err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, 1, heap);
  28024. }
  28025. if (err == MP_OKAY) {
  28026. /* Check result is infinity */
  28027. if ((sp_384_iszero_12(p->x) == 0) ||
  28028. (sp_384_iszero_12(p->y) == 0)) {
  28029. err = ECC_INF_E;
  28030. }
  28031. }
  28032. if (privm) {
  28033. if (err == MP_OKAY) {
  28034. /* Base * private = point */
  28035. err = sp_384_ecc_mulmod_base_12(p, priv, 1, 1, heap);
  28036. }
  28037. if (err == MP_OKAY) {
  28038. /* Check result is public key */
  28039. if (sp_384_cmp_12(p->x, pub->x) != 0 ||
  28040. sp_384_cmp_12(p->y, pub->y) != 0) {
  28041. err = ECC_PRIV_KEY_E;
  28042. }
  28043. }
  28044. }
  28045. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28046. if (priv != NULL) {
  28047. XFREE(priv, heap, DYNAMIC_TYPE_ECC);
  28048. }
  28049. #endif
  28050. sp_384_point_free_12(p, 0, heap);
  28051. sp_384_point_free_12(pub, 0, heap);
  28052. return err;
  28053. }
  28054. #endif
  28055. #ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
  28056. /* Add two projective EC points together.
  28057. * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
  28058. *
  28059. * pX First EC point's X ordinate.
  28060. * pY First EC point's Y ordinate.
  28061. * pZ First EC point's Z ordinate.
  28062. * qX Second EC point's X ordinate.
  28063. * qY Second EC point's Y ordinate.
  28064. * qZ Second EC point's Z ordinate.
  28065. * rX Resultant EC point's X ordinate.
  28066. * rY Resultant EC point's Y ordinate.
  28067. * rZ Resultant EC point's Z ordinate.
  28068. * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
  28069. */
  28070. int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
  28071. mp_int* qX, mp_int* qY, mp_int* qZ,
  28072. mp_int* rX, mp_int* rY, mp_int* rZ)
  28073. {
  28074. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  28075. sp_digit tmpd[2 * 12 * 5];
  28076. sp_point_384 pd;
  28077. sp_point_384 qd;
  28078. #endif
  28079. sp_digit* tmp = NULL;
  28080. sp_point_384* p;
  28081. sp_point_384* q = NULL;
  28082. int err;
  28083. err = sp_384_point_new_12(NULL, pd, p);
  28084. if (err == MP_OKAY) {
  28085. err = sp_384_point_new_12(NULL, qd, q);
  28086. }
  28087. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28088. if (err == MP_OKAY) {
  28089. tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL,
  28090. DYNAMIC_TYPE_ECC);
  28091. if (tmp == NULL) {
  28092. err = MEMORY_E;
  28093. }
  28094. }
  28095. #else
  28096. tmp = tmpd;
  28097. #endif
  28098. if (err == MP_OKAY) {
  28099. sp_384_from_mp(p->x, 12, pX);
  28100. sp_384_from_mp(p->y, 12, pY);
  28101. sp_384_from_mp(p->z, 12, pZ);
  28102. sp_384_from_mp(q->x, 12, qX);
  28103. sp_384_from_mp(q->y, 12, qY);
  28104. sp_384_from_mp(q->z, 12, qZ);
  28105. sp_384_proj_point_add_12(p, p, q, tmp);
  28106. }
  28107. if (err == MP_OKAY) {
  28108. err = sp_384_to_mp(p->x, rX);
  28109. }
  28110. if (err == MP_OKAY) {
  28111. err = sp_384_to_mp(p->y, rY);
  28112. }
  28113. if (err == MP_OKAY) {
  28114. err = sp_384_to_mp(p->z, rZ);
  28115. }
  28116. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28117. if (tmp != NULL) {
  28118. XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
  28119. }
  28120. #endif
  28121. sp_384_point_free_12(q, 0, NULL);
  28122. sp_384_point_free_12(p, 0, NULL);
  28123. return err;
  28124. }
  28125. /* Double a projective EC point.
  28126. * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
  28127. *
  28128. * pX EC point's X ordinate.
  28129. * pY EC point's Y ordinate.
  28130. * pZ EC point's Z ordinate.
  28131. * rX Resultant EC point's X ordinate.
  28132. * rY Resultant EC point's Y ordinate.
  28133. * rZ Resultant EC point's Z ordinate.
  28134. * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
  28135. */
  28136. int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
  28137. mp_int* rX, mp_int* rY, mp_int* rZ)
  28138. {
  28139. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  28140. sp_digit tmpd[2 * 12 * 2];
  28141. sp_point_384 pd;
  28142. #endif
  28143. sp_digit* tmp = NULL;
  28144. sp_point_384* p;
  28145. int err;
  28146. err = sp_384_point_new_12(NULL, pd, p);
  28147. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28148. if (err == MP_OKAY) {
  28149. tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL,
  28150. DYNAMIC_TYPE_ECC);
  28151. if (tmp == NULL) {
  28152. err = MEMORY_E;
  28153. }
  28154. }
  28155. #else
  28156. tmp = tmpd;
  28157. #endif
  28158. if (err == MP_OKAY) {
  28159. sp_384_from_mp(p->x, 12, pX);
  28160. sp_384_from_mp(p->y, 12, pY);
  28161. sp_384_from_mp(p->z, 12, pZ);
  28162. sp_384_proj_point_dbl_12(p, p, tmp);
  28163. }
  28164. if (err == MP_OKAY) {
  28165. err = sp_384_to_mp(p->x, rX);
  28166. }
  28167. if (err == MP_OKAY) {
  28168. err = sp_384_to_mp(p->y, rY);
  28169. }
  28170. if (err == MP_OKAY) {
  28171. err = sp_384_to_mp(p->z, rZ);
  28172. }
  28173. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28174. if (tmp != NULL) {
  28175. XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
  28176. }
  28177. #endif
  28178. sp_384_point_free_12(p, 0, NULL);
  28179. return err;
  28180. }
  28181. /* Map a projective EC point to affine in place.
  28182. * pZ will be one.
  28183. *
  28184. * pX EC point's X ordinate.
  28185. * pY EC point's Y ordinate.
  28186. * pZ EC point's Z ordinate.
  28187. * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
  28188. */
  28189. int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
  28190. {
  28191. #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
  28192. sp_digit tmpd[2 * 12 * 6];
  28193. sp_point_384 pd;
  28194. #endif
  28195. sp_digit* tmp = NULL;
  28196. sp_point_384* p;
  28197. int err;
  28198. err = sp_384_point_new_12(NULL, pd, p);
  28199. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28200. if (err == MP_OKAY) {
  28201. tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL,
  28202. DYNAMIC_TYPE_ECC);
  28203. if (tmp == NULL) {
  28204. err = MEMORY_E;
  28205. }
  28206. }
  28207. #else
  28208. tmp = tmpd;
  28209. #endif
  28210. if (err == MP_OKAY) {
  28211. sp_384_from_mp(p->x, 12, pX);
  28212. sp_384_from_mp(p->y, 12, pY);
  28213. sp_384_from_mp(p->z, 12, pZ);
  28214. sp_384_map_12(p, p, tmp);
  28215. }
  28216. if (err == MP_OKAY) {
  28217. err = sp_384_to_mp(p->x, pX);
  28218. }
  28219. if (err == MP_OKAY) {
  28220. err = sp_384_to_mp(p->y, pY);
  28221. }
  28222. if (err == MP_OKAY) {
  28223. err = sp_384_to_mp(p->z, pZ);
  28224. }
  28225. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28226. if (tmp != NULL) {
  28227. XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
  28228. }
  28229. #endif
  28230. sp_384_point_free_12(p, 0, NULL);
  28231. return err;
  28232. }
  28233. #endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
  28234. #ifdef HAVE_COMP_KEY
  28235. /* Find the square root of a number mod the prime of the curve.
  28236. *
  28237. * y The number to operate on and the result.
  28238. * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
  28239. */
  28240. static int sp_384_mont_sqrt_12(sp_digit* y)
  28241. {
  28242. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28243. sp_digit* d;
  28244. #else
  28245. sp_digit t1d[2 * 12];
  28246. sp_digit t2d[2 * 12];
  28247. sp_digit t3d[2 * 12];
  28248. sp_digit t4d[2 * 12];
  28249. sp_digit t5d[2 * 12];
  28250. #endif
  28251. sp_digit* t1;
  28252. sp_digit* t2;
  28253. sp_digit* t3;
  28254. sp_digit* t4;
  28255. sp_digit* t5;
  28256. int err = MP_OKAY;
  28257. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28258. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
  28259. if (d == NULL) {
  28260. err = MEMORY_E;
  28261. }
  28262. #endif
  28263. if (err == MP_OKAY) {
  28264. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28265. t1 = d + 0 * 12;
  28266. t2 = d + 2 * 12;
  28267. t3 = d + 4 * 12;
  28268. t4 = d + 6 * 12;
  28269. t5 = d + 8 * 12;
  28270. #else
  28271. t1 = t1d;
  28272. t2 = t2d;
  28273. t3 = t3d;
  28274. t4 = t4d;
  28275. t5 = t5d;
  28276. #endif
  28277. {
  28278. /* t2 = y ^ 0x2 */
  28279. sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
  28280. /* t1 = y ^ 0x3 */
  28281. sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod);
  28282. /* t5 = y ^ 0xc */
  28283. sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod);
  28284. /* t1 = y ^ 0xf */
  28285. sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod);
  28286. /* t2 = y ^ 0x1e */
  28287. sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
  28288. /* t3 = y ^ 0x1f */
  28289. sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod);
  28290. /* t2 = y ^ 0x3e0 */
  28291. sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod);
  28292. /* t1 = y ^ 0x3ff */
  28293. sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
  28294. /* t2 = y ^ 0x7fe0 */
  28295. sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod);
  28296. /* t3 = y ^ 0x7fff */
  28297. sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod);
  28298. /* t2 = y ^ 0x3fff800 */
  28299. sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod);
  28300. /* t4 = y ^ 0x3ffffff */
  28301. sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod);
  28302. /* t2 = y ^ 0xffffffc000000 */
  28303. sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod);
  28304. /* t1 = y ^ 0xfffffffffffff */
  28305. sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
  28306. /* t2 = y ^ 0xfffffffffffffff000000000000000 */
  28307. sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod);
  28308. /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
  28309. sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
  28310. /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
  28311. sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod);
  28312. /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
  28313. sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
  28314. /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
  28315. sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod);
  28316. /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
  28317. sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
  28318. /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
  28319. sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod);
  28320. /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
  28321. sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
  28322. /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
  28323. sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod);
  28324. /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
  28325. sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod);
  28326. /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
  28327. sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod);
  28328. /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
  28329. sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod);
  28330. /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
  28331. sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod);
  28332. }
  28333. }
  28334. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28335. if (d != NULL) {
  28336. XFREE(d, NULL, DYNAMIC_TYPE_ECC);
  28337. }
  28338. #endif
  28339. return err;
  28340. }
  28341. /* Uncompress the point given the X ordinate.
  28342. *
  28343. * xm X ordinate.
  28344. * odd Whether the Y ordinate is odd.
  28345. * ym Calculated Y ordinate.
  28346. * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
  28347. */
  28348. int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
  28349. {
  28350. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28351. sp_digit* d;
  28352. #else
  28353. sp_digit xd[2 * 12];
  28354. sp_digit yd[2 * 12];
  28355. #endif
  28356. sp_digit* x = NULL;
  28357. sp_digit* y = NULL;
  28358. int err = MP_OKAY;
  28359. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28360. d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC);
  28361. if (d == NULL) {
  28362. err = MEMORY_E;
  28363. }
  28364. #endif
  28365. if (err == MP_OKAY) {
  28366. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28367. x = d + 0 * 12;
  28368. y = d + 2 * 12;
  28369. #else
  28370. x = xd;
  28371. y = yd;
  28372. #endif
  28373. sp_384_from_mp(x, 12, xm);
  28374. err = sp_384_mod_mul_norm_12(x, x, p384_mod);
  28375. }
  28376. if (err == MP_OKAY) {
  28377. /* y = x^3 */
  28378. {
  28379. sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod);
  28380. sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod);
  28381. }
  28382. /* y = x^3 - 3x */
  28383. sp_384_mont_sub_12(y, y, x, p384_mod);
  28384. sp_384_mont_sub_12(y, y, x, p384_mod);
  28385. sp_384_mont_sub_12(y, y, x, p384_mod);
  28386. /* y = x^3 - 3x + b */
  28387. err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod);
  28388. }
  28389. if (err == MP_OKAY) {
  28390. sp_384_mont_add_12(y, y, x, p384_mod);
  28391. /* y = sqrt(x^3 - 3x + b) */
  28392. err = sp_384_mont_sqrt_12(y);
  28393. }
  28394. if (err == MP_OKAY) {
  28395. XMEMSET(y + 12, 0, 12U * sizeof(sp_digit));
  28396. sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod);
  28397. if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
  28398. sp_384_mont_sub_12(y, p384_mod, y, p384_mod);
  28399. }
  28400. err = sp_384_to_mp(y, ym);
  28401. }
  28402. #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
  28403. if (d != NULL) {
  28404. XFREE(d, NULL, DYNAMIC_TYPE_ECC);
  28405. }
  28406. #endif
  28407. return err;
  28408. }
  28409. #endif
  28410. #endif /* WOLFSSL_SP_384 */
  28411. #endif /* WOLFSSL_HAVE_SP_ECC */
  28412. #endif /* WOLFSSL_SP_ARM_CORTEX_M_ASM */
  28413. #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */