fe_x25519_128.i 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. /* fe_x25519_128.i
  2. *
  3. * Copyright (C) 2006-2022 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /* Generated using (from wolfssl):
  22. * cd ../scripts
  23. * ruby ./x25519/fe_x25519_128_gen.rb > ../wolfssl/wolfcrypt/src/fe_x25519_128.i
  24. */
  25. void fe_init(void)
  26. {
  27. }
  28. /* Convert a number represented as an array of bytes to an array of words with
  29. * 51-bits of data in each word.
  30. *
  31. * in An array of bytes.
  32. * out An array of words.
  33. */
  34. void fe_frombytes(fe out, const unsigned char *in)
  35. {
  36. out[0] = (((sword64)((in[ 0] ) )) )
  37. | (((sword64)((in[ 1] ) )) << 8)
  38. | (((sword64)((in[ 2] ) )) << 16)
  39. | (((sword64)((in[ 3] ) )) << 24)
  40. | (((sword64)((in[ 4] ) )) << 32)
  41. | (((sword64)((in[ 5] ) )) << 40)
  42. | (((sword64)((in[ 6] ) & 0x07)) << 48);
  43. out[1] = (((sword64)((in[ 6] >> 3) & 0x1f)) )
  44. | (((sword64)((in[ 7] ) )) << 5)
  45. | (((sword64)((in[ 8] ) )) << 13)
  46. | (((sword64)((in[ 9] ) )) << 21)
  47. | (((sword64)((in[10] ) )) << 29)
  48. | (((sword64)((in[11] ) )) << 37)
  49. | (((sword64)((in[12] ) & 0x3f)) << 45);
  50. out[2] = (((sword64)((in[12] >> 6) & 0x03)) )
  51. | (((sword64)((in[13] ) )) << 2)
  52. | (((sword64)((in[14] ) )) << 10)
  53. | (((sword64)((in[15] ) )) << 18)
  54. | (((sword64)((in[16] ) )) << 26)
  55. | (((sword64)((in[17] ) )) << 34)
  56. | (((sword64)((in[18] ) )) << 42)
  57. | (((sword64)((in[19] ) & 0x01)) << 50);
  58. out[3] = (((sword64)((in[19] >> 1) & 0x7f)) )
  59. | (((sword64)((in[20] ) )) << 7)
  60. | (((sword64)((in[21] ) )) << 15)
  61. | (((sword64)((in[22] ) )) << 23)
  62. | (((sword64)((in[23] ) )) << 31)
  63. | (((sword64)((in[24] ) )) << 39)
  64. | (((sword64)((in[25] ) & 0x0f)) << 47);
  65. out[4] = (((sword64)((in[25] >> 4) & 0x0f)) )
  66. | (((sword64)((in[26] ) )) << 4)
  67. | (((sword64)((in[27] ) )) << 12)
  68. | (((sword64)((in[28] ) )) << 20)
  69. | (((sword64)((in[29] ) )) << 28)
  70. | (((sword64)((in[30] ) )) << 36)
  71. | (((sword64)((in[31] ) & 0x7f)) << 44);
  72. }
  73. /* Convert a number represented as an array of words to an array of bytes.
  74. * The array of words is normalized to an array of 51-bit data words and if
  75. * greater than the mod, modulo reduced by the prime 2^255 - 1.
  76. *
  77. * n An array of words.
  78. * out An array of bytes.
  79. */
  80. void fe_tobytes(unsigned char *out, const fe n)
  81. {
  82. fe in;
  83. sword64 c;
  84. in[0] = n[0];
  85. in[1] = n[1];
  86. in[2] = n[2];
  87. in[3] = n[3];
  88. in[4] = n[4];
  89. /* Normalize to 51-bits of data per word. */
  90. in[0] += (in[4] >> 51) * 19; in[4] &= 0x7ffffffffffff;
  91. in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
  92. in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
  93. in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
  94. in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
  95. in[0] += (in[4] >> 51) * 19;
  96. in[4] &= 0x7ffffffffffff;
  97. c = (in[0] + 19) >> 51;
  98. c = (in[1] + c) >> 51;
  99. c = (in[2] + c) >> 51;
  100. c = (in[3] + c) >> 51;
  101. c = (in[4] + c) >> 51;
  102. in[0] += c * 19;
  103. in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
  104. in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
  105. in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
  106. in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
  107. in[4] &= 0x7ffffffffffff;
  108. out[ 0] = (((byte)((in[0] ) )) );
  109. out[ 1] = (((byte)((in[0] >> 8) )) );
  110. out[ 2] = (((byte)((in[0] >> 16) )) );
  111. out[ 3] = (((byte)((in[0] >> 24) )) );
  112. out[ 4] = (((byte)((in[0] >> 32) )) );
  113. out[ 5] = (((byte)((in[0] >> 40) )) );
  114. out[ 6] = (((byte)((in[0] >> 48) & 0x07)) )
  115. | (((byte)((in[1] ) & 0x1f)) << 3);
  116. out[ 7] = (((byte)((in[1] >> 5) )) );
  117. out[ 8] = (((byte)((in[1] >> 13) )) );
  118. out[ 9] = (((byte)((in[1] >> 21) )) );
  119. out[10] = (((byte)((in[1] >> 29) )) );
  120. out[11] = (((byte)((in[1] >> 37) )) );
  121. out[12] = (((byte)((in[1] >> 45) & 0x3f)) )
  122. | (((byte)((in[2] ) & 0x03)) << 6);
  123. out[13] = (((byte)((in[2] >> 2) )) );
  124. out[14] = (((byte)((in[2] >> 10) )) );
  125. out[15] = (((byte)((in[2] >> 18) )) );
  126. out[16] = (((byte)((in[2] >> 26) )) );
  127. out[17] = (((byte)((in[2] >> 34) )) );
  128. out[18] = (((byte)((in[2] >> 42) )) );
  129. out[19] = (((byte)((in[2] >> 50) & 0x01)) )
  130. | (((byte)((in[3] ) & 0x7f)) << 1);
  131. out[20] = (((byte)((in[3] >> 7) )) );
  132. out[21] = (((byte)((in[3] >> 15) )) );
  133. out[22] = (((byte)((in[3] >> 23) )) );
  134. out[23] = (((byte)((in[3] >> 31) )) );
  135. out[24] = (((byte)((in[3] >> 39) )) );
  136. out[25] = (((byte)((in[3] >> 47) & 0x0f)) )
  137. | (((byte)((in[4] ) & 0x0f)) << 4);
  138. out[26] = (((byte)((in[4] >> 4) )) );
  139. out[27] = (((byte)((in[4] >> 12) )) );
  140. out[28] = (((byte)((in[4] >> 20) )) );
  141. out[29] = (((byte)((in[4] >> 28) )) );
  142. out[30] = (((byte)((in[4] >> 36) )) );
  143. out[31] = (((byte)((in[4] >> 44) & 0x7f)) );
  144. }
  145. /* Set the field element to 1.
  146. *
  147. * n The field element number.
  148. */
  149. void fe_1(fe n)
  150. {
  151. n[0] = 0x0000000000001;
  152. n[1] = 0x0000000000000;
  153. n[2] = 0x0000000000000;
  154. n[3] = 0x0000000000000;
  155. n[4] = 0x0000000000000;
  156. }
  157. /* Set the field element to 0.
  158. *
  159. * n The field element number.
  160. */
  161. void fe_0(fe n)
  162. {
  163. n[0] = 0x0000000000000;
  164. n[1] = 0x0000000000000;
  165. n[2] = 0x0000000000000;
  166. n[3] = 0x0000000000000;
  167. n[4] = 0x0000000000000;
  168. }
  169. /* Copy field element a into field element r.
  170. *
  171. * r Field element to copy into.
  172. * a Field element to copy.
  173. */
  174. void fe_copy(fe r, const fe a)
  175. {
  176. r[0] = a[0];
  177. r[1] = a[1];
  178. r[2] = a[2];
  179. r[3] = a[3];
  180. r[4] = a[4];
  181. }
  182. /* Constant time, conditional swap of field elements a and b.
  183. *
  184. * f A field element.
  185. * g A field element.
  186. * b If 1 then swap and if 0 then don't swap.
  187. */
  188. void fe_cswap(fe f, fe g, int b)
  189. {
  190. sword64 m = b;
  191. sword64 t0, t1, t2, t3, t4;
  192. /* Convert conditional into mask. */
  193. m = -m;
  194. t0 = m & (f[0] ^ g[0]);
  195. t1 = m & (f[1] ^ g[1]);
  196. t2 = m & (f[2] ^ g[2]);
  197. t3 = m & (f[3] ^ g[3]);
  198. t4 = m & (f[4] ^ g[4]);
  199. f[0] ^= t0;
  200. f[1] ^= t1;
  201. f[2] ^= t2;
  202. f[3] ^= t3;
  203. f[4] ^= t4;
  204. g[0] ^= t0;
  205. g[1] ^= t1;
  206. g[2] ^= t2;
  207. g[3] ^= t3;
  208. g[4] ^= t4;
  209. }
  210. /* Subtract b from a into r. (r = a - b)
  211. *
  212. * r A field element.
  213. * a A field element.
  214. * b A field element.
  215. */
  216. void fe_sub(fe r, const fe a, const fe b)
  217. {
  218. r[0] = a[0] - b[0];
  219. r[1] = a[1] - b[1];
  220. r[2] = a[2] - b[2];
  221. r[3] = a[3] - b[3];
  222. r[4] = a[4] - b[4];
  223. }
  224. /* Add b to a into r. (r = a + b)
  225. *
  226. * r A field element.
  227. * a A field element.
  228. * b A field element.
  229. */
  230. void fe_add(fe r, const fe a, const fe b)
  231. {
  232. r[0] = a[0] + b[0];
  233. r[1] = a[1] + b[1];
  234. r[2] = a[2] + b[2];
  235. r[3] = a[3] + b[3];
  236. r[4] = a[4] + b[4];
  237. }
  238. /* Multiply a and b into r. (r = a * b)
  239. *
  240. * r A field element.
  241. * a A field element.
  242. * b A field element.
  243. */
  244. void fe_mul(fe r, const fe a, const fe b)
  245. {
  246. const __int128_t k19 = 19;
  247. __int128_t t0 = ((__int128_t)a[0]) * b[0];
  248. __int128_t t1 = ((__int128_t)a[0]) * b[1]
  249. + ((__int128_t)a[1]) * b[0];
  250. __int128_t t2 = ((__int128_t)a[0]) * b[2]
  251. + ((__int128_t)a[1]) * b[1]
  252. + ((__int128_t)a[2]) * b[0];
  253. __int128_t t3 = ((__int128_t)a[0]) * b[3]
  254. + ((__int128_t)a[1]) * b[2]
  255. + ((__int128_t)a[2]) * b[1]
  256. + ((__int128_t)a[3]) * b[0];
  257. __int128_t t4 = ((__int128_t)a[0]) * b[4]
  258. + ((__int128_t)a[1]) * b[3]
  259. + ((__int128_t)a[2]) * b[2]
  260. + ((__int128_t)a[3]) * b[1]
  261. + ((__int128_t)a[4]) * b[0];
  262. __int128_t t5 = ((__int128_t)a[1]) * b[4]
  263. + ((__int128_t)a[2]) * b[3]
  264. + ((__int128_t)a[3]) * b[2]
  265. + ((__int128_t)a[4]) * b[1];
  266. __int128_t t6 = ((__int128_t)a[2]) * b[4]
  267. + ((__int128_t)a[3]) * b[3]
  268. + ((__int128_t)a[4]) * b[2];
  269. __int128_t t7 = ((__int128_t)a[3]) * b[4]
  270. + ((__int128_t)a[4]) * b[3];
  271. __int128_t t8 = ((__int128_t)a[4]) * b[4];
  272. /* Modulo reduce double long word. */
  273. t0 += t5 * k19;
  274. t1 += t6 * k19;
  275. t2 += t7 * k19;
  276. t3 += t8 * k19;
  277. /* Normalize to 51-bits of data per word. */
  278. t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
  279. t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
  280. t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
  281. t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
  282. t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
  283. r[0] += (t4 >> 51) * k19;
  284. r[4] = t4 & 0x7ffffffffffff;
  285. }
  286. /* Square a and put result in r. (r = a * a)
  287. *
  288. * r A field element.
  289. * a A field element.
  290. * b A field element.
  291. */
  292. void fe_sq(fe r, const fe a)
  293. {
  294. const __int128_t k19 = 19;
  295. const __int128_t k2 = 2;
  296. __int128_t t0 = ((__int128_t)a[0]) * a[0];
  297. __int128_t t1 = ((__int128_t)a[0]) * a[1] * k2;
  298. __int128_t t2 = ((__int128_t)a[0]) * a[2] * k2
  299. + ((__int128_t)a[1]) * a[1];
  300. __int128_t t3 = ((__int128_t)a[0]) * a[3] * k2
  301. + ((__int128_t)a[1]) * a[2] * k2;
  302. __int128_t t4 = ((__int128_t)a[0]) * a[4] * k2
  303. + ((__int128_t)a[1]) * a[3] * k2
  304. + ((__int128_t)a[2]) * a[2];
  305. __int128_t t5 = ((__int128_t)a[1]) * a[4] * k2
  306. + ((__int128_t)a[2]) * a[3] * k2;
  307. __int128_t t6 = ((__int128_t)a[2]) * a[4] * k2
  308. + ((__int128_t)a[3]) * a[3];
  309. __int128_t t7 = ((__int128_t)a[3]) * a[4] * k2;
  310. __int128_t t8 = ((__int128_t)a[4]) * a[4];
  311. /* Modulo reduce double long word. */
  312. t0 += t5 * k19;
  313. t1 += t6 * k19;
  314. t2 += t7 * k19;
  315. t3 += t8 * k19;
  316. /* Normalize to 51-bits of data per word. */
  317. t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
  318. t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
  319. t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
  320. t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
  321. t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
  322. r[0] += (t4 >> 51) * k19;
  323. r[4] = t4 & 0x7ffffffffffff;
  324. }
  325. /* Multiply a by 121666 and put result in r. (r = 121666 * a)
  326. *
  327. * r A field element.
  328. * a A field element.
  329. * b A field element.
  330. */
  331. void fe_mul121666(fe r, fe a)
  332. {
  333. const __int128_t k19 = 19;
  334. const __int128_t k121666 = 121666;
  335. __int128_t t0 = ((__int128_t)a[0]) * k121666;
  336. __int128_t t1 = ((__int128_t)a[1]) * k121666;
  337. __int128_t t2 = ((__int128_t)a[2]) * k121666;
  338. __int128_t t3 = ((__int128_t)a[3]) * k121666;
  339. __int128_t t4 = ((__int128_t)a[4]) * k121666;
  340. /* Normalize to 51-bits of data per word. */
  341. t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
  342. t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
  343. t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
  344. t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
  345. t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
  346. r[0] += (t4 >> 51) * k19;
  347. r[4] = t4 & 0x7ffffffffffff;
  348. }
  349. /* Find the inverse of a modulo 2^255 - 1 and put result in r.
  350. * (r * a) mod (2^255 - 1) = 1
  351. * Implementation is constant time.
  352. *
  353. * r A field element.
  354. * a A field element.
  355. */
  356. void fe_invert(fe r, const fe a)
  357. {
  358. fe t0, t1, t2, t3;
  359. int i;
  360. /* a ^ (2^255 - 21) */
  361. fe_sq(t0, a); for (i = 1; i < 1; ++i) fe_sq(t0, t0);
  362. fe_sq(t1, t0); for (i = 1; i < 2; ++i) fe_sq(t1, t1); fe_mul(t1, a, t1);
  363. fe_mul(t0, t0, t1);
  364. fe_sq(t2, t0); for (i = 1; i < 1; ++i) fe_sq(t2, t2); fe_mul(t1, t1, t2);
  365. fe_sq(t2, t1); for (i = 1; i < 5; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
  366. fe_sq(t2, t1); for (i = 1; i < 10; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
  367. fe_sq(t3, t2); for (i = 1; i < 20; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
  368. fe_sq(t2, t2); for (i = 1; i < 10; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
  369. fe_sq(t2, t1); for (i = 1; i < 50; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
  370. fe_sq(t3, t2); for (i = 1; i < 100; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
  371. fe_sq(t2, t2); for (i = 1; i < 50; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
  372. fe_sq(t1, t1); for (i = 1; i < 5; ++i) fe_sq(t1, t1); fe_mul( r, t1, t0);
  373. }
  374. #ifndef CURVE25519_SMALL
  375. /* Scalar multiply the field element a by n using Montgomery Ladder and places
  376. * result in r.
  377. *
  378. * r A field element as an array of bytes.
  379. * n The scalar as an array of bytes.
  380. * a A field element as an array of bytes.
  381. */
  382. int curve25519(byte* r, const byte* n, const byte* a)
  383. {
  384. fe x1, x2, z2, x3, z3;
  385. fe t0, t1;
  386. int pos;
  387. unsigned int swap;
  388. unsigned int b;
  389. fe_frombytes(x1, a);
  390. fe_1(x2);
  391. fe_0(z2);
  392. fe_copy(x3, x1);
  393. fe_1(z3);
  394. swap = 0;
  395. for (pos = 254;pos >= 0;--pos) {
  396. b = n[pos / 8] >> (pos & 7);
  397. b &= 1;
  398. swap ^= b;
  399. fe_cswap(x2, x3, swap);
  400. fe_cswap(z2, z3, swap);
  401. swap = b;
  402. fe_sub(t0, x3, z3);
  403. fe_sub(t1, x2, z2);
  404. fe_add(x2, x2, z2);
  405. fe_add(z2, x3, z3);
  406. fe_mul(z3, t0, x2);
  407. fe_mul(z2, z2, t1);
  408. fe_sq(t0, t1);
  409. fe_sq(t1, x2);
  410. fe_add(x3, z3, z2);
  411. fe_sub(z2, z3, z2);
  412. fe_mul(x2, t1, t0);
  413. fe_sub(t1, t1, t0);
  414. fe_sq(z2, z2);
  415. fe_mul121666(z3, t1);
  416. fe_sq(x3, x3);
  417. fe_add(t0, t0, z3);
  418. fe_mul(z3, x1, z2);
  419. fe_mul(z2, t1, t0);
  420. }
  421. fe_cswap(x2, x3, swap);
  422. fe_cswap(z2, z3, swap);
  423. fe_invert(z2, z2);
  424. fe_mul(x2, x2, z2);
  425. fe_tobytes(r, x2);
  426. return 0;
  427. }
  428. #endif /* !CURVE25519_SMALL */
  429. /* The field element value 0 as an array of bytes. */
  430. static const unsigned char zero[32] = {0};
  431. /* Constant time check as to whether a is not 0.
  432. *
  433. * a A field element.
  434. */
  435. int fe_isnonzero(const fe a)
  436. {
  437. unsigned char s[32];
  438. fe_tobytes(s, a);
  439. return ConstantCompare(s, zero, 32);
  440. }
  441. /* Checks whether a is negative.
  442. *
  443. * a A field element.
  444. */
  445. int fe_isnegative(const fe a)
  446. {
  447. unsigned char s[32];
  448. fe_tobytes(s, a);
  449. return s[0] & 1;
  450. }
  451. /* Negates field element a and stores the result in r.
  452. *
  453. * r A field element.
  454. * a A field element.
  455. */
  456. void fe_neg(fe r, const fe a)
  457. {
  458. r[0] = -a[0];
  459. r[1] = -a[1];
  460. r[2] = -a[2];
  461. r[3] = -a[3];
  462. r[4] = -a[4];
  463. }
  464. /* Constant time, conditional move of b into a.
  465. * a is not changed if the condition is 0.
  466. *
  467. * f A field element.
  468. * g A field element.
  469. * b If 1 then copy and if 0 then don't copy.
  470. */
  471. void fe_cmov(fe f, const fe g, int b)
  472. {
  473. sword64 m = b;
  474. sword64 t0, t1, t2, t3, t4;
  475. /* Convert conditional into mask. */
  476. m = -m;
  477. t0 = m & (f[0] ^ g[0]);
  478. t1 = m & (f[1] ^ g[1]);
  479. t2 = m & (f[2] ^ g[2]);
  480. t3 = m & (f[3] ^ g[3]);
  481. t4 = m & (f[4] ^ g[4]);
  482. f[0] ^= t0;
  483. f[1] ^= t1;
  484. f[2] ^= t2;
  485. f[3] ^= t3;
  486. f[4] ^= t4;
  487. }
  488. void fe_pow22523(fe r, const fe a)
  489. {
  490. fe t0, t1, t2;
  491. int i;
  492. /* a ^ (2^255 - 23) */
  493. fe_sq(t0, a); for (i = 1; i < 1; ++i) fe_sq(t0, t0);
  494. fe_sq(t1, t0); for (i = 1; i < 2; ++i) fe_sq(t1, t1); fe_mul(t1, a, t1);
  495. fe_mul(t0, t0, t1);
  496. fe_sq(t0, t0); for (i = 1; i < 1; ++i) fe_sq(t0, t0); fe_mul(t0, t1, t0);
  497. fe_sq(t1, t0); for (i = 1; i < 5; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
  498. fe_sq(t1, t0); for (i = 1; i < 10; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
  499. fe_sq(t2, t1); for (i = 1; i < 20; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
  500. fe_sq(t1, t1); for (i = 1; i < 10; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
  501. fe_sq(t1, t0); for (i = 1; i < 50; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
  502. fe_sq(t2, t1); for (i = 1; i < 100; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
  503. fe_sq(t1, t1); for (i = 1; i < 50; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
  504. fe_sq(t0, t0); for (i = 1; i < 2; ++i) fe_sq(t0, t0); fe_mul( r, t0, a);
  505. return;
  506. }
  507. /* Double the square of a and put result in r. (r = 2 * a * a)
  508. *
  509. * r A field element.
  510. * a A field element.
  511. * b A field element.
  512. */
  513. void fe_sq2(fe r, const fe a)
  514. {
  515. const __int128_t k2 = 2;
  516. const __int128_t k19 = 19;
  517. __int128_t t0 = k2 * (((__int128_t)a[0]) * a[0]);
  518. __int128_t t1 = k2 * (((__int128_t)a[0]) * a[1] * k2);
  519. __int128_t t2 = k2 * (((__int128_t)a[0]) * a[2] * k2
  520. + ((__int128_t)a[1]) * a[1]);
  521. __int128_t t3 = k2 * (((__int128_t)a[0]) * a[3] * k2
  522. + ((__int128_t)a[1]) * a[2] * k2);
  523. __int128_t t4 = k2 * (((__int128_t)a[0]) * a[4] * k2
  524. + ((__int128_t)a[1]) * a[3] * k2
  525. + ((__int128_t)a[2]) * a[2]);
  526. __int128_t t5 = k2 * (((__int128_t)a[1]) * a[4] * k2
  527. + ((__int128_t)a[2]) * a[3] * k2);
  528. __int128_t t6 = k2 * (((__int128_t)a[2]) * a[4] * k2
  529. + ((__int128_t)a[3]) * a[3]);
  530. __int128_t t7 = k2 * (((__int128_t)a[3]) * a[4] * k2);
  531. __int128_t t8 = k2 * (((__int128_t)a[4]) * a[4]);
  532. /* Modulo reduce double long word. */
  533. t0 += t5 * k19;
  534. t1 += t6 * k19;
  535. t2 += t7 * k19;
  536. t3 += t8 * k19;
  537. /* Normalize to 51-bits of data per word. */
  538. t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
  539. t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
  540. t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
  541. t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
  542. t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
  543. r[0] += (t4 >> 51) * k19;
  544. r[4] = t4 & 0x7ffffffffffff;
  545. }
  546. /* Load 3 little endian bytes into a 64-bit word.
  547. *
  548. * in An array of bytes.
  549. * returns a 64-bit word.
  550. */
  551. word64 load_3(const unsigned char *in)
  552. {
  553. word64 result;
  554. result = ((((word64)in[0]) ) |
  555. (((word64)in[1]) << 8) |
  556. (((word64)in[2]) << 16));
  557. return result;
  558. }
  559. /* Load 4 little endian bytes into a 64-bit word.
  560. *
  561. * in An array of bytes.
  562. * returns a 64-bit word.
  563. */
  564. word64 load_4(const unsigned char *in)
  565. {
  566. word64 result;
  567. result = ((((word64)in[0]) ) |
  568. (((word64)in[1]) << 8) |
  569. (((word64)in[2]) << 16) |
  570. (((word64)in[3]) << 24));
  571. return result;
  572. }