tls_fe.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. /*
  2. * Copyright (C) 2018 Denys Vlasenko
  3. *
  4. * Licensed under GPLv2, see file LICENSE in this source tree.
  5. */
  6. #include "tls.h"
  7. typedef uint8_t byte;
  8. typedef uint16_t word16;
  9. typedef uint32_t word32;
  10. #define XMEMSET memset
  11. #define F25519_SIZE CURVE25519_KEYSIZE
  12. /* The code below is taken from wolfssl-3.15.3/wolfcrypt/src/fe_low_mem.c
  13. * Header comment is kept intact:
  14. */
  15. /* fe_low_mem.c
  16. *
  17. * Copyright (C) 2006-2017 wolfSSL Inc.
  18. *
  19. * This file is part of wolfSSL.
  20. *
  21. * wolfSSL is free software; you can redistribute it and/or modify
  22. * it under the terms of the GNU General Public License as published by
  23. * the Free Software Foundation; either version 2 of the License, or
  24. * (at your option) any later version.
  25. *
  26. * wolfSSL is distributed in the hope that it will be useful,
  27. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  28. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  29. * GNU General Public License for more details.
  30. *
  31. * You should have received a copy of the GNU General Public License
  32. * along with this program; if not, write to the Free Software
  33. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  34. */
  35. /* Based from Daniel Beer's public domain work. */
  36. #if 0 //UNUSED
  37. static void fprime_copy(byte *x, const byte *a)
  38. {
  39. int i;
  40. for (i = 0; i < F25519_SIZE; i++)
  41. x[i] = a[i];
  42. }
  43. #endif
  44. static void lm_copy(byte* x, const byte* a)
  45. {
  46. int i;
  47. for (i = 0; i < F25519_SIZE; i++)
  48. x[i] = a[i];
  49. }
  50. #if 0 //UNUSED
  51. static void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
  52. {
  53. const byte mask = -condition;
  54. int i;
  55. for (i = 0; i < F25519_SIZE; i++)
  56. dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
  57. }
  58. #endif
  59. static void fe_select(byte *dst,
  60. const byte *zero, const byte *one,
  61. byte condition)
  62. {
  63. const byte mask = -condition;
  64. int i;
  65. for (i = 0; i < F25519_SIZE; i++)
  66. dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
  67. }
  68. #if 0 //UNUSED
  69. static void raw_add(byte *x, const byte *p)
  70. {
  71. word16 c = 0;
  72. int i;
  73. for (i = 0; i < F25519_SIZE; i++) {
  74. c += ((word16)x[i]) + ((word16)p[i]);
  75. x[i] = (byte)c;
  76. c >>= 8;
  77. }
  78. }
  79. #endif
  80. #if 0 //UNUSED
  81. static void raw_try_sub(byte *x, const byte *p)
  82. {
  83. byte minusp[F25519_SIZE];
  84. word16 c = 0;
  85. int i;
  86. for (i = 0; i < F25519_SIZE; i++) {
  87. c = ((word16)x[i]) - ((word16)p[i]) - c;
  88. minusp[i] = (byte)c;
  89. c = (c >> 8) & 1;
  90. }
  91. fprime_select(x, minusp, x, (byte)c);
  92. }
  93. #endif
  94. #if 0 //UNUSED
  95. static int prime_msb(const byte *p)
  96. {
  97. int i;
  98. byte x;
  99. int shift = 1;
  100. int z = F25519_SIZE - 1;
  101. /*
  102. Test for any hot bits.
  103. As soon as one instance is encountered set shift to 0.
  104. */
  105. for (i = F25519_SIZE - 1; i >= 0; i--) {
  106. shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
  107. z -= shift;
  108. }
  109. x = p[z];
  110. z <<= 3;
  111. shift = 1;
  112. for (i = 0; i < 8; i++) {
  113. shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
  114. z += shift;
  115. }
  116. return z - 1;
  117. }
  118. #endif
  119. #if 0 //UNUSED
  120. static void fprime_add(byte *r, const byte *a, const byte *modulus)
  121. {
  122. raw_add(r, a);
  123. raw_try_sub(r, modulus);
  124. }
  125. #endif
  126. #if 0 //UNUSED
  127. static void fprime_sub(byte *r, const byte *a, const byte *modulus)
  128. {
  129. raw_add(r, modulus);
  130. raw_try_sub(r, a);
  131. raw_try_sub(r, modulus);
  132. }
  133. #endif
  134. #if 0 //UNUSED
  135. static void fprime_mul(byte *r, const byte *a, const byte *b,
  136. const byte *modulus)
  137. {
  138. word16 c = 0;
  139. int i,j;
  140. XMEMSET(r, 0, F25519_SIZE);
  141. for (i = prime_msb(modulus); i >= 0; i--) {
  142. const byte bit = (b[i >> 3] >> (i & 7)) & 1;
  143. byte plusa[F25519_SIZE];
  144. for (j = 0; j < F25519_SIZE; j++) {
  145. c |= ((word16)r[j]) << 1;
  146. r[j] = (byte)c;
  147. c >>= 8;
  148. }
  149. raw_try_sub(r, modulus);
  150. fprime_copy(plusa, r);
  151. fprime_add(plusa, a, modulus);
  152. fprime_select(r, r, plusa, bit);
  153. }
  154. }
  155. #endif
  156. #if 0 //UNUSED
  157. static void fe_load(byte *x, word32 c)
  158. {
  159. word32 i;
  160. for (i = 0; i < sizeof(c); i++) {
  161. x[i] = c;
  162. c >>= 8;
  163. }
  164. for (; i < F25519_SIZE; i++)
  165. x[i] = 0;
  166. }
  167. #endif
  168. static void fe_normalize(byte *x)
  169. {
  170. byte minusp[F25519_SIZE];
  171. word16 c;
  172. int i;
  173. /* Reduce using 2^255 = 19 mod p */
  174. c = (x[31] >> 7) * 19;
  175. x[31] &= 127;
  176. for (i = 0; i < F25519_SIZE; i++) {
  177. c += x[i];
  178. x[i] = (byte)c;
  179. c >>= 8;
  180. }
  181. /* The number is now less than 2^255 + 18, and therefore less than
  182. * 2p. Try subtracting p, and conditionally load the subtracted
  183. * value if underflow did not occur.
  184. */
  185. c = 19;
  186. for (i = 0; i + 1 < F25519_SIZE; i++) {
  187. c += x[i];
  188. minusp[i] = (byte)c;
  189. c >>= 8;
  190. }
  191. c += ((word16)x[i]) - 128;
  192. minusp[31] = (byte)c;
  193. /* Load x-p if no underflow */
  194. fe_select(x, minusp, x, (c >> 15) & 1);
  195. }
  196. static void lm_add(byte* r, const byte* a, const byte* b)
  197. {
  198. word16 c = 0;
  199. int i;
  200. /* Add */
  201. for (i = 0; i < F25519_SIZE; i++) {
  202. c >>= 8;
  203. c += ((word16)a[i]) + ((word16)b[i]);
  204. r[i] = (byte)c;
  205. }
  206. /* Reduce with 2^255 = 19 mod p */
  207. r[31] &= 127;
  208. c = (c >> 7) * 19;
  209. for (i = 0; i < F25519_SIZE; i++) {
  210. c += r[i];
  211. r[i] = (byte)c;
  212. c >>= 8;
  213. }
  214. }
  215. static void lm_sub(byte* r, const byte* a, const byte* b)
  216. {
  217. word32 c = 0;
  218. int i;
  219. /* Calculate a + 2p - b, to avoid underflow */
  220. c = 218;
  221. for (i = 0; i + 1 < F25519_SIZE; i++) {
  222. c += 65280 + ((word32)a[i]) - ((word32)b[i]);
  223. r[i] = c;
  224. c >>= 8;
  225. }
  226. c += ((word32)a[31]) - ((word32)b[31]);
  227. r[31] = c & 127;
  228. c = (c >> 7) * 19;
  229. for (i = 0; i < F25519_SIZE; i++) {
  230. c += r[i];
  231. r[i] = c;
  232. c >>= 8;
  233. }
  234. }
  235. #if 0 //UNUSED
  236. static void lm_neg(byte* r, const byte* a)
  237. {
  238. word32 c = 0;
  239. int i;
  240. /* Calculate 2p - a, to avoid underflow */
  241. c = 218;
  242. for (i = 0; i + 1 < F25519_SIZE; i++) {
  243. c += 65280 - ((word32)a[i]);
  244. r[i] = c;
  245. c >>= 8;
  246. }
  247. c -= ((word32)a[31]);
  248. r[31] = c & 127;
  249. c = (c >> 7) * 19;
  250. for (i = 0; i < F25519_SIZE; i++) {
  251. c += r[i];
  252. r[i] = c;
  253. c >>= 8;
  254. }
  255. }
  256. #endif
  257. static void fe_mul__distinct(byte *r, const byte *a, const byte *b)
  258. {
  259. word32 c = 0;
  260. int i;
  261. for (i = 0; i < F25519_SIZE; i++) {
  262. int j;
  263. c >>= 8;
  264. for (j = 0; j <= i; j++)
  265. c += ((word32)a[j]) * ((word32)b[i - j]);
  266. for (; j < F25519_SIZE; j++)
  267. c += ((word32)a[j]) *
  268. ((word32)b[i + F25519_SIZE - j]) * 38;
  269. r[i] = c;
  270. }
  271. r[31] &= 127;
  272. c = (c >> 7) * 19;
  273. for (i = 0; i < F25519_SIZE; i++) {
  274. c += r[i];
  275. r[i] = c;
  276. c >>= 8;
  277. }
  278. }
  279. #if 0 //UNUSED
  280. static void lm_mul(byte *r, const byte* a, const byte *b)
  281. {
  282. byte tmp[F25519_SIZE];
  283. fe_mul__distinct(tmp, a, b);
  284. lm_copy(r, tmp);
  285. }
  286. #endif
  287. static void fe_mul_c(byte *r, const byte *a, word32 b)
  288. {
  289. word32 c = 0;
  290. int i;
  291. for (i = 0; i < F25519_SIZE; i++) {
  292. c >>= 8;
  293. c += b * ((word32)a[i]);
  294. r[i] = c;
  295. }
  296. r[31] &= 127;
  297. c >>= 7;
  298. c *= 19;
  299. for (i = 0; i < F25519_SIZE; i++) {
  300. c += r[i];
  301. r[i] = c;
  302. c >>= 8;
  303. }
  304. }
  305. static void fe_inv__distinct(byte *r, const byte *x)
  306. {
  307. byte s[F25519_SIZE];
  308. int i;
  309. /* This is a prime field, so by Fermat's little theorem:
  310. *
  311. * x^(p-1) = 1 mod p
  312. *
  313. * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative
  314. * inverse.
  315. *
  316. * This is a 255-bit binary number with the digits:
  317. *
  318. * 11111111... 01011
  319. *
  320. * We compute the result by the usual binary chain, but
  321. * alternate between keeping the accumulator in r and s, so as
  322. * to avoid copying temporaries.
  323. */
  324. /* 1 1 */
  325. fe_mul__distinct(s, x, x);
  326. fe_mul__distinct(r, s, x);
  327. /* 1 x 248 */
  328. for (i = 0; i < 248; i++) {
  329. fe_mul__distinct(s, r, r);
  330. fe_mul__distinct(r, s, x);
  331. }
  332. /* 0 */
  333. fe_mul__distinct(s, r, r);
  334. /* 1 */
  335. fe_mul__distinct(r, s, s);
  336. fe_mul__distinct(s, r, x);
  337. /* 0 */
  338. fe_mul__distinct(r, s, s);
  339. /* 1 */
  340. fe_mul__distinct(s, r, r);
  341. fe_mul__distinct(r, s, x);
  342. /* 1 */
  343. fe_mul__distinct(s, r, r);
  344. fe_mul__distinct(r, s, x);
  345. }
  346. #if 0 //UNUSED
  347. static void lm_invert(byte *r, const byte *x)
  348. {
  349. byte tmp[F25519_SIZE];
  350. fe_inv__distinct(tmp, x);
  351. lm_copy(r, tmp);
  352. }
  353. #endif
  354. #if 0 //UNUSED
  355. /* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary
  356. * storage.
  357. */
  358. static void exp2523(byte *r, const byte *x, byte *s)
  359. {
  360. int i;
  361. /* This number is a 252-bit number with the binary expansion:
  362. *
  363. * 111111... 01
  364. */
  365. /* 1 1 */
  366. fe_mul__distinct(r, x, x);
  367. fe_mul__distinct(s, r, x);
  368. /* 1 x 248 */
  369. for (i = 0; i < 248; i++) {
  370. fe_mul__distinct(r, s, s);
  371. fe_mul__distinct(s, r, x);
  372. }
  373. /* 0 */
  374. fe_mul__distinct(r, s, s);
  375. /* 1 */
  376. fe_mul__distinct(s, r, r);
  377. fe_mul__distinct(r, s, x);
  378. }
  379. #endif
  380. #if 0 //UNUSED
  381. static void fe_sqrt(byte *r, const byte *a)
  382. {
  383. byte v[F25519_SIZE];
  384. byte i[F25519_SIZE];
  385. byte x[F25519_SIZE];
  386. byte y[F25519_SIZE];
  387. /* v = (2a)^((p-5)/8) [x = 2a] */
  388. fe_mul_c(x, a, 2);
  389. exp2523(v, x, y);
  390. /* i = 2av^2 - 1 */
  391. fe_mul__distinct(y, v, v);
  392. fe_mul__distinct(i, x, y);
  393. fe_load(y, 1);
  394. lm_sub(i, i, y);
  395. /* r = avi */
  396. fe_mul__distinct(x, v, a);
  397. fe_mul__distinct(r, x, i);
  398. }
  399. #endif
  400. /* Differential addition */
  401. static void xc_diffadd(byte *x5, byte *z5,
  402. const byte *x1, const byte *z1,
  403. const byte *x2, const byte *z2,
  404. const byte *x3, const byte *z3)
  405. {
  406. /* Explicit formulas database: dbl-1987-m3
  407. *
  408. * source 1987 Montgomery "Speeding the Pollard and elliptic curve
  409. * methods of factorization", page 261, fifth display, plus
  410. * common-subexpression elimination
  411. * compute A = X2+Z2
  412. * compute B = X2-Z2
  413. * compute C = X3+Z3
  414. * compute D = X3-Z3
  415. * compute DA = D A
  416. * compute CB = C B
  417. * compute X5 = Z1(DA+CB)^2
  418. * compute Z5 = X1(DA-CB)^2
  419. */
  420. byte da[F25519_SIZE];
  421. byte cb[F25519_SIZE];
  422. byte a[F25519_SIZE];
  423. byte b[F25519_SIZE];
  424. lm_add(a, x2, z2);
  425. lm_sub(b, x3, z3); /* D */
  426. fe_mul__distinct(da, a, b);
  427. lm_sub(b, x2, z2);
  428. lm_add(a, x3, z3); /* C */
  429. fe_mul__distinct(cb, a, b);
  430. lm_add(a, da, cb);
  431. fe_mul__distinct(b, a, a);
  432. fe_mul__distinct(x5, z1, b);
  433. lm_sub(a, da, cb);
  434. fe_mul__distinct(b, a, a);
  435. fe_mul__distinct(z5, x1, b);
  436. }
  437. /* Double an X-coordinate */
  438. static void xc_double(byte *x3, byte *z3,
  439. const byte *x1, const byte *z1)
  440. {
  441. /* Explicit formulas database: dbl-1987-m
  442. *
  443. * source 1987 Montgomery "Speeding the Pollard and elliptic
  444. * curve methods of factorization", page 261, fourth display
  445. * compute X3 = (X1^2-Z1^2)^2
  446. * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2)
  447. */
  448. byte x1sq[F25519_SIZE];
  449. byte z1sq[F25519_SIZE];
  450. byte x1z1[F25519_SIZE];
  451. byte a[F25519_SIZE];
  452. fe_mul__distinct(x1sq, x1, x1);
  453. fe_mul__distinct(z1sq, z1, z1);
  454. fe_mul__distinct(x1z1, x1, z1);
  455. lm_sub(a, x1sq, z1sq);
  456. fe_mul__distinct(x3, a, a);
  457. fe_mul_c(a, x1z1, 486662);
  458. lm_add(a, x1sq, a);
  459. lm_add(a, z1sq, a);
  460. fe_mul__distinct(x1sq, x1z1, a);
  461. fe_mul_c(z3, x1sq, 4);
  462. }
  463. void FAST_FUNC curve25519(byte *result, const byte *e, const byte *q)
  464. {
  465. int i;
  466. struct {
  467. /* from wolfssl-3.15.3/wolfssl/wolfcrypt/fe_operations.h */
  468. /*static const*/ byte f25519_one[F25519_SIZE]; // = {1};
  469. /* Current point: P_m */
  470. byte xm[F25519_SIZE];
  471. byte zm[F25519_SIZE]; // = {1};
  472. /* Predecessor: P_(m-1) */
  473. byte xm1[F25519_SIZE]; // = {1};
  474. byte zm1[F25519_SIZE]; // = {0};
  475. } z;
  476. #define f25519_one z.f25519_one
  477. #define xm z.xm
  478. #define zm z.zm
  479. #define xm1 z.xm1
  480. #define zm1 z.zm1
  481. memset(&z, 0, sizeof(z));
  482. f25519_one[0] = 1;
  483. zm[0] = 1;
  484. xm1[0] = 1;
  485. /* Note: bit 254 is assumed to be 1 */
  486. lm_copy(xm, q);
  487. for (i = 253; i >= 0; i--) {
  488. const int bit = (e[i >> 3] >> (i & 7)) & 1;
  489. byte xms[F25519_SIZE];
  490. byte zms[F25519_SIZE];
  491. /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
  492. xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
  493. xc_double(xm, zm, xm, zm);
  494. /* Compute P_(2m+1) */
  495. xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
  496. /* Select:
  497. * bit = 1 --> (P_(2m+1), P_(2m))
  498. * bit = 0 --> (P_(2m), P_(2m-1))
  499. */
  500. fe_select(xm1, xm1, xm, bit);
  501. fe_select(zm1, zm1, zm, bit);
  502. fe_select(xm, xm, xms, bit);
  503. fe_select(zm, zm, zms, bit);
  504. }
  505. /* Freeze out of projective coordinates */
  506. fe_inv__distinct(zm1, zm);
  507. fe_mul__distinct(result, zm1, xm);
  508. fe_normalize(result);
  509. }