tls_fe.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628
  1. /*
  2. * Copyright (C) 2018 Denys Vlasenko
  3. *
  4. * Licensed under GPLv2, see file LICENSE in this source tree.
  5. */
  6. #include "tls.h"
  7. typedef uint8_t byte;
  8. typedef uint16_t word16;
  9. typedef uint32_t word32;
  10. #define XMEMSET memset
  11. #define F25519_SIZE CURVE25519_KEYSIZE
  12. /* The code below is taken from wolfssl-3.15.3/wolfcrypt/src/fe_low_mem.c
  13. * Header comment is kept intact:
  14. */
  15. /* fe_low_mem.c
  16. *
  17. * Copyright (C) 2006-2017 wolfSSL Inc.
  18. *
  19. * This file is part of wolfSSL.
  20. *
  21. * wolfSSL is free software; you can redistribute it and/or modify
  22. * it under the terms of the GNU General Public License as published by
  23. * the Free Software Foundation; either version 2 of the License, or
  24. * (at your option) any later version.
  25. *
  26. * wolfSSL is distributed in the hope that it will be useful,
  27. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  28. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  29. * GNU General Public License for more details.
  30. *
  31. * You should have received a copy of the GNU General Public License
  32. * along with this program; if not, write to the Free Software
  33. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  34. */
  35. /* Based from Daniel Beer's public domain work. */
  36. #if 0 //UNUSED
  37. static void fprime_copy(byte *x, const byte *a)
  38. {
  39. memcpy(x, a, F25519_SIZE);
  40. }
  41. #endif
  42. static void lm_copy(byte* x, const byte* a)
  43. {
  44. memcpy(x, a, F25519_SIZE);
  45. }
  46. #if 0 //UNUSED
  47. static void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
  48. {
  49. const byte mask = -condition;
  50. int i;
  51. for (i = 0; i < F25519_SIZE; i++)
  52. dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
  53. }
  54. #endif
  55. #if 0 /* constant-time */
  56. static void fe_select(byte *dst,
  57. const byte *src,
  58. byte condition)
  59. {
  60. const byte mask = -condition;
  61. int i;
  62. for (i = 0; i < F25519_SIZE; i++)
  63. dst[i] = dst[i] ^ (mask & (src[i] ^ dst[i]));
  64. }
  65. #else
  66. # define fe_select(dst, src, condition) do { \
  67. if (condition) lm_copy(dst, src); \
  68. } while (0)
  69. #endif
  70. #if 0 //UNUSED
  71. static void raw_add(byte *x, const byte *p)
  72. {
  73. word16 c = 0;
  74. int i;
  75. for (i = 0; i < F25519_SIZE; i++) {
  76. c += ((word16)x[i]) + ((word16)p[i]);
  77. x[i] = (byte)c;
  78. c >>= 8;
  79. }
  80. }
  81. #endif
  82. #if 0 //UNUSED
  83. static void raw_try_sub(byte *x, const byte *p)
  84. {
  85. byte minusp[F25519_SIZE];
  86. word16 c = 0;
  87. int i;
  88. for (i = 0; i < F25519_SIZE; i++) {
  89. c = ((word16)x[i]) - ((word16)p[i]) - c;
  90. minusp[i] = (byte)c;
  91. c = (c >> 8) & 1;
  92. }
  93. fprime_select(x, minusp, x, (byte)c);
  94. }
  95. #endif
  96. #if 0 //UNUSED
  97. static int prime_msb(const byte *p)
  98. {
  99. int i;
  100. byte x;
  101. int shift = 1;
  102. int z = F25519_SIZE - 1;
  103. /*
  104. Test for any hot bits.
  105. As soon as one instance is encountered set shift to 0.
  106. */
  107. for (i = F25519_SIZE - 1; i >= 0; i--) {
  108. shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
  109. z -= shift;
  110. }
  111. x = p[z];
  112. z <<= 3;
  113. shift = 1;
  114. for (i = 0; i < 8; i++) {
  115. shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
  116. z += shift;
  117. }
  118. return z - 1;
  119. }
  120. #endif
  121. #if 0 //UNUSED
  122. static void fprime_add(byte *r, const byte *a, const byte *modulus)
  123. {
  124. raw_add(r, a);
  125. raw_try_sub(r, modulus);
  126. }
  127. #endif
  128. #if 0 //UNUSED
  129. static void fprime_sub(byte *r, const byte *a, const byte *modulus)
  130. {
  131. raw_add(r, modulus);
  132. raw_try_sub(r, a);
  133. raw_try_sub(r, modulus);
  134. }
  135. #endif
  136. #if 0 //UNUSED
  137. static void fprime_mul(byte *r, const byte *a, const byte *b,
  138. const byte *modulus)
  139. {
  140. word16 c = 0;
  141. int i,j;
  142. XMEMSET(r, 0, F25519_SIZE);
  143. for (i = prime_msb(modulus); i >= 0; i--) {
  144. const byte bit = (b[i >> 3] >> (i & 7)) & 1;
  145. byte plusa[F25519_SIZE];
  146. for (j = 0; j < F25519_SIZE; j++) {
  147. c |= ((word16)r[j]) << 1;
  148. r[j] = (byte)c;
  149. c >>= 8;
  150. }
  151. raw_try_sub(r, modulus);
  152. fprime_copy(plusa, r);
  153. fprime_add(plusa, a, modulus);
  154. fprime_select(r, r, plusa, bit);
  155. }
  156. }
  157. #endif
  158. #if 0 //UNUSED
  159. static void fe_load(byte *x, word32 c)
  160. {
  161. int i;
  162. for (i = 0; i < sizeof(c); i++) {
  163. x[i] = c;
  164. c >>= 8;
  165. }
  166. for (; i < F25519_SIZE; i++)
  167. x[i] = 0;
  168. }
  169. #endif
  170. static void fe_reduce(byte *x, word32 c)
  171. {
  172. int i;
  173. /* Reduce using 2^255 = 19 mod p */
  174. x[31] = c & 127;
  175. c = (c >> 7) * 19;
  176. for (i = 0; i < F25519_SIZE; i++) {
  177. c += x[i];
  178. x[i] = (byte)c;
  179. c >>= 8;
  180. }
  181. }
  182. static void fe_normalize(byte *x)
  183. {
  184. byte minusp[F25519_SIZE];
  185. unsigned c;
  186. int i;
  187. /* Reduce using 2^255 = 19 mod p */
  188. fe_reduce(x, x[31]);
  189. /* The number is now less than 2^255 + 18, and therefore less than
  190. * 2p. Try subtracting p, and conditionally load the subtracted
  191. * value if underflow did not occur.
  192. */
  193. c = 19;
  194. for (i = 0; i < F25519_SIZE - 1; i++) {
  195. c += x[i];
  196. minusp[i] = (byte)c;
  197. c >>= 8;
  198. }
  199. c += ((unsigned)x[i]) - 128;
  200. minusp[31] = (byte)c;
  201. /* Load x-p if no underflow */
  202. fe_select(x, minusp, !(c & (1<<15)));
  203. }
  204. static void lm_add(byte* r, const byte* a, const byte* b)
  205. {
  206. unsigned c = 0;
  207. int i;
  208. /* Add */
  209. for (i = 0; i < F25519_SIZE; i++) {
  210. c >>= 8;
  211. c += ((unsigned)a[i]) + ((unsigned)b[i]);
  212. r[i] = (byte)c;
  213. }
  214. /* Reduce with 2^255 = 19 mod p */
  215. fe_reduce(r, c);
  216. }
  217. static void lm_sub(byte* r, const byte* a, const byte* b)
  218. {
  219. word32 c = 0;
  220. int i;
  221. /* Calculate a + 2p - b, to avoid underflow */
  222. c = 218;
  223. for (i = 0; i < F25519_SIZE - 1; i++) {
  224. c += 65280 + ((word32)a[i]) - ((word32)b[i]);
  225. r[i] = c;
  226. c >>= 8;
  227. }
  228. c += ((word32)a[31]) - ((word32)b[31]);
  229. fe_reduce(r, c);
  230. }
  231. #if 0 //UNUSED
  232. static void lm_neg(byte* r, const byte* a)
  233. {
  234. word32 c = 0;
  235. int i;
  236. /* Calculate 2p - a, to avoid underflow */
  237. c = 218;
  238. for (i = 0; i < F25519_SIZE - 1; i++) {
  239. c += 65280 - ((word32)a[i]);
  240. r[i] = c;
  241. c >>= 8;
  242. }
  243. c -= ((word32)a[31]);
  244. fe_reduce(r, c);
  245. }
  246. #endif
  247. static void fe_mul__distinct(byte *r, const byte *a, const byte *b)
  248. {
  249. word32 c = 0;
  250. int i;
  251. for (i = 0; i < F25519_SIZE; i++) {
  252. int j;
  253. c >>= 8;
  254. for (j = 0; j <= i; j++)
  255. c += ((word32)a[j]) * ((word32)b[i - j]);
  256. for (; j < F25519_SIZE; j++)
  257. c += ((word32)a[j]) *
  258. ((word32)b[i + F25519_SIZE - j]) * 38;
  259. r[i] = c;
  260. }
  261. fe_reduce(r, c);
  262. }
  263. #if 0 //UNUSED
  264. static void lm_mul(byte *r, const byte* a, const byte *b)
  265. {
  266. byte tmp[F25519_SIZE];
  267. fe_mul__distinct(tmp, a, b);
  268. lm_copy(r, tmp);
  269. }
  270. #endif
  271. static void fe_mul_c(byte *r, const byte *a, word32 b)
  272. {
  273. word32 c = 0;
  274. int i;
  275. for (i = 0; i < F25519_SIZE; i++) {
  276. c >>= 8;
  277. c += b * ((word32)a[i]);
  278. r[i] = c;
  279. }
  280. fe_reduce(r, c);
  281. }
  282. static void fe_inv__distinct(byte *r, const byte *x)
  283. {
  284. byte s[F25519_SIZE];
  285. int i;
  286. /* This is a prime field, so by Fermat's little theorem:
  287. *
  288. * x^(p-1) = 1 mod p
  289. *
  290. * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative
  291. * inverse.
  292. *
  293. * This is a 255-bit binary number with the digits:
  294. *
  295. * 11111111... 01011
  296. *
  297. * We compute the result by the usual binary chain, but
  298. * alternate between keeping the accumulator in r and s, so as
  299. * to avoid copying temporaries.
  300. */
  301. lm_copy(r, x);
  302. /* 1, 1 x 249 */
  303. for (i = 0; i < 249; i++) {
  304. fe_mul__distinct(s, r, r);
  305. fe_mul__distinct(r, s, x);
  306. }
  307. /* 0 */
  308. fe_mul__distinct(s, r, r);
  309. /* 1 */
  310. fe_mul__distinct(r, s, s);
  311. fe_mul__distinct(s, r, x);
  312. /* 0 */
  313. fe_mul__distinct(r, s, s);
  314. /* 1, 1 */
  315. for (i = 0; i < 2; i++) {
  316. fe_mul__distinct(s, r, r);
  317. fe_mul__distinct(r, s, x);
  318. }
  319. }
  320. #if 0 //UNUSED
  321. static void lm_invert(byte *r, const byte *x)
  322. {
  323. byte tmp[F25519_SIZE];
  324. fe_inv__distinct(tmp, x);
  325. lm_copy(r, tmp);
  326. }
  327. #endif
  328. #if 0 //UNUSED
  329. /* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary
  330. * storage.
  331. */
  332. static void exp2523(byte *r, const byte *x, byte *s)
  333. {
  334. int i;
  335. /* This number is a 252-bit number with the binary expansion:
  336. *
  337. * 111111... 01
  338. */
  339. lm_copy(s, x);
  340. /* 1, 1 x 249 */
  341. for (i = 0; i < 249; i++) {
  342. fe_mul__distinct(r, s, s);
  343. fe_mul__distinct(s, r, x);
  344. }
  345. /* 0 */
  346. fe_mul__distinct(r, s, s);
  347. /* 1 */
  348. fe_mul__distinct(s, r, r);
  349. fe_mul__distinct(r, s, x);
  350. }
  351. #endif
  352. #if 0 //UNUSED
  353. static void fe_sqrt(byte *r, const byte *a)
  354. {
  355. byte v[F25519_SIZE];
  356. byte i[F25519_SIZE];
  357. byte x[F25519_SIZE];
  358. byte y[F25519_SIZE];
  359. /* v = (2a)^((p-5)/8) [x = 2a] */
  360. fe_mul_c(x, a, 2);
  361. exp2523(v, x, y);
  362. /* i = 2av^2 - 1 */
  363. fe_mul__distinct(y, v, v);
  364. fe_mul__distinct(i, x, y);
  365. fe_load(y, 1);
  366. lm_sub(i, i, y);
  367. /* r = avi */
  368. fe_mul__distinct(x, v, a);
  369. fe_mul__distinct(r, x, i);
  370. }
  371. #endif
  372. /* Differential addition */
  373. static void xc_diffadd(byte *x5, byte *z5,
  374. const byte *x1, const byte *z1,
  375. const byte *x2, const byte *z2,
  376. const byte *x3, const byte *z3)
  377. {
  378. /* Explicit formulas database: dbl-1987-m3
  379. *
  380. * source 1987 Montgomery "Speeding the Pollard and elliptic curve
  381. * methods of factorization", page 261, fifth display, plus
  382. * common-subexpression elimination
  383. * compute A = X2+Z2
  384. * compute B = X2-Z2
  385. * compute C = X3+Z3
  386. * compute D = X3-Z3
  387. * compute DA = D A
  388. * compute CB = C B
  389. * compute X5 = Z1(DA+CB)^2
  390. * compute Z5 = X1(DA-CB)^2
  391. */
  392. byte da[F25519_SIZE];
  393. byte cb[F25519_SIZE];
  394. byte a[F25519_SIZE];
  395. byte b[F25519_SIZE];
  396. lm_add(a, x2, z2);
  397. lm_sub(b, x3, z3); /* D */
  398. fe_mul__distinct(da, a, b);
  399. lm_sub(b, x2, z2);
  400. lm_add(a, x3, z3); /* C */
  401. fe_mul__distinct(cb, a, b);
  402. lm_add(a, da, cb);
  403. fe_mul__distinct(b, a, a);
  404. fe_mul__distinct(x5, z1, b);
  405. lm_sub(a, da, cb);
  406. fe_mul__distinct(b, a, a);
  407. fe_mul__distinct(z5, x1, b);
  408. }
  409. /* Double an X-coordinate */
  410. static void xc_double(byte *x3, byte *z3,
  411. const byte *x1, const byte *z1)
  412. {
  413. /* Explicit formulas database: dbl-1987-m
  414. *
  415. * source 1987 Montgomery "Speeding the Pollard and elliptic
  416. * curve methods of factorization", page 261, fourth display
  417. * compute X3 = (X1^2-Z1^2)^2
  418. * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2)
  419. */
  420. byte x1sq[F25519_SIZE];
  421. byte z1sq[F25519_SIZE];
  422. byte x1z1[F25519_SIZE];
  423. byte a[F25519_SIZE];
  424. fe_mul__distinct(x1sq, x1, x1);
  425. fe_mul__distinct(z1sq, z1, z1);
  426. fe_mul__distinct(x1z1, x1, z1);
  427. lm_sub(a, x1sq, z1sq);
  428. fe_mul__distinct(x3, a, a);
  429. fe_mul_c(a, x1z1, 486662);
  430. lm_add(a, x1sq, a);
  431. lm_add(a, z1sq, a);
  432. fe_mul__distinct(x1sq, x1z1, a);
  433. fe_mul_c(z3, x1sq, 4);
  434. }
  435. static void curve25519(byte *result, const byte *e, const byte *q)
  436. {
  437. int i;
  438. struct Z {
  439. /* for bbox's special case of q == NULL meaning "use basepoint" */
  440. /*static const*/ uint8_t basepoint9[CURVE25519_KEYSIZE]; // = {9};
  441. /* from wolfssl-3.15.3/wolfssl/wolfcrypt/fe_operations.h */
  442. /*static const*/ byte f25519_one[F25519_SIZE]; // = {1};
  443. /* Predecessor: P_(m-1) */
  444. byte xm1[F25519_SIZE]; // = {1};
  445. byte zm1[F25519_SIZE]; // = {0};
  446. /* Current point: P_m */
  447. byte xm[F25519_SIZE];
  448. byte zm[F25519_SIZE]; // = {1};
  449. /* Temporaries */
  450. byte xms[F25519_SIZE];
  451. byte zms[F25519_SIZE];
  452. } z;
  453. uint8_t *XM1 = (uint8_t*)&z + offsetof(struct Z,xm1); // gcc 11.0.0 workaround
  454. #define basepoint9 z.basepoint9
  455. #define f25519_one z.f25519_one
  456. #define xm1 z.xm1
  457. #define zm1 z.zm1
  458. #define xm z.xm
  459. #define zm z.zm
  460. #define xms z.xms
  461. #define zms z.zms
  462. memset(&z, 0, sizeof(z));
  463. f25519_one[0] = 1;
  464. zm[0] = 1;
  465. xm1[0] = 1;
  466. if (!q) {
  467. basepoint9[0] = 9;
  468. q = basepoint9;
  469. }
  470. /* Note: bit 254 is assumed to be 1 */
  471. lm_copy(xm, q);
  472. for (i = 253; i >= 0; i--) {
  473. const int bit = (e[i >> 3] >> (i & 7)) & 1;
  474. // byte xms[F25519_SIZE];
  475. // byte zms[F25519_SIZE];
  476. /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
  477. xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
  478. xc_double(xm, zm, xm, zm);
  479. /* Compute P_(2m+1) */
  480. xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
  481. /* Select:
  482. * bit = 1 --> (P_(2m+1), P_(2m))
  483. * bit = 0 --> (P_(2m), P_(2m-1))
  484. */
  485. #if 0
  486. fe_select(xm1, xm, bit);
  487. fe_select(zm1, zm, bit);
  488. fe_select(xm, xms, bit);
  489. fe_select(zm, zms, bit);
  490. #else
  491. // same as above in about 50 bytes smaller code, but
  492. // requires that in-memory order is exactly xm1,zm1,xm,zm,xms,zms
  493. if (bit) {
  494. //memcpy(xm1, xm, 4 * F25519_SIZE);
  495. //^^^ gcc 11.0.0 warns of overlapping memcpy
  496. //memmove(xm1, xm, 4 * F25519_SIZE);
  497. //^^^ gcc 11.0.0 warns of out-of-bounds access to xm1[]
  498. memmove(XM1, XM1 + 2 * F25519_SIZE, 4 * F25519_SIZE);
  499. }
  500. #endif
  501. }
  502. /* Freeze out of projective coordinates */
  503. fe_inv__distinct(zm1, zm);
  504. fe_mul__distinct(result, zm1, xm);
  505. fe_normalize(result);
  506. }
  507. /* interface to bbox's TLS code: */
  508. void FAST_FUNC curve_x25519_compute_pubkey_and_premaster(
  509. uint8_t *pubkey, uint8_t *premaster,
  510. const uint8_t *peerkey32)
  511. {
  512. uint8_t privkey[CURVE25519_KEYSIZE]; //[32]
  513. /* Generate random private key, see RFC 7748 */
  514. tls_get_random(privkey, sizeof(privkey));
  515. privkey[0] &= 0xf8;
  516. privkey[CURVE25519_KEYSIZE-1] = ((privkey[CURVE25519_KEYSIZE-1] & 0x7f) | 0x40);
  517. /* Compute public key */
  518. curve25519(pubkey, privkey, NULL /* "use base point of x25519" */);
  519. /* Compute premaster using peer's public key */
  520. curve25519(premaster, privkey, peerkey32);
  521. }