tls_fe.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. /*
  2. * Copyright (C) 2018 Denys Vlasenko
  3. *
  4. * Licensed under GPLv2, see file LICENSE in this source tree.
  5. */
  6. #include "tls.h"
  7. typedef uint8_t byte;
  8. typedef uint16_t word16;
  9. typedef uint32_t word32;
  10. #define XMEMSET memset
  11. #define F25519_SIZE CURVE25519_KEYSIZE
  12. /* The code below is taken from wolfssl-3.15.3/wolfcrypt/src/fe_low_mem.c
  13. * Header comment is kept intact:
  14. */
  15. /* fe_low_mem.c
  16. *
  17. * Copyright (C) 2006-2017 wolfSSL Inc.
  18. *
  19. * This file is part of wolfSSL.
  20. *
  21. * wolfSSL is free software; you can redistribute it and/or modify
  22. * it under the terms of the GNU General Public License as published by
  23. * the Free Software Foundation; either version 2 of the License, or
  24. * (at your option) any later version.
  25. *
  26. * wolfSSL is distributed in the hope that it will be useful,
  27. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  28. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  29. * GNU General Public License for more details.
  30. *
  31. * You should have received a copy of the GNU General Public License
  32. * along with this program; if not, write to the Free Software
  33. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  34. */
  35. /* Based from Daniel Beer's public domain work. */
  36. #if 0 //UNUSED
  37. static void fprime_copy(byte *x, const byte *a)
  38. {
  39. memcpy(x, a, F25519_SIZE);
  40. }
  41. #endif
  42. static void lm_copy(byte* x, const byte* a)
  43. {
  44. memcpy(x, a, F25519_SIZE);
  45. }
  46. #if 0 //UNUSED
  47. static void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
  48. {
  49. const byte mask = -condition;
  50. int i;
  51. for (i = 0; i < F25519_SIZE; i++)
  52. dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
  53. }
  54. #endif
  55. static void fe_select(byte *dst,
  56. const byte *zero, const byte *one,
  57. byte condition)
  58. {
  59. const byte mask = -condition;
  60. int i;
  61. for (i = 0; i < F25519_SIZE; i++)
  62. dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
  63. }
  64. #if 0 //UNUSED
  65. static void raw_add(byte *x, const byte *p)
  66. {
  67. word16 c = 0;
  68. int i;
  69. for (i = 0; i < F25519_SIZE; i++) {
  70. c += ((word16)x[i]) + ((word16)p[i]);
  71. x[i] = (byte)c;
  72. c >>= 8;
  73. }
  74. }
  75. #endif
  76. #if 0 //UNUSED
  77. static void raw_try_sub(byte *x, const byte *p)
  78. {
  79. byte minusp[F25519_SIZE];
  80. word16 c = 0;
  81. int i;
  82. for (i = 0; i < F25519_SIZE; i++) {
  83. c = ((word16)x[i]) - ((word16)p[i]) - c;
  84. minusp[i] = (byte)c;
  85. c = (c >> 8) & 1;
  86. }
  87. fprime_select(x, minusp, x, (byte)c);
  88. }
  89. #endif
  90. #if 0 //UNUSED
  91. static int prime_msb(const byte *p)
  92. {
  93. int i;
  94. byte x;
  95. int shift = 1;
  96. int z = F25519_SIZE - 1;
  97. /*
  98. Test for any hot bits.
  99. As soon as one instance is encountered set shift to 0.
  100. */
  101. for (i = F25519_SIZE - 1; i >= 0; i--) {
  102. shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
  103. z -= shift;
  104. }
  105. x = p[z];
  106. z <<= 3;
  107. shift = 1;
  108. for (i = 0; i < 8; i++) {
  109. shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
  110. z += shift;
  111. }
  112. return z - 1;
  113. }
  114. #endif
  115. #if 0 //UNUSED
  116. static void fprime_add(byte *r, const byte *a, const byte *modulus)
  117. {
  118. raw_add(r, a);
  119. raw_try_sub(r, modulus);
  120. }
  121. #endif
  122. #if 0 //UNUSED
  123. static void fprime_sub(byte *r, const byte *a, const byte *modulus)
  124. {
  125. raw_add(r, modulus);
  126. raw_try_sub(r, a);
  127. raw_try_sub(r, modulus);
  128. }
  129. #endif
  130. #if 0 //UNUSED
  131. static void fprime_mul(byte *r, const byte *a, const byte *b,
  132. const byte *modulus)
  133. {
  134. word16 c = 0;
  135. int i,j;
  136. XMEMSET(r, 0, F25519_SIZE);
  137. for (i = prime_msb(modulus); i >= 0; i--) {
  138. const byte bit = (b[i >> 3] >> (i & 7)) & 1;
  139. byte plusa[F25519_SIZE];
  140. for (j = 0; j < F25519_SIZE; j++) {
  141. c |= ((word16)r[j]) << 1;
  142. r[j] = (byte)c;
  143. c >>= 8;
  144. }
  145. raw_try_sub(r, modulus);
  146. fprime_copy(plusa, r);
  147. fprime_add(plusa, a, modulus);
  148. fprime_select(r, r, plusa, bit);
  149. }
  150. }
  151. #endif
  152. #if 0 //UNUSED
  153. static void fe_load(byte *x, word32 c)
  154. {
  155. word32 i;
  156. for (i = 0; i < sizeof(c); i++) {
  157. x[i] = c;
  158. c >>= 8;
  159. }
  160. for (; i < F25519_SIZE; i++)
  161. x[i] = 0;
  162. }
  163. #endif
  164. static void fe_normalize(byte *x)
  165. {
  166. byte minusp[F25519_SIZE];
  167. unsigned c;
  168. int i;
  169. /* Reduce using 2^255 = 19 mod p */
  170. c = (x[31] >> 7) * 19;
  171. x[31] &= 127;
  172. for (i = 0; i < F25519_SIZE; i++) {
  173. c += x[i];
  174. x[i] = (byte)c;
  175. c >>= 8;
  176. }
  177. /* The number is now less than 2^255 + 18, and therefore less than
  178. * 2p. Try subtracting p, and conditionally load the subtracted
  179. * value if underflow did not occur.
  180. */
  181. c = 19;
  182. for (i = 0; i < F25519_SIZE - 1; i++) {
  183. c += x[i];
  184. minusp[i] = (byte)c;
  185. c >>= 8;
  186. }
  187. c += ((unsigned)x[i]) - 128;
  188. minusp[31] = (byte)c;
  189. /* Load x-p if no underflow */
  190. fe_select(x, minusp, x, (c >> 15) & 1);
  191. }
  192. static void lm_add(byte* r, const byte* a, const byte* b)
  193. {
  194. unsigned c = 0;
  195. int i;
  196. /* Add */
  197. for (i = 0; i < F25519_SIZE; i++) {
  198. c >>= 8;
  199. c += ((unsigned)a[i]) + ((unsigned)b[i]);
  200. r[i] = (byte)c;
  201. }
  202. /* Reduce with 2^255 = 19 mod p */
  203. r[31] &= 127;
  204. c = (c >> 7) * 19;
  205. for (i = 0; i < F25519_SIZE; i++) {
  206. c += r[i];
  207. r[i] = (byte)c;
  208. c >>= 8;
  209. }
  210. }
  211. static void lm_sub(byte* r, const byte* a, const byte* b)
  212. {
  213. word32 c = 0;
  214. int i;
  215. /* Calculate a + 2p - b, to avoid underflow */
  216. c = 218;
  217. for (i = 0; i + 1 < F25519_SIZE; i++) {
  218. c += 65280 + ((word32)a[i]) - ((word32)b[i]);
  219. r[i] = c;
  220. c >>= 8;
  221. }
  222. c += ((word32)a[31]) - ((word32)b[31]);
  223. r[31] = c & 127;
  224. c = (c >> 7) * 19;
  225. for (i = 0; i < F25519_SIZE; i++) {
  226. c += r[i];
  227. r[i] = c;
  228. c >>= 8;
  229. }
  230. }
  231. #if 0 //UNUSED
  232. static void lm_neg(byte* r, const byte* a)
  233. {
  234. word32 c = 0;
  235. int i;
  236. /* Calculate 2p - a, to avoid underflow */
  237. c = 218;
  238. for (i = 0; i + 1 < F25519_SIZE; i++) {
  239. c += 65280 - ((word32)a[i]);
  240. r[i] = c;
  241. c >>= 8;
  242. }
  243. c -= ((word32)a[31]);
  244. r[31] = c & 127;
  245. c = (c >> 7) * 19;
  246. for (i = 0; i < F25519_SIZE; i++) {
  247. c += r[i];
  248. r[i] = c;
  249. c >>= 8;
  250. }
  251. }
  252. #endif
  253. static void fe_mul__distinct(byte *r, const byte *a, const byte *b)
  254. {
  255. word32 c = 0;
  256. int i;
  257. for (i = 0; i < F25519_SIZE; i++) {
  258. int j;
  259. c >>= 8;
  260. for (j = 0; j <= i; j++)
  261. c += ((word32)a[j]) * ((word32)b[i - j]);
  262. for (; j < F25519_SIZE; j++)
  263. c += ((word32)a[j]) *
  264. ((word32)b[i + F25519_SIZE - j]) * 38;
  265. r[i] = c;
  266. }
  267. r[31] &= 127;
  268. c = (c >> 7) * 19;
  269. for (i = 0; i < F25519_SIZE; i++) {
  270. c += r[i];
  271. r[i] = c;
  272. c >>= 8;
  273. }
  274. }
  275. #if 0 //UNUSED
  276. static void lm_mul(byte *r, const byte* a, const byte *b)
  277. {
  278. byte tmp[F25519_SIZE];
  279. fe_mul__distinct(tmp, a, b);
  280. lm_copy(r, tmp);
  281. }
  282. #endif
  283. static void fe_mul_c(byte *r, const byte *a, word32 b)
  284. {
  285. word32 c = 0;
  286. int i;
  287. for (i = 0; i < F25519_SIZE; i++) {
  288. c >>= 8;
  289. c += b * ((word32)a[i]);
  290. r[i] = c;
  291. }
  292. r[31] &= 127;
  293. c >>= 7;
  294. c *= 19;
  295. for (i = 0; i < F25519_SIZE; i++) {
  296. c += r[i];
  297. r[i] = c;
  298. c >>= 8;
  299. }
  300. }
  301. static void fe_inv__distinct(byte *r, const byte *x)
  302. {
  303. byte s[F25519_SIZE];
  304. int i;
  305. /* This is a prime field, so by Fermat's little theorem:
  306. *
  307. * x^(p-1) = 1 mod p
  308. *
  309. * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative
  310. * inverse.
  311. *
  312. * This is a 255-bit binary number with the digits:
  313. *
  314. * 11111111... 01011
  315. *
  316. * We compute the result by the usual binary chain, but
  317. * alternate between keeping the accumulator in r and s, so as
  318. * to avoid copying temporaries.
  319. */
  320. /* 1 1 */
  321. fe_mul__distinct(s, x, x);
  322. fe_mul__distinct(r, s, x);
  323. /* 1 x 248 */
  324. for (i = 0; i < 248; i++) {
  325. fe_mul__distinct(s, r, r);
  326. fe_mul__distinct(r, s, x);
  327. }
  328. /* 0 */
  329. fe_mul__distinct(s, r, r);
  330. /* 1 */
  331. fe_mul__distinct(r, s, s);
  332. fe_mul__distinct(s, r, x);
  333. /* 0 */
  334. fe_mul__distinct(r, s, s);
  335. /* 1 */
  336. fe_mul__distinct(s, r, r);
  337. fe_mul__distinct(r, s, x);
  338. /* 1 */
  339. fe_mul__distinct(s, r, r);
  340. fe_mul__distinct(r, s, x);
  341. }
  342. #if 0 //UNUSED
  343. static void lm_invert(byte *r, const byte *x)
  344. {
  345. byte tmp[F25519_SIZE];
  346. fe_inv__distinct(tmp, x);
  347. lm_copy(r, tmp);
  348. }
  349. #endif
  350. #if 0 //UNUSED
  351. /* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary
  352. * storage.
  353. */
  354. static void exp2523(byte *r, const byte *x, byte *s)
  355. {
  356. int i;
  357. /* This number is a 252-bit number with the binary expansion:
  358. *
  359. * 111111... 01
  360. */
  361. /* 1 1 */
  362. fe_mul__distinct(r, x, x);
  363. fe_mul__distinct(s, r, x);
  364. /* 1 x 248 */
  365. for (i = 0; i < 248; i++) {
  366. fe_mul__distinct(r, s, s);
  367. fe_mul__distinct(s, r, x);
  368. }
  369. /* 0 */
  370. fe_mul__distinct(r, s, s);
  371. /* 1 */
  372. fe_mul__distinct(s, r, r);
  373. fe_mul__distinct(r, s, x);
  374. }
  375. #endif
  376. #if 0 //UNUSED
  377. static void fe_sqrt(byte *r, const byte *a)
  378. {
  379. byte v[F25519_SIZE];
  380. byte i[F25519_SIZE];
  381. byte x[F25519_SIZE];
  382. byte y[F25519_SIZE];
  383. /* v = (2a)^((p-5)/8) [x = 2a] */
  384. fe_mul_c(x, a, 2);
  385. exp2523(v, x, y);
  386. /* i = 2av^2 - 1 */
  387. fe_mul__distinct(y, v, v);
  388. fe_mul__distinct(i, x, y);
  389. fe_load(y, 1);
  390. lm_sub(i, i, y);
  391. /* r = avi */
  392. fe_mul__distinct(x, v, a);
  393. fe_mul__distinct(r, x, i);
  394. }
  395. #endif
  396. /* Differential addition */
  397. static void xc_diffadd(byte *x5, byte *z5,
  398. const byte *x1, const byte *z1,
  399. const byte *x2, const byte *z2,
  400. const byte *x3, const byte *z3)
  401. {
  402. /* Explicit formulas database: dbl-1987-m3
  403. *
  404. * source 1987 Montgomery "Speeding the Pollard and elliptic curve
  405. * methods of factorization", page 261, fifth display, plus
  406. * common-subexpression elimination
  407. * compute A = X2+Z2
  408. * compute B = X2-Z2
  409. * compute C = X3+Z3
  410. * compute D = X3-Z3
  411. * compute DA = D A
  412. * compute CB = C B
  413. * compute X5 = Z1(DA+CB)^2
  414. * compute Z5 = X1(DA-CB)^2
  415. */
  416. byte da[F25519_SIZE];
  417. byte cb[F25519_SIZE];
  418. byte a[F25519_SIZE];
  419. byte b[F25519_SIZE];
  420. lm_add(a, x2, z2);
  421. lm_sub(b, x3, z3); /* D */
  422. fe_mul__distinct(da, a, b);
  423. lm_sub(b, x2, z2);
  424. lm_add(a, x3, z3); /* C */
  425. fe_mul__distinct(cb, a, b);
  426. lm_add(a, da, cb);
  427. fe_mul__distinct(b, a, a);
  428. fe_mul__distinct(x5, z1, b);
  429. lm_sub(a, da, cb);
  430. fe_mul__distinct(b, a, a);
  431. fe_mul__distinct(z5, x1, b);
  432. }
  433. /* Double an X-coordinate */
  434. static void xc_double(byte *x3, byte *z3,
  435. const byte *x1, const byte *z1)
  436. {
  437. /* Explicit formulas database: dbl-1987-m
  438. *
  439. * source 1987 Montgomery "Speeding the Pollard and elliptic
  440. * curve methods of factorization", page 261, fourth display
  441. * compute X3 = (X1^2-Z1^2)^2
  442. * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2)
  443. */
  444. byte x1sq[F25519_SIZE];
  445. byte z1sq[F25519_SIZE];
  446. byte x1z1[F25519_SIZE];
  447. byte a[F25519_SIZE];
  448. fe_mul__distinct(x1sq, x1, x1);
  449. fe_mul__distinct(z1sq, z1, z1);
  450. fe_mul__distinct(x1z1, x1, z1);
  451. lm_sub(a, x1sq, z1sq);
  452. fe_mul__distinct(x3, a, a);
  453. fe_mul_c(a, x1z1, 486662);
  454. lm_add(a, x1sq, a);
  455. lm_add(a, z1sq, a);
  456. fe_mul__distinct(x1sq, x1z1, a);
  457. fe_mul_c(z3, x1sq, 4);
  458. }
  459. void FAST_FUNC curve25519(byte *result, const byte *e, const byte *q)
  460. {
  461. int i;
  462. struct {
  463. /* from wolfssl-3.15.3/wolfssl/wolfcrypt/fe_operations.h */
  464. /*static const*/ byte f25519_one[F25519_SIZE]; // = {1};
  465. /* Current point: P_m */
  466. byte xm[F25519_SIZE];
  467. byte zm[F25519_SIZE]; // = {1};
  468. /* Predecessor: P_(m-1) */
  469. byte xm1[F25519_SIZE]; // = {1};
  470. byte zm1[F25519_SIZE]; // = {0};
  471. } z;
  472. #define f25519_one z.f25519_one
  473. #define xm z.xm
  474. #define zm z.zm
  475. #define xm1 z.xm1
  476. #define zm1 z.zm1
  477. memset(&z, 0, sizeof(z));
  478. f25519_one[0] = 1;
  479. zm[0] = 1;
  480. xm1[0] = 1;
  481. /* Note: bit 254 is assumed to be 1 */
  482. lm_copy(xm, q);
  483. for (i = 253; i >= 0; i--) {
  484. const int bit = (e[i >> 3] >> (i & 7)) & 1;
  485. byte xms[F25519_SIZE];
  486. byte zms[F25519_SIZE];
  487. /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
  488. xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
  489. xc_double(xm, zm, xm, zm);
  490. /* Compute P_(2m+1) */
  491. xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
  492. /* Select:
  493. * bit = 1 --> (P_(2m+1), P_(2m))
  494. * bit = 0 --> (P_(2m), P_(2m-1))
  495. */
  496. fe_select(xm1, xm1, xm, bit);
  497. fe_select(zm1, zm1, zm, bit);
  498. fe_select(xm, xm, xms, bit);
  499. fe_select(zm, zm, zms, bit);
  500. }
  501. /* Freeze out of projective coordinates */
  502. fe_inv__distinct(zm1, zm);
  503. fe_mul__distinct(result, zm1, xm);
  504. fe_normalize(result);
  505. }