ecp_nistp224.c 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492
  1. /* crypto/ec/ecp_nistp224.c */
  2. /*
  3. * Written by Emilia Kasper (Google) for the OpenSSL project.
  4. */
  5. /* ====================================================================
  6. * Copyright (c) 2000-2010 The OpenSSL Project. All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. All advertising materials mentioning features or use of this
  21. * software must display the following acknowledgment:
  22. * "This product includes software developed by the OpenSSL Project
  23. * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
  24. *
  25. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  26. * endorse or promote products derived from this software without
  27. * prior written permission. For written permission, please contact
  28. * licensing@OpenSSL.org.
  29. *
  30. * 5. Products derived from this software may not be called "OpenSSL"
  31. * nor may "OpenSSL" appear in their names without prior written
  32. * permission of the OpenSSL Project.
  33. *
  34. * 6. Redistributions of any form whatsoever must retain the following
  35. * acknowledgment:
  36. * "This product includes software developed by the OpenSSL Project
  37. * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
  38. *
  39. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  40. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  41. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  42. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  43. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  44. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  45. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  46. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  48. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  49. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  50. * OF THE POSSIBILITY OF SUCH DAMAGE.
  51. * ====================================================================
  52. *
  53. * This product includes cryptographic software written by Eric Young
  54. * (eay@cryptsoft.com). This product includes software written by Tim
  55. * Hudson (tjh@cryptsoft.com).
  56. *
  57. */
  58. /*
  59. * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  60. *
  61. * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  62. * and Adam Langley's public domain 64-bit C implementation of curve25519
  63. */
  64. #include <openssl/opensslconf.h>
  65. #ifndef OPENSSL_NO_EC_NISTP224_64_GCC_128
  66. #include <stdint.h>
  67. #include <string.h>
  68. #include <openssl/err.h>
  69. #include "ec_lcl.h"
  70. #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
  71. /* even with gcc, the typedef won't work for 32-bit platforms */
  72. typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
  73. #else
  74. #error "Need GCC 3.1 or later to define type uint128_t"
  75. #endif
  76. typedef uint8_t u8;
  77. /******************************************************************************/
  78. /* INTERNAL REPRESENTATION OF FIELD ELEMENTS
  79. *
  80. * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  81. * where each slice a_i is a 64-bit word, i.e., a field element is an fslice
  82. * array a with 4 elements, where a[i] = a_i.
  83. * Outputs from multiplications are represented as unreduced polynomials
  84. * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  85. * where each b_i is a 128-bit word. We ensure that inputs to each field
  86. * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  87. * and fit into a 128-bit word without overflow. The coefficients are then
  88. * again partially reduced to a_i < 2^57. We only reduce to the unique minimal
  89. * representation at the end of the computation.
  90. *
  91. */
  92. typedef uint64_t fslice;
  93. /* Field element represented as a byte arrary.
  94. * 28*8 = 224 bits is also the group order size for the elliptic curve. */
  95. typedef u8 felem_bytearray[28];
  96. static const felem_bytearray nistp224_curve_params[5] = {
  97. {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* p */
  98. 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
  99. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01},
  100. {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* a */
  101. 0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF,
  102. 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE},
  103. {0xB4,0x05,0x0A,0x85,0x0C,0x04,0xB3,0xAB,0xF5,0x41, /* b */
  104. 0x32,0x56,0x50,0x44,0xB0,0xB7,0xD7,0xBF,0xD8,0xBA,
  105. 0x27,0x0B,0x39,0x43,0x23,0x55,0xFF,0xB4},
  106. {0xB7,0x0E,0x0C,0xBD,0x6B,0xB4,0xBF,0x7F,0x32,0x13, /* x */
  107. 0x90,0xB9,0x4A,0x03,0xC1,0xD3,0x56,0xC2,0x11,0x22,
  108. 0x34,0x32,0x80,0xD6,0x11,0x5C,0x1D,0x21},
  109. {0xbd,0x37,0x63,0x88,0xb5,0xf7,0x23,0xfb,0x4c,0x22, /* y */
  110. 0xdf,0xe6,0xcd,0x43,0x75,0xa0,0x5a,0x07,0x47,0x64,
  111. 0x44,0xd5,0x81,0x99,0x85,0x00,0x7e,0x34}
  112. };
  113. /* Precomputed multiples of the standard generator
  114. * b_0*G + b_1*2^56*G + b_2*2^112*G + b_3*2^168*G for
  115. * (b_3, b_2, b_1, b_0) in [0,15], i.e., gmul[0] = point_at_infinity,
  116. * gmul[1] = G, gmul[2] = 2^56*G, gmul[3] = 2^56*G + G, etc.
  117. * Points are given in Jacobian projective coordinates: words 0-3 represent the
  118. * X-coordinate (slice a_0 is word 0, etc.), words 4-7 represent the
  119. * Y-coordinate and words 8-11 represent the Z-coordinate. */
  120. static const fslice gmul[16][3][4] = {
  121. {{0x00000000000000, 0x00000000000000, 0x00000000000000, 0x00000000000000},
  122. {0x00000000000000, 0x00000000000000, 0x00000000000000, 0x00000000000000},
  123. {0x00000000000000, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  124. {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
  125. {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
  126. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  127. {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
  128. {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
  129. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  130. {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
  131. {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
  132. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  133. {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
  134. {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
  135. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  136. {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
  137. {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
  138. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  139. {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
  140. {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
  141. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  142. {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
  143. {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
  144. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  145. {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
  146. {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
  147. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  148. {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
  149. {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
  150. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  151. {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
  152. {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
  153. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  154. {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
  155. {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
  156. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  157. {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
  158. {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
  159. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  160. {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
  161. {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
  162. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  163. {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
  164. {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
  165. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}},
  166. {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
  167. {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
  168. {0x00000000000001, 0x00000000000000, 0x00000000000000, 0x00000000000000}}
  169. };
  170. /* Precomputation for the group generator. */
  171. typedef struct {
  172. fslice g_pre_comp[16][3][4];
  173. int references;
  174. } NISTP224_PRE_COMP;
  175. const EC_METHOD *EC_GFp_nistp224_method(void)
  176. {
  177. static const EC_METHOD ret = {
  178. NID_X9_62_prime_field,
  179. ec_GFp_nistp224_group_init,
  180. ec_GFp_simple_group_finish,
  181. ec_GFp_simple_group_clear_finish,
  182. ec_GFp_nist_group_copy,
  183. ec_GFp_nistp224_group_set_curve,
  184. ec_GFp_simple_group_get_curve,
  185. ec_GFp_simple_group_get_degree,
  186. ec_GFp_simple_group_check_discriminant,
  187. ec_GFp_simple_point_init,
  188. ec_GFp_simple_point_finish,
  189. ec_GFp_simple_point_clear_finish,
  190. ec_GFp_simple_point_copy,
  191. ec_GFp_simple_point_set_to_infinity,
  192. ec_GFp_simple_set_Jprojective_coordinates_GFp,
  193. ec_GFp_simple_get_Jprojective_coordinates_GFp,
  194. ec_GFp_simple_point_set_affine_coordinates,
  195. ec_GFp_nistp224_point_get_affine_coordinates,
  196. ec_GFp_simple_set_compressed_coordinates,
  197. ec_GFp_simple_point2oct,
  198. ec_GFp_simple_oct2point,
  199. ec_GFp_simple_add,
  200. ec_GFp_simple_dbl,
  201. ec_GFp_simple_invert,
  202. ec_GFp_simple_is_at_infinity,
  203. ec_GFp_simple_is_on_curve,
  204. ec_GFp_simple_cmp,
  205. ec_GFp_simple_make_affine,
  206. ec_GFp_simple_points_make_affine,
  207. ec_GFp_nistp224_points_mul,
  208. ec_GFp_nistp224_precompute_mult,
  209. ec_GFp_nistp224_have_precompute_mult,
  210. ec_GFp_nist_field_mul,
  211. ec_GFp_nist_field_sqr,
  212. 0 /* field_div */,
  213. 0 /* field_encode */,
  214. 0 /* field_decode */,
  215. 0 /* field_set_to_one */ };
  216. return &ret;
  217. }
  218. /* Helper functions to convert field elements to/from internal representation */
  219. static void bin28_to_felem(fslice out[4], const u8 in[28])
  220. {
  221. out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
  222. out[1] = (*((const uint64_t *)(in+7))) & 0x00ffffffffffffff;
  223. out[2] = (*((const uint64_t *)(in+14))) & 0x00ffffffffffffff;
  224. out[3] = (*((const uint64_t *)(in+21))) & 0x00ffffffffffffff;
  225. }
  226. static void felem_to_bin28(u8 out[28], const fslice in[4])
  227. {
  228. unsigned i;
  229. for (i = 0; i < 7; ++i)
  230. {
  231. out[i] = in[0]>>(8*i);
  232. out[i+7] = in[1]>>(8*i);
  233. out[i+14] = in[2]>>(8*i);
  234. out[i+21] = in[3]>>(8*i);
  235. }
  236. }
  237. /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
  238. static void flip_endian(u8 *out, const u8 *in, unsigned len)
  239. {
  240. unsigned i;
  241. for (i = 0; i < len; ++i)
  242. out[i] = in[len-1-i];
  243. }
  244. /* From OpenSSL BIGNUM to internal representation */
  245. static int BN_to_felem(fslice out[4], const BIGNUM *bn)
  246. {
  247. felem_bytearray b_in;
  248. felem_bytearray b_out;
  249. unsigned num_bytes;
  250. /* BN_bn2bin eats leading zeroes */
  251. memset(b_out, 0, sizeof b_out);
  252. num_bytes = BN_num_bytes(bn);
  253. if (num_bytes > sizeof b_out)
  254. {
  255. ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
  256. return 0;
  257. }
  258. if (BN_is_negative(bn))
  259. {
  260. ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
  261. return 0;
  262. }
  263. num_bytes = BN_bn2bin(bn, b_in);
  264. flip_endian(b_out, b_in, num_bytes);
  265. bin28_to_felem(out, b_out);
  266. return 1;
  267. }
  268. /* From internal representation to OpenSSL BIGNUM */
  269. static BIGNUM *felem_to_BN(BIGNUM *out, const fslice in[4])
  270. {
  271. felem_bytearray b_in, b_out;
  272. felem_to_bin28(b_in, in);
  273. flip_endian(b_out, b_in, sizeof b_out);
  274. return BN_bin2bn(b_out, sizeof b_out, out);
  275. }
  276. /******************************************************************************/
  277. /* FIELD OPERATIONS
  278. *
  279. * Field operations, using the internal representation of field elements.
  280. * NB! These operations are specific to our point multiplication and cannot be
  281. * expected to be correct in general - e.g., multiplication with a large scalar
  282. * will cause an overflow.
  283. *
  284. */
  285. /* Sum two field elements: out += in */
  286. static void felem_sum64(fslice out[4], const fslice in[4])
  287. {
  288. out[0] += in[0];
  289. out[1] += in[1];
  290. out[2] += in[2];
  291. out[3] += in[3];
  292. }
  293. /* Subtract field elements: out -= in */
  294. /* Assumes in[i] < 2^57 */
  295. static void felem_diff64(fslice out[4], const fslice in[4])
  296. {
  297. static const uint64_t two58p2 = (((uint64_t) 1) << 58) + (((uint64_t) 1) << 2);
  298. static const uint64_t two58m2 = (((uint64_t) 1) << 58) - (((uint64_t) 1) << 2);
  299. static const uint64_t two58m42m2 = (((uint64_t) 1) << 58) -
  300. (((uint64_t) 1) << 42) - (((uint64_t) 1) << 2);
  301. /* Add 0 mod 2^224-2^96+1 to ensure out > in */
  302. out[0] += two58p2;
  303. out[1] += two58m42m2;
  304. out[2] += two58m2;
  305. out[3] += two58m2;
  306. out[0] -= in[0];
  307. out[1] -= in[1];
  308. out[2] -= in[2];
  309. out[3] -= in[3];
  310. }
  311. /* Subtract in unreduced 128-bit mode: out128 -= in128 */
  312. /* Assumes in[i] < 2^119 */
  313. static void felem_diff128(uint128_t out[7], const uint128_t in[4])
  314. {
  315. static const uint128_t two120 = ((uint128_t) 1) << 120;
  316. static const uint128_t two120m64 = (((uint128_t) 1) << 120) -
  317. (((uint128_t) 1) << 64);
  318. static const uint128_t two120m104m64 = (((uint128_t) 1) << 120) -
  319. (((uint128_t) 1) << 104) - (((uint128_t) 1) << 64);
  320. /* Add 0 mod 2^224-2^96+1 to ensure out > in */
  321. out[0] += two120;
  322. out[1] += two120m64;
  323. out[2] += two120m64;
  324. out[3] += two120;
  325. out[4] += two120m104m64;
  326. out[5] += two120m64;
  327. out[6] += two120m64;
  328. out[0] -= in[0];
  329. out[1] -= in[1];
  330. out[2] -= in[2];
  331. out[3] -= in[3];
  332. out[4] -= in[4];
  333. out[5] -= in[5];
  334. out[6] -= in[6];
  335. }
  336. /* Subtract in mixed mode: out128 -= in64 */
  337. /* in[i] < 2^63 */
  338. static void felem_diff_128_64(uint128_t out[7], const fslice in[4])
  339. {
  340. static const uint128_t two64p8 = (((uint128_t) 1) << 64) +
  341. (((uint128_t) 1) << 8);
  342. static const uint128_t two64m8 = (((uint128_t) 1) << 64) -
  343. (((uint128_t) 1) << 8);
  344. static const uint128_t two64m48m8 = (((uint128_t) 1) << 64) -
  345. (((uint128_t) 1) << 48) - (((uint128_t) 1) << 8);
  346. /* Add 0 mod 2^224-2^96+1 to ensure out > in */
  347. out[0] += two64p8;
  348. out[1] += two64m48m8;
  349. out[2] += two64m8;
  350. out[3] += two64m8;
  351. out[0] -= in[0];
  352. out[1] -= in[1];
  353. out[2] -= in[2];
  354. out[3] -= in[3];
  355. }
  356. /* Multiply a field element by a scalar: out64 = out64 * scalar
  357. * The scalars we actually use are small, so results fit without overflow */
  358. static void felem_scalar64(fslice out[4], const fslice scalar)
  359. {
  360. out[0] *= scalar;
  361. out[1] *= scalar;
  362. out[2] *= scalar;
  363. out[3] *= scalar;
  364. }
  365. /* Multiply an unreduced field element by a scalar: out128 = out128 * scalar
  366. * The scalars we actually use are small, so results fit without overflow */
  367. static void felem_scalar128(uint128_t out[7], const uint128_t scalar)
  368. {
  369. out[0] *= scalar;
  370. out[1] *= scalar;
  371. out[2] *= scalar;
  372. out[3] *= scalar;
  373. out[4] *= scalar;
  374. out[5] *= scalar;
  375. out[6] *= scalar;
  376. }
  377. /* Square a field element: out = in^2 */
  378. static void felem_square(uint128_t out[7], const fslice in[4])
  379. {
  380. out[0] = ((uint128_t) in[0]) * in[0];
  381. out[1] = ((uint128_t) in[0]) * in[1] * 2;
  382. out[2] = ((uint128_t) in[0]) * in[2] * 2 + ((uint128_t) in[1]) * in[1];
  383. out[3] = ((uint128_t) in[0]) * in[3] * 2 +
  384. ((uint128_t) in[1]) * in[2] * 2;
  385. out[4] = ((uint128_t) in[1]) * in[3] * 2 + ((uint128_t) in[2]) * in[2];
  386. out[5] = ((uint128_t) in[2]) * in[3] * 2;
  387. out[6] = ((uint128_t) in[3]) * in[3];
  388. }
  389. /* Multiply two field elements: out = in1 * in2 */
  390. static void felem_mul(uint128_t out[7], const fslice in1[4], const fslice in2[4])
  391. {
  392. out[0] = ((uint128_t) in1[0]) * in2[0];
  393. out[1] = ((uint128_t) in1[0]) * in2[1] + ((uint128_t) in1[1]) * in2[0];
  394. out[2] = ((uint128_t) in1[0]) * in2[2] + ((uint128_t) in1[1]) * in2[1] +
  395. ((uint128_t) in1[2]) * in2[0];
  396. out[3] = ((uint128_t) in1[0]) * in2[3] + ((uint128_t) in1[1]) * in2[2] +
  397. ((uint128_t) in1[2]) * in2[1] + ((uint128_t) in1[3]) * in2[0];
  398. out[4] = ((uint128_t) in1[1]) * in2[3] + ((uint128_t) in1[2]) * in2[2] +
  399. ((uint128_t) in1[3]) * in2[1];
  400. out[5] = ((uint128_t) in1[2]) * in2[3] + ((uint128_t) in1[3]) * in2[2];
  401. out[6] = ((uint128_t) in1[3]) * in2[3];
  402. }
  403. /* Reduce 128-bit coefficients to 64-bit coefficients. Requires in[i] < 2^126,
  404. * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] < 2^57 */
  405. static void felem_reduce(fslice out[4], const uint128_t in[7])
  406. {
  407. static const uint128_t two127p15 = (((uint128_t) 1) << 127) +
  408. (((uint128_t) 1) << 15);
  409. static const uint128_t two127m71 = (((uint128_t) 1) << 127) -
  410. (((uint128_t) 1) << 71);
  411. static const uint128_t two127m71m55 = (((uint128_t) 1) << 127) -
  412. (((uint128_t) 1) << 71) - (((uint128_t) 1) << 55);
  413. uint128_t output[5];
  414. /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
  415. output[0] = in[0] + two127p15;
  416. output[1] = in[1] + two127m71m55;
  417. output[2] = in[2] + two127m71;
  418. output[3] = in[3];
  419. output[4] = in[4];
  420. /* Eliminate in[4], in[5], in[6] */
  421. output[4] += in[6] >> 16;
  422. output[3] += (in[6]&0xffff) << 40;
  423. output[2] -= in[6];
  424. output[3] += in[5] >> 16;
  425. output[2] += (in[5]&0xffff) << 40;
  426. output[1] -= in[5];
  427. output[2] += output[4] >> 16;
  428. output[1] += (output[4]&0xffff) << 40;
  429. output[0] -= output[4];
  430. output[4] = 0;
  431. /* Carry 2 -> 3 -> 4 */
  432. output[3] += output[2] >> 56;
  433. output[2] &= 0x00ffffffffffffff;
  434. output[4] += output[3] >> 56;
  435. output[3] &= 0x00ffffffffffffff;
  436. /* Now output[2] < 2^56, output[3] < 2^56 */
  437. /* Eliminate output[4] */
  438. output[2] += output[4] >> 16;
  439. output[1] += (output[4]&0xffff) << 40;
  440. output[0] -= output[4];
  441. /* Carry 0 -> 1 -> 2 -> 3 */
  442. output[1] += output[0] >> 56;
  443. out[0] = output[0] & 0x00ffffffffffffff;
  444. output[2] += output[1] >> 56;
  445. out[1] = output[1] & 0x00ffffffffffffff;
  446. output[3] += output[2] >> 56;
  447. out[2] = output[2] & 0x00ffffffffffffff;
  448. /* out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
  449. * out[3] < 2^57 (due to final carry) */
  450. out[3] = output[3];
  451. }
  452. /* Reduce to unique minimal representation */
  453. static void felem_contract(fslice out[4], const fslice in[4])
  454. {
  455. static const int64_t two56 = ((uint64_t) 1) << 56;
  456. /* 0 <= in < 2^225 */
  457. /* if in > 2^224 , reduce in = in - 2^224 + 2^96 - 1 */
  458. int64_t tmp[4], a;
  459. tmp[0] = (int64_t) in[0] - (in[3] >> 56);
  460. tmp[1] = (int64_t) in[1] + ((in[3] >> 16) & 0x0000010000000000);
  461. tmp[2] = (int64_t) in[2];
  462. tmp[3] = (int64_t) in[3] & 0x00ffffffffffffff;
  463. /* eliminate negative coefficients */
  464. a = tmp[0] >> 63;
  465. tmp[0] += two56 & a;
  466. tmp[1] -= 1 & a;
  467. a = tmp[1] >> 63;
  468. tmp[1] += two56 & a;
  469. tmp[2] -= 1 & a;
  470. a = tmp[2] >> 63;
  471. tmp[2] += two56 & a;
  472. tmp[3] -= 1 & a;
  473. a = tmp[3] >> 63;
  474. tmp[3] += two56 & a;
  475. tmp[0] += 1 & a;
  476. tmp[1] -= (1 & a) << 40;
  477. /* carry 1 -> 2 -> 3 */
  478. tmp[2] += tmp[1] >> 56;
  479. tmp[1] &= 0x00ffffffffffffff;
  480. tmp[3] += tmp[2] >> 56;
  481. tmp[2] &= 0x00ffffffffffffff;
  482. /* 0 <= in < 2^224 + 2^96 - 1 */
  483. /* if in > 2^224 , reduce in = in - 2^224 + 2^96 - 1 */
  484. tmp[0] -= (tmp[3] >> 56);
  485. tmp[1] += ((tmp[3] >> 16) & 0x0000010000000000);
  486. tmp[3] &= 0x00ffffffffffffff;
  487. /* eliminate negative coefficients */
  488. a = tmp[0] >> 63;
  489. tmp[0] += two56 & a;
  490. tmp[1] -= 1 & a;
  491. a = tmp[1] >> 63;
  492. tmp[1] += two56 & a;
  493. tmp[2] -= 1 & a;
  494. a = tmp[2] >> 63;
  495. tmp[2] += two56 & a;
  496. tmp[3] -= 1 & a;
  497. a = tmp[3] >> 63;
  498. tmp[3] += two56 & a;
  499. tmp[0] += 1 & a;
  500. tmp[1] -= (1 & a) << 40;
  501. /* carry 1 -> 2 -> 3 */
  502. tmp[2] += tmp[1] >> 56;
  503. tmp[1] &= 0x00ffffffffffffff;
  504. tmp[3] += tmp[2] >> 56;
  505. tmp[2] &= 0x00ffffffffffffff;
  506. /* Now 0 <= in < 2^224 */
  507. /* if in > 2^224 - 2^96, reduce */
  508. /* a = 0 iff in > 2^224 - 2^96, i.e.,
  509. * the high 128 bits are all 1 and the lower part is non-zero */
  510. a = (tmp[3] + 1) | (tmp[2] + 1) |
  511. ((tmp[1] | 0x000000ffffffffff) + 1) |
  512. ((((tmp[1] & 0xffff) - 1) >> 63) & ((tmp[0] - 1) >> 63));
  513. /* turn a into an all-one mask (if a = 0) or an all-zero mask */
  514. a = ((a & 0x00ffffffffffffff) - 1) >> 63;
  515. /* subtract 2^224 - 2^96 + 1 if a is all-one*/
  516. tmp[3] &= a ^ 0xffffffffffffffff;
  517. tmp[2] &= a ^ 0xffffffffffffffff;
  518. tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
  519. tmp[0] -= 1 & a;
  520. /* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
  521. * non-zero, so we only need one step */
  522. a = tmp[0] >> 63;
  523. tmp[0] += two56 & a;
  524. tmp[1] -= 1 & a;
  525. out[0] = tmp[0];
  526. out[1] = tmp[1];
  527. out[2] = tmp[2];
  528. out[3] = tmp[3];
  529. }
  530. /* Zero-check: returns 1 if input is 0, and 0 otherwise.
  531. * We know that field elements are reduced to in < 2^225,
  532. * so we only need to check three cases: 0, 2^224 - 2^96 + 1,
  533. * and 2^225 - 2^97 + 2 */
  534. static fslice felem_is_zero(const fslice in[4])
  535. {
  536. fslice zero, two224m96p1, two225m97p2;
  537. zero = in[0] | in[1] | in[2] | in[3];
  538. zero = (((int64_t)(zero) - 1) >> 63) & 1;
  539. two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
  540. | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
  541. two224m96p1 = (((int64_t)(two224m96p1) - 1) >> 63) & 1;
  542. two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
  543. | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
  544. two225m97p2 = (((int64_t)(two225m97p2) - 1) >> 63) & 1;
  545. return (zero | two224m96p1 | two225m97p2);
  546. }
  547. /* Invert a field element */
  548. /* Computation chain copied from djb's code */
  549. static void felem_inv(fslice out[4], const fslice in[4])
  550. {
  551. fslice ftmp[4], ftmp2[4], ftmp3[4], ftmp4[4];
  552. uint128_t tmp[7];
  553. unsigned i;
  554. felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2 */
  555. felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 1 */
  556. felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2 */
  557. felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 1 */
  558. felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
  559. felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
  560. felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
  561. felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 1 */
  562. felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
  563. for (i = 0; i < 5; ++i) /* 2^12 - 2^6 */
  564. {
  565. felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
  566. }
  567. felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
  568. felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
  569. for (i = 0; i < 11; ++i) /* 2^24 - 2^12 */
  570. {
  571. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
  572. }
  573. felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
  574. felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
  575. for (i = 0; i < 23; ++i) /* 2^48 - 2^24 */
  576. {
  577. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
  578. }
  579. felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
  580. felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
  581. for (i = 0; i < 47; ++i) /* 2^96 - 2^48 */
  582. {
  583. felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
  584. }
  585. felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
  586. felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
  587. for (i = 0; i < 23; ++i) /* 2^120 - 2^24 */
  588. {
  589. felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
  590. }
  591. felem_mul(tmp, ftmp2, ftmp4); felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
  592. for (i = 0; i < 6; ++i) /* 2^126 - 2^6 */
  593. {
  594. felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
  595. }
  596. felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^126 - 1 */
  597. felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^127 - 2 */
  598. felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^127 - 1 */
  599. for (i = 0; i < 97; ++i) /* 2^224 - 2^97 */
  600. {
  601. felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
  602. }
  603. felem_mul(tmp, ftmp, ftmp3); felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
  604. }
  605. /* Copy in constant time:
  606. * if icopy == 1, copy in to out,
  607. * if icopy == 0, copy out to itself. */
  608. static void
  609. copy_conditional(fslice *out, const fslice *in, unsigned len, fslice icopy)
  610. {
  611. unsigned i;
  612. /* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one */
  613. const fslice copy = -icopy;
  614. for (i = 0; i < len; ++i)
  615. {
  616. const fslice tmp = copy & (in[i] ^ out[i]);
  617. out[i] ^= tmp;
  618. }
  619. }
  620. /* Copy in constant time:
  621. * if isel == 1, copy in2 to out,
  622. * if isel == 0, copy in1 to out. */
  623. static void select_conditional(fslice *out, const fslice *in1, const fslice *in2,
  624. unsigned len, fslice isel)
  625. {
  626. unsigned i;
  627. /* isel is a (64-bit) 0 or 1, so sel is either all-zero or all-one */
  628. const fslice sel = -isel;
  629. for (i = 0; i < len; ++i)
  630. {
  631. const fslice tmp = sel & (in1[i] ^ in2[i]);
  632. out[i] = in1[i] ^ tmp;
  633. }
  634. }
  635. /******************************************************************************/
  636. /* ELLIPTIC CURVE POINT OPERATIONS
  637. *
  638. * Points are represented in Jacobian projective coordinates:
  639. * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
  640. * or to the point at infinity if Z == 0.
  641. *
  642. */
  643. /* Double an elliptic curve point:
  644. * (X', Y', Z') = 2 * (X, Y, Z), where
  645. * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
  646. * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
  647. * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
  648. * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
  649. * while x_out == y_in is not (maybe this works, but it's not tested). */
  650. static void
  651. point_double(fslice x_out[4], fslice y_out[4], fslice z_out[4],
  652. const fslice x_in[4], const fslice y_in[4], const fslice z_in[4])
  653. {
  654. uint128_t tmp[7], tmp2[7];
  655. fslice delta[4];
  656. fslice gamma[4];
  657. fslice beta[4];
  658. fslice alpha[4];
  659. fslice ftmp[4], ftmp2[4];
  660. memcpy(ftmp, x_in, 4 * sizeof(fslice));
  661. memcpy(ftmp2, x_in, 4 * sizeof(fslice));
  662. /* delta = z^2 */
  663. felem_square(tmp, z_in);
  664. felem_reduce(delta, tmp);
  665. /* gamma = y^2 */
  666. felem_square(tmp, y_in);
  667. felem_reduce(gamma, tmp);
  668. /* beta = x*gamma */
  669. felem_mul(tmp, x_in, gamma);
  670. felem_reduce(beta, tmp);
  671. /* alpha = 3*(x-delta)*(x+delta) */
  672. felem_diff64(ftmp, delta);
  673. /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
  674. felem_sum64(ftmp2, delta);
  675. /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
  676. felem_scalar64(ftmp2, 3);
  677. /* ftmp2[i] < 3 * 2^58 < 2^60 */
  678. felem_mul(tmp, ftmp, ftmp2);
  679. /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
  680. felem_reduce(alpha, tmp);
  681. /* x' = alpha^2 - 8*beta */
  682. felem_square(tmp, alpha);
  683. /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
  684. memcpy(ftmp, beta, 4 * sizeof(fslice));
  685. felem_scalar64(ftmp, 8);
  686. /* ftmp[i] < 8 * 2^57 = 2^60 */
  687. felem_diff_128_64(tmp, ftmp);
  688. /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
  689. felem_reduce(x_out, tmp);
  690. /* z' = (y + z)^2 - gamma - delta */
  691. felem_sum64(delta, gamma);
  692. /* delta[i] < 2^57 + 2^57 = 2^58 */
  693. memcpy(ftmp, y_in, 4 * sizeof(fslice));
  694. felem_sum64(ftmp, z_in);
  695. /* ftmp[i] < 2^57 + 2^57 = 2^58 */
  696. felem_square(tmp, ftmp);
  697. /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
  698. felem_diff_128_64(tmp, delta);
  699. /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
  700. felem_reduce(z_out, tmp);
  701. /* y' = alpha*(4*beta - x') - 8*gamma^2 */
  702. felem_scalar64(beta, 4);
  703. /* beta[i] < 4 * 2^57 = 2^59 */
  704. felem_diff64(beta, x_out);
  705. /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
  706. felem_mul(tmp, alpha, beta);
  707. /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
  708. felem_square(tmp2, gamma);
  709. /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
  710. felem_scalar128(tmp2, 8);
  711. /* tmp2[i] < 8 * 2^116 = 2^119 */
  712. felem_diff128(tmp, tmp2);
  713. /* tmp[i] < 2^119 + 2^120 < 2^121 */
  714. felem_reduce(y_out, tmp);
  715. }
  716. /* Add two elliptic curve points:
  717. * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
  718. * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
  719. * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
  720. * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
  721. * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
  722. * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2) */
  723. /* This function is not entirely constant-time:
  724. * it includes a branch for checking whether the two input points are equal,
  725. * (while not equal to the point at infinity).
  726. * This case never happens during single point multiplication,
  727. * so there is no timing leak for ECDH or ECDSA signing. */
  728. static void point_add(fslice x3[4], fslice y3[4], fslice z3[4],
  729. const fslice x1[4], const fslice y1[4], const fslice z1[4],
  730. const fslice x2[4], const fslice y2[4], const fslice z2[4])
  731. {
  732. fslice ftmp[4], ftmp2[4], ftmp3[4], ftmp4[4], ftmp5[4];
  733. uint128_t tmp[7], tmp2[7];
  734. fslice z1_is_zero, z2_is_zero, x_equal, y_equal;
  735. /* ftmp = z1^2 */
  736. felem_square(tmp, z1);
  737. felem_reduce(ftmp, tmp);
  738. /* ftmp2 = z2^2 */
  739. felem_square(tmp, z2);
  740. felem_reduce(ftmp2, tmp);
  741. /* ftmp3 = z1^3 */
  742. felem_mul(tmp, ftmp, z1);
  743. felem_reduce(ftmp3, tmp);
  744. /* ftmp4 = z2^3 */
  745. felem_mul(tmp, ftmp2, z2);
  746. felem_reduce(ftmp4, tmp);
  747. /* ftmp3 = z1^3*y2 */
  748. felem_mul(tmp, ftmp3, y2);
  749. /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
  750. /* ftmp4 = z2^3*y1 */
  751. felem_mul(tmp2, ftmp4, y1);
  752. felem_reduce(ftmp4, tmp2);
  753. /* ftmp3 = z1^3*y2 - z2^3*y1 */
  754. felem_diff_128_64(tmp, ftmp4);
  755. /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
  756. felem_reduce(ftmp3, tmp);
  757. /* ftmp = z1^2*x2 */
  758. felem_mul(tmp, ftmp, x2);
  759. /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
  760. /* ftmp2 =z2^2*x1 */
  761. felem_mul(tmp2, ftmp2, x1);
  762. felem_reduce(ftmp2, tmp2);
  763. /* ftmp = z1^2*x2 - z2^2*x1 */
  764. felem_diff128(tmp, tmp2);
  765. /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
  766. felem_reduce(ftmp, tmp);
  767. /* the formulae are incorrect if the points are equal
  768. * so we check for this and do doubling if this happens */
  769. x_equal = felem_is_zero(ftmp);
  770. y_equal = felem_is_zero(ftmp3);
  771. z1_is_zero = felem_is_zero(z1);
  772. z2_is_zero = felem_is_zero(z2);
  773. /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
  774. if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
  775. {
  776. point_double(x3, y3, z3, x1, y1, z1);
  777. return;
  778. }
  779. /* ftmp5 = z1*z2 */
  780. felem_mul(tmp, z1, z2);
  781. felem_reduce(ftmp5, tmp);
  782. /* z3 = (z1^2*x2 - z2^2*x1)*(z1*z2) */
  783. felem_mul(tmp, ftmp, ftmp5);
  784. felem_reduce(z3, tmp);
  785. /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
  786. memcpy(ftmp5, ftmp, 4 * sizeof(fslice));
  787. felem_square(tmp, ftmp);
  788. felem_reduce(ftmp, tmp);
  789. /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
  790. felem_mul(tmp, ftmp, ftmp5);
  791. felem_reduce(ftmp5, tmp);
  792. /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
  793. felem_mul(tmp, ftmp2, ftmp);
  794. felem_reduce(ftmp2, tmp);
  795. /* ftmp4 = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
  796. felem_mul(tmp, ftmp4, ftmp5);
  797. /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
  798. /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
  799. felem_square(tmp2, ftmp3);
  800. /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
  801. /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
  802. felem_diff_128_64(tmp2, ftmp5);
  803. /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
  804. /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
  805. memcpy(ftmp5, ftmp2, 4 * sizeof(fslice));
  806. felem_scalar64(ftmp5, 2);
  807. /* ftmp5[i] < 2 * 2^57 = 2^58 */
  808. /* x3 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
  809. 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
  810. felem_diff_128_64(tmp2, ftmp5);
  811. /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
  812. felem_reduce(x3, tmp2);
  813. /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x3 */
  814. felem_diff64(ftmp2, x3);
  815. /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
  816. /* tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x3) */
  817. felem_mul(tmp2, ftmp3, ftmp2);
  818. /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
  819. /* y3 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x3) -
  820. z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
  821. felem_diff128(tmp2, tmp);
  822. /* tmp2[i] < 2^118 + 2^120 < 2^121 */
  823. felem_reduce(y3, tmp2);
  824. /* the result (x3, y3, z3) is incorrect if one of the inputs is the
  825. * point at infinity, so we need to check for this separately */
  826. /* if point 1 is at infinity, copy point 2 to output, and vice versa */
  827. copy_conditional(x3, x2, 4, z1_is_zero);
  828. copy_conditional(x3, x1, 4, z2_is_zero);
  829. copy_conditional(y3, y2, 4, z1_is_zero);
  830. copy_conditional(y3, y1, 4, z2_is_zero);
  831. copy_conditional(z3, z2, 4, z1_is_zero);
  832. copy_conditional(z3, z1, 4, z2_is_zero);
  833. }
  834. /* Select a point from an array of 16 precomputed point multiples,
  835. * in constant time: for bits = {b_0, b_1, b_2, b_3}, return the point
  836. * pre_comp[8*b_3 + 4*b_2 + 2*b_1 + b_0] */
  837. static void select_point(const fslice bits[4], const fslice pre_comp[16][3][4],
  838. fslice out[12])
  839. {
  840. fslice tmp[5][12];
  841. select_conditional(tmp[0], pre_comp[7][0], pre_comp[15][0], 12, bits[3]);
  842. select_conditional(tmp[1], pre_comp[3][0], pre_comp[11][0], 12, bits[3]);
  843. select_conditional(tmp[2], tmp[1], tmp[0], 12, bits[2]);
  844. select_conditional(tmp[0], pre_comp[5][0], pre_comp[13][0], 12, bits[3]);
  845. select_conditional(tmp[1], pre_comp[1][0], pre_comp[9][0], 12, bits[3]);
  846. select_conditional(tmp[3], tmp[1], tmp[0], 12, bits[2]);
  847. select_conditional(tmp[4], tmp[3], tmp[2], 12, bits[1]);
  848. select_conditional(tmp[0], pre_comp[6][0], pre_comp[14][0], 12, bits[3]);
  849. select_conditional(tmp[1], pre_comp[2][0], pre_comp[10][0], 12, bits[3]);
  850. select_conditional(tmp[2], tmp[1], tmp[0], 12, bits[2]);
  851. select_conditional(tmp[0], pre_comp[4][0], pre_comp[12][0], 12, bits[3]);
  852. select_conditional(tmp[1], pre_comp[0][0], pre_comp[8][0], 12, bits[3]);
  853. select_conditional(tmp[3], tmp[1], tmp[0], 12, bits[2]);
  854. select_conditional(tmp[1], tmp[3], tmp[2], 12, bits[1]);
  855. select_conditional(out, tmp[1], tmp[4], 12, bits[0]);
  856. }
  857. /* Interleaved point multiplication using precomputed point multiples:
  858. * The small point multiples 0*P, 1*P, ..., 15*P are in pre_comp[],
  859. * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
  860. * of the generator, using certain (large) precomputed multiples in g_pre_comp.
  861. * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
  862. static void batch_mul(fslice x_out[4], fslice y_out[4], fslice z_out[4],
  863. const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
  864. const fslice pre_comp[][16][3][4], const fslice g_pre_comp[16][3][4])
  865. {
  866. unsigned i, j, num;
  867. unsigned gen_mul = (g_scalar != NULL);
  868. fslice nq[12], nqt[12], tmp[12];
  869. fslice bits[4];
  870. u8 byte;
  871. /* set nq to the point at infinity */
  872. memset(nq, 0, 12 * sizeof(fslice));
  873. /* Loop over all scalars msb-to-lsb, 4 bits at a time: for each nibble,
  874. * double 4 times, then add the precomputed point multiples.
  875. * If we are also adding multiples of the generator, then interleave
  876. * these additions with the last 56 doublings. */
  877. for (i = (num_points ? 28 : 7); i > 0; --i)
  878. {
  879. for (j = 0; j < 8; ++j)
  880. {
  881. /* double once */
  882. point_double(nq, nq+4, nq+8, nq, nq+4, nq+8);
  883. /* add multiples of the generator */
  884. if ((gen_mul) && (i <= 7))
  885. {
  886. bits[3] = (g_scalar[i+20] >> (7-j)) & 1;
  887. bits[2] = (g_scalar[i+13] >> (7-j)) & 1;
  888. bits[1] = (g_scalar[i+6] >> (7-j)) & 1;
  889. bits[0] = (g_scalar[i-1] >> (7-j)) & 1;
  890. /* select the point to add, in constant time */
  891. select_point(bits, g_pre_comp, tmp);
  892. memcpy(nqt, nq, 12 * sizeof(fslice));
  893. point_add(nq, nq+4, nq+8, nqt, nqt+4, nqt+8,
  894. tmp, tmp+4, tmp+8);
  895. }
  896. /* do an addition after every 4 doublings */
  897. if (j % 4 == 3)
  898. {
  899. /* loop over all scalars */
  900. for (num = 0; num < num_points; ++num)
  901. {
  902. byte = scalars[num][i-1];
  903. bits[3] = (byte >> (10-j)) & 1;
  904. bits[2] = (byte >> (9-j)) & 1;
  905. bits[1] = (byte >> (8-j)) & 1;
  906. bits[0] = (byte >> (7-j)) & 1;
  907. /* select the point to add */
  908. select_point(bits,
  909. pre_comp[num], tmp);
  910. memcpy(nqt, nq, 12 * sizeof(fslice));
  911. point_add(nq, nq+4, nq+8, nqt, nqt+4,
  912. nqt+8, tmp, tmp+4, tmp+8);
  913. }
  914. }
  915. }
  916. }
  917. memcpy(x_out, nq, 4 * sizeof(fslice));
  918. memcpy(y_out, nq+4, 4 * sizeof(fslice));
  919. memcpy(z_out, nq+8, 4 * sizeof(fslice));
  920. }
  921. /******************************************************************************/
  922. /* FUNCTIONS TO MANAGE PRECOMPUTATION
  923. */
  924. static NISTP224_PRE_COMP *nistp224_pre_comp_new()
  925. {
  926. NISTP224_PRE_COMP *ret = NULL;
  927. ret = (NISTP224_PRE_COMP *)OPENSSL_malloc(sizeof(NISTP224_PRE_COMP));
  928. if (!ret)
  929. {
  930. ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
  931. return ret;
  932. }
  933. memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
  934. ret->references = 1;
  935. return ret;
  936. }
  937. static void *nistp224_pre_comp_dup(void *src_)
  938. {
  939. NISTP224_PRE_COMP *src = src_;
  940. /* no need to actually copy, these objects never change! */
  941. CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
  942. return src_;
  943. }
  944. static void nistp224_pre_comp_free(void *pre_)
  945. {
  946. int i;
  947. NISTP224_PRE_COMP *pre = pre_;
  948. if (!pre)
  949. return;
  950. i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
  951. if (i > 0)
  952. return;
  953. OPENSSL_free(pre);
  954. }
  955. static void nistp224_pre_comp_clear_free(void *pre_)
  956. {
  957. int i;
  958. NISTP224_PRE_COMP *pre = pre_;
  959. if (!pre)
  960. return;
  961. i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
  962. if (i > 0)
  963. return;
  964. OPENSSL_cleanse(pre, sizeof *pre);
  965. OPENSSL_free(pre);
  966. }
  967. /******************************************************************************/
  968. /* OPENSSL EC_METHOD FUNCTIONS
  969. */
  970. int ec_GFp_nistp224_group_init(EC_GROUP *group)
  971. {
  972. int ret;
  973. ret = ec_GFp_simple_group_init(group);
  974. group->a_is_minus3 = 1;
  975. return ret;
  976. }
  977. int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
  978. const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
  979. {
  980. int ret = 0;
  981. BN_CTX *new_ctx = NULL;
  982. BIGNUM *curve_p, *curve_a, *curve_b;
  983. if (ctx == NULL)
  984. if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
  985. BN_CTX_start(ctx);
  986. if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
  987. ((curve_a = BN_CTX_get(ctx)) == NULL) ||
  988. ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
  989. BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
  990. BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
  991. BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
  992. if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
  993. (BN_cmp(curve_b, b)))
  994. {
  995. ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
  996. EC_R_WRONG_CURVE_PARAMETERS);
  997. goto err;
  998. }
  999. group->field_mod_func = BN_nist_mod_224;
  1000. ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
  1001. err:
  1002. BN_CTX_end(ctx);
  1003. if (new_ctx != NULL)
  1004. BN_CTX_free(new_ctx);
  1005. return ret;
  1006. }
  1007. /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
  1008. * (X', Y') = (X/Z^2, Y/Z^3) */
  1009. int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
  1010. const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
  1011. {
  1012. fslice z1[4], z2[4], x_in[4], y_in[4], x_out[4], y_out[4];
  1013. uint128_t tmp[7];
  1014. if (EC_POINT_is_at_infinity(group, point))
  1015. {
  1016. ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
  1017. EC_R_POINT_AT_INFINITY);
  1018. return 0;
  1019. }
  1020. if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
  1021. (!BN_to_felem(z1, &point->Z))) return 0;
  1022. felem_inv(z2, z1);
  1023. felem_square(tmp, z2); felem_reduce(z1, tmp);
  1024. felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
  1025. felem_contract(x_out, x_in);
  1026. if (x != NULL)
  1027. {
  1028. if (!felem_to_BN(x, x_out)) {
  1029. ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
  1030. ERR_R_BN_LIB);
  1031. return 0;
  1032. }
  1033. }
  1034. felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
  1035. felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
  1036. felem_contract(y_out, y_in);
  1037. if (y != NULL)
  1038. {
  1039. if (!felem_to_BN(y, y_out)) {
  1040. ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
  1041. ERR_R_BN_LIB);
  1042. return 0;
  1043. }
  1044. }
  1045. return 1;
  1046. }
  1047. /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
  1048. * Result is stored in r (r can equal one of the inputs). */
  1049. int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
  1050. const BIGNUM *scalar, size_t num, const EC_POINT *points[],
  1051. const BIGNUM *scalars[], BN_CTX *ctx)
  1052. {
  1053. int ret = 0;
  1054. int i, j;
  1055. BN_CTX *new_ctx = NULL;
  1056. BIGNUM *x, *y, *z, *tmp_scalar;
  1057. felem_bytearray g_secret;
  1058. felem_bytearray *secrets = NULL;
  1059. fslice (*pre_comp)[16][3][4] = NULL;
  1060. felem_bytearray tmp;
  1061. unsigned num_bytes;
  1062. int have_pre_comp = 0;
  1063. size_t num_points = num;
  1064. fslice x_in[4], y_in[4], z_in[4], x_out[4], y_out[4], z_out[4];
  1065. NISTP224_PRE_COMP *pre = NULL;
  1066. fslice (*g_pre_comp)[3][4] = NULL;
  1067. EC_POINT *generator = NULL;
  1068. const EC_POINT *p = NULL;
  1069. const BIGNUM *p_scalar = NULL;
  1070. if (ctx == NULL)
  1071. if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
  1072. BN_CTX_start(ctx);
  1073. if (((x = BN_CTX_get(ctx)) == NULL) ||
  1074. ((y = BN_CTX_get(ctx)) == NULL) ||
  1075. ((z = BN_CTX_get(ctx)) == NULL) ||
  1076. ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
  1077. goto err;
  1078. if (scalar != NULL)
  1079. {
  1080. pre = EC_EX_DATA_get_data(group->extra_data,
  1081. nistp224_pre_comp_dup, nistp224_pre_comp_free,
  1082. nistp224_pre_comp_clear_free);
  1083. if (pre)
  1084. /* we have precomputation, try to use it */
  1085. g_pre_comp = pre->g_pre_comp;
  1086. else
  1087. /* try to use the standard precomputation */
  1088. g_pre_comp = (fslice (*)[3][4]) gmul;
  1089. generator = EC_POINT_new(group);
  1090. if (generator == NULL)
  1091. goto err;
  1092. /* get the generator from precomputation */
  1093. if (!felem_to_BN(x, g_pre_comp[1][0]) ||
  1094. !felem_to_BN(y, g_pre_comp[1][1]) ||
  1095. !felem_to_BN(z, g_pre_comp[1][2]))
  1096. {
  1097. ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
  1098. goto err;
  1099. }
  1100. if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
  1101. generator, x, y, z, ctx))
  1102. goto err;
  1103. if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
  1104. /* precomputation matches generator */
  1105. have_pre_comp = 1;
  1106. else
  1107. /* we don't have valid precomputation:
  1108. * treat the generator as a random point */
  1109. num_points = num_points + 1;
  1110. }
  1111. secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
  1112. pre_comp = OPENSSL_malloc(num_points * 16 * 3 * 4 * sizeof(fslice));
  1113. if ((num_points) && ((secrets == NULL) || (pre_comp == NULL)))
  1114. {
  1115. ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
  1116. goto err;
  1117. }
  1118. /* we treat NULL scalars as 0, and NULL points as points at infinity,
  1119. * i.e., they contribute nothing to the linear combination */
  1120. memset(secrets, 0, num_points * sizeof(felem_bytearray));
  1121. memset(pre_comp, 0, num_points * 16 * 3 * 4 * sizeof(fslice));
  1122. for (i = 0; i < num_points; ++i)
  1123. {
  1124. if (i == num)
  1125. /* the generator */
  1126. {
  1127. p = EC_GROUP_get0_generator(group);
  1128. p_scalar = scalar;
  1129. }
  1130. else
  1131. /* the i^th point */
  1132. {
  1133. p = points[i];
  1134. p_scalar = scalars[i];
  1135. }
  1136. if ((p_scalar != NULL) && (p != NULL))
  1137. {
  1138. num_bytes = BN_num_bytes(p_scalar);
  1139. /* reduce scalar to 0 <= scalar < 2^224 */
  1140. if ((num_bytes > sizeof(felem_bytearray)) || (BN_is_negative(p_scalar)))
  1141. {
  1142. /* this is an unusual input, and we don't guarantee
  1143. * constant-timeness */
  1144. if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
  1145. {
  1146. ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
  1147. goto err;
  1148. }
  1149. num_bytes = BN_bn2bin(tmp_scalar, tmp);
  1150. }
  1151. else
  1152. BN_bn2bin(p_scalar, tmp);
  1153. flip_endian(secrets[i], tmp, num_bytes);
  1154. /* precompute multiples */
  1155. if ((!BN_to_felem(x_out, &p->X)) ||
  1156. (!BN_to_felem(y_out, &p->Y)) ||
  1157. (!BN_to_felem(z_out, &p->Z))) goto err;
  1158. memcpy(pre_comp[i][1][0], x_out, 4 * sizeof(fslice));
  1159. memcpy(pre_comp[i][1][1], y_out, 4 * sizeof(fslice));
  1160. memcpy(pre_comp[i][1][2], z_out, 4 * sizeof(fslice));
  1161. for (j = 1; j < 8; ++j)
  1162. {
  1163. point_double(pre_comp[i][2*j][0],
  1164. pre_comp[i][2*j][1],
  1165. pre_comp[i][2*j][2],
  1166. pre_comp[i][j][0],
  1167. pre_comp[i][j][1],
  1168. pre_comp[i][j][2]);
  1169. point_add(pre_comp[i][2*j+1][0],
  1170. pre_comp[i][2*j+1][1],
  1171. pre_comp[i][2*j+1][2],
  1172. pre_comp[i][1][0],
  1173. pre_comp[i][1][1],
  1174. pre_comp[i][1][2],
  1175. pre_comp[i][2*j][0],
  1176. pre_comp[i][2*j][1],
  1177. pre_comp[i][2*j][2]);
  1178. }
  1179. }
  1180. }
  1181. /* the scalar for the generator */
  1182. if ((scalar != NULL) && (have_pre_comp))
  1183. {
  1184. memset(g_secret, 0, sizeof g_secret);
  1185. num_bytes = BN_num_bytes(scalar);
  1186. /* reduce scalar to 0 <= scalar < 2^224 */
  1187. if ((num_bytes > sizeof(felem_bytearray)) || (BN_is_negative(scalar)))
  1188. {
  1189. /* this is an unusual input, and we don't guarantee
  1190. * constant-timeness */
  1191. if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
  1192. {
  1193. ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
  1194. goto err;
  1195. }
  1196. num_bytes = BN_bn2bin(tmp_scalar, tmp);
  1197. }
  1198. else
  1199. BN_bn2bin(scalar, tmp);
  1200. flip_endian(g_secret, tmp, num_bytes);
  1201. /* do the multiplication with generator precomputation*/
  1202. batch_mul(x_out, y_out, z_out,
  1203. (const felem_bytearray (*)) secrets, num_points,
  1204. g_secret, (const fslice (*)[16][3][4]) pre_comp,
  1205. (const fslice (*)[3][4]) g_pre_comp);
  1206. }
  1207. else
  1208. /* do the multiplication without generator precomputation */
  1209. batch_mul(x_out, y_out, z_out,
  1210. (const felem_bytearray (*)) secrets, num_points,
  1211. NULL, (const fslice (*)[16][3][4]) pre_comp, NULL);
  1212. /* reduce the output to its unique minimal representation */
  1213. felem_contract(x_in, x_out);
  1214. felem_contract(y_in, y_out);
  1215. felem_contract(z_in, z_out);
  1216. if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
  1217. (!felem_to_BN(z, z_in)))
  1218. {
  1219. ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
  1220. goto err;
  1221. }
  1222. ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
  1223. err:
  1224. BN_CTX_end(ctx);
  1225. if (generator != NULL)
  1226. EC_POINT_free(generator);
  1227. if (new_ctx != NULL)
  1228. BN_CTX_free(new_ctx);
  1229. if (secrets != NULL)
  1230. OPENSSL_free(secrets);
  1231. if (pre_comp != NULL)
  1232. OPENSSL_free(pre_comp);
  1233. return ret;
  1234. }
  1235. int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
  1236. {
  1237. int ret = 0;
  1238. NISTP224_PRE_COMP *pre = NULL;
  1239. int i, j;
  1240. BN_CTX *new_ctx = NULL;
  1241. BIGNUM *x, *y;
  1242. EC_POINT *generator = NULL;
  1243. /* throw away old precomputation */
  1244. EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
  1245. nistp224_pre_comp_free, nistp224_pre_comp_clear_free);
  1246. if (ctx == NULL)
  1247. if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
  1248. BN_CTX_start(ctx);
  1249. if (((x = BN_CTX_get(ctx)) == NULL) ||
  1250. ((y = BN_CTX_get(ctx)) == NULL))
  1251. goto err;
  1252. /* get the generator */
  1253. if (group->generator == NULL) goto err;
  1254. generator = EC_POINT_new(group);
  1255. if (generator == NULL)
  1256. goto err;
  1257. BN_bin2bn(nistp224_curve_params[3], sizeof (felem_bytearray), x);
  1258. BN_bin2bn(nistp224_curve_params[4], sizeof (felem_bytearray), y);
  1259. if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
  1260. goto err;
  1261. if ((pre = nistp224_pre_comp_new()) == NULL)
  1262. goto err;
  1263. /* if the generator is the standard one, use built-in precomputation */
  1264. if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
  1265. {
  1266. memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
  1267. ret = 1;
  1268. goto err;
  1269. }
  1270. if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
  1271. (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
  1272. (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
  1273. goto err;
  1274. /* compute 2^56*G, 2^112*G, 2^168*G */
  1275. for (i = 1; i < 5; ++i)
  1276. {
  1277. point_double(pre->g_pre_comp[2*i][0], pre->g_pre_comp[2*i][1],
  1278. pre->g_pre_comp[2*i][2], pre->g_pre_comp[i][0],
  1279. pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
  1280. for (j = 0; j < 55; ++j)
  1281. {
  1282. point_double(pre->g_pre_comp[2*i][0],
  1283. pre->g_pre_comp[2*i][1],
  1284. pre->g_pre_comp[2*i][2],
  1285. pre->g_pre_comp[2*i][0],
  1286. pre->g_pre_comp[2*i][1],
  1287. pre->g_pre_comp[2*i][2]);
  1288. }
  1289. }
  1290. /* g_pre_comp[0] is the point at infinity */
  1291. memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
  1292. /* the remaining multiples */
  1293. /* 2^56*G + 2^112*G */
  1294. point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
  1295. pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
  1296. pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
  1297. pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
  1298. pre->g_pre_comp[2][2]);
  1299. /* 2^56*G + 2^168*G */
  1300. point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
  1301. pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
  1302. pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
  1303. pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
  1304. pre->g_pre_comp[2][2]);
  1305. /* 2^112*G + 2^168*G */
  1306. point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
  1307. pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
  1308. pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
  1309. pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
  1310. pre->g_pre_comp[4][2]);
  1311. /* 2^56*G + 2^112*G + 2^168*G */
  1312. point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
  1313. pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
  1314. pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
  1315. pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
  1316. pre->g_pre_comp[2][2]);
  1317. for (i = 1; i < 8; ++i)
  1318. {
  1319. /* odd multiples: add G */
  1320. point_add(pre->g_pre_comp[2*i+1][0], pre->g_pre_comp[2*i+1][1],
  1321. pre->g_pre_comp[2*i+1][2], pre->g_pre_comp[2*i][0],
  1322. pre->g_pre_comp[2*i][1], pre->g_pre_comp[2*i][2],
  1323. pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
  1324. pre->g_pre_comp[1][2]);
  1325. }
  1326. if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
  1327. nistp224_pre_comp_free, nistp224_pre_comp_clear_free))
  1328. goto err;
  1329. ret = 1;
  1330. pre = NULL;
  1331. err:
  1332. BN_CTX_end(ctx);
  1333. if (generator != NULL)
  1334. EC_POINT_free(generator);
  1335. if (new_ctx != NULL)
  1336. BN_CTX_free(new_ctx);
  1337. if (pre)
  1338. nistp224_pre_comp_free(pre);
  1339. return ret;
  1340. }
  1341. int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
  1342. {
  1343. if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
  1344. nistp224_pre_comp_free, nistp224_pre_comp_clear_free)
  1345. != NULL)
  1346. return 1;
  1347. else
  1348. return 0;
  1349. }
  1350. #else
  1351. static void *dummy=&dummy;
  1352. #endif