ecp_nistp521.c 62 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020
  1. /* crypto/ec/ecp_nistp521.c */
  2. /*
  3. * Written by Adam Langley (Google) for the OpenSSL project
  4. */
  5. /* Copyright 2011 Google Inc.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. *
  9. * you may not use this file except in compliance with the License.
  10. * You may obtain a copy of the License at
  11. *
  12. * http://www.apache.org/licenses/LICENSE-2.0
  13. *
  14. * Unless required by applicable law or agreed to in writing, software
  15. * distributed under the License is distributed on an "AS IS" BASIS,
  16. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17. * See the License for the specific language governing permissions and
  18. * limitations under the License.
  19. */
  20. /*
  21. * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
  22. *
  23. * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
  24. * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
  25. * work which got its smarts from Daniel J. Bernstein's work on the same.
  26. */
  27. #include <openssl/opensslconf.h>
  28. #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
  29. #include <stdint.h>
  30. #include <string.h>
  31. #include <openssl/err.h>
  32. #include "ec_lcl.h"
  33. #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
  34. /* even with gcc, the typedef won't work for 32-bit platforms */
  35. typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
  36. #else
  37. #error "Need GCC 3.1 or later to define type uint128_t"
  38. #endif
  39. typedef uint8_t u8;
  40. typedef uint64_t u64;
  41. typedef int64_t s64;
  42. /* The underlying field.
  43. *
  44. * P521 operates over GF(2^521-1). We can serialise an element of this field
  45. * into 66 bytes where the most significant byte contains only a single bit. We
  46. * call this an felem_bytearray. */
  47. typedef u8 felem_bytearray[66];
  48. /* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
  49. * These values are big-endian. */
  50. static const felem_bytearray nistp521_curve_params[5] =
  51. {
  52. {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
  53. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  54. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  55. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  56. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  57. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  58. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  59. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  60. 0xff, 0xff},
  61. {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
  62. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  63. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  64. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  65. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  66. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  67. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  68. 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  69. 0xff, 0xfc},
  70. {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
  71. 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
  72. 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
  73. 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
  74. 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
  75. 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
  76. 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
  77. 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
  78. 0x3f, 0x00},
  79. {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
  80. 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
  81. 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
  82. 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
  83. 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
  84. 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
  85. 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
  86. 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
  87. 0xbd, 0x66},
  88. {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
  89. 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
  90. 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
  91. 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
  92. 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
  93. 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
  94. 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
  95. 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
  96. 0x66, 0x50}
  97. };
  98. /* The representation of field elements.
  99. * ------------------------------------
  100. *
  101. * We represent field elements with nine values. These values are either 64 or
  102. * 128 bits and the field element represented is:
  103. * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
  104. * Each of the nine values is called a 'limb'. Since the limbs are spaced only
  105. * 58 bits apart, but are greater than 58 bits in length, the most significant
  106. * bits of each limb overlap with the least significant bits of the next.
  107. *
  108. * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
  109. * 'largefelem' */
  110. #define NLIMBS 9
  111. typedef uint64_t limb;
  112. typedef limb felem[NLIMBS];
  113. typedef uint128_t largefelem[NLIMBS];
  114. static const limb bottom57bits = 0x1ffffffffffffff;
  115. static const limb bottom58bits = 0x3ffffffffffffff;
  116. /* bin66_to_felem takes a little-endian byte array and converts it into felem
  117. * form. This assumes that the CPU is little-endian. */
  118. static void bin66_to_felem(felem out, const u8 in[66])
  119. {
  120. out[0] = (*((limb*) &in[0])) & bottom58bits;
  121. out[1] = (*((limb*) &in[7]) >> 2) & bottom58bits;
  122. out[2] = (*((limb*) &in[14]) >> 4) & bottom58bits;
  123. out[3] = (*((limb*) &in[21]) >> 6) & bottom58bits;
  124. out[4] = (*((limb*) &in[29])) & bottom58bits;
  125. out[5] = (*((limb*) &in[36]) >> 2) & bottom58bits;
  126. out[6] = (*((limb*) &in[43]) >> 4) & bottom58bits;
  127. out[7] = (*((limb*) &in[50]) >> 6) & bottom58bits;
  128. out[8] = (*((limb*) &in[58])) & bottom57bits;
  129. }
  130. /* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
  131. * array. This assumes that the CPU is little-endian. */
  132. static void felem_to_bin66(u8 out[66], const felem in)
  133. {
  134. memset(out, 0, 66);
  135. (*((limb*) &out[0])) = in[0];
  136. (*((limb*) &out[7])) |= in[1] << 2;
  137. (*((limb*) &out[14])) |= in[2] << 4;
  138. (*((limb*) &out[21])) |= in[3] << 6;
  139. (*((limb*) &out[29])) = in[4];
  140. (*((limb*) &out[36])) |= in[5] << 2;
  141. (*((limb*) &out[43])) |= in[6] << 4;
  142. (*((limb*) &out[50])) |= in[7] << 6;
  143. (*((limb*) &out[58])) = in[8];
  144. }
  145. /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
  146. static void flip_endian(u8 *out, const u8 *in, unsigned len)
  147. {
  148. unsigned i;
  149. for (i = 0; i < len; ++i)
  150. out[i] = in[len-1-i];
  151. }
  152. /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
  153. static int BN_to_felem(felem out, const BIGNUM *bn)
  154. {
  155. felem_bytearray b_in;
  156. felem_bytearray b_out;
  157. unsigned num_bytes;
  158. /* BN_bn2bin eats leading zeroes */
  159. memset(b_out, 0, sizeof b_out);
  160. num_bytes = BN_num_bytes(bn);
  161. if (num_bytes > sizeof b_out)
  162. {
  163. ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
  164. return 0;
  165. }
  166. if (BN_is_negative(bn))
  167. {
  168. ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
  169. return 0;
  170. }
  171. num_bytes = BN_bn2bin(bn, b_in);
  172. flip_endian(b_out, b_in, num_bytes);
  173. bin66_to_felem(out, b_out);
  174. return 1;
  175. }
  176. /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
  177. static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
  178. {
  179. felem_bytearray b_in, b_out;
  180. felem_to_bin66(b_in, in);
  181. flip_endian(b_out, b_in, sizeof b_out);
  182. return BN_bin2bn(b_out, sizeof b_out, out);
  183. }
  184. /* Field operations
  185. * ---------------- */
  186. static void felem_one(felem out)
  187. {
  188. out[0] = 1;
  189. out[1] = 0;
  190. out[2] = 0;
  191. out[3] = 0;
  192. out[4] = 0;
  193. out[5] = 0;
  194. out[6] = 0;
  195. out[7] = 0;
  196. out[8] = 0;
  197. }
  198. static void felem_assign(felem out, const felem in)
  199. {
  200. out[0] = in[0];
  201. out[1] = in[1];
  202. out[2] = in[2];
  203. out[3] = in[3];
  204. out[4] = in[4];
  205. out[5] = in[5];
  206. out[6] = in[6];
  207. out[7] = in[7];
  208. out[8] = in[8];
  209. }
  210. /* felem_sum64 sets out = out + in. */
  211. static void felem_sum64(felem out, const felem in)
  212. {
  213. out[0] += in[0];
  214. out[1] += in[1];
  215. out[2] += in[2];
  216. out[3] += in[3];
  217. out[4] += in[4];
  218. out[5] += in[5];
  219. out[6] += in[6];
  220. out[7] += in[7];
  221. out[8] += in[8];
  222. }
  223. /* felem_scalar sets out = in * scalar */
  224. static void felem_scalar(felem out, const felem in, limb scalar)
  225. {
  226. out[0] = in[0] * scalar;
  227. out[1] = in[1] * scalar;
  228. out[2] = in[2] * scalar;
  229. out[3] = in[3] * scalar;
  230. out[4] = in[4] * scalar;
  231. out[5] = in[5] * scalar;
  232. out[6] = in[6] * scalar;
  233. out[7] = in[7] * scalar;
  234. out[8] = in[8] * scalar;
  235. }
  236. /* felem_scalar64 sets out = out * scalar */
  237. static void felem_scalar64(felem out, limb scalar)
  238. {
  239. out[0] *= scalar;
  240. out[1] *= scalar;
  241. out[2] *= scalar;
  242. out[3] *= scalar;
  243. out[4] *= scalar;
  244. out[5] *= scalar;
  245. out[6] *= scalar;
  246. out[7] *= scalar;
  247. out[8] *= scalar;
  248. }
  249. /* felem_scalar128 sets out = out * scalar */
  250. static void felem_scalar128(largefelem out, limb scalar)
  251. {
  252. out[0] *= scalar;
  253. out[1] *= scalar;
  254. out[2] *= scalar;
  255. out[3] *= scalar;
  256. out[4] *= scalar;
  257. out[5] *= scalar;
  258. out[6] *= scalar;
  259. out[7] *= scalar;
  260. out[8] *= scalar;
  261. }
  262. /* felem_neg sets |out| to |-in|
  263. * On entry:
  264. * in[i] < 2^59 + 2^14
  265. * On exit:
  266. * out[i] < 2^62
  267. */
  268. static void felem_neg(felem out, const felem in)
  269. {
  270. /* In order to prevent underflow, we subtract from 0 mod p. */
  271. static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
  272. static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
  273. out[0] = two62m3 - in[0];
  274. out[1] = two62m2 - in[1];
  275. out[2] = two62m2 - in[2];
  276. out[3] = two62m2 - in[3];
  277. out[4] = two62m2 - in[4];
  278. out[5] = two62m2 - in[5];
  279. out[6] = two62m2 - in[6];
  280. out[7] = two62m2 - in[7];
  281. out[8] = two62m2 - in[8];
  282. }
  283. /* felem_diff64 subtracts |in| from |out|
  284. * On entry:
  285. * in[i] < 2^59 + 2^14
  286. * On exit:
  287. * out[i] < out[i] + 2^62
  288. */
  289. static void felem_diff64(felem out, const felem in)
  290. {
  291. /* In order to prevent underflow, we add 0 mod p before subtracting. */
  292. static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
  293. static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
  294. out[0] += two62m3 - in[0];
  295. out[1] += two62m2 - in[1];
  296. out[2] += two62m2 - in[2];
  297. out[3] += two62m2 - in[3];
  298. out[4] += two62m2 - in[4];
  299. out[5] += two62m2 - in[5];
  300. out[6] += two62m2 - in[6];
  301. out[7] += two62m2 - in[7];
  302. out[8] += two62m2 - in[8];
  303. }
  304. /* felem_diff_128_64 subtracts |in| from |out|
  305. * On entry:
  306. * in[i] < 2^62 + 2^17
  307. * On exit:
  308. * out[i] < out[i] + 2^63
  309. */
  310. static void felem_diff_128_64(largefelem out, const felem in)
  311. {
  312. /* In order to prevent underflow, we add 0 mod p before subtracting. */
  313. static const limb two63m6 = (((limb)1) << 62) - (((limb)1) << 5);
  314. static const limb two63m5 = (((limb)1) << 62) - (((limb)1) << 4);
  315. out[0] += two63m6 - in[0];
  316. out[1] += two63m5 - in[1];
  317. out[2] += two63m5 - in[2];
  318. out[3] += two63m5 - in[3];
  319. out[4] += two63m5 - in[4];
  320. out[5] += two63m5 - in[5];
  321. out[6] += two63m5 - in[6];
  322. out[7] += two63m5 - in[7];
  323. out[8] += two63m5 - in[8];
  324. }
  325. /* felem_diff_128_64 subtracts |in| from |out|
  326. * On entry:
  327. * in[i] < 2^126
  328. * On exit:
  329. * out[i] < out[i] + 2^127 - 2^69
  330. */
  331. static void felem_diff128(largefelem out, const largefelem in)
  332. {
  333. /* In order to prevent underflow, we add 0 mod p before subtracting. */
  334. static const uint128_t two127m70 = (((uint128_t)1) << 127) - (((uint128_t)1) << 70);
  335. static const uint128_t two127m69 = (((uint128_t)1) << 127) - (((uint128_t)1) << 69);
  336. out[0] += (two127m70 - in[0]);
  337. out[1] += (two127m69 - in[1]);
  338. out[2] += (two127m69 - in[2]);
  339. out[3] += (two127m69 - in[3]);
  340. out[4] += (two127m69 - in[4]);
  341. out[5] += (two127m69 - in[5]);
  342. out[6] += (two127m69 - in[6]);
  343. out[7] += (two127m69 - in[7]);
  344. out[8] += (two127m69 - in[8]);
  345. }
  346. /* felem_square sets |out| = |in|^2
  347. * On entry:
  348. * in[i] < 2^62
  349. * On exit:
  350. * out[i] < 17 * max(in[i]) * max(in[i])
  351. */
  352. static void felem_square(largefelem out, const felem in)
  353. {
  354. felem inx2, inx4;
  355. felem_scalar(inx2, in, 2);
  356. felem_scalar(inx4, in, 4);
  357. /* We have many cases were we want to do
  358. * in[x] * in[y] +
  359. * in[y] * in[x]
  360. * This is obviously just
  361. * 2 * in[x] * in[y]
  362. * However, rather than do the doubling on the 128 bit result, we
  363. * double one of the inputs to the multiplication by reading from
  364. * |inx2| */
  365. out[0] = ((uint128_t) in[0]) * in[0];
  366. out[1] = ((uint128_t) in[0]) * inx2[1];
  367. out[2] = ((uint128_t) in[0]) * inx2[2] +
  368. ((uint128_t) in[1]) * in[1];
  369. out[3] = ((uint128_t) in[0]) * inx2[3] +
  370. ((uint128_t) in[1]) * inx2[2];
  371. out[4] = ((uint128_t) in[0]) * inx2[4] +
  372. ((uint128_t) in[1]) * inx2[3] +
  373. ((uint128_t) in[2]) * in[2];
  374. out[5] = ((uint128_t) in[0]) * inx2[5] +
  375. ((uint128_t) in[1]) * inx2[4] +
  376. ((uint128_t) in[2]) * inx2[3];
  377. out[6] = ((uint128_t) in[0]) * inx2[6] +
  378. ((uint128_t) in[1]) * inx2[5] +
  379. ((uint128_t) in[2]) * inx2[4] +
  380. ((uint128_t) in[3]) * in[3];
  381. out[7] = ((uint128_t) in[0]) * inx2[7] +
  382. ((uint128_t) in[1]) * inx2[6] +
  383. ((uint128_t) in[2]) * inx2[5] +
  384. ((uint128_t) in[3]) * inx2[4];
  385. out[8] = ((uint128_t) in[0]) * inx2[8] +
  386. ((uint128_t) in[1]) * inx2[7] +
  387. ((uint128_t) in[2]) * inx2[6] +
  388. ((uint128_t) in[3]) * inx2[5] +
  389. ((uint128_t) in[4]) * in[4];
  390. /* The remaining limbs fall above 2^521, with the first falling at
  391. * 2^522. They correspond to locations one bit up from the limbs
  392. * produced above so we would have to multiply by two to align them.
  393. * Again, rather than operate on the 128-bit result, we double one of
  394. * the inputs to the multiplication. If we want to double for both this
  395. * reason, and the reason above, then we end up multiplying by four. */
  396. /* 9 */
  397. out[0] += ((uint128_t) in[1]) * inx4[8] +
  398. ((uint128_t) in[2]) * inx4[7] +
  399. ((uint128_t) in[3]) * inx4[6] +
  400. ((uint128_t) in[4]) * inx4[5];
  401. /* 10 */
  402. out[1] += ((uint128_t) in[2]) * inx4[8] +
  403. ((uint128_t) in[3]) * inx4[7] +
  404. ((uint128_t) in[4]) * inx4[6] +
  405. ((uint128_t) in[5]) * inx2[5];
  406. /* 11 */
  407. out[2] += ((uint128_t) in[3]) * inx4[8] +
  408. ((uint128_t) in[4]) * inx4[7] +
  409. ((uint128_t) in[5]) * inx4[6];
  410. /* 12 */
  411. out[3] += ((uint128_t) in[4]) * inx4[8] +
  412. ((uint128_t) in[5]) * inx4[7] +
  413. ((uint128_t) in[6]) * inx2[6];
  414. /* 13 */
  415. out[4] += ((uint128_t) in[5]) * inx4[8] +
  416. ((uint128_t) in[6]) * inx4[7];
  417. /* 14 */
  418. out[5] += ((uint128_t) in[6]) * inx4[8] +
  419. ((uint128_t) in[7]) * inx2[7];
  420. /* 15 */
  421. out[6] += ((uint128_t) in[7]) * inx4[8];
  422. /* 16 */
  423. out[7] += ((uint128_t) in[8]) * inx2[8];
  424. }
  425. /* felem_mul sets |out| = |in1| * |in2|
  426. * On entry:
  427. * in1[i] < 2^64
  428. * in2[i] < 2^63
  429. * On exit:
  430. * out[i] < 17 * max(in1[i]) * max(in2[i])
  431. */
  432. static void felem_mul(largefelem out, const felem in1, const felem in2)
  433. {
  434. felem in2x2;
  435. felem_scalar(in2x2, in2, 2);
  436. out[0] = ((uint128_t) in1[0]) * in2[0];
  437. out[1] = ((uint128_t) in1[0]) * in2[1] +
  438. ((uint128_t) in1[1]) * in2[0];
  439. out[2] = ((uint128_t) in1[0]) * in2[2] +
  440. ((uint128_t) in1[1]) * in2[1] +
  441. ((uint128_t) in1[2]) * in2[0];
  442. out[3] = ((uint128_t) in1[0]) * in2[3] +
  443. ((uint128_t) in1[1]) * in2[2] +
  444. ((uint128_t) in1[2]) * in2[1] +
  445. ((uint128_t) in1[3]) * in2[0];
  446. out[4] = ((uint128_t) in1[0]) * in2[4] +
  447. ((uint128_t) in1[1]) * in2[3] +
  448. ((uint128_t) in1[2]) * in2[2] +
  449. ((uint128_t) in1[3]) * in2[1] +
  450. ((uint128_t) in1[4]) * in2[0];
  451. out[5] = ((uint128_t) in1[0]) * in2[5] +
  452. ((uint128_t) in1[1]) * in2[4] +
  453. ((uint128_t) in1[2]) * in2[3] +
  454. ((uint128_t) in1[3]) * in2[2] +
  455. ((uint128_t) in1[4]) * in2[1] +
  456. ((uint128_t) in1[5]) * in2[0];
  457. out[6] = ((uint128_t) in1[0]) * in2[6] +
  458. ((uint128_t) in1[1]) * in2[5] +
  459. ((uint128_t) in1[2]) * in2[4] +
  460. ((uint128_t) in1[3]) * in2[3] +
  461. ((uint128_t) in1[4]) * in2[2] +
  462. ((uint128_t) in1[5]) * in2[1] +
  463. ((uint128_t) in1[6]) * in2[0];
  464. out[7] = ((uint128_t) in1[0]) * in2[7] +
  465. ((uint128_t) in1[1]) * in2[6] +
  466. ((uint128_t) in1[2]) * in2[5] +
  467. ((uint128_t) in1[3]) * in2[4] +
  468. ((uint128_t) in1[4]) * in2[3] +
  469. ((uint128_t) in1[5]) * in2[2] +
  470. ((uint128_t) in1[6]) * in2[1] +
  471. ((uint128_t) in1[7]) * in2[0];
  472. out[8] = ((uint128_t) in1[0]) * in2[8] +
  473. ((uint128_t) in1[1]) * in2[7] +
  474. ((uint128_t) in1[2]) * in2[6] +
  475. ((uint128_t) in1[3]) * in2[5] +
  476. ((uint128_t) in1[4]) * in2[4] +
  477. ((uint128_t) in1[5]) * in2[3] +
  478. ((uint128_t) in1[6]) * in2[2] +
  479. ((uint128_t) in1[7]) * in2[1] +
  480. ((uint128_t) in1[8]) * in2[0];
  481. /* See comment in felem_square about the use of in2x2 here */
  482. out[0] += ((uint128_t) in1[1]) * in2x2[8] +
  483. ((uint128_t) in1[2]) * in2x2[7] +
  484. ((uint128_t) in1[3]) * in2x2[6] +
  485. ((uint128_t) in1[4]) * in2x2[5] +
  486. ((uint128_t) in1[5]) * in2x2[4] +
  487. ((uint128_t) in1[6]) * in2x2[3] +
  488. ((uint128_t) in1[7]) * in2x2[2] +
  489. ((uint128_t) in1[8]) * in2x2[1];
  490. out[1] += ((uint128_t) in1[2]) * in2x2[8] +
  491. ((uint128_t) in1[3]) * in2x2[7] +
  492. ((uint128_t) in1[4]) * in2x2[6] +
  493. ((uint128_t) in1[5]) * in2x2[5] +
  494. ((uint128_t) in1[6]) * in2x2[4] +
  495. ((uint128_t) in1[7]) * in2x2[3] +
  496. ((uint128_t) in1[8]) * in2x2[2];
  497. out[2] += ((uint128_t) in1[3]) * in2x2[8] +
  498. ((uint128_t) in1[4]) * in2x2[7] +
  499. ((uint128_t) in1[5]) * in2x2[6] +
  500. ((uint128_t) in1[6]) * in2x2[5] +
  501. ((uint128_t) in1[7]) * in2x2[4] +
  502. ((uint128_t) in1[8]) * in2x2[3];
  503. out[3] += ((uint128_t) in1[4]) * in2x2[8] +
  504. ((uint128_t) in1[5]) * in2x2[7] +
  505. ((uint128_t) in1[6]) * in2x2[6] +
  506. ((uint128_t) in1[7]) * in2x2[5] +
  507. ((uint128_t) in1[8]) * in2x2[4];
  508. out[4] += ((uint128_t) in1[5]) * in2x2[8] +
  509. ((uint128_t) in1[6]) * in2x2[7] +
  510. ((uint128_t) in1[7]) * in2x2[6] +
  511. ((uint128_t) in1[8]) * in2x2[5];
  512. out[5] += ((uint128_t) in1[6]) * in2x2[8] +
  513. ((uint128_t) in1[7]) * in2x2[7] +
  514. ((uint128_t) in1[8]) * in2x2[6];
  515. out[6] += ((uint128_t) in1[7]) * in2x2[8] +
  516. ((uint128_t) in1[8]) * in2x2[7];
  517. out[7] += ((uint128_t) in1[8]) * in2x2[8];
  518. }
  519. static const limb bottom52bits = 0xfffffffffffff;
  520. /* felem_reduce converts a largefelem to an felem.
  521. * On entry:
  522. * in[i] < 2^128
  523. * On exit:
  524. * out[i] < 2^59 + 2^14
  525. */
  526. static void felem_reduce(felem out, const largefelem in)
  527. {
  528. u64 overflow1, overflow2;
  529. out[0] = ((limb) in[0]) & bottom58bits;
  530. out[1] = ((limb) in[1]) & bottom58bits;
  531. out[2] = ((limb) in[2]) & bottom58bits;
  532. out[3] = ((limb) in[3]) & bottom58bits;
  533. out[4] = ((limb) in[4]) & bottom58bits;
  534. out[5] = ((limb) in[5]) & bottom58bits;
  535. out[6] = ((limb) in[6]) & bottom58bits;
  536. out[7] = ((limb) in[7]) & bottom58bits;
  537. out[8] = ((limb) in[8]) & bottom58bits;
  538. /* out[i] < 2^58 */
  539. out[1] += ((limb) in[0]) >> 58;
  540. out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
  541. /* out[1] < 2^58 + 2^6 + 2^58
  542. * = 2^59 + 2^6 */
  543. out[2] += ((limb) (in[0] >> 64)) >> 52;
  544. out[2] += ((limb) in[1]) >> 58;
  545. out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
  546. out[3] += ((limb) (in[1] >> 64)) >> 52;
  547. out[3] += ((limb) in[2]) >> 58;
  548. out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
  549. out[4] += ((limb) (in[2] >> 64)) >> 52;
  550. out[4] += ((limb) in[3]) >> 58;
  551. out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
  552. out[5] += ((limb) (in[3] >> 64)) >> 52;
  553. out[5] += ((limb) in[4]) >> 58;
  554. out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
  555. out[6] += ((limb) (in[4] >> 64)) >> 52;
  556. out[6] += ((limb) in[5]) >> 58;
  557. out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
  558. out[7] += ((limb) (in[5] >> 64)) >> 52;
  559. out[7] += ((limb) in[6]) >> 58;
  560. out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
  561. out[8] += ((limb) (in[6] >> 64)) >> 52;
  562. out[8] += ((limb) in[7]) >> 58;
  563. out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
  564. /* out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
  565. * < 2^59 + 2^13 */
  566. overflow1 = ((limb) (in[7] >> 64)) >> 52;
  567. overflow1 += ((limb) in[8]) >> 58;
  568. overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
  569. overflow2 = ((limb) (in[8] >> 64)) >> 52;
  570. overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
  571. overflow2 <<= 1; /* overflow2 < 2^13 */
  572. out[0] += overflow1; /* out[0] < 2^60 */
  573. out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
  574. out[1] += out[0] >> 58; out[0] &= bottom58bits;
  575. /* out[0] < 2^58
  576. * out[1] < 2^59 + 2^6 + 2^13 + 2^2
  577. * < 2^59 + 2^14 */
  578. }
  579. static void felem_square_reduce(felem out, const felem in)
  580. {
  581. largefelem tmp;
  582. felem_square(tmp, in);
  583. felem_reduce(out, tmp);
  584. }
  585. static void felem_mul_reduce(felem out, const felem in1, const felem in2)
  586. {
  587. largefelem tmp;
  588. felem_mul(tmp, in1, in2);
  589. felem_reduce(out, tmp);
  590. }
  591. /* felem_inv calculates |out| = |in|^{-1}
  592. *
  593. * Based on Fermat's Little Theorem:
  594. * a^p = a (mod p)
  595. * a^{p-1} = 1 (mod p)
  596. * a^{p-2} = a^{-1} (mod p)
  597. */
  598. static void felem_inv(felem out, const felem in)
  599. {
  600. felem ftmp, ftmp2, ftmp3, ftmp4;
  601. largefelem tmp;
  602. unsigned i;
  603. felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
  604. felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
  605. felem_assign(ftmp2, ftmp);
  606. felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
  607. felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */
  608. felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */
  609. felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
  610. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
  611. felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
  612. felem_assign(ftmp2, ftmp3);
  613. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
  614. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
  615. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
  616. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
  617. felem_assign(ftmp4, ftmp3);
  618. felem_mul(tmp, ftmp3, ftmp); felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
  619. felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
  620. felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
  621. felem_assign(ftmp2, ftmp3);
  622. for (i = 0; i < 8; i++)
  623. {
  624. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
  625. }
  626. felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
  627. felem_assign(ftmp2, ftmp3);
  628. for (i = 0; i < 16; i++)
  629. {
  630. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
  631. }
  632. felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
  633. felem_assign(ftmp2, ftmp3);
  634. for (i = 0; i < 32; i++)
  635. {
  636. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
  637. }
  638. felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
  639. felem_assign(ftmp2, ftmp3);
  640. for (i = 0; i < 64; i++)
  641. {
  642. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
  643. }
  644. felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
  645. felem_assign(ftmp2, ftmp3);
  646. for (i = 0; i < 128; i++)
  647. {
  648. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
  649. }
  650. felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
  651. felem_assign(ftmp2, ftmp3);
  652. for (i = 0; i < 256; i++)
  653. {
  654. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
  655. }
  656. felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
  657. for (i = 0; i < 9; i++)
  658. {
  659. felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
  660. }
  661. felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
  662. felem_mul(tmp, ftmp3, in); felem_reduce(out, tmp); /* 2^512 - 3 */
  663. }
  664. /* This is 2^521-1, expressed as an felem */
  665. static const felem kPrime =
  666. {
  667. 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
  668. 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
  669. 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
  670. };
  671. /* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
  672. * otherwise.
  673. * On entry:
  674. * in[i] < 2^59 + 2^14
  675. */
  676. static limb felem_is_zero(const felem in)
  677. {
  678. felem ftmp;
  679. limb is_zero, is_p;
  680. felem_assign(ftmp, in);
  681. ftmp[0] += ftmp[8] >> 57; ftmp[8] &= bottom57bits;
  682. /* ftmp[8] < 2^57 */
  683. ftmp[1] += ftmp[0] >> 58; ftmp[0] &= bottom58bits;
  684. ftmp[2] += ftmp[1] >> 58; ftmp[1] &= bottom58bits;
  685. ftmp[3] += ftmp[2] >> 58; ftmp[2] &= bottom58bits;
  686. ftmp[4] += ftmp[3] >> 58; ftmp[3] &= bottom58bits;
  687. ftmp[5] += ftmp[4] >> 58; ftmp[4] &= bottom58bits;
  688. ftmp[6] += ftmp[5] >> 58; ftmp[5] &= bottom58bits;
  689. ftmp[7] += ftmp[6] >> 58; ftmp[6] &= bottom58bits;
  690. ftmp[8] += ftmp[7] >> 58; ftmp[7] &= bottom58bits;
  691. /* ftmp[8] < 2^57 + 4 */
  692. /* The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
  693. * greater than our bound for ftmp[8]. Therefore we only have to check
  694. * if the zero is zero or 2^521-1. */
  695. is_zero = 0;
  696. is_zero |= ftmp[0];
  697. is_zero |= ftmp[1];
  698. is_zero |= ftmp[2];
  699. is_zero |= ftmp[3];
  700. is_zero |= ftmp[4];
  701. is_zero |= ftmp[5];
  702. is_zero |= ftmp[6];
  703. is_zero |= ftmp[7];
  704. is_zero |= ftmp[8];
  705. is_zero--;
  706. /* We know that ftmp[i] < 2^63, therefore the only way that the top bit
  707. * can be set is if is_zero was 0 before the decrement. */
  708. is_zero = ((s64) is_zero) >> 63;
  709. is_p = ftmp[0] ^ kPrime[0];
  710. is_p |= ftmp[1] ^ kPrime[1];
  711. is_p |= ftmp[2] ^ kPrime[2];
  712. is_p |= ftmp[3] ^ kPrime[3];
  713. is_p |= ftmp[4] ^ kPrime[4];
  714. is_p |= ftmp[5] ^ kPrime[5];
  715. is_p |= ftmp[6] ^ kPrime[6];
  716. is_p |= ftmp[7] ^ kPrime[7];
  717. is_p |= ftmp[8] ^ kPrime[8];
  718. is_p--;
  719. is_p = ((s64) is_p) >> 63;
  720. is_zero |= is_p;
  721. return is_zero;
  722. }
  723. static int felem_is_zero_int(const felem in)
  724. {
  725. return (int) (felem_is_zero(in) & ((limb)1));
  726. }
  727. /* felem_contract converts |in| to its unique, minimal representation.
  728. * On entry:
  729. * in[i] < 2^59 + 2^14
  730. */
  731. static void felem_contract(felem out, const felem in)
  732. {
  733. limb is_p, is_greater, sign;
  734. static const limb two58 = ((limb)1) << 58;
  735. felem_assign(out, in);
  736. out[0] += out[8] >> 57; out[8] &= bottom57bits;
  737. /* out[8] < 2^57 */
  738. out[1] += out[0] >> 58; out[0] &= bottom58bits;
  739. out[2] += out[1] >> 58; out[1] &= bottom58bits;
  740. out[3] += out[2] >> 58; out[2] &= bottom58bits;
  741. out[4] += out[3] >> 58; out[3] &= bottom58bits;
  742. out[5] += out[4] >> 58; out[4] &= bottom58bits;
  743. out[6] += out[5] >> 58; out[5] &= bottom58bits;
  744. out[7] += out[6] >> 58; out[6] &= bottom58bits;
  745. out[8] += out[7] >> 58; out[7] &= bottom58bits;
  746. /* out[8] < 2^57 + 4 */
  747. /* If the value is greater than 2^521-1 then we have to subtract
  748. * 2^521-1 out. See the comments in felem_is_zero regarding why we
  749. * don't test for other multiples of the prime. */
  750. /* First, if |out| is equal to 2^521-1, we subtract it out to get zero. */
  751. is_p = out[0] ^ kPrime[0];
  752. is_p |= out[1] ^ kPrime[1];
  753. is_p |= out[2] ^ kPrime[2];
  754. is_p |= out[3] ^ kPrime[3];
  755. is_p |= out[4] ^ kPrime[4];
  756. is_p |= out[5] ^ kPrime[5];
  757. is_p |= out[6] ^ kPrime[6];
  758. is_p |= out[7] ^ kPrime[7];
  759. is_p |= out[8] ^ kPrime[8];
  760. is_p--;
  761. is_p &= is_p << 32;
  762. is_p &= is_p << 16;
  763. is_p &= is_p << 8;
  764. is_p &= is_p << 4;
  765. is_p &= is_p << 2;
  766. is_p &= is_p << 1;
  767. is_p = ((s64) is_p) >> 63;
  768. is_p = ~is_p;
  769. /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
  770. out[0] &= is_p;
  771. out[1] &= is_p;
  772. out[2] &= is_p;
  773. out[3] &= is_p;
  774. out[4] &= is_p;
  775. out[5] &= is_p;
  776. out[6] &= is_p;
  777. out[7] &= is_p;
  778. out[8] &= is_p;
  779. /* In order to test that |out| >= 2^521-1 we need only test if out[8]
  780. * >> 57 is greater than zero as (2^521-1) + x >= 2^522 */
  781. is_greater = out[8] >> 57;
  782. is_greater |= is_greater << 32;
  783. is_greater |= is_greater << 16;
  784. is_greater |= is_greater << 8;
  785. is_greater |= is_greater << 4;
  786. is_greater |= is_greater << 2;
  787. is_greater |= is_greater << 1;
  788. is_greater = ((s64) is_greater) >> 63;
  789. out[0] -= kPrime[0] & is_greater;
  790. out[1] -= kPrime[1] & is_greater;
  791. out[2] -= kPrime[2] & is_greater;
  792. out[3] -= kPrime[3] & is_greater;
  793. out[4] -= kPrime[4] & is_greater;
  794. out[5] -= kPrime[5] & is_greater;
  795. out[6] -= kPrime[6] & is_greater;
  796. out[7] -= kPrime[7] & is_greater;
  797. out[8] -= kPrime[8] & is_greater;
  798. /* Eliminate negative coefficients */
  799. sign = -(out[0] >> 63); out[0] += (two58 & sign); out[1] -= (1 & sign);
  800. sign = -(out[1] >> 63); out[1] += (two58 & sign); out[2] -= (1 & sign);
  801. sign = -(out[2] >> 63); out[2] += (two58 & sign); out[3] -= (1 & sign);
  802. sign = -(out[3] >> 63); out[3] += (two58 & sign); out[4] -= (1 & sign);
  803. sign = -(out[4] >> 63); out[4] += (two58 & sign); out[5] -= (1 & sign);
  804. sign = -(out[0] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
  805. sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
  806. sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
  807. sign = -(out[5] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
  808. sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
  809. sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
  810. }
  811. /* Group operations
  812. * ----------------
  813. *
  814. * Building on top of the field operations we have the operations on the
  815. * elliptic curve group itself. Points on the curve are represented in Jacobian
  816. * coordinates */
  817. /* point_double calcuates 2*(x_in, y_in, z_in)
  818. *
  819. * The method is taken from:
  820. * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
  821. *
  822. * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
  823. * while x_out == y_in is not (maybe this works, but it's not tested). */
  824. static void
  825. point_double(felem x_out, felem y_out, felem z_out,
  826. const felem x_in, const felem y_in, const felem z_in)
  827. {
  828. largefelem tmp, tmp2;
  829. felem delta, gamma, beta, alpha, ftmp, ftmp2;
  830. felem_assign(ftmp, x_in);
  831. felem_assign(ftmp2, x_in);
  832. /* delta = z^2 */
  833. felem_square(tmp, z_in);
  834. felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
  835. /* gamma = y^2 */
  836. felem_square(tmp, y_in);
  837. felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
  838. /* beta = x*gamma */
  839. felem_mul(tmp, x_in, gamma);
  840. felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */
  841. /* alpha = 3*(x-delta)*(x+delta) */
  842. felem_diff64(ftmp, delta);
  843. /* ftmp[i] < 2^61 */
  844. felem_sum64(ftmp2, delta);
  845. /* ftmp2[i] < 2^60 + 2^15 */
  846. felem_scalar64(ftmp2, 3);
  847. /* ftmp2[i] < 3*2^60 + 3*2^15 */
  848. felem_mul(tmp, ftmp, ftmp2);
  849. /* tmp[i] < 17(3*2^121 + 3*2^76)
  850. * = 61*2^121 + 61*2^76
  851. * < 64*2^121 + 64*2^76
  852. * = 2^127 + 2^82
  853. * < 2^128 */
  854. felem_reduce(alpha, tmp);
  855. /* x' = alpha^2 - 8*beta */
  856. felem_square(tmp, alpha);
  857. /* tmp[i] < 17*2^120
  858. * < 2^125 */
  859. felem_assign(ftmp, beta);
  860. felem_scalar64(ftmp, 8);
  861. /* ftmp[i] < 2^62 + 2^17 */
  862. felem_diff_128_64(tmp, ftmp);
  863. /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
  864. felem_reduce(x_out, tmp);
  865. /* z' = (y + z)^2 - gamma - delta */
  866. felem_sum64(delta, gamma);
  867. /* delta[i] < 2^60 + 2^15 */
  868. felem_assign(ftmp, y_in);
  869. felem_sum64(ftmp, z_in);
  870. /* ftmp[i] < 2^60 + 2^15 */
  871. felem_square(tmp, ftmp);
  872. /* tmp[i] < 17(2^122)
  873. * < 2^127 */
  874. felem_diff_128_64(tmp, delta);
  875. /* tmp[i] < 2^127 + 2^63 */
  876. felem_reduce(z_out, tmp);
  877. /* y' = alpha*(4*beta - x') - 8*gamma^2 */
  878. felem_scalar64(beta, 4);
  879. /* beta[i] < 2^61 + 2^16 */
  880. felem_diff64(beta, x_out);
  881. /* beta[i] < 2^61 + 2^60 + 2^16 */
  882. felem_mul(tmp, alpha, beta);
  883. /* tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
  884. * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
  885. * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
  886. * < 2^128 */
  887. felem_square(tmp2, gamma);
  888. /* tmp2[i] < 17*(2^59 + 2^14)^2
  889. * = 17*(2^118 + 2^74 + 2^28) */
  890. felem_scalar128(tmp2, 8);
  891. /* tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
  892. * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
  893. * < 2^126 */
  894. felem_diff128(tmp, tmp2);
  895. /* tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
  896. * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
  897. * 2^74 + 2^69 + 2^34 + 2^30
  898. * < 2^128 */
  899. felem_reduce(y_out, tmp);
  900. }
  901. /* copy_conditional copies in to out iff mask is all ones. */
  902. static void
  903. copy_conditional(felem out, const felem in, limb mask)
  904. {
  905. unsigned i;
  906. for (i = 0; i < NLIMBS; ++i)
  907. {
  908. const limb tmp = mask & (in[i] ^ out[i]);
  909. out[i] ^= tmp;
  910. }
  911. }
  912. /* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
  913. *
  914. * The method is taken from
  915. * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
  916. * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
  917. *
  918. * This function includes a branch for checking whether the two input points
  919. * are equal (while not equal to the point at infinity). This case never
  920. * happens during single point multiplication, so there is no timing leak for
  921. * ECDH or ECDSA signing. */
  922. static void point_add(felem x3, felem y3, felem z3,
  923. const felem x1, const felem y1, const felem z1,
  924. const int mixed, const felem x2, const felem y2, const felem z2)
  925. {
  926. felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
  927. largefelem tmp, tmp2;
  928. limb x_equal, y_equal, z1_is_zero, z2_is_zero;
  929. z1_is_zero = felem_is_zero(z1);
  930. z2_is_zero = felem_is_zero(z2);
  931. /* ftmp = z1z1 = z1**2 */
  932. felem_square(tmp, z1);
  933. felem_reduce(ftmp, tmp);
  934. if (!mixed)
  935. {
  936. /* ftmp2 = z2z2 = z2**2 */
  937. felem_square(tmp, z2);
  938. felem_reduce(ftmp2, tmp);
  939. /* u1 = ftmp3 = x1*z2z2 */
  940. felem_mul(tmp, x1, ftmp2);
  941. felem_reduce(ftmp3, tmp);
  942. /* ftmp5 = z1 + z2 */
  943. felem_assign(ftmp5, z1);
  944. felem_sum64(ftmp5, z2);
  945. /* ftmp5[i] < 2^61 */
  946. /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
  947. felem_square(tmp, ftmp5);
  948. /* tmp[i] < 17*2^122 */
  949. felem_diff_128_64(tmp, ftmp);
  950. /* tmp[i] < 17*2^122 + 2^63 */
  951. felem_diff_128_64(tmp, ftmp2);
  952. /* tmp[i] < 17*2^122 + 2^64 */
  953. felem_reduce(ftmp5, tmp);
  954. /* ftmp2 = z2 * z2z2 */
  955. felem_mul(tmp, ftmp2, z2);
  956. felem_reduce(ftmp2, tmp);
  957. /* s1 = ftmp6 = y1 * z2**3 */
  958. felem_mul(tmp, y1, ftmp2);
  959. felem_reduce(ftmp6, tmp);
  960. }
  961. else
  962. {
  963. /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
  964. /* u1 = ftmp3 = x1*z2z2 */
  965. felem_assign(ftmp3, x1);
  966. /* ftmp5 = 2*z1z2 */
  967. felem_scalar(ftmp5, z1, 2);
  968. /* s1 = ftmp6 = y1 * z2**3 */
  969. felem_assign(ftmp6, y1);
  970. }
  971. /* u2 = x2*z1z1 */
  972. felem_mul(tmp, x2, ftmp);
  973. /* tmp[i] < 17*2^120 */
  974. /* h = ftmp4 = u2 - u1 */
  975. felem_diff_128_64(tmp, ftmp3);
  976. /* tmp[i] < 17*2^120 + 2^63 */
  977. felem_reduce(ftmp4, tmp);
  978. x_equal = felem_is_zero(ftmp4);
  979. /* z_out = ftmp5 * h */
  980. felem_mul(tmp, ftmp5, ftmp4);
  981. felem_reduce(z_out, tmp);
  982. /* ftmp = z1 * z1z1 */
  983. felem_mul(tmp, ftmp, z1);
  984. felem_reduce(ftmp, tmp);
  985. /* s2 = tmp = y2 * z1**3 */
  986. felem_mul(tmp, y2, ftmp);
  987. /* tmp[i] < 17*2^120 */
  988. /* r = ftmp5 = (s2 - s1)*2 */
  989. felem_diff_128_64(tmp, ftmp6);
  990. /* tmp[i] < 17*2^120 + 2^63 */
  991. felem_reduce(ftmp5, tmp);
  992. y_equal = felem_is_zero(ftmp5);
  993. felem_scalar64(ftmp5, 2);
  994. /* ftmp5[i] < 2^61 */
  995. if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
  996. {
  997. point_double(x3, y3, z3, x1, y1, z1);
  998. return;
  999. }
  1000. /* I = ftmp = (2h)**2 */
  1001. felem_assign(ftmp, ftmp4);
  1002. felem_scalar64(ftmp, 2);
  1003. /* ftmp[i] < 2^61 */
  1004. felem_square(tmp, ftmp);
  1005. /* tmp[i] < 17*2^122 */
  1006. felem_reduce(ftmp, tmp);
  1007. /* J = ftmp2 = h * I */
  1008. felem_mul(tmp, ftmp4, ftmp);
  1009. felem_reduce(ftmp2, tmp);
  1010. /* V = ftmp4 = U1 * I */
  1011. felem_mul(tmp, ftmp3, ftmp);
  1012. felem_reduce(ftmp4, tmp);
  1013. /* x_out = r**2 - J - 2V */
  1014. felem_square(tmp, ftmp5);
  1015. /* tmp[i] < 17*2^122 */
  1016. felem_diff_128_64(tmp, ftmp2);
  1017. /* tmp[i] < 17*2^122 + 2^63 */
  1018. felem_assign(ftmp3, ftmp4);
  1019. felem_scalar64(ftmp4, 2);
  1020. /* ftmp4[i] < 2^61 */
  1021. felem_diff_128_64(tmp, ftmp4);
  1022. /* tmp[i] < 17*2^122 + 2^64 */
  1023. felem_reduce(x_out, tmp);
  1024. /* y_out = r(V-x_out) - 2 * s1 * J */
  1025. felem_diff64(ftmp3, x_out);
  1026. /* ftmp3[i] < 2^60 + 2^60
  1027. * = 2^61 */
  1028. felem_mul(tmp, ftmp5, ftmp3);
  1029. /* tmp[i] < 17*2^122 */
  1030. felem_mul(tmp2, ftmp6, ftmp2);
  1031. /* tmp2[i] < 17*2^120 */
  1032. felem_scalar128(tmp2, 2);
  1033. /* tmp2[i] < 17*2^121 */
  1034. felem_diff128(tmp, tmp2);
  1035. /* tmp[i] < 2^127 - 2^69 + 17*2^122
  1036. * = 2^126 - 2^122 - 2^6 - 2^2 - 1
  1037. * < 2^127 */
  1038. felem_reduce(y_out, tmp);
  1039. copy_conditional(x_out, x2, z1_is_zero);
  1040. copy_conditional(x_out, x1, z2_is_zero);
  1041. copy_conditional(y_out, y2, z1_is_zero);
  1042. copy_conditional(y_out, y1, z2_is_zero);
  1043. copy_conditional(z_out, z2, z1_is_zero);
  1044. copy_conditional(z_out, z1, z2_is_zero);
  1045. felem_assign(x3, x_out);
  1046. felem_assign(y3, y_out);
  1047. felem_assign(z3, z_out);
  1048. }
  1049. /* Base point pre computation
  1050. * --------------------------
  1051. *
  1052. * Two different sorts of precomputed tables are used in the following code.
  1053. * Each contain various points on the curve, where each point is three field
  1054. * elements (x, y, z).
  1055. *
  1056. * For the base point table, z is usually 1 (0 for the point at infinity).
  1057. * This table has 16 elements:
  1058. * index | bits | point
  1059. * ------+---------+------------------------------
  1060. * 0 | 0 0 0 0 | 0G
  1061. * 1 | 0 0 0 1 | 1G
  1062. * 2 | 0 0 1 0 | 2^130G
  1063. * 3 | 0 0 1 1 | (2^130 + 1)G
  1064. * 4 | 0 1 0 0 | 2^260G
  1065. * 5 | 0 1 0 1 | (2^260 + 1)G
  1066. * 6 | 0 1 1 0 | (2^260 + 2^130)G
  1067. * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
  1068. * 8 | 1 0 0 0 | 2^390G
  1069. * 9 | 1 0 0 1 | (2^390 + 1)G
  1070. * 10 | 1 0 1 0 | (2^390 + 2^130)G
  1071. * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
  1072. * 12 | 1 1 0 0 | (2^390 + 2^260)G
  1073. * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
  1074. * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
  1075. * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
  1076. *
  1077. * The reason for this is so that we can clock bits into four different
  1078. * locations when doing simple scalar multiplies against the base point.
  1079. *
  1080. * Tables for other points have table[i] = iG for i in 0 .. 16. */
  1081. /* gmul is the table of precomputed base points */
  1082. static const felem gmul[16][3] =
  1083. {{{0, 0, 0, 0, 0, 0, 0, 0, 0},
  1084. {0, 0, 0, 0, 0, 0, 0, 0, 0},
  1085. {0, 0, 0, 0, 0, 0, 0, 0, 0}},
  1086. {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
  1087. 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
  1088. 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
  1089. {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
  1090. 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
  1091. 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
  1092. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1093. {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
  1094. 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
  1095. 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
  1096. {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
  1097. 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
  1098. 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
  1099. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1100. {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
  1101. 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
  1102. 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
  1103. {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
  1104. 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
  1105. 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
  1106. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1107. {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
  1108. 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
  1109. 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
  1110. {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
  1111. 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
  1112. 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
  1113. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1114. {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
  1115. 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
  1116. 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
  1117. {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
  1118. 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
  1119. 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
  1120. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1121. {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
  1122. 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
  1123. 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
  1124. {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
  1125. 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
  1126. 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
  1127. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1128. {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
  1129. 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
  1130. 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
  1131. {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
  1132. 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
  1133. 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
  1134. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1135. {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
  1136. 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
  1137. 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
  1138. {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
  1139. 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
  1140. 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
  1141. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1142. {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
  1143. 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
  1144. 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
  1145. {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
  1146. 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
  1147. 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
  1148. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1149. {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
  1150. 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
  1151. 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
  1152. {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
  1153. 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
  1154. 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
  1155. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1156. {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
  1157. 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
  1158. 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
  1159. {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
  1160. 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
  1161. 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
  1162. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1163. {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
  1164. 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
  1165. 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
  1166. {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
  1167. 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
  1168. 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
  1169. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1170. {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
  1171. 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
  1172. 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
  1173. {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
  1174. 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
  1175. 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
  1176. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1177. {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
  1178. 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
  1179. 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
  1180. {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
  1181. 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
  1182. 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
  1183. {1, 0, 0, 0, 0, 0, 0, 0, 0}},
  1184. {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
  1185. 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
  1186. 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
  1187. {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
  1188. 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
  1189. 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
  1190. {1, 0, 0, 0, 0, 0, 0, 0, 0}}};
  1191. /* select_point selects the |idx|th point from a precomputation table and
  1192. * copies it to out. */
  1193. static void select_point(const limb idx, unsigned int size, const felem pre_comp[/* size */][3],
  1194. felem out[3])
  1195. {
  1196. unsigned i, j;
  1197. limb *outlimbs = &out[0][0];
  1198. memset(outlimbs, 0, 3 * sizeof(felem));
  1199. for (i = 0; i < size; i++)
  1200. {
  1201. const limb *inlimbs = &pre_comp[i][0][0];
  1202. limb mask = i ^ idx;
  1203. mask |= mask >> 4;
  1204. mask |= mask >> 2;
  1205. mask |= mask >> 1;
  1206. mask &= 1;
  1207. mask--;
  1208. for (j = 0; j < NLIMBS * 3; j++)
  1209. outlimbs[j] |= inlimbs[j] & mask;
  1210. }
  1211. }
  1212. /* get_bit returns the |i|th bit in |in| */
  1213. static char get_bit(const felem_bytearray in, int i)
  1214. {
  1215. if (i < 0)
  1216. return 0;
  1217. return (in[i >> 3] >> (i & 7)) & 1;
  1218. }
  1219. /* Interleaved point multiplication using precomputed point multiples:
  1220. * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
  1221. * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
  1222. * of the generator, using certain (large) precomputed multiples in g_pre_comp.
  1223. * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
  1224. static void batch_mul(felem x_out, felem y_out, felem z_out,
  1225. const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
  1226. const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[16][3])
  1227. {
  1228. int i, skip;
  1229. unsigned num, gen_mul = (g_scalar != NULL);
  1230. felem nq[3], tmp[4];
  1231. limb bits;
  1232. u8 sign, digit;
  1233. /* set nq to the point at infinity */
  1234. memset(nq, 0, 3 * sizeof(felem));
  1235. /* Loop over all scalars msb-to-lsb, interleaving additions
  1236. * of multiples of the generator (last quarter of rounds)
  1237. * and additions of other points multiples (every 5th round).
  1238. */
  1239. skip = 1; /* save two point operations in the first round */
  1240. for (i = (num_points ? 520 : 130); i >= 0; --i)
  1241. {
  1242. /* double */
  1243. if (!skip)
  1244. point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
  1245. /* add multiples of the generator */
  1246. if (gen_mul && (i <= 130))
  1247. {
  1248. bits = get_bit(g_scalar, i + 390) << 3;
  1249. if (i < 130)
  1250. {
  1251. bits |= get_bit(g_scalar, i + 260) << 2;
  1252. bits |= get_bit(g_scalar, i + 130) << 1;
  1253. bits |= get_bit(g_scalar, i);
  1254. }
  1255. /* select the point to add, in constant time */
  1256. select_point(bits, 16, g_pre_comp, tmp);
  1257. if (!skip)
  1258. {
  1259. point_add(nq[0], nq[1], nq[2],
  1260. nq[0], nq[1], nq[2],
  1261. 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
  1262. }
  1263. else
  1264. {
  1265. memcpy(nq, tmp, 3 * sizeof(felem));
  1266. skip = 0;
  1267. }
  1268. }
  1269. /* do other additions every 5 doublings */
  1270. if (num_points && (i % 5 == 0))
  1271. {
  1272. /* loop over all scalars */
  1273. for (num = 0; num < num_points; ++num)
  1274. {
  1275. bits = get_bit(scalars[num], i + 4) << 5;
  1276. bits |= get_bit(scalars[num], i + 3) << 4;
  1277. bits |= get_bit(scalars[num], i + 2) << 3;
  1278. bits |= get_bit(scalars[num], i + 1) << 2;
  1279. bits |= get_bit(scalars[num], i) << 1;
  1280. bits |= get_bit(scalars[num], i - 1);
  1281. ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
  1282. /* select the point to add or subtract, in constant time */
  1283. select_point(digit, 17, pre_comp[num], tmp);
  1284. felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
  1285. copy_conditional(tmp[1], tmp[3], (-(limb) sign));
  1286. if (!skip)
  1287. {
  1288. point_add(nq[0], nq[1], nq[2],
  1289. nq[0], nq[1], nq[2],
  1290. mixed, tmp[0], tmp[1], tmp[2]);
  1291. }
  1292. else
  1293. {
  1294. memcpy(nq, tmp, 3 * sizeof(felem));
  1295. skip = 0;
  1296. }
  1297. }
  1298. }
  1299. }
  1300. felem_assign(x_out, nq[0]);
  1301. felem_assign(y_out, nq[1]);
  1302. felem_assign(z_out, nq[2]);
  1303. }
  1304. /* Precomputation for the group generator. */
  1305. typedef struct {
  1306. felem g_pre_comp[16][3];
  1307. int references;
  1308. } NISTP521_PRE_COMP;
  1309. const EC_METHOD *EC_GFp_nistp521_method(void)
  1310. {
  1311. static const EC_METHOD ret = {
  1312. EC_FLAGS_DEFAULT_OCT,
  1313. NID_X9_62_prime_field,
  1314. ec_GFp_nistp521_group_init,
  1315. ec_GFp_simple_group_finish,
  1316. ec_GFp_simple_group_clear_finish,
  1317. ec_GFp_nist_group_copy,
  1318. ec_GFp_nistp521_group_set_curve,
  1319. ec_GFp_simple_group_get_curve,
  1320. ec_GFp_simple_group_get_degree,
  1321. ec_GFp_simple_group_check_discriminant,
  1322. ec_GFp_simple_point_init,
  1323. ec_GFp_simple_point_finish,
  1324. ec_GFp_simple_point_clear_finish,
  1325. ec_GFp_simple_point_copy,
  1326. ec_GFp_simple_point_set_to_infinity,
  1327. ec_GFp_simple_set_Jprojective_coordinates_GFp,
  1328. ec_GFp_simple_get_Jprojective_coordinates_GFp,
  1329. ec_GFp_simple_point_set_affine_coordinates,
  1330. ec_GFp_nistp521_point_get_affine_coordinates,
  1331. 0 /* point_set_compressed_coordinates */,
  1332. 0 /* point2oct */,
  1333. 0 /* oct2point */,
  1334. ec_GFp_simple_add,
  1335. ec_GFp_simple_dbl,
  1336. ec_GFp_simple_invert,
  1337. ec_GFp_simple_is_at_infinity,
  1338. ec_GFp_simple_is_on_curve,
  1339. ec_GFp_simple_cmp,
  1340. ec_GFp_simple_make_affine,
  1341. ec_GFp_simple_points_make_affine,
  1342. ec_GFp_nistp521_points_mul,
  1343. ec_GFp_nistp521_precompute_mult,
  1344. ec_GFp_nistp521_have_precompute_mult,
  1345. ec_GFp_nist_field_mul,
  1346. ec_GFp_nist_field_sqr,
  1347. 0 /* field_div */,
  1348. 0 /* field_encode */,
  1349. 0 /* field_decode */,
  1350. 0 /* field_set_to_one */ };
  1351. return &ret;
  1352. }
  1353. /******************************************************************************/
  1354. /* FUNCTIONS TO MANAGE PRECOMPUTATION
  1355. */
  1356. static NISTP521_PRE_COMP *nistp521_pre_comp_new()
  1357. {
  1358. NISTP521_PRE_COMP *ret = NULL;
  1359. ret = (NISTP521_PRE_COMP *)OPENSSL_malloc(sizeof(NISTP521_PRE_COMP));
  1360. if (!ret)
  1361. {
  1362. ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
  1363. return ret;
  1364. }
  1365. memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
  1366. ret->references = 1;
  1367. return ret;
  1368. }
  1369. static void *nistp521_pre_comp_dup(void *src_)
  1370. {
  1371. NISTP521_PRE_COMP *src = src_;
  1372. /* no need to actually copy, these objects never change! */
  1373. CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
  1374. return src_;
  1375. }
  1376. static void nistp521_pre_comp_free(void *pre_)
  1377. {
  1378. int i;
  1379. NISTP521_PRE_COMP *pre = pre_;
  1380. if (!pre)
  1381. return;
  1382. i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
  1383. if (i > 0)
  1384. return;
  1385. OPENSSL_free(pre);
  1386. }
  1387. static void nistp521_pre_comp_clear_free(void *pre_)
  1388. {
  1389. int i;
  1390. NISTP521_PRE_COMP *pre = pre_;
  1391. if (!pre)
  1392. return;
  1393. i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
  1394. if (i > 0)
  1395. return;
  1396. OPENSSL_cleanse(pre, sizeof(*pre));
  1397. OPENSSL_free(pre);
  1398. }
  1399. /******************************************************************************/
  1400. /* OPENSSL EC_METHOD FUNCTIONS
  1401. */
  1402. int ec_GFp_nistp521_group_init(EC_GROUP *group)
  1403. {
  1404. int ret;
  1405. ret = ec_GFp_simple_group_init(group);
  1406. group->a_is_minus3 = 1;
  1407. return ret;
  1408. }
  1409. int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
  1410. const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
  1411. {
  1412. int ret = 0;
  1413. BN_CTX *new_ctx = NULL;
  1414. BIGNUM *curve_p, *curve_a, *curve_b;
  1415. if (ctx == NULL)
  1416. if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
  1417. BN_CTX_start(ctx);
  1418. if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
  1419. ((curve_a = BN_CTX_get(ctx)) == NULL) ||
  1420. ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
  1421. BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
  1422. BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
  1423. BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
  1424. if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
  1425. (BN_cmp(curve_b, b)))
  1426. {
  1427. ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
  1428. EC_R_WRONG_CURVE_PARAMETERS);
  1429. goto err;
  1430. }
  1431. group->field_mod_func = BN_nist_mod_521;
  1432. ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
  1433. err:
  1434. BN_CTX_end(ctx);
  1435. if (new_ctx != NULL)
  1436. BN_CTX_free(new_ctx);
  1437. return ret;
  1438. }
  1439. /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
  1440. * (X', Y') = (X/Z^2, Y/Z^3) */
  1441. int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
  1442. const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
  1443. {
  1444. felem z1, z2, x_in, y_in, x_out, y_out;
  1445. largefelem tmp;
  1446. if (EC_POINT_is_at_infinity(group, point))
  1447. {
  1448. ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
  1449. EC_R_POINT_AT_INFINITY);
  1450. return 0;
  1451. }
  1452. if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
  1453. (!BN_to_felem(z1, &point->Z))) return 0;
  1454. felem_inv(z2, z1);
  1455. felem_square(tmp, z2); felem_reduce(z1, tmp);
  1456. felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
  1457. felem_contract(x_out, x_in);
  1458. if (x != NULL)
  1459. {
  1460. if (!felem_to_BN(x, x_out))
  1461. {
  1462. ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
  1463. return 0;
  1464. }
  1465. }
  1466. felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
  1467. felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
  1468. felem_contract(y_out, y_in);
  1469. if (y != NULL)
  1470. {
  1471. if (!felem_to_BN(y, y_out))
  1472. {
  1473. ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
  1474. return 0;
  1475. }
  1476. }
  1477. return 1;
  1478. }
  1479. static void make_points_affine(size_t num, felem points[/* num */][3], felem tmp_felems[/* num+1 */])
  1480. {
  1481. /* Runs in constant time, unless an input is the point at infinity
  1482. * (which normally shouldn't happen). */
  1483. ec_GFp_nistp_points_make_affine_internal(
  1484. num,
  1485. points,
  1486. sizeof(felem),
  1487. tmp_felems,
  1488. (void (*)(void *)) felem_one,
  1489. (int (*)(const void *)) felem_is_zero_int,
  1490. (void (*)(void *, const void *)) felem_assign,
  1491. (void (*)(void *, const void *)) felem_square_reduce,
  1492. (void (*)(void *, const void *, const void *)) felem_mul_reduce,
  1493. (void (*)(void *, const void *)) felem_inv,
  1494. (void (*)(void *, const void *)) felem_contract);
  1495. }
  1496. /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
  1497. * Result is stored in r (r can equal one of the inputs). */
  1498. int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
  1499. const BIGNUM *scalar, size_t num, const EC_POINT *points[],
  1500. const BIGNUM *scalars[], BN_CTX *ctx)
  1501. {
  1502. int ret = 0;
  1503. int j;
  1504. int mixed = 0;
  1505. BN_CTX *new_ctx = NULL;
  1506. BIGNUM *x, *y, *z, *tmp_scalar;
  1507. felem_bytearray g_secret;
  1508. felem_bytearray *secrets = NULL;
  1509. felem (*pre_comp)[17][3] = NULL;
  1510. felem *tmp_felems = NULL;
  1511. felem_bytearray tmp;
  1512. unsigned i, num_bytes;
  1513. int have_pre_comp = 0;
  1514. size_t num_points = num;
  1515. felem x_in, y_in, z_in, x_out, y_out, z_out;
  1516. NISTP521_PRE_COMP *pre = NULL;
  1517. felem (*g_pre_comp)[3] = NULL;
  1518. EC_POINT *generator = NULL;
  1519. const EC_POINT *p = NULL;
  1520. const BIGNUM *p_scalar = NULL;
  1521. if (ctx == NULL)
  1522. if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
  1523. BN_CTX_start(ctx);
  1524. if (((x = BN_CTX_get(ctx)) == NULL) ||
  1525. ((y = BN_CTX_get(ctx)) == NULL) ||
  1526. ((z = BN_CTX_get(ctx)) == NULL) ||
  1527. ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
  1528. goto err;
  1529. if (scalar != NULL)
  1530. {
  1531. pre = EC_EX_DATA_get_data(group->extra_data,
  1532. nistp521_pre_comp_dup, nistp521_pre_comp_free,
  1533. nistp521_pre_comp_clear_free);
  1534. if (pre)
  1535. /* we have precomputation, try to use it */
  1536. g_pre_comp = &pre->g_pre_comp[0];
  1537. else
  1538. /* try to use the standard precomputation */
  1539. g_pre_comp = (felem (*)[3]) gmul;
  1540. generator = EC_POINT_new(group);
  1541. if (generator == NULL)
  1542. goto err;
  1543. /* get the generator from precomputation */
  1544. if (!felem_to_BN(x, g_pre_comp[1][0]) ||
  1545. !felem_to_BN(y, g_pre_comp[1][1]) ||
  1546. !felem_to_BN(z, g_pre_comp[1][2]))
  1547. {
  1548. ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
  1549. goto err;
  1550. }
  1551. if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
  1552. generator, x, y, z, ctx))
  1553. goto err;
  1554. if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
  1555. /* precomputation matches generator */
  1556. have_pre_comp = 1;
  1557. else
  1558. /* we don't have valid precomputation:
  1559. * treat the generator as a random point */
  1560. num_points++;
  1561. }
  1562. if (num_points > 0)
  1563. {
  1564. if (num_points >= 2)
  1565. {
  1566. /* unless we precompute multiples for just one point,
  1567. * converting those into affine form is time well spent */
  1568. mixed = 1;
  1569. }
  1570. secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
  1571. pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
  1572. if (mixed)
  1573. tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
  1574. if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
  1575. {
  1576. ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
  1577. goto err;
  1578. }
  1579. /* we treat NULL scalars as 0, and NULL points as points at infinity,
  1580. * i.e., they contribute nothing to the linear combination */
  1581. memset(secrets, 0, num_points * sizeof(felem_bytearray));
  1582. memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
  1583. for (i = 0; i < num_points; ++i)
  1584. {
  1585. if (i == num)
  1586. /* we didn't have a valid precomputation, so we pick
  1587. * the generator */
  1588. {
  1589. p = EC_GROUP_get0_generator(group);
  1590. p_scalar = scalar;
  1591. }
  1592. else
  1593. /* the i^th point */
  1594. {
  1595. p = points[i];
  1596. p_scalar = scalars[i];
  1597. }
  1598. if ((p_scalar != NULL) && (p != NULL))
  1599. {
  1600. /* reduce scalar to 0 <= scalar < 2^521 */
  1601. if ((BN_num_bits(p_scalar) > 521) || (BN_is_negative(p_scalar)))
  1602. {
  1603. /* this is an unusual input, and we don't guarantee
  1604. * constant-timeness */
  1605. if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
  1606. {
  1607. ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
  1608. goto err;
  1609. }
  1610. num_bytes = BN_bn2bin(tmp_scalar, tmp);
  1611. }
  1612. else
  1613. num_bytes = BN_bn2bin(p_scalar, tmp);
  1614. flip_endian(secrets[i], tmp, num_bytes);
  1615. /* precompute multiples */
  1616. if ((!BN_to_felem(x_out, &p->X)) ||
  1617. (!BN_to_felem(y_out, &p->Y)) ||
  1618. (!BN_to_felem(z_out, &p->Z))) goto err;
  1619. memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
  1620. memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
  1621. memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
  1622. for (j = 2; j <= 16; ++j)
  1623. {
  1624. if (j & 1)
  1625. {
  1626. point_add(
  1627. pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
  1628. pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
  1629. 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
  1630. }
  1631. else
  1632. {
  1633. point_double(
  1634. pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
  1635. pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
  1636. }
  1637. }
  1638. }
  1639. }
  1640. if (mixed)
  1641. make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
  1642. }
  1643. /* the scalar for the generator */
  1644. if ((scalar != NULL) && (have_pre_comp))
  1645. {
  1646. memset(g_secret, 0, sizeof(g_secret));
  1647. /* reduce scalar to 0 <= scalar < 2^521 */
  1648. if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar)))
  1649. {
  1650. /* this is an unusual input, and we don't guarantee
  1651. * constant-timeness */
  1652. if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
  1653. {
  1654. ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
  1655. goto err;
  1656. }
  1657. num_bytes = BN_bn2bin(tmp_scalar, tmp);
  1658. }
  1659. else
  1660. num_bytes = BN_bn2bin(scalar, tmp);
  1661. flip_endian(g_secret, tmp, num_bytes);
  1662. /* do the multiplication with generator precomputation*/
  1663. batch_mul(x_out, y_out, z_out,
  1664. (const felem_bytearray (*)) secrets, num_points,
  1665. g_secret,
  1666. mixed, (const felem (*)[17][3]) pre_comp,
  1667. (const felem (*)[3]) g_pre_comp);
  1668. }
  1669. else
  1670. /* do the multiplication without generator precomputation */
  1671. batch_mul(x_out, y_out, z_out,
  1672. (const felem_bytearray (*)) secrets, num_points,
  1673. NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
  1674. /* reduce the output to its unique minimal representation */
  1675. felem_contract(x_in, x_out);
  1676. felem_contract(y_in, y_out);
  1677. felem_contract(z_in, z_out);
  1678. if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
  1679. (!felem_to_BN(z, z_in)))
  1680. {
  1681. ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
  1682. goto err;
  1683. }
  1684. ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
  1685. err:
  1686. BN_CTX_end(ctx);
  1687. if (generator != NULL)
  1688. EC_POINT_free(generator);
  1689. if (new_ctx != NULL)
  1690. BN_CTX_free(new_ctx);
  1691. if (secrets != NULL)
  1692. OPENSSL_free(secrets);
  1693. if (pre_comp != NULL)
  1694. OPENSSL_free(pre_comp);
  1695. if (tmp_felems != NULL)
  1696. OPENSSL_free(tmp_felems);
  1697. return ret;
  1698. }
  1699. int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
  1700. {
  1701. int ret = 0;
  1702. NISTP521_PRE_COMP *pre = NULL;
  1703. int i, j;
  1704. BN_CTX *new_ctx = NULL;
  1705. BIGNUM *x, *y;
  1706. EC_POINT *generator = NULL;
  1707. felem tmp_felems[16];
  1708. /* throw away old precomputation */
  1709. EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
  1710. nistp521_pre_comp_free, nistp521_pre_comp_clear_free);
  1711. if (ctx == NULL)
  1712. if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
  1713. BN_CTX_start(ctx);
  1714. if (((x = BN_CTX_get(ctx)) == NULL) ||
  1715. ((y = BN_CTX_get(ctx)) == NULL))
  1716. goto err;
  1717. /* get the generator */
  1718. if (group->generator == NULL) goto err;
  1719. generator = EC_POINT_new(group);
  1720. if (generator == NULL)
  1721. goto err;
  1722. BN_bin2bn(nistp521_curve_params[3], sizeof (felem_bytearray), x);
  1723. BN_bin2bn(nistp521_curve_params[4], sizeof (felem_bytearray), y);
  1724. if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
  1725. goto err;
  1726. if ((pre = nistp521_pre_comp_new()) == NULL)
  1727. goto err;
  1728. /* if the generator is the standard one, use built-in precomputation */
  1729. if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
  1730. {
  1731. memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
  1732. ret = 1;
  1733. goto err;
  1734. }
  1735. if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
  1736. (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
  1737. (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
  1738. goto err;
  1739. /* compute 2^130*G, 2^260*G, 2^390*G */
  1740. for (i = 1; i <= 4; i <<= 1)
  1741. {
  1742. point_double(pre->g_pre_comp[2*i][0], pre->g_pre_comp[2*i][1],
  1743. pre->g_pre_comp[2*i][2], pre->g_pre_comp[i][0],
  1744. pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
  1745. for (j = 0; j < 129; ++j)
  1746. {
  1747. point_double(pre->g_pre_comp[2*i][0],
  1748. pre->g_pre_comp[2*i][1],
  1749. pre->g_pre_comp[2*i][2],
  1750. pre->g_pre_comp[2*i][0],
  1751. pre->g_pre_comp[2*i][1],
  1752. pre->g_pre_comp[2*i][2]);
  1753. }
  1754. }
  1755. /* g_pre_comp[0] is the point at infinity */
  1756. memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
  1757. /* the remaining multiples */
  1758. /* 2^130*G + 2^260*G */
  1759. point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
  1760. pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
  1761. pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
  1762. 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
  1763. pre->g_pre_comp[2][2]);
  1764. /* 2^130*G + 2^390*G */
  1765. point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
  1766. pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
  1767. pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
  1768. 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
  1769. pre->g_pre_comp[2][2]);
  1770. /* 2^260*G + 2^390*G */
  1771. point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
  1772. pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
  1773. pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
  1774. 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
  1775. pre->g_pre_comp[4][2]);
  1776. /* 2^130*G + 2^260*G + 2^390*G */
  1777. point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
  1778. pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
  1779. pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
  1780. 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
  1781. pre->g_pre_comp[2][2]);
  1782. for (i = 1; i < 8; ++i)
  1783. {
  1784. /* odd multiples: add G */
  1785. point_add(pre->g_pre_comp[2*i+1][0], pre->g_pre_comp[2*i+1][1],
  1786. pre->g_pre_comp[2*i+1][2], pre->g_pre_comp[2*i][0],
  1787. pre->g_pre_comp[2*i][1], pre->g_pre_comp[2*i][2],
  1788. 0, pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
  1789. pre->g_pre_comp[1][2]);
  1790. }
  1791. make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
  1792. if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
  1793. nistp521_pre_comp_free, nistp521_pre_comp_clear_free))
  1794. goto err;
  1795. ret = 1;
  1796. pre = NULL;
  1797. err:
  1798. BN_CTX_end(ctx);
  1799. if (generator != NULL)
  1800. EC_POINT_free(generator);
  1801. if (new_ctx != NULL)
  1802. BN_CTX_free(new_ctx);
  1803. if (pre)
  1804. nistp521_pre_comp_free(pre);
  1805. return ret;
  1806. }
  1807. int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
  1808. {
  1809. if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
  1810. nistp521_pre_comp_free, nistp521_pre_comp_clear_free)
  1811. != NULL)
  1812. return 1;
  1813. else
  1814. return 0;
  1815. }
  1816. #else
  1817. static void *dummy=&dummy;
  1818. #endif