tfm.c 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429
  1. /* tfm.c
  2. *
  3. * Copyright (C) 2006-2011 Sawtooth Consulting Ltd.
  4. *
  5. * This file is part of CyaSSL.
  6. *
  7. * CyaSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * CyaSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  20. */
  21. /*
  22. * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
  23. * http://math.libtomcrypt.com
  24. */
  25. /**
  26. * Edited by Moisés Guimarães (moises.guimaraes@phoebus.com.br)
  27. * to fit CyaSSL's needs.
  28. */
  29. #ifdef USE_FAST_MATH
  30. #include "tfm.h"
  31. #include "asm.c" /* will define asm MACROS or C ones */
  32. /* Functions */
  33. void fp_add(fp_int *a, fp_int *b, fp_int *c)
  34. {
  35. int sa, sb;
  36. /* get sign of both inputs */
  37. sa = a->sign;
  38. sb = b->sign;
  39. /* handle two cases, not four */
  40. if (sa == sb) {
  41. /* both positive or both negative */
  42. /* add their magnitudes, copy the sign */
  43. c->sign = sa;
  44. s_fp_add (a, b, c);
  45. } else {
  46. /* one positive, the other negative */
  47. /* subtract the one with the greater magnitude from */
  48. /* the one of the lesser magnitude. The result gets */
  49. /* the sign of the one with the greater magnitude. */
  50. if (fp_cmp_mag (a, b) == FP_LT) {
  51. c->sign = sb;
  52. s_fp_sub (b, a, c);
  53. } else {
  54. c->sign = sa;
  55. s_fp_sub (a, b, c);
  56. }
  57. }
  58. }
  59. /* unsigned addition */
  60. void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
  61. {
  62. int x, y, oldused;
  63. register fp_word t;
  64. y = MAX(a->used, b->used);
  65. oldused = c->used;
  66. c->used = y;
  67. t = 0;
  68. for (x = 0; x < y; x++) {
  69. t += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
  70. c->dp[x] = (fp_digit)t;
  71. t >>= DIGIT_BIT;
  72. }
  73. if (t != 0 && x < FP_SIZE) {
  74. c->dp[c->used++] = (fp_digit)t;
  75. ++x;
  76. }
  77. c->used = x;
  78. for (; x < oldused; x++) {
  79. c->dp[x] = 0;
  80. }
  81. fp_clamp(c);
  82. }
  83. /* c = a - b */
  84. void fp_sub(fp_int *a, fp_int *b, fp_int *c)
  85. {
  86. int sa, sb;
  87. sa = a->sign;
  88. sb = b->sign;
  89. if (sa != sb) {
  90. /* subtract a negative from a positive, OR */
  91. /* subtract a positive from a negative. */
  92. /* In either case, ADD their magnitudes, */
  93. /* and use the sign of the first number. */
  94. c->sign = sa;
  95. s_fp_add (a, b, c);
  96. } else {
  97. /* subtract a positive from a positive, OR */
  98. /* subtract a negative from a negative. */
  99. /* First, take the difference between their */
  100. /* magnitudes, then... */
  101. if (fp_cmp_mag (a, b) != FP_LT) {
  102. /* Copy the sign from the first */
  103. c->sign = sa;
  104. /* The first has a larger or equal magnitude */
  105. s_fp_sub (a, b, c);
  106. } else {
  107. /* The result has the *opposite* sign from */
  108. /* the first number. */
  109. c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
  110. /* The second has a larger magnitude */
  111. s_fp_sub (b, a, c);
  112. }
  113. }
  114. }
  115. /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
  116. void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
  117. {
  118. int x, oldbused, oldused;
  119. fp_word t;
  120. oldused = c->used;
  121. oldbused = b->used;
  122. c->used = a->used;
  123. t = 0;
  124. for (x = 0; x < oldbused; x++) {
  125. t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
  126. c->dp[x] = (fp_digit)t;
  127. t = (t >> DIGIT_BIT)&1;
  128. }
  129. for (; x < a->used; x++) {
  130. t = ((fp_word)a->dp[x]) - t;
  131. c->dp[x] = (fp_digit)t;
  132. t = (t >> DIGIT_BIT);
  133. }
  134. for (; x < oldused; x++) {
  135. c->dp[x] = 0;
  136. }
  137. fp_clamp(c);
  138. }
  139. /* c = a * b */
  140. void fp_mul(fp_int *A, fp_int *B, fp_int *C)
  141. {
  142. int y, yy;
  143. y = MAX(A->used, B->used);
  144. yy = MIN(A->used, B->used);
  145. /* call generic if we're out of range */
  146. if (y + yy > FP_SIZE) {
  147. fp_mul_comba(A, B, C);
  148. return ;
  149. }
  150. /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
  151. of the largest input. We also want to avoid doing excess mults if the
  152. inputs are not close to the next power of two. That is, for example,
  153. if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
  154. */
  155. #ifdef TFM_MUL3
  156. if (y <= 3) {
  157. fp_mul_comba3(A,B,C);
  158. return;
  159. }
  160. #endif
  161. #ifdef TFM_MUL4
  162. if (y == 4) {
  163. fp_mul_comba4(A,B,C);
  164. return;
  165. }
  166. #endif
  167. #ifdef TFM_MUL6
  168. if (y <= 6) {
  169. fp_mul_comba6(A,B,C);
  170. return;
  171. }
  172. #endif
  173. #ifdef TFM_MUL7
  174. if (y == 7) {
  175. fp_mul_comba7(A,B,C);
  176. return;
  177. }
  178. #endif
  179. #ifdef TFM_MUL8
  180. if (y == 8) {
  181. fp_mul_comba8(A,B,C);
  182. return;
  183. }
  184. #endif
  185. #ifdef TFM_MUL9
  186. if (y == 9) {
  187. fp_mul_comba9(A,B,C);
  188. return;
  189. }
  190. #endif
  191. #ifdef TFM_MUL12
  192. if (y <= 12) {
  193. fp_mul_comba12(A,B,C);
  194. return;
  195. }
  196. #endif
  197. #ifdef TFM_MUL17
  198. if (y <= 17) {
  199. fp_mul_comba17(A,B,C);
  200. return;
  201. }
  202. #endif
  203. #ifdef TFM_SMALL_SET
  204. if (y <= 16) {
  205. fp_mul_comba_small(A,B,C);
  206. return;
  207. }
  208. #endif
  209. #if defined(TFM_MUL20)
  210. if (y <= 20) {
  211. fp_mul_comba20(A,B,C);
  212. return;
  213. }
  214. #endif
  215. #if defined(TFM_MUL24)
  216. if (yy >= 16 && y <= 24) {
  217. fp_mul_comba24(A,B,C);
  218. return;
  219. }
  220. #endif
  221. #if defined(TFM_MUL28)
  222. if (yy >= 20 && y <= 28) {
  223. fp_mul_comba28(A,B,C);
  224. return;
  225. }
  226. #endif
  227. #if defined(TFM_MUL32)
  228. if (yy >= 24 && y <= 32) {
  229. fp_mul_comba32(A,B,C);
  230. return;
  231. }
  232. #endif
  233. #if defined(TFM_MUL48)
  234. if (yy >= 40 && y <= 48) {
  235. fp_mul_comba48(A,B,C);
  236. return;
  237. }
  238. #endif
  239. #if defined(TFM_MUL64)
  240. if (yy >= 56 && y <= 64) {
  241. fp_mul_comba64(A,B,C);
  242. return;
  243. }
  244. #endif
  245. fp_mul_comba(A,B,C);
  246. }
  247. void fp_mul_2(fp_int * a, fp_int * b)
  248. {
  249. int x, oldused;
  250. oldused = b->used;
  251. b->used = a->used;
  252. {
  253. register fp_digit r, rr, *tmpa, *tmpb;
  254. /* alias for source */
  255. tmpa = a->dp;
  256. /* alias for dest */
  257. tmpb = b->dp;
  258. /* carry */
  259. r = 0;
  260. for (x = 0; x < a->used; x++) {
  261. /* get what will be the *next* carry bit from the
  262. * MSB of the current digit
  263. */
  264. rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
  265. /* now shift up this digit, add in the carry [from the previous] */
  266. *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
  267. /* copy the carry that would be from the source
  268. * digit into the next iteration
  269. */
  270. r = rr;
  271. }
  272. /* new leading digit? */
  273. if (r != 0 && b->used != (FP_SIZE-1)) {
  274. /* add a MSB which is always 1 at this point */
  275. *tmpb = 1;
  276. ++(b->used);
  277. }
  278. /* now zero any excess digits on the destination
  279. * that we didn't write to
  280. */
  281. tmpb = b->dp + b->used;
  282. for (x = b->used; x < oldused; x++) {
  283. *tmpb++ = 0;
  284. }
  285. }
  286. b->sign = a->sign;
  287. }
  288. /* c = a * b */
  289. void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
  290. {
  291. fp_word w;
  292. int x, oldused;
  293. oldused = c->used;
  294. c->used = a->used;
  295. c->sign = a->sign;
  296. w = 0;
  297. for (x = 0; x < a->used; x++) {
  298. w = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
  299. c->dp[x] = (fp_digit)w;
  300. w = w >> DIGIT_BIT;
  301. }
  302. if (w != 0 && (a->used != FP_SIZE)) {
  303. c->dp[c->used++] = (fp_digit) w;
  304. ++x;
  305. }
  306. for (; x < oldused; x++) {
  307. c->dp[x] = 0;
  308. }
  309. fp_clamp(c);
  310. }
  311. /* c = a * 2**d */
  312. void fp_mul_2d(fp_int *a, int b, fp_int *c)
  313. {
  314. fp_digit carry, carrytmp, shift;
  315. int x;
  316. /* copy it */
  317. fp_copy(a, c);
  318. /* handle whole digits */
  319. if (b >= DIGIT_BIT) {
  320. fp_lshd(c, b/DIGIT_BIT);
  321. }
  322. b %= DIGIT_BIT;
  323. /* shift the digits */
  324. if (b != 0) {
  325. carry = 0;
  326. shift = DIGIT_BIT - b;
  327. for (x = 0; x < c->used; x++) {
  328. carrytmp = c->dp[x] >> shift;
  329. c->dp[x] = (c->dp[x] << b) + carry;
  330. carry = carrytmp;
  331. }
  332. /* store last carry if room */
  333. if (carry && x < FP_SIZE) {
  334. c->dp[c->used++] = carry;
  335. }
  336. }
  337. fp_clamp(c);
  338. }
  339. /* generic PxQ multiplier */
  340. void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
  341. {
  342. int ix, iy, iz, tx, ty, pa;
  343. fp_digit c0, c1, c2, *tmpx, *tmpy;
  344. fp_int tmp, *dst;
  345. COMBA_START;
  346. COMBA_CLEAR;
  347. /* get size of output and trim */
  348. pa = A->used + B->used;
  349. if (pa >= FP_SIZE) {
  350. pa = FP_SIZE-1;
  351. }
  352. if (A == C || B == C) {
  353. fp_zero(&tmp);
  354. dst = &tmp;
  355. } else {
  356. fp_zero(C);
  357. dst = C;
  358. }
  359. for (ix = 0; ix < pa; ix++) {
  360. /* get offsets into the two bignums */
  361. ty = MIN(ix, B->used-1);
  362. tx = ix - ty;
  363. /* setup temp aliases */
  364. tmpx = A->dp + tx;
  365. tmpy = B->dp + ty;
  366. /* this is the number of times the loop will iterrate, essentially its
  367. while (tx++ < a->used && ty-- >= 0) { ... }
  368. */
  369. iy = MIN(A->used-tx, ty+1);
  370. /* execute loop */
  371. COMBA_FORWARD;
  372. for (iz = 0; iz < iy; ++iz) {
  373. /* TAO change COMBA_ADD back to MULADD */
  374. MULADD(*tmpx++, *tmpy--);
  375. }
  376. /* store term */
  377. COMBA_STORE(dst->dp[ix]);
  378. }
  379. COMBA_FINI;
  380. dst->used = pa;
  381. dst->sign = A->sign ^ B->sign;
  382. fp_clamp(dst);
  383. fp_copy(dst, C);
  384. }
  385. /* a/b => cb + d == a */
  386. int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
  387. {
  388. fp_int q, x, y, t1, t2;
  389. int n, t, i, norm, neg;
  390. /* is divisor zero ? */
  391. if (fp_iszero (b) == 1) {
  392. return FP_VAL;
  393. }
  394. /* if a < b then q=0, r = a */
  395. if (fp_cmp_mag (a, b) == FP_LT) {
  396. if (d != NULL) {
  397. fp_copy (a, d);
  398. }
  399. if (c != NULL) {
  400. fp_zero (c);
  401. }
  402. return FP_OKAY;
  403. }
  404. fp_init(&q);
  405. q.used = a->used + 2;
  406. fp_init(&t1);
  407. fp_init(&t2);
  408. fp_init_copy(&x, a);
  409. fp_init_copy(&y, b);
  410. /* fix the sign */
  411. neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
  412. x.sign = y.sign = FP_ZPOS;
  413. /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
  414. norm = fp_count_bits(&y) % DIGIT_BIT;
  415. if (norm < (int)(DIGIT_BIT-1)) {
  416. norm = (DIGIT_BIT-1) - norm;
  417. fp_mul_2d (&x, norm, &x);
  418. fp_mul_2d (&y, norm, &y);
  419. } else {
  420. norm = 0;
  421. }
  422. /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
  423. n = x.used - 1;
  424. t = y.used - 1;
  425. /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
  426. fp_lshd (&y, n - t); /* y = y*b**{n-t} */
  427. while (fp_cmp (&x, &y) != FP_LT) {
  428. ++(q.dp[n - t]);
  429. fp_sub (&x, &y, &x);
  430. }
  431. /* reset y by shifting it back down */
  432. fp_rshd (&y, n - t);
  433. /* step 3. for i from n down to (t + 1) */
  434. for (i = n; i >= (t + 1); i--) {
  435. if (i > x.used) {
  436. continue;
  437. }
  438. /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
  439. * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
  440. if (x.dp[i] == y.dp[t]) {
  441. q.dp[i - t - 1] = ((((fp_word)1) << DIGIT_BIT) - 1);
  442. } else {
  443. fp_word tmp;
  444. tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
  445. tmp |= ((fp_word) x.dp[i - 1]);
  446. tmp /= ((fp_word)y.dp[t]);
  447. q.dp[i - t - 1] = (fp_digit) (tmp);
  448. }
  449. /* while (q{i-t-1} * (yt * b + y{t-1})) >
  450. xi * b**2 + xi-1 * b + xi-2
  451. do q{i-t-1} -= 1;
  452. */
  453. q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
  454. do {
  455. q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
  456. /* find left hand */
  457. fp_zero (&t1);
  458. t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
  459. t1.dp[1] = y.dp[t];
  460. t1.used = 2;
  461. fp_mul_d (&t1, q.dp[i - t - 1], &t1);
  462. /* find right hand */
  463. t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
  464. t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
  465. t2.dp[2] = x.dp[i];
  466. t2.used = 3;
  467. } while (fp_cmp_mag(&t1, &t2) == FP_GT);
  468. /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
  469. fp_mul_d (&y, q.dp[i - t - 1], &t1);
  470. fp_lshd (&t1, i - t - 1);
  471. fp_sub (&x, &t1, &x);
  472. /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
  473. if (x.sign == FP_NEG) {
  474. fp_copy (&y, &t1);
  475. fp_lshd (&t1, i - t - 1);
  476. fp_add (&x, &t1, &x);
  477. q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
  478. }
  479. }
  480. /* now q is the quotient and x is the remainder
  481. * [which we have to normalize]
  482. */
  483. /* get sign before writing to c */
  484. x.sign = x.used == 0 ? FP_ZPOS : a->sign;
  485. if (c != NULL) {
  486. fp_clamp (&q);
  487. fp_copy (&q, c);
  488. c->sign = neg;
  489. }
  490. if (d != NULL) {
  491. fp_div_2d (&x, norm, &x, NULL);
  492. /* the following is a kludge, essentially we were seeing the right remainder but
  493. with excess digits that should have been zero
  494. */
  495. for (i = b->used; i < x.used; i++) {
  496. x.dp[i] = 0;
  497. }
  498. fp_clamp(&x);
  499. fp_copy (&x, d);
  500. }
  501. return FP_OKAY;
  502. }
  503. /* b = a/2 */
  504. void fp_div_2(fp_int * a, fp_int * b)
  505. {
  506. int x, oldused;
  507. oldused = b->used;
  508. b->used = a->used;
  509. {
  510. register fp_digit r, rr, *tmpa, *tmpb;
  511. /* source alias */
  512. tmpa = a->dp + b->used - 1;
  513. /* dest alias */
  514. tmpb = b->dp + b->used - 1;
  515. /* carry */
  516. r = 0;
  517. for (x = b->used - 1; x >= 0; x--) {
  518. /* get the carry for the next iteration */
  519. rr = *tmpa & 1;
  520. /* shift the current digit, add in carry and store */
  521. *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
  522. /* forward carry to next iteration */
  523. r = rr;
  524. }
  525. /* zero excess digits */
  526. tmpb = b->dp + b->used;
  527. for (x = b->used; x < oldused; x++) {
  528. *tmpb++ = 0;
  529. }
  530. }
  531. b->sign = a->sign;
  532. fp_clamp (b);
  533. }
  534. /* c = a / 2**b */
  535. void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
  536. {
  537. fp_digit D, r, rr;
  538. int x;
  539. fp_int t;
  540. /* if the shift count is <= 0 then we do no work */
  541. if (b <= 0) {
  542. fp_copy (a, c);
  543. if (d != NULL) {
  544. fp_zero (d);
  545. }
  546. return;
  547. }
  548. fp_init(&t);
  549. /* get the remainder */
  550. if (d != NULL) {
  551. fp_mod_2d (a, b, &t);
  552. }
  553. /* copy */
  554. fp_copy(a, c);
  555. /* shift by as many digits in the bit count */
  556. if (b >= (int)DIGIT_BIT) {
  557. fp_rshd (c, b / DIGIT_BIT);
  558. }
  559. /* shift any bit count < DIGIT_BIT */
  560. D = (fp_digit) (b % DIGIT_BIT);
  561. if (D != 0) {
  562. register fp_digit *tmpc, mask, shift;
  563. /* mask */
  564. mask = (((fp_digit)1) << D) - 1;
  565. /* shift for lsb */
  566. shift = DIGIT_BIT - D;
  567. /* alias */
  568. tmpc = c->dp + (c->used - 1);
  569. /* carry */
  570. r = 0;
  571. for (x = c->used - 1; x >= 0; x--) {
  572. /* get the lower bits of this word in a temp */
  573. rr = *tmpc & mask;
  574. /* shift the current word and mix in the carry bits from the previous word */
  575. *tmpc = (*tmpc >> D) | (r << shift);
  576. --tmpc;
  577. /* set the carry to the carry bits of the current word found above */
  578. r = rr;
  579. }
  580. }
  581. fp_clamp (c);
  582. if (d != NULL) {
  583. fp_copy (&t, d);
  584. }
  585. }
  586. /* c = a mod b, 0 <= c < b */
  587. int fp_mod(fp_int *a, fp_int *b, fp_int *c)
  588. {
  589. fp_int t;
  590. int err;
  591. fp_zero(&t);
  592. if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
  593. return err;
  594. }
  595. if (t.sign != b->sign) {
  596. fp_add(&t, b, c);
  597. } else {
  598. fp_copy(&t, c);
  599. }
  600. return FP_OKAY;
  601. }
  602. /* c = a mod 2**d */
  603. void fp_mod_2d(fp_int *a, int b, fp_int *c)
  604. {
  605. int x;
  606. /* zero if count less than or equal to zero */
  607. if (b <= 0) {
  608. fp_zero(c);
  609. return;
  610. }
  611. /* get copy of input */
  612. fp_copy(a, c);
  613. /* if 2**d is larger than we just return */
  614. if (b >= (DIGIT_BIT * a->used)) {
  615. return;
  616. }
  617. /* zero digits above the last digit of the modulus */
  618. for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
  619. c->dp[x] = 0;
  620. }
  621. /* clear the digit that is not completely outside/inside the modulus */
  622. c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
  623. fp_clamp (c);
  624. }
  625. static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
  626. {
  627. fp_int x, y, u, v, A, B, C, D;
  628. int res;
  629. /* b cannot be negative */
  630. if (b->sign == FP_NEG || fp_iszero(b) == 1) {
  631. return FP_VAL;
  632. }
  633. /* init temps */
  634. fp_init(&x); fp_init(&y);
  635. fp_init(&u); fp_init(&v);
  636. fp_init(&A); fp_init(&B);
  637. fp_init(&C); fp_init(&D);
  638. /* x = a, y = b */
  639. if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
  640. return res;
  641. }
  642. fp_copy(b, &y);
  643. /* 2. [modified] if x,y are both even then return an error! */
  644. if (fp_iseven (&x) == 1 && fp_iseven (&y) == 1) {
  645. return FP_VAL;
  646. }
  647. /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
  648. fp_copy (&x, &u);
  649. fp_copy (&y, &v);
  650. fp_set (&A, 1);
  651. fp_set (&D, 1);
  652. top:
  653. /* 4. while u is even do */
  654. while (fp_iseven (&u) == 1) {
  655. /* 4.1 u = u/2 */
  656. fp_div_2 (&u, &u);
  657. /* 4.2 if A or B is odd then */
  658. if (fp_isodd (&A) == 1 || fp_isodd (&B) == 1) {
  659. /* A = (A+y)/2, B = (B-x)/2 */
  660. fp_add (&A, &y, &A);
  661. fp_sub (&B, &x, &B);
  662. }
  663. /* A = A/2, B = B/2 */
  664. fp_div_2 (&A, &A);
  665. fp_div_2 (&B, &B);
  666. }
  667. /* 5. while v is even do */
  668. while (fp_iseven (&v) == 1) {
  669. /* 5.1 v = v/2 */
  670. fp_div_2 (&v, &v);
  671. /* 5.2 if C or D is odd then */
  672. if (fp_isodd (&C) == 1 || fp_isodd (&D) == 1) {
  673. /* C = (C+y)/2, D = (D-x)/2 */
  674. fp_add (&C, &y, &C);
  675. fp_sub (&D, &x, &D);
  676. }
  677. /* C = C/2, D = D/2 */
  678. fp_div_2 (&C, &C);
  679. fp_div_2 (&D, &D);
  680. }
  681. /* 6. if u >= v then */
  682. if (fp_cmp (&u, &v) != FP_LT) {
  683. /* u = u - v, A = A - C, B = B - D */
  684. fp_sub (&u, &v, &u);
  685. fp_sub (&A, &C, &A);
  686. fp_sub (&B, &D, &B);
  687. } else {
  688. /* v - v - u, C = C - A, D = D - B */
  689. fp_sub (&v, &u, &v);
  690. fp_sub (&C, &A, &C);
  691. fp_sub (&D, &B, &D);
  692. }
  693. /* if not zero goto step 4 */
  694. if (fp_iszero (&u) == 0)
  695. goto top;
  696. /* now a = C, b = D, gcd == g*v */
  697. /* if v != 1 then there is no inverse */
  698. if (fp_cmp_d (&v, 1) != FP_EQ) {
  699. return FP_VAL;
  700. }
  701. /* if its too low */
  702. while (fp_cmp_d(&C, 0) == FP_LT) {
  703. fp_add(&C, b, &C);
  704. }
  705. /* too big */
  706. while (fp_cmp_mag(&C, b) != FP_LT) {
  707. fp_sub(&C, b, &C);
  708. }
  709. /* C is now the inverse */
  710. fp_copy(&C, c);
  711. return FP_OKAY;
  712. }
  713. /* c = 1/a (mod b) for odd b only */
  714. int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
  715. {
  716. fp_int x, y, u, v, B, D;
  717. int neg;
  718. /* 2. [modified] b must be odd */
  719. if (fp_iseven (b) == FP_YES) {
  720. return fp_invmod_slow(a,b,c);
  721. }
  722. /* init all our temps */
  723. fp_init(&x); fp_init(&y);
  724. fp_init(&u); fp_init(&v);
  725. fp_init(&B); fp_init(&D);
  726. /* x == modulus, y == value to invert */
  727. fp_copy(b, &x);
  728. /* we need y = |a| */
  729. fp_abs(a, &y);
  730. /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
  731. fp_copy(&x, &u);
  732. fp_copy(&y, &v);
  733. fp_set (&D, 1);
  734. top:
  735. /* 4. while u is even do */
  736. while (fp_iseven (&u) == FP_YES) {
  737. /* 4.1 u = u/2 */
  738. fp_div_2 (&u, &u);
  739. /* 4.2 if B is odd then */
  740. if (fp_isodd (&B) == FP_YES) {
  741. fp_sub (&B, &x, &B);
  742. }
  743. /* B = B/2 */
  744. fp_div_2 (&B, &B);
  745. }
  746. /* 5. while v is even do */
  747. while (fp_iseven (&v) == FP_YES) {
  748. /* 5.1 v = v/2 */
  749. fp_div_2 (&v, &v);
  750. /* 5.2 if D is odd then */
  751. if (fp_isodd (&D) == FP_YES) {
  752. /* D = (D-x)/2 */
  753. fp_sub (&D, &x, &D);
  754. }
  755. /* D = D/2 */
  756. fp_div_2 (&D, &D);
  757. }
  758. /* 6. if u >= v then */
  759. if (fp_cmp (&u, &v) != FP_LT) {
  760. /* u = u - v, B = B - D */
  761. fp_sub (&u, &v, &u);
  762. fp_sub (&B, &D, &B);
  763. } else {
  764. /* v - v - u, D = D - B */
  765. fp_sub (&v, &u, &v);
  766. fp_sub (&D, &B, &D);
  767. }
  768. /* if not zero goto step 4 */
  769. if (fp_iszero (&u) == FP_NO) {
  770. goto top;
  771. }
  772. /* now a = C, b = D, gcd == g*v */
  773. /* if v != 1 then there is no inverse */
  774. if (fp_cmp_d (&v, 1) != FP_EQ) {
  775. return FP_VAL;
  776. }
  777. /* b is now the inverse */
  778. neg = a->sign;
  779. while (D.sign == FP_NEG) {
  780. fp_add (&D, b, &D);
  781. }
  782. fp_copy (&D, c);
  783. c->sign = neg;
  784. return FP_OKAY;
  785. }
  786. /* d = a * b (mod c) */
  787. int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
  788. {
  789. fp_int tmp;
  790. fp_zero(&tmp);
  791. fp_mul(a, b, &tmp);
  792. return fp_mod(&tmp, c, d);
  793. }
  794. #ifdef TFM_TIMING_RESISTANT
  795. /* timing resistant montgomery ladder based exptmod
  796. Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
  797. */
  798. static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
  799. {
  800. fp_int R[2];
  801. fp_digit buf, mp;
  802. int err, bitcnt, digidx, y;
  803. /* now setup montgomery */
  804. if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
  805. return err;
  806. }
  807. fp_init(&R[0]);
  808. fp_init(&R[1]);
  809. /* now we need R mod m */
  810. fp_montgomery_calc_normalization (&R[0], P);
  811. /* now set R[0][1] to G * R mod m */
  812. if (fp_cmp_mag(P, G) != FP_GT) {
  813. /* G > P so we reduce it first */
  814. fp_mod(G, P, &R[1]);
  815. } else {
  816. fp_copy(G, &R[1]);
  817. }
  818. fp_mulmod (&R[1], &R[0], P, &R[1]);
  819. /* for j = t-1 downto 0 do
  820. r_!k = R0*R1; r_k = r_k^2
  821. */
  822. /* set initial mode and bit cnt */
  823. bitcnt = 1;
  824. buf = 0;
  825. digidx = X->used - 1;
  826. for (;;) {
  827. /* grab next digit as required */
  828. if (--bitcnt == 0) {
  829. /* if digidx == -1 we are out of digits so break */
  830. if (digidx == -1) {
  831. break;
  832. }
  833. /* read next digit and reset bitcnt */
  834. buf = X->dp[digidx--];
  835. bitcnt = (int)DIGIT_BIT;
  836. }
  837. /* grab the next msb from the exponent */
  838. y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
  839. buf <<= (fp_digit)1;
  840. /* do ops */
  841. fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
  842. fp_sqr(&R[y], &R[y]); fp_montgomery_reduce(&R[y], P, mp);
  843. }
  844. fp_montgomery_reduce(&R[0], P, mp);
  845. fp_copy(&R[0], Y);
  846. return FP_OKAY;
  847. }
  848. #else
  849. /* y = g**x (mod b)
  850. * Some restrictions... x must be positive and < b
  851. */
  852. static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
  853. {
  854. fp_int M[64], res;
  855. fp_digit buf, mp;
  856. int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
  857. /* find window size */
  858. x = fp_count_bits (X);
  859. if (x <= 21) {
  860. winsize = 1;
  861. } else if (x <= 36) {
  862. winsize = 3;
  863. } else if (x <= 140) {
  864. winsize = 4;
  865. } else if (x <= 450) {
  866. winsize = 5;
  867. } else {
  868. winsize = 6;
  869. }
  870. /* init M array */
  871. XMEMSET(M, 0, sizeof(M));
  872. /* now setup montgomery */
  873. if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
  874. return err;
  875. }
  876. /* setup result */
  877. fp_init(&res);
  878. /* create M table
  879. *
  880. * The M table contains powers of the input base, e.g. M[x] = G^x mod P
  881. *
  882. * The first half of the table is not computed though accept for M[0] and M[1]
  883. */
  884. /* now we need R mod m */
  885. fp_montgomery_calc_normalization (&res, P);
  886. /* now set M[1] to G * R mod m */
  887. if (fp_cmp_mag(P, G) != FP_GT) {
  888. /* G > P so we reduce it first */
  889. fp_mod(G, P, &M[1]);
  890. } else {
  891. fp_copy(G, &M[1]);
  892. }
  893. fp_mulmod (&M[1], &res, P, &M[1]);
  894. /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
  895. fp_copy (&M[1], &M[1 << (winsize - 1)]);
  896. for (x = 0; x < (winsize - 1); x++) {
  897. fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
  898. fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
  899. }
  900. /* create upper table */
  901. for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
  902. fp_mul(&M[x - 1], &M[1], &M[x]);
  903. fp_montgomery_reduce(&M[x], P, mp);
  904. }
  905. /* set initial mode and bit cnt */
  906. mode = 0;
  907. bitcnt = 1;
  908. buf = 0;
  909. digidx = X->used - 1;
  910. bitcpy = 0;
  911. bitbuf = 0;
  912. for (;;) {
  913. /* grab next digit as required */
  914. if (--bitcnt == 0) {
  915. /* if digidx == -1 we are out of digits so break */
  916. if (digidx == -1) {
  917. break;
  918. }
  919. /* read next digit and reset bitcnt */
  920. buf = X->dp[digidx--];
  921. bitcnt = (int)DIGIT_BIT;
  922. }
  923. /* grab the next msb from the exponent */
  924. y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
  925. buf <<= (fp_digit)1;
  926. /* if the bit is zero and mode == 0 then we ignore it
  927. * These represent the leading zero bits before the first 1 bit
  928. * in the exponent. Technically this opt is not required but it
  929. * does lower the # of trivial squaring/reductions used
  930. */
  931. if (mode == 0 && y == 0) {
  932. continue;
  933. }
  934. /* if the bit is zero and mode == 1 then we square */
  935. if (mode == 1 && y == 0) {
  936. fp_sqr(&res, &res);
  937. fp_montgomery_reduce(&res, P, mp);
  938. continue;
  939. }
  940. /* else we add it to the window */
  941. bitbuf |= (y << (winsize - ++bitcpy));
  942. mode = 2;
  943. if (bitcpy == winsize) {
  944. /* ok window is filled so square as required and multiply */
  945. /* square first */
  946. for (x = 0; x < winsize; x++) {
  947. fp_sqr(&res, &res);
  948. fp_montgomery_reduce(&res, P, mp);
  949. }
  950. /* then multiply */
  951. fp_mul(&res, &M[bitbuf], &res);
  952. fp_montgomery_reduce(&res, P, mp);
  953. /* empty window and reset */
  954. bitcpy = 0;
  955. bitbuf = 0;
  956. mode = 1;
  957. }
  958. }
  959. /* if bits remain then square/multiply */
  960. if (mode == 2 && bitcpy > 0) {
  961. /* square then multiply if the bit is set */
  962. for (x = 0; x < bitcpy; x++) {
  963. fp_sqr(&res, &res);
  964. fp_montgomery_reduce(&res, P, mp);
  965. /* get next bit of the window */
  966. bitbuf <<= 1;
  967. if ((bitbuf & (1 << winsize)) != 0) {
  968. /* then multiply */
  969. fp_mul(&res, &M[1], &res);
  970. fp_montgomery_reduce(&res, P, mp);
  971. }
  972. }
  973. }
  974. /* fixup result if Montgomery reduction is used
  975. * recall that any value in a Montgomery system is
  976. * actually multiplied by R mod n. So we have
  977. * to reduce one more time to cancel out the factor
  978. * of R.
  979. */
  980. fp_montgomery_reduce(&res, P, mp);
  981. /* swap res with Y */
  982. fp_copy (&res, Y);
  983. return FP_OKAY;
  984. }
  985. #endif
  986. int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
  987. {
  988. fp_int tmp;
  989. int err;
  990. /* prevent overflows */
  991. if (P->used > (FP_SIZE/2)) {
  992. return FP_VAL;
  993. }
  994. /* is X negative? */
  995. if (X->sign == FP_NEG) {
  996. /* yes, copy G and invmod it */
  997. fp_copy(G, &tmp);
  998. if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
  999. return err;
  1000. }
  1001. X->sign = FP_ZPOS;
  1002. err = _fp_exptmod(&tmp, X, P, Y);
  1003. if (X != Y) {
  1004. X->sign = FP_NEG;
  1005. }
  1006. return err;
  1007. } else {
  1008. /* Positive exponent so just exptmod */
  1009. return _fp_exptmod(G, X, P, Y);
  1010. }
  1011. }
  1012. /* computes a = 2**b */
  1013. void fp_2expt(fp_int *a, int b)
  1014. {
  1015. int z;
  1016. /* zero a as per default */
  1017. fp_zero (a);
  1018. if (b < 0) {
  1019. return;
  1020. }
  1021. z = b / DIGIT_BIT;
  1022. if (z >= FP_SIZE) {
  1023. return;
  1024. }
  1025. /* set the used count of where the bit will go */
  1026. a->used = z + 1;
  1027. /* put the single bit in its place */
  1028. a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
  1029. }
  1030. /* b = a*a */
  1031. void fp_sqr(fp_int *A, fp_int *B)
  1032. {
  1033. int y = A->used;
  1034. /* call generic if we're out of range */
  1035. if (y + y > FP_SIZE) {
  1036. fp_sqr_comba(A, B);
  1037. return ;
  1038. }
  1039. #if defined(TFM_SQR3)
  1040. if (y <= 3) {
  1041. fp_sqr_comba3(A,B);
  1042. return;
  1043. }
  1044. #endif
  1045. #if defined(TFM_SQR4)
  1046. if (y == 4) {
  1047. fp_sqr_comba4(A,B);
  1048. return;
  1049. }
  1050. #endif
  1051. #if defined(TFM_SQR6)
  1052. if (y <= 6) {
  1053. fp_sqr_comba6(A,B);
  1054. return;
  1055. }
  1056. #endif
  1057. #if defined(TFM_SQR7)
  1058. if (y == 7) {
  1059. fp_sqr_comba7(A,B);
  1060. return;
  1061. }
  1062. #endif
  1063. #if defined(TFM_SQR8)
  1064. if (y == 8) {
  1065. fp_sqr_comba8(A,B);
  1066. return;
  1067. }
  1068. #endif
  1069. #if defined(TFM_SQR9)
  1070. if (y == 9) {
  1071. fp_sqr_comba9(A,B);
  1072. return;
  1073. }
  1074. #endif
  1075. #if defined(TFM_SQR12)
  1076. if (y <= 12) {
  1077. fp_sqr_comba12(A,B);
  1078. return;
  1079. }
  1080. #endif
  1081. #if defined(TFM_SQR17)
  1082. if (y <= 17) {
  1083. fp_sqr_comba17(A,B);
  1084. return;
  1085. }
  1086. #endif
  1087. #if defined(TFM_SMALL_SET)
  1088. if (y <= 16) {
  1089. fp_sqr_comba_small(A,B);
  1090. return;
  1091. }
  1092. #endif
  1093. #if defined(TFM_SQR20)
  1094. if (y <= 20) {
  1095. fp_sqr_comba20(A,B);
  1096. return;
  1097. }
  1098. #endif
  1099. #if defined(TFM_SQR24)
  1100. if (y <= 24) {
  1101. fp_sqr_comba24(A,B);
  1102. return;
  1103. }
  1104. #endif
  1105. #if defined(TFM_SQR28)
  1106. if (y <= 28) {
  1107. fp_sqr_comba28(A,B);
  1108. return;
  1109. }
  1110. #endif
  1111. #if defined(TFM_SQR32)
  1112. if (y <= 32) {
  1113. fp_sqr_comba32(A,B);
  1114. return;
  1115. }
  1116. #endif
  1117. #if defined(TFM_SQR48)
  1118. if (y <= 48) {
  1119. fp_sqr_comba48(A,B);
  1120. return;
  1121. }
  1122. #endif
  1123. #if defined(TFM_SQR64)
  1124. if (y <= 64) {
  1125. fp_sqr_comba64(A,B);
  1126. return;
  1127. }
  1128. #endif
  1129. fp_sqr_comba(A, B);
  1130. }
  1131. /* generic comba squarer */
  1132. void fp_sqr_comba(fp_int *A, fp_int *B)
  1133. {
  1134. int pa, ix, iz;
  1135. fp_digit c0, c1, c2;
  1136. fp_int tmp, *dst;
  1137. #ifdef TFM_ISO
  1138. fp_word tt;
  1139. #endif
  1140. /* get size of output and trim */
  1141. pa = A->used + A->used;
  1142. if (pa >= FP_SIZE) {
  1143. pa = FP_SIZE-1;
  1144. }
  1145. /* number of output digits to produce */
  1146. COMBA_START;
  1147. COMBA_CLEAR;
  1148. if (A == B) {
  1149. fp_zero(&tmp);
  1150. dst = &tmp;
  1151. } else {
  1152. fp_zero(B);
  1153. dst = B;
  1154. }
  1155. for (ix = 0; ix < pa; ix++) {
  1156. int tx, ty, iy;
  1157. fp_digit *tmpy, *tmpx;
  1158. /* get offsets into the two bignums */
  1159. ty = MIN(A->used-1, ix);
  1160. tx = ix - ty;
  1161. /* setup temp aliases */
  1162. tmpx = A->dp + tx;
  1163. tmpy = A->dp + ty;
  1164. /* this is the number of times the loop will iterrate,
  1165. while (tx++ < a->used && ty-- >= 0) { ... }
  1166. */
  1167. iy = MIN(A->used-tx, ty+1);
  1168. /* now for squaring tx can never equal ty
  1169. * we halve the distance since they approach
  1170. * at a rate of 2x and we have to round because
  1171. * odd cases need to be executed
  1172. */
  1173. iy = MIN(iy, (ty-tx+1)>>1);
  1174. /* forward carries */
  1175. COMBA_FORWARD;
  1176. /* execute loop */
  1177. for (iz = 0; iz < iy; iz++) {
  1178. SQRADD2(*tmpx++, *tmpy--);
  1179. }
  1180. /* even columns have the square term in them */
  1181. if ((ix&1) == 0) {
  1182. /* TAO change COMBA_ADD back to SQRADD */
  1183. SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
  1184. }
  1185. /* store it */
  1186. COMBA_STORE(dst->dp[ix]);
  1187. }
  1188. COMBA_FINI;
  1189. /* setup dest */
  1190. dst->used = pa;
  1191. fp_clamp (dst);
  1192. if (dst != B) {
  1193. fp_copy(dst, B);
  1194. }
  1195. }
  1196. int fp_cmp(fp_int *a, fp_int *b)
  1197. {
  1198. if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
  1199. return FP_LT;
  1200. } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
  1201. return FP_GT;
  1202. } else {
  1203. /* compare digits */
  1204. if (a->sign == FP_NEG) {
  1205. /* if negative compare opposite direction */
  1206. return fp_cmp_mag(b, a);
  1207. } else {
  1208. return fp_cmp_mag(a, b);
  1209. }
  1210. }
  1211. }
  1212. /* compare against a single digit */
  1213. int fp_cmp_d(fp_int *a, fp_digit b)
  1214. {
  1215. /* compare based on sign */
  1216. if ((b && a->used == 0) || a->sign == FP_NEG) {
  1217. return FP_LT;
  1218. }
  1219. /* compare based on magnitude */
  1220. if (a->used > 1) {
  1221. return FP_GT;
  1222. }
  1223. /* compare the only digit of a to b */
  1224. if (a->dp[0] > b) {
  1225. return FP_GT;
  1226. } else if (a->dp[0] < b) {
  1227. return FP_LT;
  1228. } else {
  1229. return FP_EQ;
  1230. }
  1231. }
  1232. int fp_cmp_mag(fp_int *a, fp_int *b)
  1233. {
  1234. int x;
  1235. if (a->used > b->used) {
  1236. return FP_GT;
  1237. } else if (a->used < b->used) {
  1238. return FP_LT;
  1239. } else {
  1240. for (x = a->used - 1; x >= 0; x--) {
  1241. if (a->dp[x] > b->dp[x]) {
  1242. return FP_GT;
  1243. } else if (a->dp[x] < b->dp[x]) {
  1244. return FP_LT;
  1245. }
  1246. }
  1247. }
  1248. return FP_EQ;
  1249. }
  1250. /* setups the montgomery reduction */
  1251. int fp_montgomery_setup(fp_int *a, fp_digit *rho)
  1252. {
  1253. fp_digit x, b;
  1254. /* fast inversion mod 2**k
  1255. *
  1256. * Based on the fact that
  1257. *
  1258. * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
  1259. * => 2*X*A - X*X*A*A = 1
  1260. * => 2*(1) - (1) = 1
  1261. */
  1262. b = a->dp[0];
  1263. if ((b & 1) == 0) {
  1264. return FP_VAL;
  1265. }
  1266. x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
  1267. x *= 2 - b * x; /* here x*a==1 mod 2**8 */
  1268. x *= 2 - b * x; /* here x*a==1 mod 2**16 */
  1269. x *= 2 - b * x; /* here x*a==1 mod 2**32 */
  1270. #ifdef FP_64BIT
  1271. x *= 2 - b * x; /* here x*a==1 mod 2**64 */
  1272. #endif
  1273. /* rho = -1/m mod b */
  1274. *rho = (fp_digit) (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
  1275. return FP_OKAY;
  1276. }
  1277. /* computes a = B**n mod b without division or multiplication useful for
  1278. * normalizing numbers in a Montgomery system.
  1279. */
  1280. void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
  1281. {
  1282. int x, bits;
  1283. /* how many bits of last digit does b use */
  1284. bits = fp_count_bits (b) % DIGIT_BIT;
  1285. if (!bits) bits = DIGIT_BIT;
  1286. /* compute A = B^(n-1) * 2^(bits-1) */
  1287. if (b->used > 1) {
  1288. fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
  1289. } else {
  1290. fp_set(a, 1);
  1291. bits = 1;
  1292. }
  1293. /* now compute C = A * B mod b */
  1294. for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
  1295. fp_mul_2 (a, a);
  1296. if (fp_cmp_mag (a, b) != FP_LT) {
  1297. s_fp_sub (a, b, a);
  1298. }
  1299. }
  1300. }
  1301. #ifdef TFM_SMALL_MONT_SET
  1302. #include "fp_mont_small.i"
  1303. #endif
  1304. /* computes x/R == x (mod N) via Montgomery Reduction */
  1305. void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
  1306. {
  1307. fp_digit c[FP_SIZE], *_c, *tmpm, mu;
  1308. int oldused, x, y, pa;
  1309. /* bail if too large */
  1310. if (m->used > (FP_SIZE/2)) {
  1311. (void)mu; /* shut up compiler */
  1312. return;
  1313. }
  1314. #ifdef TFM_SMALL_MONT_SET
  1315. if (m->used <= 16) {
  1316. fp_montgomery_reduce_small(a, m, mp);
  1317. return;
  1318. }
  1319. #endif
  1320. #if defined(USE_MEMSET)
  1321. /* now zero the buff */
  1322. XMEMSET(c, 0, sizeof c);
  1323. #endif
  1324. pa = m->used;
  1325. /* copy the input */
  1326. oldused = a->used;
  1327. for (x = 0; x < oldused; x++) {
  1328. c[x] = a->dp[x];
  1329. }
  1330. #if !defined(USE_MEMSET)
  1331. for (; x < 2*pa+1; x++) {
  1332. c[x] = 0;
  1333. }
  1334. #endif
  1335. MONT_START;
  1336. for (x = 0; x < pa; x++) {
  1337. fp_digit cy = 0;
  1338. /* get Mu for this round */
  1339. LOOP_START;
  1340. _c = c + x;
  1341. tmpm = m->dp;
  1342. y = 0;
  1343. #if (defined(TFM_SSE2) || defined(TFM_X86_64))
  1344. for (; y < (pa & ~7); y += 8) {
  1345. INNERMUL8;
  1346. _c += 8;
  1347. tmpm += 8;
  1348. }
  1349. #endif
  1350. for (; y < pa; y++) {
  1351. INNERMUL;
  1352. ++_c;
  1353. }
  1354. LOOP_END;
  1355. while (cy) {
  1356. PROPCARRY;
  1357. ++_c;
  1358. }
  1359. }
  1360. /* now copy out */
  1361. _c = c + pa;
  1362. tmpm = a->dp;
  1363. for (x = 0; x < pa+1; x++) {
  1364. *tmpm++ = *_c++;
  1365. }
  1366. for (; x < oldused; x++) {
  1367. *tmpm++ = 0;
  1368. }
  1369. MONT_FINI;
  1370. a->used = pa+1;
  1371. fp_clamp(a);
  1372. /* if A >= m then A = A - m */
  1373. if (fp_cmp_mag (a, m) != FP_LT) {
  1374. s_fp_sub (a, m, a);
  1375. }
  1376. }
  1377. void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
  1378. {
  1379. /* zero the int */
  1380. fp_zero (a);
  1381. /* If we know the endianness of this architecture, and we're using
  1382. 32-bit fp_digits, we can optimize this */
  1383. #if (defined(ENDIAN_LITTLE) || defined(ENDIAN_BIG)) && !defined(FP_64BIT)
  1384. /* But not for both simultaneously */
  1385. #if defined(ENDIAN_LITTLE) && defined(ENDIAN_BIG)
  1386. #error Both ENDIAN_LITTLE and ENDIAN_BIG defined.
  1387. #endif
  1388. {
  1389. unsigned char *pd = (unsigned char *)a->dp;
  1390. if ((unsigned)c > (FP_SIZE * sizeof(fp_digit))) {
  1391. int excess = c - (FP_SIZE * sizeof(fp_digit));
  1392. c -= excess;
  1393. b += excess;
  1394. }
  1395. a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
  1396. /* read the bytes in */
  1397. #ifdef ENDIAN_BIG
  1398. {
  1399. /* Use Duff's device to unroll the loop. */
  1400. int idx = (c - 1) & ~3;
  1401. switch (c % 4) {
  1402. case 0: do { pd[idx+0] = *b++;
  1403. case 3: pd[idx+1] = *b++;
  1404. case 2: pd[idx+2] = *b++;
  1405. case 1: pd[idx+3] = *b++;
  1406. idx -= 4;
  1407. } while ((c -= 4) > 0);
  1408. }
  1409. }
  1410. #else
  1411. for (c -= 1; c >= 0; c -= 1) {
  1412. pd[c] = *b++;
  1413. }
  1414. #endif
  1415. }
  1416. #else
  1417. /* read the bytes in */
  1418. for (; c > 0; c--) {
  1419. fp_mul_2d (a, 8, a);
  1420. a->dp[0] |= *b++;
  1421. a->used += 1;
  1422. }
  1423. #endif
  1424. fp_clamp (a);
  1425. }
  1426. void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
  1427. {
  1428. int x;
  1429. fp_int t;
  1430. fp_init_copy(&t, a);
  1431. x = 0;
  1432. while (fp_iszero (&t) == FP_NO) {
  1433. b[x++] = (unsigned char) (t.dp[0] & 255);
  1434. fp_div_2d (&t, 8, &t, NULL);
  1435. }
  1436. fp_reverse (b, x);
  1437. }
  1438. int fp_unsigned_bin_size(fp_int *a)
  1439. {
  1440. int size = fp_count_bits (a);
  1441. return (size / 8 + ((size & 7) != 0 ? 1 : 0));
  1442. }
  1443. void fp_set(fp_int *a, fp_digit b)
  1444. {
  1445. fp_zero(a);
  1446. a->dp[0] = b;
  1447. a->used = a->dp[0] ? 1 : 0;
  1448. }
  1449. int fp_count_bits (fp_int * a)
  1450. {
  1451. int r;
  1452. fp_digit q;
  1453. /* shortcut */
  1454. if (a->used == 0) {
  1455. return 0;
  1456. }
  1457. /* get number of digits and add that */
  1458. r = (a->used - 1) * DIGIT_BIT;
  1459. /* take the last digit and count the bits in it */
  1460. q = a->dp[a->used - 1];
  1461. while (q > ((fp_digit) 0)) {
  1462. ++r;
  1463. q >>= ((fp_digit) 1);
  1464. }
  1465. return r;
  1466. }
  1467. void fp_lshd(fp_int *a, int x)
  1468. {
  1469. int y;
  1470. /* move up and truncate as required */
  1471. y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
  1472. /* store new size */
  1473. a->used = y + 1;
  1474. /* move digits */
  1475. for (; y >= x; y--) {
  1476. a->dp[y] = a->dp[y-x];
  1477. }
  1478. /* zero lower digits */
  1479. for (; y >= 0; y--) {
  1480. a->dp[y] = 0;
  1481. }
  1482. /* clamp digits */
  1483. fp_clamp(a);
  1484. }
  1485. void fp_rshd(fp_int *a, int x)
  1486. {
  1487. int y;
  1488. /* too many digits just zero and return */
  1489. if (x >= a->used) {
  1490. fp_zero(a);
  1491. return;
  1492. }
  1493. /* shift */
  1494. for (y = 0; y < a->used - x; y++) {
  1495. a->dp[y] = a->dp[y+x];
  1496. }
  1497. /* zero rest */
  1498. for (; y < a->used; y++) {
  1499. a->dp[y] = 0;
  1500. }
  1501. /* decrement count */
  1502. a->used -= x;
  1503. fp_clamp(a);
  1504. }
  1505. /* reverse an array, used for radix code */
  1506. void fp_reverse (unsigned char *s, int len)
  1507. {
  1508. int ix, iy;
  1509. unsigned char t;
  1510. ix = 0;
  1511. iy = len - 1;
  1512. while (ix < iy) {
  1513. t = s[ix];
  1514. s[ix] = s[iy];
  1515. s[iy] = t;
  1516. ++ix;
  1517. --iy;
  1518. }
  1519. }
  1520. /* CyaSSL callers from normal lib */
  1521. /* init a new mp_int */
  1522. int mp_init (mp_int * a)
  1523. {
  1524. if (a)
  1525. fp_init(a);
  1526. return MP_OKAY;
  1527. }
  1528. /* clear one (frees) */
  1529. void mp_clear (mp_int * a)
  1530. {
  1531. fp_zero(a);
  1532. }
  1533. /* handle up to 6 inits */
  1534. int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e, mp_int* f)
  1535. {
  1536. if (a)
  1537. fp_init(a);
  1538. if (b)
  1539. fp_init(b);
  1540. if (c)
  1541. fp_init(c);
  1542. if (d)
  1543. fp_init(d);
  1544. if (e)
  1545. fp_init(e);
  1546. if (f)
  1547. fp_init(f);
  1548. return MP_OKAY;
  1549. }
  1550. /* high level addition (handles signs) */
  1551. int mp_add (mp_int * a, mp_int * b, mp_int * c)
  1552. {
  1553. fp_add(a, b, c);
  1554. return MP_OKAY;
  1555. }
  1556. /* high level subtraction (handles signs) */
  1557. int mp_sub (mp_int * a, mp_int * b, mp_int * c)
  1558. {
  1559. fp_sub(a, b, c);
  1560. return MP_OKAY;
  1561. }
  1562. /* high level multiplication (handles sign) */
  1563. int mp_mul (mp_int * a, mp_int * b, mp_int * c)
  1564. {
  1565. fp_mul(a, b, c);
  1566. return MP_OKAY;
  1567. }
  1568. /* d = a * b (mod c) */
  1569. int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
  1570. {
  1571. return fp_mulmod(a, b, c, d);
  1572. }
  1573. /* c = a mod b, 0 <= c < b */
  1574. int mp_mod (mp_int * a, mp_int * b, mp_int * c)
  1575. {
  1576. return fp_mod (a, b, c);
  1577. }
  1578. /* hac 14.61, pp608 */
  1579. int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
  1580. {
  1581. return fp_invmod(a, b, c);
  1582. }
  1583. /* this is a shell function that calls either the normal or Montgomery
  1584. * exptmod functions. Originally the call to the montgomery code was
  1585. * embedded in the normal function but that wasted alot of stack space
  1586. * for nothing (since 99% of the time the Montgomery code would be called)
  1587. */
  1588. int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
  1589. {
  1590. return fp_exptmod(G, X, P, Y);
  1591. }
  1592. /* compare two ints (signed)*/
  1593. int mp_cmp (mp_int * a, mp_int * b)
  1594. {
  1595. return fp_cmp(a, b);
  1596. }
  1597. /* compare a digit */
  1598. int mp_cmp_d(mp_int * a, mp_digit b)
  1599. {
  1600. return fp_cmp_d(a, b);
  1601. }
  1602. /* get the size for an unsigned equivalent */
  1603. int mp_unsigned_bin_size (mp_int * a)
  1604. {
  1605. return fp_unsigned_bin_size(a);
  1606. }
  1607. /* store in unsigned [big endian] format */
  1608. int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
  1609. {
  1610. fp_to_unsigned_bin(a,b);
  1611. return MP_OKAY;
  1612. }
  1613. /* reads a unsigned char array, assumes the msb is stored first [big endian] */
  1614. int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
  1615. {
  1616. fp_read_unsigned_bin(a, (unsigned char *)b, c);
  1617. return MP_OKAY;
  1618. }
  1619. #if defined(CYASSL_KEY_GEN) || defined (HAVE_ECC)
  1620. /* c = a * a (mod b) */
  1621. int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
  1622. {
  1623. fp_int tmp;
  1624. fp_zero(&tmp);
  1625. fp_sqr(a, &tmp);
  1626. return fp_mod(&tmp, b, c);
  1627. }
  1628. /* fast math conversion */
  1629. int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
  1630. {
  1631. return fp_sqrmod(a, b, c);
  1632. }
  1633. /* fast math conversion */
  1634. int mp_montgomery_calc_normalization(mp_int *a, mp_int *b)
  1635. {
  1636. fp_montgomery_calc_normalization(a, b);
  1637. return MP_OKAY;
  1638. }
  1639. /* fast math conversion */
  1640. int mp_copy(fp_int* a, fp_int* b)
  1641. {
  1642. fp_copy(a, b);
  1643. return MP_OKAY;
  1644. }
  1645. #endif /* CYASSL_KEYGEN || HAVE_ECC */
  1646. #ifdef CYASSL_KEY_GEN
  1647. void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
  1648. void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
  1649. void fp_sub_d(fp_int *a, fp_digit b, fp_int *c);
  1650. int fp_isprime(fp_int *a);
  1651. int fp_cnt_lsb(fp_int *a);
  1652. /* fast math wrappers */
  1653. int mp_set_int(fp_int *a, fp_digit b)
  1654. {
  1655. fp_set(a, b);
  1656. return MP_OKAY;
  1657. }
  1658. int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
  1659. {
  1660. fp_gcd(a, b, c);
  1661. return MP_OKAY;
  1662. }
  1663. int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
  1664. {
  1665. fp_lcm(a, b, c);
  1666. return MP_OKAY;
  1667. }
  1668. int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
  1669. {
  1670. fp_sub_d(a, b, c);
  1671. return MP_OKAY;
  1672. }
  1673. int mp_prime_is_prime(mp_int* a, int t, int* result)
  1674. {
  1675. *result = fp_isprime(a);
  1676. return MP_OKAY;
  1677. }
  1678. /* c = a - b */
  1679. void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
  1680. {
  1681. fp_int tmp;
  1682. fp_set(&tmp, b);
  1683. fp_sub(a, &tmp, c);
  1684. }
  1685. static int s_is_power_of_two(fp_digit b, int *p)
  1686. {
  1687. int x;
  1688. /* fast return if no power of two */
  1689. if ((b==0) || (b & (b-1))) {
  1690. return 0;
  1691. }
  1692. for (x = 0; x < DIGIT_BIT; x++) {
  1693. if (b == (((fp_digit)1)<<x)) {
  1694. *p = x;
  1695. return 1;
  1696. }
  1697. }
  1698. return 0;
  1699. }
  1700. /* a/b => cb + d == a */
  1701. int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
  1702. {
  1703. fp_int q;
  1704. fp_word w;
  1705. fp_digit t;
  1706. int ix;
  1707. /* cannot divide by zero */
  1708. if (b == 0) {
  1709. return FP_VAL;
  1710. }
  1711. /* quick outs */
  1712. if (b == 1 || fp_iszero(a) == 1) {
  1713. if (d != NULL) {
  1714. *d = 0;
  1715. }
  1716. if (c != NULL) {
  1717. fp_copy(a, c);
  1718. }
  1719. return FP_OKAY;
  1720. }
  1721. /* power of two ? */
  1722. if (s_is_power_of_two(b, &ix) == 1) {
  1723. if (d != NULL) {
  1724. *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
  1725. }
  1726. if (c != NULL) {
  1727. fp_div_2d(a, ix, c, NULL);
  1728. }
  1729. return FP_OKAY;
  1730. }
  1731. /* no easy answer [c'est la vie]. Just division */
  1732. fp_init(&q);
  1733. q.used = a->used;
  1734. q.sign = a->sign;
  1735. w = 0;
  1736. for (ix = a->used - 1; ix >= 0; ix--) {
  1737. w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
  1738. if (w >= b) {
  1739. t = (fp_digit)(w / b);
  1740. w -= ((fp_word)t) * ((fp_word)b);
  1741. } else {
  1742. t = 0;
  1743. }
  1744. q.dp[ix] = (fp_digit)t;
  1745. }
  1746. if (d != NULL) {
  1747. *d = (fp_digit)w;
  1748. }
  1749. if (c != NULL) {
  1750. fp_clamp(&q);
  1751. fp_copy(&q, c);
  1752. }
  1753. return FP_OKAY;
  1754. }
  1755. /* c = a mod b, 0 <= c < b */
  1756. int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
  1757. {
  1758. return fp_div_d(a, b, NULL, c);
  1759. }
  1760. /* Miller-Rabin test of "a" to the base of "b" as described in
  1761. * HAC pp. 139 Algorithm 4.24
  1762. *
  1763. * Sets result to 0 if definitely composite or 1 if probably prime.
  1764. * Randomly the chance of error is no more than 1/4 and often
  1765. * very much lower.
  1766. */
  1767. void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
  1768. {
  1769. fp_int n1, y, r;
  1770. int s, j;
  1771. /* default */
  1772. *result = FP_NO;
  1773. /* ensure b > 1 */
  1774. if (fp_cmp_d(b, 1) != FP_GT) {
  1775. return;
  1776. }
  1777. /* get n1 = a - 1 */
  1778. fp_init_copy(&n1, a);
  1779. fp_sub_d(&n1, 1, &n1);
  1780. /* set 2**s * r = n1 */
  1781. fp_init_copy(&r, &n1);
  1782. /* count the number of least significant bits
  1783. * which are zero
  1784. */
  1785. s = fp_cnt_lsb(&r);
  1786. /* now divide n - 1 by 2**s */
  1787. fp_div_2d (&r, s, &r, NULL);
  1788. /* compute y = b**r mod a */
  1789. fp_init(&y);
  1790. fp_exptmod(b, &r, a, &y);
  1791. /* if y != 1 and y != n1 do */
  1792. if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
  1793. j = 1;
  1794. /* while j <= s-1 and y != n1 */
  1795. while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
  1796. fp_sqrmod (&y, a, &y);
  1797. /* if y == 1 then composite */
  1798. if (fp_cmp_d (&y, 1) == FP_EQ) {
  1799. return;
  1800. }
  1801. ++j;
  1802. }
  1803. /* if y != n1 then composite */
  1804. if (fp_cmp (&y, &n1) != FP_EQ) {
  1805. return;
  1806. }
  1807. }
  1808. /* probably prime now */
  1809. *result = FP_YES;
  1810. }
  1811. /* a few primes */
  1812. static const fp_digit primes[256] = {
  1813. 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
  1814. 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
  1815. 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
  1816. 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
  1817. 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
  1818. 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
  1819. 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
  1820. 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
  1821. 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
  1822. 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
  1823. 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
  1824. 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
  1825. 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
  1826. 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
  1827. 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
  1828. 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
  1829. 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
  1830. 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
  1831. 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
  1832. 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
  1833. 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
  1834. 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
  1835. 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
  1836. 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
  1837. 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
  1838. 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
  1839. 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
  1840. 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
  1841. 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
  1842. 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
  1843. 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
  1844. 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
  1845. };
  1846. int fp_isprime(fp_int *a)
  1847. {
  1848. fp_int b;
  1849. fp_digit d;
  1850. int r, res;
  1851. /* do trial division */
  1852. for (r = 0; r < 256; r++) {
  1853. fp_mod_d(a, primes[r], &d);
  1854. if (d == 0) {
  1855. return FP_NO;
  1856. }
  1857. }
  1858. /* now do 8 miller rabins */
  1859. fp_init(&b);
  1860. for (r = 0; r < 8; r++) {
  1861. fp_set(&b, primes[r]);
  1862. fp_prime_miller_rabin(a, &b, &res);
  1863. if (res == FP_NO) {
  1864. return FP_NO;
  1865. }
  1866. }
  1867. return FP_YES;
  1868. }
  1869. /* c = [a, b] */
  1870. void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
  1871. {
  1872. fp_int t1, t2;
  1873. fp_init(&t1);
  1874. fp_init(&t2);
  1875. fp_gcd(a, b, &t1);
  1876. if (fp_cmp_mag(a, b) == FP_GT) {
  1877. fp_div(a, &t1, &t2, NULL);
  1878. fp_mul(b, &t2, c);
  1879. } else {
  1880. fp_div(b, &t1, &t2, NULL);
  1881. fp_mul(a, &t2, c);
  1882. }
  1883. }
  1884. static const int lnz[16] = {
  1885. 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
  1886. };
  1887. /* Counts the number of lsbs which are zero before the first zero bit */
  1888. int fp_cnt_lsb(fp_int *a)
  1889. {
  1890. int x;
  1891. fp_digit q, qq;
  1892. /* easy out */
  1893. if (fp_iszero(a) == 1) {
  1894. return 0;
  1895. }
  1896. /* scan lower digits until non-zero */
  1897. for (x = 0; x < a->used && a->dp[x] == 0; x++);
  1898. q = a->dp[x];
  1899. x *= DIGIT_BIT;
  1900. /* now scan this digit until a 1 is found */
  1901. if ((q & 1) == 0) {
  1902. do {
  1903. qq = q & 15;
  1904. x += lnz[qq];
  1905. q >>= 4;
  1906. } while (qq == 0);
  1907. }
  1908. return x;
  1909. }
  1910. /* c = (a, b) */
  1911. void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
  1912. {
  1913. fp_int u, v, r;
  1914. /* either zero than gcd is the largest */
  1915. if (fp_iszero (a) == 1 && fp_iszero (b) == 0) {
  1916. fp_abs (b, c);
  1917. return;
  1918. }
  1919. if (fp_iszero (a) == 0 && fp_iszero (b) == 1) {
  1920. fp_abs (a, c);
  1921. return;
  1922. }
  1923. /* optimized. At this point if a == 0 then
  1924. * b must equal zero too
  1925. */
  1926. if (fp_iszero (a) == 1) {
  1927. fp_zero(c);
  1928. return;
  1929. }
  1930. /* sort inputs */
  1931. if (fp_cmp_mag(a, b) != FP_LT) {
  1932. fp_init_copy(&u, a);
  1933. fp_init_copy(&v, b);
  1934. } else {
  1935. fp_init_copy(&u, b);
  1936. fp_init_copy(&v, a);
  1937. }
  1938. fp_zero(&r);
  1939. while (fp_iszero(&v) == FP_NO) {
  1940. fp_mod(&u, &v, &r);
  1941. fp_copy(&v, &u);
  1942. fp_copy(&r, &v);
  1943. }
  1944. fp_copy(&u, c);
  1945. }
  1946. #endif /* CYASSL_KEY_GEN */
  1947. #ifdef HAVE_ECC
  1948. /* chars used in radix conversions */
  1949. const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
  1950. /* c = a + b */
  1951. void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
  1952. {
  1953. fp_int tmp;
  1954. fp_set(&tmp, b);
  1955. fp_add(a,&tmp,c);
  1956. }
  1957. int fp_read_radix(fp_int *a, const char *str, int radix)
  1958. {
  1959. int y, neg;
  1960. char ch;
  1961. /* make sure the radix is ok */
  1962. if (radix < 2 || radix > 64) {
  1963. return FP_VAL;
  1964. }
  1965. /* if the leading digit is a
  1966. * minus set the sign to negative.
  1967. */
  1968. if (*str == '-') {
  1969. ++str;
  1970. neg = FP_NEG;
  1971. } else {
  1972. neg = FP_ZPOS;
  1973. }
  1974. /* set the integer to the default of zero */
  1975. fp_zero (a);
  1976. /* process each digit of the string */
  1977. while (*str) {
  1978. /* if the radix < 36 the conversion is case insensitive
  1979. * this allows numbers like 1AB and 1ab to represent the same value
  1980. * [e.g. in hex]
  1981. */
  1982. ch = (char) ((radix < 36) ? XTOUPPER(*str) : *str);
  1983. for (y = 0; y < 64; y++) {
  1984. if (ch == fp_s_rmap[y]) {
  1985. break;
  1986. }
  1987. }
  1988. /* if the char was found in the map
  1989. * and is less than the given radix add it
  1990. * to the number, otherwise exit the loop.
  1991. */
  1992. if (y < radix) {
  1993. fp_mul_d (a, (fp_digit) radix, a);
  1994. fp_add_d (a, (fp_digit) y, a);
  1995. } else {
  1996. break;
  1997. }
  1998. ++str;
  1999. }
  2000. /* set the sign only if a != 0 */
  2001. if (fp_iszero(a) != FP_YES) {
  2002. a->sign = neg;
  2003. }
  2004. return FP_OKAY;
  2005. }
  2006. /* fast math conversion */
  2007. int mp_read_radix(mp_int *a, const char *str, int radix)
  2008. {
  2009. return fp_read_radix(a, str, radix);
  2010. }
  2011. /* fast math conversion */
  2012. int mp_iszero(mp_int* a)
  2013. {
  2014. return fp_iszero(a);
  2015. }
  2016. /* fast math conversion */
  2017. int mp_set(fp_int *a, fp_digit b)
  2018. {
  2019. fp_set(a,b);
  2020. return MP_OKAY;
  2021. }
  2022. /* fast math conversion */
  2023. int mp_sqr(fp_int *A, fp_int *B)
  2024. {
  2025. fp_sqr(A, B);
  2026. return MP_OKAY;
  2027. }
  2028. /* fast math conversion */
  2029. int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
  2030. {
  2031. fp_montgomery_reduce(a, m, mp);
  2032. return MP_OKAY;
  2033. }
  2034. /* fast math conversion */
  2035. int mp_montgomery_setup(fp_int *a, fp_digit *rho)
  2036. {
  2037. return fp_montgomery_setup(a, rho);
  2038. }
  2039. /* fast math conversion */
  2040. int mp_isodd(mp_int* a)
  2041. {
  2042. return fp_isodd(a);
  2043. }
  2044. int mp_div_2(fp_int * a, fp_int * b)
  2045. {
  2046. fp_div_2(a, b);
  2047. return MP_OKAY;
  2048. }
  2049. #endif /* HAVE_ECC */
  2050. #endif /* USE_FAST_MATH */