sp_int.c 452 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419
  1. /* sp_int.c
  2. *
  3. * Copyright (C) 2006-2020 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /* Implementation by Sean Parkinson. */
  22. /*
  23. DESCRIPTION
  24. This library provides single precision (SP) integer math functions.
  25. */
  26. #ifdef HAVE_CONFIG_H
  27. #include <config.h>
  28. #endif
  29. #include <wolfssl/wolfcrypt/settings.h>
  30. #include <wolfssl/wolfcrypt/error-crypt.h>
  31. #ifdef NO_INLINE
  32. #include <wolfssl/wolfcrypt/misc.h>
  33. #else
  34. #define WOLFSSL_MISC_INCLUDED
  35. #include <wolfcrypt/src/misc.c>
  36. #endif
  37. /* SP Build Options:
  38. * WOLFSSL_HAVE_SP_RSA: Enable SP RSA support
  39. * WOLFSSL_HAVE_SP_DH: Enable SP DH support
  40. * WOLFSSL_HAVE_SP_ECC: Enable SP ECC support
  41. * WOLFSSL_SP_MATH: Use only single precision math and algorithms
  42. * it supports (no fastmath tfm.c or normal integer.c)
  43. * WOLFSSL_SP_MATH_ALL Implementation of all MP functions
  44. * (replacement for tfm.c and integer.c)
  45. * WOLFSSL_SP_SMALL: Use smaller version of code and avoid large
  46. * stack variables
  47. * WOLFSSL_SP_NO_MALLOC: Always use stack, no heap XMALLOC/XFREE allowed
  48. * WOLFSSL_SP_NO_2048: Disable RSA/DH 2048-bit support
  49. * WOLFSSL_SP_NO_3072: Disable RSA/DH 3072-bit support
  50. * WOLFSSL_SP_4096: Enable RSA/RH 4096-bit support
  51. * WOLFSSL_SP_NO_256 Disable ECC 256-bit SECP256R1 support
  52. * WOLFSSL_SP_384 Enable ECC 384-bit SECP384R1 support
  53. * WOLFSSL_SP_ASM Enable assembly speedups (detect platform)
  54. * WOLFSSL_SP_X86_64_ASM Enable Intel x64 assembly implementation
  55. * WOLFSSL_SP_ARM32_ASM Enable Aarch32 assembly implementation
  56. * WOLFSSL_SP_ARM64_ASM Enable Aarch64 assembly implementation
  57. * WOLFSSL_SP_ARM_CORTEX_M_ASM Enable Cortex-M assembly implementation
  58. * WOLFSSL_SP_ARM_THUMB_ASM Enable ARM Thumb assembly implementation
  59. * (used with -mthumb)
  60. * WOLFSSL_SP_X86_64 Enable Intel x86 64-bit assembly speedups
  61. * WOLFSSL_SP_X86 Enable Intel x86 assembly speedups
  62. * WOLFSSL_SP_PPC64 Enable PPC64 assembly speedups
  63. * WOLFSSL_SP_PPC Enable PPC assembly speedups
  64. * WOLFSSL_SP_MIPS64 Enable MIPS64 assembly speedups
  65. * WOLFSSL_SP_MIPS Enable MIPS assembly speedups
  66. * WOLFSSL_SP_RISCV64 Enable RISCV64 assmebly speedups
  67. * WOLFSSL_SP_RISCV32 Enable RISCV32 assmebly speedups
  68. * WOLFSSL_SP_S390X Enable S390X assembly speedups
  69. * SP_WORD_SIZE Force 32 or 64 bit mode
  70. * WOLFSSL_SP_NONBLOCK Enables "non blocking" mode for SP math, which
  71. * will return FP_WOULDBLOCK for long operations and function must be
  72. * called again until complete.
  73. * WOLFSSL_SP_FAST_NCT_EXPTMOD Enables the faster non-constant time modular
  74. * exponentation implementation.
  75. */
  76. #if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
  77. #include <wolfssl/wolfcrypt/sp_int.h>
  78. /* DECL_SP_INT: Declare one variable of type 'sp_int'. */
  79. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  80. !defined(WOLFSSL_SP_NO_MALLOC)
  81. /* Declare a variable that will be assigned a value on XMALLOC. */
  82. #define DECL_SP_INT(n, s) \
  83. sp_int* n = NULL
  84. #else
  85. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  86. defined(WOLFSSL_SP_SMALL)
  87. /* Declare a variable on the stack with the required data size. */
  88. #define DECL_SP_INT(n, s) \
  89. byte n##d[MP_INT_SIZEOF(s)]; \
  90. sp_int* n = (sp_int*)n##d
  91. #else
  92. /* Declare a variable on the stack. */
  93. #define DECL_SP_INT(n, s) \
  94. sp_int n[1]
  95. #endif
  96. #endif
  97. /* ALLOC_SP_INT: Allocate an 'sp_int' of reqired size. */
  98. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  99. !defined(WOLFSSL_SP_NO_MALLOC)
  100. /* Dynamically allocate just enough data to support size. */
  101. #define ALLOC_SP_INT(n, s, err, h) \
  102. do { \
  103. if (err == MP_OKAY) { \
  104. n = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), h, DYNAMIC_TYPE_BIGINT); \
  105. if (n == NULL) { \
  106. err = MP_MEM; \
  107. } \
  108. } \
  109. } \
  110. while (0)
  111. /* Dynamically allocate just enough data to support size - and set size. */
  112. #define ALLOC_SP_INT_SIZE(n, s, err, h) \
  113. do { \
  114. ALLOC_SP_INT(n, s, err, h); \
  115. if (err == MP_OKAY) { \
  116. n->size = s; \
  117. } \
  118. } \
  119. while (0)
  120. #else
  121. /* Array declared on stack - nothing to do. */
  122. #define ALLOC_SP_INT(n, s, err, h)
  123. /* Array declared on stack - set the size field. */
  124. #define ALLOC_SP_INT_SIZE(n, s, err, h) \
  125. n->size = s;
  126. #endif
  127. /* FREE_SP_INT: Free an 'sp_int' variable. */
  128. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  129. !defined(WOLFSSL_SP_NO_MALLOC)
  130. /* Free dynamically allocated data. */
  131. #define FREE_SP_INT(n, h) \
  132. do { \
  133. if (n != NULL) { \
  134. XFREE(n, h, DYNAMIC_TYPE_BIGINT); \
  135. } \
  136. } \
  137. while (0)
  138. #else
  139. /* Nothing to do as declared on stack. */
  140. #define FREE_SP_INT(n, h)
  141. #endif
  142. /* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
  143. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  144. !defined(WOLFSSL_SP_NO_MALLOC)
  145. /* Declare a variable that will be assigned a value on XMALLOC. */
  146. #define DECL_SP_INT_ARRAY(n, s, c) \
  147. sp_int* n##d = NULL; \
  148. sp_int* n[c] = { NULL, }
  149. #else
  150. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  151. defined(WOLFSSL_SP_SMALL)
  152. /* Declare a variable on the stack with the required data size. */
  153. #define DECL_SP_INT_ARRAY(n, s, c) \
  154. byte n##d[MP_INT_SIZEOF(s) * (c)]; \
  155. sp_int* n[c]
  156. #else
  157. /* Declare a variable on the stack. */
  158. #define DECL_SP_INT_ARRAY(n, s, c) \
  159. sp_int n##d[c]; \
  160. sp_int* n[c]
  161. #endif
  162. #endif
  163. /* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of reqired size. */
  164. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  165. !defined(WOLFSSL_SP_NO_MALLOC)
  166. /* Dynamically allocate just enough data to support multiple sp_ints of the
  167. * required size. Use pointers into data to make up array and set sizes.
  168. */
  169. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  170. do { \
  171. if (err == MP_OKAY) { \
  172. n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), h, \
  173. DYNAMIC_TYPE_BIGINT); \
  174. if (n##d == NULL) { \
  175. err = MP_MEM; \
  176. } \
  177. else { \
  178. int n##ii; \
  179. n[0] = n##d; \
  180. n[0]->size = s; \
  181. for (n##ii = 1; n##ii < (c); n##ii++) { \
  182. n[n##ii] = MP_INT_NEXT(n[n##ii-1], s); \
  183. n[n##ii]->size = s; \
  184. } \
  185. } \
  186. } \
  187. } \
  188. while (0)
  189. #else
  190. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  191. defined(WOLFSSL_SP_SMALL)
  192. /* Data declared on stack that supports multiple sp_ints of the
  193. * required size. Use pointers into data to make up array and set sizes.
  194. */
  195. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  196. do { \
  197. if (err == MP_OKAY) { \
  198. int n##ii; \
  199. n[0] = (sp_int*)n##d; \
  200. n[0]->size = s; \
  201. for (n##ii = 1; n##ii < (c); n##ii++) { \
  202. n[n##ii] = MP_INT_NEXT(n[n##ii-1], s); \
  203. n[n##ii]->size = s; \
  204. } \
  205. } \
  206. } \
  207. while (0)
  208. #else
  209. /* Data declared on stack that supports multiple sp_ints of the
  210. * required size. Set into array and set sizes.
  211. */
  212. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  213. do { \
  214. if (err == MP_OKAY) { \
  215. int n##ii; \
  216. for (n##ii = 0; n##ii < (c); n##ii++) { \
  217. n[n##ii] = &n##d[n##ii]; \
  218. n[n##ii]->size = s; \
  219. } \
  220. } \
  221. } \
  222. while (0)
  223. #endif
  224. #endif
  225. /* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
  226. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  227. !defined(WOLFSSL_SP_NO_MALLOC)
  228. /* Free data variable that was dynamically allocated. */
  229. #define FREE_SP_INT_ARRAY(n, h) \
  230. do { \
  231. if (n##d != NULL) { \
  232. XFREE(n##d, h, DYNAMIC_TYPE_BIGINT); \
  233. } \
  234. } \
  235. while (0)
  236. #else
  237. /* Nothing to do as data declared on stack. */
  238. #define FREE_SP_INT_ARRAY(n, h)
  239. #endif
  240. #ifndef WOLFSSL_NO_ASM
  241. #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
  242. /* Multiply va by vb and store double size result in: vh | vl */
  243. #define SP_ASM_MUL(vl, vh, va, vb) \
  244. __asm__ __volatile__ ( \
  245. "movq %[b], %%rax \n\t" \
  246. "mulq %[a] \n\t" \
  247. "movq %%rax, %[l] \n\t" \
  248. "movq %%rdx, %[h] \n\t" \
  249. : [h] "+r" (vh), [l] "+r" (vl) \
  250. : [a] "m" (va), [b] "m" (vb) \
  251. : "memory", "%rax", "%rdx", "cc" \
  252. )
  253. /* Multiply va by vb and store double size result in: vo | vh | vl */
  254. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  255. __asm__ __volatile__ ( \
  256. "movq %[b], %%rax \n\t" \
  257. "mulq %[a] \n\t" \
  258. "movq $0 , %[o] \n\t" \
  259. "movq %%rax, %[l] \n\t" \
  260. "movq %%rdx, %[h] \n\t" \
  261. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  262. : [a] "m" (va), [b] "m" (vb) \
  263. : "%rax", "%rdx", "cc" \
  264. )
  265. /* Multiply va by vb and add double size result into: vo | vh | vl */
  266. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  267. __asm__ __volatile__ ( \
  268. "movq %[b], %%rax \n\t" \
  269. "mulq %[a] \n\t" \
  270. "addq %%rax, %[l] \n\t" \
  271. "adcq %%rdx, %[h] \n\t" \
  272. "adcq $0 , %[o] \n\t" \
  273. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  274. : [a] "m" (va), [b] "m" (vb) \
  275. : "%rax", "%rdx", "cc" \
  276. )
  277. /* Multiply va by vb and add double size result into: vh | vl */
  278. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  279. __asm__ __volatile__ ( \
  280. "movq %[b], %%rax \n\t" \
  281. "mulq %[a] \n\t" \
  282. "addq %%rax, %[l] \n\t" \
  283. "adcq %%rdx, %[h] \n\t" \
  284. : [l] "+r" (vl), [h] "+r" (vh) \
  285. : [a] "m" (va), [b] "m" (vb) \
  286. : "%rax", "%rdx", "cc" \
  287. )
  288. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  289. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  290. __asm__ __volatile__ ( \
  291. "movq %[b], %%rax \n\t" \
  292. "mulq %[a] \n\t" \
  293. "addq %%rax, %[l] \n\t" \
  294. "adcq %%rdx, %[h] \n\t" \
  295. "adcq $0 , %[o] \n\t" \
  296. "addq %%rax, %[l] \n\t" \
  297. "adcq %%rdx, %[h] \n\t" \
  298. "adcq $0 , %[o] \n\t" \
  299. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  300. : [a] "m" (va), [b] "m" (vb) \
  301. : "%rax", "%rdx", "cc" \
  302. )
  303. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  304. * Assumes first add will not overflow vh | vl
  305. */
  306. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  307. __asm__ __volatile__ ( \
  308. "movq %[b], %%rax \n\t" \
  309. "mulq %[a] \n\t" \
  310. "addq %%rax, %[l] \n\t" \
  311. "adcq %%rdx, %[h] \n\t" \
  312. "addq %%rax, %[l] \n\t" \
  313. "adcq %%rdx, %[h] \n\t" \
  314. "adcq $0 , %[o] \n\t" \
  315. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  316. : [a] "m" (va), [b] "m" (vb) \
  317. : "%rax", "%rdx", "cc" \
  318. )
  319. /* Square va and store double size result in: vh | vl */
  320. #define SP_ASM_SQR(vl, vh, va) \
  321. __asm__ __volatile__ ( \
  322. "movq %[a], %%rax \n\t" \
  323. "mulq %%rax \n\t" \
  324. "movq %%rax, %[l] \n\t" \
  325. "movq %%rdx, %[h] \n\t" \
  326. : [h] "+r" (vh), [l] "+r" (vl) \
  327. : [a] "m" (va) \
  328. : "memory", "%rax", "%rdx", "cc" \
  329. )
  330. /* Square va and add double size result into: vo | vh | vl */
  331. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  332. __asm__ __volatile__ ( \
  333. "movq %[a], %%rax \n\t" \
  334. "mulq %%rax \n\t" \
  335. "addq %%rax, %[l] \n\t" \
  336. "adcq %%rdx, %[h] \n\t" \
  337. "adcq $0 , %[o] \n\t" \
  338. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  339. : [a] "m" (va) \
  340. : "%rax", "%rdx", "cc" \
  341. )
  342. /* Square va and add double size result into: vh | vl */
  343. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  344. __asm__ __volatile__ ( \
  345. "movq %[a], %%rax \n\t" \
  346. "mulq %%rax \n\t" \
  347. "addq %%rax, %[l] \n\t" \
  348. "adcq %%rdx, %[h] \n\t" \
  349. : [l] "+r" (vl), [h] "+r" (vh) \
  350. : [a] "m" (va) \
  351. : "%rax", "%rdx", "cc" \
  352. )
  353. /* Add va into: vh | vl */
  354. #define SP_ASM_ADDC(vl, vh, va) \
  355. __asm__ __volatile__ ( \
  356. "addq %[a], %[l] \n\t" \
  357. "adcq $0 , %[h] \n\t" \
  358. : [l] "+r" (vl), [h] "+r" (vh) \
  359. : [a] "m" (va) \
  360. : "cc" \
  361. )
  362. /* Add va, variable in a register, into: vh | vl */
  363. #define SP_ASM_ADDC_REG(vl, vh, va) \
  364. __asm__ __volatile__ ( \
  365. "addq %[a], %[l] \n\t" \
  366. "adcq $0 , %[h] \n\t" \
  367. : [l] "+r" (vl), [h] "+r" (vh) \
  368. : [a] "r" (va) \
  369. : "cc" \
  370. )
  371. /* Sub va from: vh | vl */
  372. #define SP_ASM_SUBC(vl, vh, va) \
  373. __asm__ __volatile__ ( \
  374. "subq %[a], %[l] \n\t" \
  375. "sbbq $0 , %[h] \n\t" \
  376. : [l] "+r" (vl), [h] "+r" (vh) \
  377. : [a] "m" (va) \
  378. : "cc" \
  379. )
  380. /* Add two times vc | vb | va into vo | vh | vl */
  381. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  382. __asm__ __volatile__ ( \
  383. "addq %[a], %[l] \n\t" \
  384. "adcq %[b], %[h] \n\t" \
  385. "adcq %[c], %[o] \n\t" \
  386. "addq %[a], %[l] \n\t" \
  387. "adcq %[b], %[h] \n\t" \
  388. "adcq %[c], %[o] \n\t" \
  389. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  390. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  391. : "%rax", "%rdx", "cc" \
  392. )
  393. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  394. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  395. *
  396. * Using divq instruction on Intel x64.
  397. *
  398. * @param [in] hi SP integer digit. High digit of the dividend.
  399. * @param [in] lo SP integer digit. Lower digit of the dividend.
  400. * @param [in] d SP integer digit. Number to divide by.
  401. * @reutrn The division result.
  402. */
  403. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  404. sp_int_digit d)
  405. {
  406. __asm__ __volatile__ (
  407. "divq %2"
  408. : "+a" (lo)
  409. : "d" (hi), "r" (d)
  410. : "cc"
  411. );
  412. return lo;
  413. }
  414. #define SP_ASM_DIV_WORD
  415. #endif
  416. #define SP_INT_ASM_AVAILABLE
  417. #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
  418. #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
  419. /* Multiply va by vb and store double size result in: vh | vl */
  420. #define SP_ASM_MUL(vl, vh, va, vb) \
  421. __asm__ __volatile__ ( \
  422. "movl %[b], %%eax \n\t" \
  423. "mull %[a] \n\t" \
  424. "movl %%eax, %[l] \n\t" \
  425. "movl %%edx, %[h] \n\t" \
  426. : [h] "+r" (vh), [l] "+r" (vl) \
  427. : [a] "m" (va), [b] "m" (vb) \
  428. : "memory", "eax", "edx", "cc" \
  429. )
  430. /* Multiply va by vb and store double size result in: vo | vh | vl */
  431. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  432. __asm__ __volatile__ ( \
  433. "movl %[b], %%eax \n\t" \
  434. "mull %[a] \n\t" \
  435. "movl $0 , %[o] \n\t" \
  436. "movl %%eax, %[l] \n\t" \
  437. "movl %%edx, %[h] \n\t" \
  438. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  439. : [a] "m" (va), [b] "m" (vb) \
  440. : "eax", "edx", "cc" \
  441. )
  442. /* Multiply va by vb and add double size result into: vo | vh | vl */
  443. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  444. __asm__ __volatile__ ( \
  445. "movl %[b], %%eax \n\t" \
  446. "mull %[a] \n\t" \
  447. "addl %%eax, %[l] \n\t" \
  448. "adcl %%edx, %[h] \n\t" \
  449. "adcl $0 , %[o] \n\t" \
  450. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  451. : [a] "r" (va), [b] "r" (vb) \
  452. : "eax", "edx", "cc" \
  453. )
  454. /* Multiply va by vb and add double size result into: vh | vl */
  455. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  456. __asm__ __volatile__ ( \
  457. "movl %[b], %%eax \n\t" \
  458. "mull %[a] \n\t" \
  459. "addl %%eax, %[l] \n\t" \
  460. "adcl %%edx, %[h] \n\t" \
  461. : [l] "+r" (vl), [h] "+r" (vh) \
  462. : [a] "m" (va), [b] "m" (vb) \
  463. : "eax", "edx", "cc" \
  464. )
  465. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  466. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  467. __asm__ __volatile__ ( \
  468. "movl %[b], %%eax \n\t" \
  469. "mull %[a] \n\t" \
  470. "addl %%eax, %[l] \n\t" \
  471. "adcl %%edx, %[h] \n\t" \
  472. "adcl $0 , %[o] \n\t" \
  473. "addl %%eax, %[l] \n\t" \
  474. "adcl %%edx, %[h] \n\t" \
  475. "adcl $0 , %[o] \n\t" \
  476. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  477. : [a] "r" (va), [b] "r" (vb) \
  478. : "eax", "edx", "cc" \
  479. )
  480. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  481. * Assumes first add will not overflow vh | vl
  482. */
  483. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  484. __asm__ __volatile__ ( \
  485. "movl %[b], %%eax \n\t" \
  486. "mull %[a] \n\t" \
  487. "addl %%eax, %[l] \n\t" \
  488. "adcl %%edx, %[h] \n\t" \
  489. "addl %%eax, %[l] \n\t" \
  490. "adcl %%edx, %[h] \n\t" \
  491. "adcl $0 , %[o] \n\t" \
  492. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  493. : [a] "m" (va), [b] "m" (vb) \
  494. : "eax", "edx", "cc" \
  495. )
  496. /* Square va and store double size result in: vh | vl */
  497. #define SP_ASM_SQR(vl, vh, va) \
  498. __asm__ __volatile__ ( \
  499. "movl %[a], %%eax \n\t" \
  500. "mull %%eax \n\t" \
  501. "movl %%eax, %[l] \n\t" \
  502. "movl %%edx, %[h] \n\t" \
  503. : [h] "+r" (vh), [l] "+r" (vl) \
  504. : [a] "m" (va) \
  505. : "memory", "eax", "edx", "cc" \
  506. )
  507. /* Square va and add double size result into: vo | vh | vl */
  508. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  509. __asm__ __volatile__ ( \
  510. "movl %[a], %%eax \n\t" \
  511. "mull %%eax \n\t" \
  512. "addl %%eax, %[l] \n\t" \
  513. "adcl %%edx, %[h] \n\t" \
  514. "adcl $0 , %[o] \n\t" \
  515. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  516. : [a] "m" (va) \
  517. : "eax", "edx", "cc" \
  518. )
  519. /* Square va and add double size result into: vh | vl */
  520. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  521. __asm__ __volatile__ ( \
  522. "movl %[a], %%eax \n\t" \
  523. "mull %%eax \n\t" \
  524. "addl %%eax, %[l] \n\t" \
  525. "adcl %%edx, %[h] \n\t" \
  526. : [l] "+r" (vl), [h] "+r" (vh) \
  527. : [a] "m" (va) \
  528. : "eax", "edx", "cc" \
  529. )
  530. /* Add va into: vh | vl */
  531. #define SP_ASM_ADDC(vl, vh, va) \
  532. __asm__ __volatile__ ( \
  533. "addl %[a], %[l] \n\t" \
  534. "adcl $0 , %[h] \n\t" \
  535. : [l] "+r" (vl), [h] "+r" (vh) \
  536. : [a] "m" (va) \
  537. : "cc" \
  538. )
  539. /* Add va, variable in a register, into: vh | vl */
  540. #define SP_ASM_ADDC_REG(vl, vh, va) \
  541. __asm__ __volatile__ ( \
  542. "addl %[a], %[l] \n\t" \
  543. "adcl $0 , %[h] \n\t" \
  544. : [l] "+r" (vl), [h] "+r" (vh) \
  545. : [a] "r" (va) \
  546. : "cc" \
  547. )
  548. /* Sub va from: vh | vl */
  549. #define SP_ASM_SUBC(vl, vh, va) \
  550. __asm__ __volatile__ ( \
  551. "subl %[a], %[l] \n\t" \
  552. "sbbl $0 , %[h] \n\t" \
  553. : [l] "+r" (vl), [h] "+r" (vh) \
  554. : [a] "m" (va) \
  555. : "cc" \
  556. )
  557. /* Add two times vc | vb | va into vo | vh | vl */
  558. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  559. __asm__ __volatile__ ( \
  560. "addl %[a], %[l] \n\t" \
  561. "adcl %[b], %[h] \n\t" \
  562. "adcl %[c], %[o] \n\t" \
  563. "addl %[a], %[l] \n\t" \
  564. "adcl %[b], %[h] \n\t" \
  565. "adcl %[c], %[o] \n\t" \
  566. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  567. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  568. : "cc" \
  569. )
  570. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  571. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  572. *
  573. * Using divl instruction on Intel x64.
  574. *
  575. * @param [in] hi SP integer digit. High digit of the dividend.
  576. * @param [in] lo SP integer digit. Lower digit of the dividend.
  577. * @param [in] d SP integer digit. Number to divide by.
  578. * @reutrn The division result.
  579. */
  580. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  581. sp_int_digit d)
  582. {
  583. __asm__ __volatile__ (
  584. "divl %2"
  585. : "+a" (lo)
  586. : "d" (hi), "r" (d)
  587. : "cc"
  588. );
  589. return lo;
  590. }
  591. #define SP_ASM_DIV_WORD
  592. #endif
  593. #define SP_INT_ASM_AVAILABLE
  594. #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
  595. #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
  596. /* Multiply va by vb and store double size result in: vh | vl */
  597. #define SP_ASM_MUL(vl, vh, va, vb) \
  598. __asm__ __volatile__ ( \
  599. "mul %[l], %[a], %[b] \n\t" \
  600. "umulh %[h], %[a], %[b] \n\t" \
  601. : [h] "+r" (vh), [l] "+r" (vl) \
  602. : [a] "r" (va), [b] "r" (vb) \
  603. : "memory", "cc" \
  604. )
  605. /* Multiply va by vb and store double size result in: vo | vh | vl */
  606. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  607. __asm__ __volatile__ ( \
  608. "mul x8, %[a], %[b] \n\t" \
  609. "umulh %[h], %[a], %[b] \n\t" \
  610. "mov %[l], x8 \n\t" \
  611. "mov %[o], xzr \n\t" \
  612. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  613. : [a] "r" (va), [b] "r" (vb) \
  614. : "x8" \
  615. )
  616. /* Multiply va by vb and add double size result into: vo | vh | vl */
  617. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  618. __asm__ __volatile__ ( \
  619. "mul x8, %[a], %[b] \n\t" \
  620. "umulh x9, %[a], %[b] \n\t" \
  621. "adds %[l], %[l], x8 \n\t" \
  622. "adcs %[h], %[h], x9 \n\t" \
  623. "adc %[o], %[o], xzr \n\t" \
  624. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  625. : [a] "r" (va), [b] "r" (vb) \
  626. : "x8", "x9", "cc" \
  627. )
  628. /* Multiply va by vb and add double size result into: vh | vl */
  629. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  630. __asm__ __volatile__ ( \
  631. "mul x8, %[a], %[b] \n\t" \
  632. "umulh x9, %[a], %[b] \n\t" \
  633. "adds %[l], %[l], x8 \n\t" \
  634. "adc %[h], %[h], x9 \n\t" \
  635. : [l] "+r" (vl), [h] "+r" (vh) \
  636. : [a] "r" (va), [b] "r" (vb) \
  637. : "x8", "x9", "cc" \
  638. )
  639. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  640. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  641. __asm__ __volatile__ ( \
  642. "mul x8, %[a], %[b] \n\t" \
  643. "umulh x9, %[a], %[b] \n\t" \
  644. "adds %[l], %[l], x8 \n\t" \
  645. "adcs %[h], %[h], x9 \n\t" \
  646. "adc %[o], %[o], xzr \n\t" \
  647. "adds %[l], %[l], x8 \n\t" \
  648. "adcs %[h], %[h], x9 \n\t" \
  649. "adc %[o], %[o], xzr \n\t" \
  650. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  651. : [a] "r" (va), [b] "r" (vb) \
  652. : "x8", "x9", "cc" \
  653. )
  654. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  655. * Assumes first add will not overflow vh | vl
  656. */
  657. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  658. __asm__ __volatile__ ( \
  659. "mul x8, %[a], %[b] \n\t" \
  660. "umulh x9, %[a], %[b] \n\t" \
  661. "adds %[l], %[l], x8 \n\t" \
  662. "adc %[h], %[h], x9 \n\t" \
  663. "adds %[l], %[l], x8 \n\t" \
  664. "adcs %[h], %[h], x9 \n\t" \
  665. "adc %[o], %[o], xzr \n\t" \
  666. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  667. : [a] "r" (va), [b] "r" (vb) \
  668. : "x8", "x9", "cc" \
  669. )
  670. /* Square va and store double size result in: vh | vl */
  671. #define SP_ASM_SQR(vl, vh, va) \
  672. __asm__ __volatile__ ( \
  673. "mul %[l], %[a], %[a] \n\t" \
  674. "umulh %[h], %[a], %[a] \n\t" \
  675. : [h] "+r" (vh), [l] "+r" (vl) \
  676. : [a] "r" (va) \
  677. : "memory" \
  678. )
  679. /* Square va and add double size result into: vo | vh | vl */
  680. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  681. __asm__ __volatile__ ( \
  682. "mul x8, %[a], %[a] \n\t" \
  683. "umulh x9, %[a], %[a] \n\t" \
  684. "adds %[l], %[l], x8 \n\t" \
  685. "adcs %[h], %[h], x9 \n\t" \
  686. "adc %[o], %[o], xzr \n\t" \
  687. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  688. : [a] "r" (va) \
  689. : "x8", "x9", "cc" \
  690. )
  691. /* Square va and add double size result into: vh | vl */
  692. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  693. __asm__ __volatile__ ( \
  694. "mul x8, %[a], %[a] \n\t" \
  695. "umulh x9, %[a], %[a] \n\t" \
  696. "adds %[l], %[l], x8 \n\t" \
  697. "adc %[h], %[h], x9 \n\t" \
  698. : [l] "+r" (vl), [h] "+r" (vh) \
  699. : [a] "r" (va) \
  700. : "x8", "x9", "cc" \
  701. )
  702. /* Add va into: vh | vl */
  703. #define SP_ASM_ADDC(vl, vh, va) \
  704. __asm__ __volatile__ ( \
  705. "adds %[l], %[l], %[a] \n\t" \
  706. "adc %[h], %[h], xzr \n\t" \
  707. : [l] "+r" (vl), [h] "+r" (vh) \
  708. : [a] "r" (va) \
  709. : "cc" \
  710. )
  711. /* Sub va from: vh | vl */
  712. #define SP_ASM_SUBC(vl, vh, va) \
  713. __asm__ __volatile__ ( \
  714. "subs %[l], %[l], %[a] \n\t" \
  715. "sbc %[h], %[h], xzr \n\t" \
  716. : [l] "+r" (vl), [h] "+r" (vh) \
  717. : [a] "r" (va) \
  718. : "cc" \
  719. )
  720. /* Add two times vc | vb | va into vo | vh | vl */
  721. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  722. __asm__ __volatile__ ( \
  723. "adds %[l], %[l], %[a] \n\t" \
  724. "adcs %[h], %[h], %[b] \n\t" \
  725. "adc %[o], %[o], %[c] \n\t" \
  726. "adds %[l], %[l], %[a] \n\t" \
  727. "adcs %[h], %[h], %[b] \n\t" \
  728. "adc %[o], %[o], %[c] \n\t" \
  729. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  730. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  731. : "cc" \
  732. )
  733. #define SP_INT_ASM_AVAILABLE
  734. #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
  735. #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
  736. SP_WORD_SIZE == 32
  737. /* Multiply va by vb and store double size result in: vh | vl */
  738. #define SP_ASM_MUL(vl, vh, va, vb) \
  739. __asm__ __volatile__ ( \
  740. "umull %[l], %[h], %[a], %[b] \n\t" \
  741. : [h] "+r" (vh), [l] "+r" (vl) \
  742. : [a] "r" (va), [b] "r" (vb) \
  743. : "memory" \
  744. )
  745. /* Multiply va by vb and store double size result in: vo | vh | vl */
  746. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  747. __asm__ __volatile__ ( \
  748. "umull %[l], %[h], %[a], %[b] \n\t" \
  749. "mov %[o], #0 \n\t" \
  750. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  751. : [a] "r" (va), [b] "r" (vb) \
  752. : \
  753. )
  754. /* Multiply va by vb and add double size result into: vo | vh | vl */
  755. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  756. __asm__ __volatile__ ( \
  757. "umull r8, r9, %[a], %[b] \n\t" \
  758. "adds %[l], %[l], r8 \n\t" \
  759. "adcs %[h], %[h], r9 \n\t" \
  760. "adc %[o], %[o], #0 \n\t" \
  761. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  762. : [a] "r" (va), [b] "r" (vb) \
  763. : "r8", "r9", "cc" \
  764. )
  765. /* Multiply va by vb and add double size result into: vh | vl */
  766. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  767. __asm__ __volatile__ ( \
  768. "umlal %[l], %[h], %[a], %[b] \n\t" \
  769. : [l] "+r" (vl), [h] "+r" (vh) \
  770. : [a] "r" (va), [b] "r" (vb) \
  771. : \
  772. )
  773. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  774. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  775. __asm__ __volatile__ ( \
  776. "umull r8, r9, %[a], %[b] \n\t" \
  777. "adds %[l], %[l], r8 \n\t" \
  778. "adcs %[h], %[h], r9 \n\t" \
  779. "adc %[o], %[o], #0 \n\t" \
  780. "adds %[l], %[l], r8 \n\t" \
  781. "adcs %[h], %[h], r9 \n\t" \
  782. "adc %[o], %[o], #0 \n\t" \
  783. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  784. : [a] "r" (va), [b] "r" (vb) \
  785. : "r8", "r9", "cc" \
  786. )
  787. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  788. * Assumes first add will not overflow vh | vl
  789. */
  790. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  791. __asm__ __volatile__ ( \
  792. "umull r8, r9, %[a], %[b] \n\t" \
  793. "adds %[l], %[l], r8 \n\t" \
  794. "adc %[h], %[h], r9 \n\t" \
  795. "adds %[l], %[l], r8 \n\t" \
  796. "adcs %[h], %[h], r9 \n\t" \
  797. "adc %[o], %[o], #0 \n\t" \
  798. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  799. : [a] "r" (va), [b] "r" (vb) \
  800. : "r8", "r9", "cc" \
  801. )
  802. /* Square va and store double size result in: vh | vl */
  803. #define SP_ASM_SQR(vl, vh, va) \
  804. __asm__ __volatile__ ( \
  805. "umull %[l], %[h], %[a], %[a] \n\t" \
  806. : [h] "+r" (vh), [l] "+r" (vl) \
  807. : [a] "r" (va) \
  808. : "memory" \
  809. )
  810. /* Square va and add double size result into: vo | vh | vl */
  811. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  812. __asm__ __volatile__ ( \
  813. "umull r8, r9, %[a], %[a] \n\t" \
  814. "adds %[l], %[l], r8 \n\t" \
  815. "adcs %[h], %[h], r9 \n\t" \
  816. "adc %[o], %[o], #0 \n\t" \
  817. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  818. : [a] "r" (va) \
  819. : "r8", "r9", "cc" \
  820. )
  821. /* Square va and add double size result into: vh | vl */
  822. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  823. __asm__ __volatile__ ( \
  824. "umlal %[l], %[h], %[a], %[a] \n\t" \
  825. : [l] "+r" (vl), [h] "+r" (vh) \
  826. : [a] "r" (va) \
  827. : "cc" \
  828. )
  829. /* Add va into: vh | vl */
  830. #define SP_ASM_ADDC(vl, vh, va) \
  831. __asm__ __volatile__ ( \
  832. "adds %[l], %[l], %[a] \n\t" \
  833. "adc %[h], %[h], #0 \n\t" \
  834. : [l] "+r" (vl), [h] "+r" (vh) \
  835. : [a] "r" (va) \
  836. : "cc" \
  837. )
  838. /* Sub va from: vh | vl */
  839. #define SP_ASM_SUBC(vl, vh, va) \
  840. __asm__ __volatile__ ( \
  841. "subs %[l], %[l], %[a] \n\t" \
  842. "sbc %[h], %[h], #0 \n\t" \
  843. : [l] "+r" (vl), [h] "+r" (vh) \
  844. : [a] "r" (va) \
  845. : "cc" \
  846. )
  847. /* Add two times vc | vb | va into vo | vh | vl */
  848. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  849. __asm__ __volatile__ ( \
  850. "adds %[l], %[l], %[a] \n\t" \
  851. "adcs %[h], %[h], %[b] \n\t" \
  852. "adc %[o], %[o], %[c] \n\t" \
  853. "adds %[l], %[l], %[a] \n\t" \
  854. "adcs %[h], %[h], %[b] \n\t" \
  855. "adc %[o], %[o], %[c] \n\t" \
  856. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  857. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  858. : "cc" \
  859. )
  860. #define SP_INT_ASM_AVAILABLE
  861. #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
  862. #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
  863. /* Multiply va by vb and store double size result in: vh | vl */
  864. #define SP_ASM_MUL(vl, vh, va, vb) \
  865. __asm__ __volatile__ ( \
  866. "mulld %[l], %[a], %[b] \n\t" \
  867. "mulhdu %[h], %[a], %[b] \n\t" \
  868. : [h] "+r" (vh), [l] "+r" (vl) \
  869. : [a] "r" (va), [b] "r" (vb) \
  870. : "memory" \
  871. )
  872. /* Multiply va by vb and store double size result in: vo | vh | vl */
  873. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  874. __asm__ __volatile__ ( \
  875. "mulhdu %[h], %[a], %[b] \n\t" \
  876. "mulld %[l], %[a], %[b] \n\t" \
  877. "li %[o], 0 \n\t" \
  878. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  879. : [a] "r" (va), [b] "r" (vb) \
  880. : \
  881. )
  882. /* Multiply va by vb and add double size result into: vo | vh | vl */
  883. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  884. __asm__ __volatile__ ( \
  885. "mulld 16, %[a], %[b] \n\t" \
  886. "mulhdu 17, %[a], %[b] \n\t" \
  887. "addc %[l], %[l], 16 \n\t" \
  888. "adde %[h], %[h], 17 \n\t" \
  889. "addze %[o], %[o] \n\t" \
  890. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  891. : [a] "r" (va), [b] "r" (vb) \
  892. : "16", "17", "cc" \
  893. )
  894. /* Multiply va by vb and add double size result into: vh | vl */
  895. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  896. __asm__ __volatile__ ( \
  897. "mulld 16, %[a], %[b] \n\t" \
  898. "mulhdu 17, %[a], %[b] \n\t" \
  899. "addc %[l], %[l], 16 \n\t" \
  900. "adde %[h], %[h], 17 \n\t" \
  901. : [l] "+r" (vl), [h] "+r" (vh) \
  902. : [a] "r" (va), [b] "r" (vb) \
  903. : "16", "17", "cc" \
  904. )
  905. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  906. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  907. __asm__ __volatile__ ( \
  908. "mulld 16, %[a], %[b] \n\t" \
  909. "mulhdu 17, %[a], %[b] \n\t" \
  910. "addc %[l], %[l], 16 \n\t" \
  911. "adde %[h], %[h], 17 \n\t" \
  912. "addze %[o], %[o] \n\t" \
  913. "addc %[l], %[l], 16 \n\t" \
  914. "adde %[h], %[h], 17 \n\t" \
  915. "addze %[o], %[o] \n\t" \
  916. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  917. : [a] "r" (va), [b] "r" (vb) \
  918. : "16", "17", "cc" \
  919. )
  920. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  921. * Assumes first add will not overflow vh | vl
  922. */
  923. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  924. __asm__ __volatile__ ( \
  925. "mulld 16, %[a], %[b] \n\t" \
  926. "mulhdu 17, %[a], %[b] \n\t" \
  927. "addc %[l], %[l], 16 \n\t" \
  928. "adde %[h], %[h], 17 \n\t" \
  929. "addc %[l], %[l], 16 \n\t" \
  930. "adde %[h], %[h], 17 \n\t" \
  931. "addze %[o], %[o] \n\t" \
  932. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  933. : [a] "r" (va), [b] "r" (vb) \
  934. : "16", "17", "cc" \
  935. )
  936. /* Square va and store double size result in: vh | vl */
  937. #define SP_ASM_SQR(vl, vh, va) \
  938. __asm__ __volatile__ ( \
  939. "mulld %[l], %[a], %[a] \n\t" \
  940. "mulhdu %[h], %[a], %[a] \n\t" \
  941. : [h] "+r" (vh), [l] "+r" (vl) \
  942. : [a] "r" (va) \
  943. : "memory" \
  944. )
  945. /* Square va and add double size result into: vo | vh | vl */
  946. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  947. __asm__ __volatile__ ( \
  948. "mulld 16, %[a], %[a] \n\t" \
  949. "mulhdu 17, %[a], %[a] \n\t" \
  950. "addc %[l], %[l], 16 \n\t" \
  951. "adde %[h], %[h], 17 \n\t" \
  952. "addze %[o], %[o] \n\t" \
  953. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  954. : [a] "r" (va) \
  955. : "16", "17", "cc" \
  956. )
  957. /* Square va and add double size result into: vh | vl */
  958. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  959. __asm__ __volatile__ ( \
  960. "mulld 16, %[a], %[a] \n\t" \
  961. "mulhdu 17, %[a], %[a] \n\t" \
  962. "addc %[l], %[l], 16 \n\t" \
  963. "adde %[h], %[h], 17 \n\t" \
  964. : [l] "+r" (vl), [h] "+r" (vh) \
  965. : [a] "r" (va) \
  966. : "16", "17", "cc" \
  967. )
  968. /* Add va into: vh | vl */
  969. #define SP_ASM_ADDC(vl, vh, va) \
  970. __asm__ __volatile__ ( \
  971. "addc %[l], %[l], %[a] \n\t" \
  972. "addze %[h], %[h] \n\t" \
  973. : [l] "+r" (vl), [h] "+r" (vh) \
  974. : [a] "r" (va) \
  975. : "cc" \
  976. )
  977. /* Sub va from: vh | vl */
  978. #define SP_ASM_SUBC(vl, vh, va) \
  979. __asm__ __volatile__ ( \
  980. "subfc %[l], %[a], %[l] \n\t" \
  981. "li 16, 0 \n\t" \
  982. "subfe %[h], 16, %[h] \n\t" \
  983. : [l] "+r" (vl), [h] "+r" (vh) \
  984. : [a] "r" (va) \
  985. : "16", "cc" \
  986. )
  987. /* Add two times vc | vb | va into vo | vh | vl */
  988. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  989. __asm__ __volatile__ ( \
  990. "addc %[l], %[l], %[a] \n\t" \
  991. "adde %[h], %[h], %[b] \n\t" \
  992. "adde %[o], %[o], %[c] \n\t" \
  993. "addc %[l], %[l], %[a] \n\t" \
  994. "adde %[h], %[h], %[b] \n\t" \
  995. "adde %[o], %[o], %[c] \n\t" \
  996. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  997. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  998. : "cc" \
  999. )
  1000. #define SP_INT_ASM_AVAILABLE
  1001. #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
  1002. #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
  1003. /* Multiply va by vb and store double size result in: vh | vl */
  1004. #define SP_ASM_MUL(vl, vh, va, vb) \
  1005. __asm__ __volatile__ ( \
  1006. "mullw %[l], %[a], %[b] \n\t" \
  1007. "mulhwu %[h], %[a], %[b] \n\t" \
  1008. : [h] "+r" (vh), [l] "+r" (vl) \
  1009. : [a] "r" (va), [b] "r" (vb) \
  1010. : "memory" \
  1011. )
  1012. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1013. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1014. __asm__ __volatile__ ( \
  1015. "mulhwu %[h], %[a], %[b] \n\t" \
  1016. "mullw %[l], %[a], %[b] \n\t" \
  1017. "li %[o], 0 \n\t" \
  1018. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  1019. : [a] "r" (va), [b] "r" (vb) \
  1020. : \
  1021. )
  1022. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1023. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1024. __asm__ __volatile__ ( \
  1025. "mullw 16, %[a], %[b] \n\t" \
  1026. "mulhwu 17, %[a], %[b] \n\t" \
  1027. "addc %[l], %[l], 16 \n\t" \
  1028. "adde %[h], %[h], 17 \n\t" \
  1029. "addze %[o], %[o] \n\t" \
  1030. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1031. : [a] "r" (va), [b] "r" (vb) \
  1032. : "16", "17", "cc" \
  1033. )
  1034. /* Multiply va by vb and add double size result into: vh | vl */
  1035. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1036. __asm__ __volatile__ ( \
  1037. "mullw 16, %[a], %[b] \n\t" \
  1038. "mulhwu 17, %[a], %[b] \n\t" \
  1039. "addc %[l], %[l], 16 \n\t" \
  1040. "adde %[h], %[h], 17 \n\t" \
  1041. : [l] "+r" (vl), [h] "+r" (vh) \
  1042. : [a] "r" (va), [b] "r" (vb) \
  1043. : "16", "17", "cc" \
  1044. )
  1045. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1046. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1047. __asm__ __volatile__ ( \
  1048. "mullw 16, %[a], %[b] \n\t" \
  1049. "mulhwu 17, %[a], %[b] \n\t" \
  1050. "addc %[l], %[l], 16 \n\t" \
  1051. "adde %[h], %[h], 17 \n\t" \
  1052. "addze %[o], %[o] \n\t" \
  1053. "addc %[l], %[l], 16 \n\t" \
  1054. "adde %[h], %[h], 17 \n\t" \
  1055. "addze %[o], %[o] \n\t" \
  1056. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1057. : [a] "r" (va), [b] "r" (vb) \
  1058. : "16", "17", "cc" \
  1059. )
  1060. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1061. * Assumes first add will not overflow vh | vl
  1062. */
  1063. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1064. __asm__ __volatile__ ( \
  1065. "mullw 16, %[a], %[b] \n\t" \
  1066. "mulhwu 17, %[a], %[b] \n\t" \
  1067. "addc %[l], %[l], 16 \n\t" \
  1068. "adde %[h], %[h], 17 \n\t" \
  1069. "addc %[l], %[l], 16 \n\t" \
  1070. "adde %[h], %[h], 17 \n\t" \
  1071. "addze %[o], %[o] \n\t" \
  1072. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1073. : [a] "r" (va), [b] "r" (vb) \
  1074. : "16", "17", "cc" \
  1075. )
  1076. /* Square va and store double size result in: vh | vl */
  1077. #define SP_ASM_SQR(vl, vh, va) \
  1078. __asm__ __volatile__ ( \
  1079. "mullw %[l], %[a], %[a] \n\t" \
  1080. "mulhwu %[h], %[a], %[a] \n\t" \
  1081. : [h] "+r" (vh), [l] "+r" (vl) \
  1082. : [a] "r" (va) \
  1083. : "memory" \
  1084. )
  1085. /* Square va and add double size result into: vo | vh | vl */
  1086. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1087. __asm__ __volatile__ ( \
  1088. "mullw 16, %[a], %[a] \n\t" \
  1089. "mulhwu 17, %[a], %[a] \n\t" \
  1090. "addc %[l], %[l], 16 \n\t" \
  1091. "adde %[h], %[h], 17 \n\t" \
  1092. "addze %[o], %[o] \n\t" \
  1093. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1094. : [a] "r" (va) \
  1095. : "16", "17", "cc" \
  1096. )
  1097. /* Square va and add double size result into: vh | vl */
  1098. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1099. __asm__ __volatile__ ( \
  1100. "mullw 16, %[a], %[a] \n\t" \
  1101. "mulhwu 17, %[a], %[a] \n\t" \
  1102. "addc %[l], %[l], 16 \n\t" \
  1103. "adde %[h], %[h], 17 \n\t" \
  1104. : [l] "+r" (vl), [h] "+r" (vh) \
  1105. : [a] "r" (va) \
  1106. : "16", "17", "cc" \
  1107. )
  1108. /* Add va into: vh | vl */
  1109. #define SP_ASM_ADDC(vl, vh, va) \
  1110. __asm__ __volatile__ ( \
  1111. "addc %[l], %[l], %[a] \n\t" \
  1112. "addze %[h], %[h] \n\t" \
  1113. : [l] "+r" (vl), [h] "+r" (vh) \
  1114. : [a] "r" (va) \
  1115. : "cc" \
  1116. )
  1117. /* Sub va from: vh | vl */
  1118. #define SP_ASM_SUBC(vl, vh, va) \
  1119. __asm__ __volatile__ ( \
  1120. "subfc %[l], %[a], %[l] \n\t" \
  1121. "li 16, 0 \n\t" \
  1122. "subfe %[h], 16, %[h] \n\t" \
  1123. : [l] "+r" (vl), [h] "+r" (vh) \
  1124. : [a] "r" (va) \
  1125. : "16", "cc" \
  1126. )
  1127. /* Add two times vc | vb | va into vo | vh | vl */
  1128. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1129. __asm__ __volatile__ ( \
  1130. "addc %[l], %[l], %[a] \n\t" \
  1131. "adde %[h], %[h], %[b] \n\t" \
  1132. "adde %[o], %[o], %[c] \n\t" \
  1133. "addc %[l], %[l], %[a] \n\t" \
  1134. "adde %[h], %[h], %[b] \n\t" \
  1135. "adde %[o], %[o], %[c] \n\t" \
  1136. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1137. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  1138. : "cc" \
  1139. )
  1140. #define SP_INT_ASM_AVAILABLE
  1141. #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
  1142. #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
  1143. /* Multiply va by vb and store double size result in: vh | vl */
  1144. #define SP_ASM_MUL(vl, vh, va, vb) \
  1145. __asm__ __volatile__ ( \
  1146. "dmultu %[a], %[b] \n\t" \
  1147. "mflo %[l] \n\t" \
  1148. "mfhi %[h] \n\t" \
  1149. : [h] "+r" (vh), [l] "+r" (vl) \
  1150. : [a] "r" (va), [b] "r" (vb) \
  1151. : "memory", "$lo", "$hi" \
  1152. )
  1153. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1154. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1155. __asm__ __volatile__ ( \
  1156. "dmultu %[a], %[b] \n\t" \
  1157. "mflo %[l] \n\t" \
  1158. "mfhi %[h] \n\t" \
  1159. "move %[o], $0 \n\t" \
  1160. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  1161. : [a] "r" (va), [b] "r" (vb) \
  1162. : "$lo", "$hi" \
  1163. )
  1164. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1165. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1166. __asm__ __volatile__ ( \
  1167. "dmultu %[a], %[b] \n\t" \
  1168. "mflo $10 \n\t" \
  1169. "mfhi $11 \n\t" \
  1170. "daddu %[l], %[l], $10 \n\t" \
  1171. "sltu $12, %[l], $10 \n\t" \
  1172. "daddu %[h], %[h], $12 \n\t" \
  1173. "sltu $12, %[h], $12 \n\t" \
  1174. "daddu %[o], %[o], $12 \n\t" \
  1175. "daddu %[h], %[h], $11 \n\t" \
  1176. "sltu $12, %[h], $11 \n\t" \
  1177. "daddu %[o], %[o], $12 \n\t" \
  1178. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1179. : [a] "r" (va), [b] "r" (vb) \
  1180. : "$10", "$11", "$12", "$lo", "$hi" \
  1181. )
  1182. /* Multiply va by vb and add double size result into: vh | vl */
  1183. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1184. __asm__ __volatile__ ( \
  1185. "dmultu %[a], %[b] \n\t" \
  1186. "mflo $10 \n\t" \
  1187. "mfhi $11 \n\t" \
  1188. "daddu %[l], %[l], $10 \n\t" \
  1189. "sltu $12, %[l], $10 \n\t" \
  1190. "daddu %[h], %[h], $11 \n\t" \
  1191. "daddu %[h], %[h], $12 \n\t" \
  1192. : [l] "+r" (vl), [h] "+r" (vh) \
  1193. : [a] "r" (va), [b] "r" (vb) \
  1194. : "$10", "$11", "$12", "$lo", "$hi" \
  1195. )
  1196. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1197. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1198. __asm__ __volatile__ ( \
  1199. "dmultu %[a], %[b] \n\t" \
  1200. "mflo $10 \n\t" \
  1201. "mfhi $11 \n\t" \
  1202. "daddu %[l], %[l], $10 \n\t" \
  1203. "sltu $12, %[l], $10 \n\t" \
  1204. "daddu %[h], %[h], $12 \n\t" \
  1205. "sltu $12, %[h], $12 \n\t" \
  1206. "daddu %[o], %[o], $12 \n\t" \
  1207. "daddu %[h], %[h], $11 \n\t" \
  1208. "sltu $12, %[h], $11 \n\t" \
  1209. "daddu %[o], %[o], $12 \n\t" \
  1210. "daddu %[l], %[l], $10 \n\t" \
  1211. "sltu $12, %[l], $10 \n\t" \
  1212. "daddu %[h], %[h], $12 \n\t" \
  1213. "sltu $12, %[h], $12 \n\t" \
  1214. "daddu %[o], %[o], $12 \n\t" \
  1215. "daddu %[h], %[h], $11 \n\t" \
  1216. "sltu $12, %[h], $11 \n\t" \
  1217. "daddu %[o], %[o], $12 \n\t" \
  1218. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1219. : [a] "r" (va), [b] "r" (vb) \
  1220. : "$10", "$11", "$12", "$lo", "$hi" \
  1221. )
  1222. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1223. * Assumes first add will not overflow vh | vl
  1224. */
  1225. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1226. __asm__ __volatile__ ( \
  1227. "dmultu %[a], %[b] \n\t" \
  1228. "mflo $10 \n\t" \
  1229. "mfhi $11 \n\t" \
  1230. "daddu %[l], %[l], $10 \n\t" \
  1231. "sltu $12, %[l], $10 \n\t" \
  1232. "daddu %[h], %[h], $11 \n\t" \
  1233. "daddu %[h], %[h], $12 \n\t" \
  1234. "daddu %[l], %[l], $10 \n\t" \
  1235. "sltu $12, %[l], $10 \n\t" \
  1236. "daddu %[h], %[h], $12 \n\t" \
  1237. "sltu $12, %[h], $12 \n\t" \
  1238. "daddu %[o], %[o], $12 \n\t" \
  1239. "daddu %[h], %[h], $11 \n\t" \
  1240. "sltu $12, %[h], $11 \n\t" \
  1241. "daddu %[o], %[o], $12 \n\t" \
  1242. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1243. : [a] "r" (va), [b] "r" (vb) \
  1244. : "$10", "$11", "$12", "$lo", "$hi" \
  1245. )
  1246. /* Square va and store double size result in: vh | vl */
  1247. #define SP_ASM_SQR(vl, vh, va) \
  1248. __asm__ __volatile__ ( \
  1249. "dmultu %[a], %[a] \n\t" \
  1250. "mflo %[l] \n\t" \
  1251. "mfhi %[h] \n\t" \
  1252. : [h] "+r" (vh), [l] "+r" (vl) \
  1253. : [a] "r" (va) \
  1254. : "memory", "$lo", "$hi" \
  1255. )
  1256. /* Square va and add double size result into: vo | vh | vl */
  1257. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1258. __asm__ __volatile__ ( \
  1259. "dmultu %[a], %[a] \n\t" \
  1260. "mflo $10 \n\t" \
  1261. "mfhi $11 \n\t" \
  1262. "daddu %[l], %[l], $10 \n\t" \
  1263. "sltu $12, %[l], $10 \n\t" \
  1264. "daddu %[h], %[h], $12 \n\t" \
  1265. "sltu $12, %[h], $12 \n\t" \
  1266. "daddu %[o], %[o], $12 \n\t" \
  1267. "daddu %[h], %[h], $11 \n\t" \
  1268. "sltu $12, %[h], $11 \n\t" \
  1269. "daddu %[o], %[o], $12 \n\t" \
  1270. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1271. : [a] "r" (va) \
  1272. : "$10", "$11", "$12", "$lo", "$hi" \
  1273. )
  1274. /* Square va and add double size result into: vh | vl */
  1275. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1276. __asm__ __volatile__ ( \
  1277. "dmultu %[a], %[a] \n\t" \
  1278. "mflo $10 \n\t" \
  1279. "mfhi $11 \n\t" \
  1280. "daddu %[l], %[l], $10 \n\t" \
  1281. "sltu $12, %[l], $10 \n\t" \
  1282. "daddu %[h], %[h], $11 \n\t" \
  1283. "daddu %[h], %[h], $12 \n\t" \
  1284. : [l] "+r" (vl), [h] "+r" (vh) \
  1285. : [a] "r" (va) \
  1286. : "$10", "$11", "$12", "$lo", "$hi" \
  1287. )
  1288. /* Add va into: vh | vl */
  1289. #define SP_ASM_ADDC(vl, vh, va) \
  1290. __asm__ __volatile__ ( \
  1291. "daddu %[l], %[l], %[a] \n\t" \
  1292. "sltu $12, %[l], %[a] \n\t" \
  1293. "daddu %[h], %[h], $12 \n\t" \
  1294. : [l] "+r" (vl), [h] "+r" (vh) \
  1295. : [a] "r" (va) \
  1296. : "$12" \
  1297. )
  1298. /* Sub va from: vh | vl */
  1299. #define SP_ASM_SUBC(vl, vh, va) \
  1300. __asm__ __volatile__ ( \
  1301. "move $12, %[l] \n\t" \
  1302. "dsubu %[l], $12, %[a] \n\t" \
  1303. "sltu $12, $12, %[l] \n\t" \
  1304. "dsubu %[h], %[h], $12 \n\t" \
  1305. : [l] "+r" (vl), [h] "+r" (vh) \
  1306. : [a] "r" (va) \
  1307. : "$12" \
  1308. )
  1309. /* Add two times vc | vb | va into vo | vh | vl */
  1310. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1311. __asm__ __volatile__ ( \
  1312. "daddu %[l], %[l], %[a] \n\t" \
  1313. "sltu $12, %[l], %[a] \n\t" \
  1314. "daddu %[h], %[h], $12 \n\t" \
  1315. "sltu $12, %[h], $12 \n\t" \
  1316. "daddu %[o], %[o], $12 \n\t" \
  1317. "daddu %[h], %[h], %[b] \n\t" \
  1318. "sltu $12, %[h], %[b] \n\t" \
  1319. "daddu %[o], %[o], %[c] \n\t" \
  1320. "daddu %[o], %[o], $12 \n\t" \
  1321. "daddu %[l], %[l], %[a] \n\t" \
  1322. "sltu $12, %[l], %[a] \n\t" \
  1323. "daddu %[h], %[h], $12 \n\t" \
  1324. "sltu $12, %[h], $12 \n\t" \
  1325. "daddu %[o], %[o], $12 \n\t" \
  1326. "daddu %[h], %[h], %[b] \n\t" \
  1327. "sltu $12, %[h], %[b] \n\t" \
  1328. "daddu %[o], %[o], %[c] \n\t" \
  1329. "daddu %[o], %[o], $12 \n\t" \
  1330. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1331. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  1332. : "$12" \
  1333. )
  1334. #define SP_INT_ASM_AVAILABLE
  1335. #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
  1336. #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
  1337. /* Multiply va by vb and store double size result in: vh | vl */
  1338. #define SP_ASM_MUL(vl, vh, va, vb) \
  1339. __asm__ __volatile__ ( \
  1340. "multu %[a], %[b] \n\t" \
  1341. "mflo %[l] \n\t" \
  1342. "mfhi %[h] \n\t" \
  1343. : [h] "+r" (vh), [l] "+r" (vl) \
  1344. : [a] "r" (va), [b] "r" (vb) \
  1345. : "memory", "$lo", "$hi" \
  1346. )
  1347. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1348. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1349. __asm__ __volatile__ ( \
  1350. "multu %[a], %[b] \n\t" \
  1351. "mflo %[l] \n\t" \
  1352. "mfhi %[h] \n\t" \
  1353. "move %[o], $0 \n\t" \
  1354. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  1355. : [a] "r" (va), [b] "r" (vb) \
  1356. : "$lo", "$hi" \
  1357. )
  1358. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1359. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1360. __asm__ __volatile__ ( \
  1361. "multu %[a], %[b] \n\t" \
  1362. "mflo $10 \n\t" \
  1363. "mfhi $11 \n\t" \
  1364. "addu %[l], %[l], $10 \n\t" \
  1365. "sltu $12, %[l], $10 \n\t" \
  1366. "addu %[h], %[h], $12 \n\t" \
  1367. "sltu $12, %[h], $12 \n\t" \
  1368. "addu %[o], %[o], $12 \n\t" \
  1369. "addu %[h], %[h], $11 \n\t" \
  1370. "sltu $12, %[h], $11 \n\t" \
  1371. "addu %[o], %[o], $12 \n\t" \
  1372. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1373. : [a] "r" (va), [b] "r" (vb) \
  1374. : "$10", "$11", "$12", "$lo", "$hi" \
  1375. )
  1376. /* Multiply va by vb and add double size result into: vh | vl */
  1377. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1378. __asm__ __volatile__ ( \
  1379. "multu %[a], %[b] \n\t" \
  1380. "mflo $10 \n\t" \
  1381. "mfhi $11 \n\t" \
  1382. "addu %[l], %[l], $10 \n\t" \
  1383. "sltu $12, %[l], $10 \n\t" \
  1384. "addu %[h], %[h], $11 \n\t" \
  1385. "addu %[h], %[h], $12 \n\t" \
  1386. : [l] "+r" (vl), [h] "+r" (vh) \
  1387. : [a] "r" (va), [b] "r" (vb) \
  1388. : "$10", "$11", "$12", "$lo", "$hi" \
  1389. )
  1390. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1391. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1392. __asm__ __volatile__ ( \
  1393. "multu %[a], %[b] \n\t" \
  1394. "mflo $10 \n\t" \
  1395. "mfhi $11 \n\t" \
  1396. "addu %[l], %[l], $10 \n\t" \
  1397. "sltu $12, %[l], $10 \n\t" \
  1398. "addu %[h], %[h], $12 \n\t" \
  1399. "sltu $12, %[h], $12 \n\t" \
  1400. "addu %[o], %[o], $12 \n\t" \
  1401. "addu %[h], %[h], $11 \n\t" \
  1402. "sltu $12, %[h], $11 \n\t" \
  1403. "addu %[o], %[o], $12 \n\t" \
  1404. "addu %[l], %[l], $10 \n\t" \
  1405. "sltu $12, %[l], $10 \n\t" \
  1406. "addu %[h], %[h], $12 \n\t" \
  1407. "sltu $12, %[h], $12 \n\t" \
  1408. "addu %[o], %[o], $12 \n\t" \
  1409. "addu %[h], %[h], $11 \n\t" \
  1410. "sltu $12, %[h], $11 \n\t" \
  1411. "addu %[o], %[o], $12 \n\t" \
  1412. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1413. : [a] "r" (va), [b] "r" (vb) \
  1414. : "$10", "$11", "$12", "$lo", "$hi" \
  1415. )
  1416. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1417. * Assumes first add will not overflow vh | vl
  1418. */
  1419. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1420. __asm__ __volatile__ ( \
  1421. "multu %[a], %[b] \n\t" \
  1422. "mflo $10 \n\t" \
  1423. "mfhi $11 \n\t" \
  1424. "addu %[l], %[l], $10 \n\t" \
  1425. "sltu $12, %[l], $10 \n\t" \
  1426. "addu %[h], %[h], $11 \n\t" \
  1427. "addu %[h], %[h], $12 \n\t" \
  1428. "addu %[l], %[l], $10 \n\t" \
  1429. "sltu $12, %[l], $10 \n\t" \
  1430. "addu %[h], %[h], $12 \n\t" \
  1431. "sltu $12, %[h], $12 \n\t" \
  1432. "addu %[o], %[o], $12 \n\t" \
  1433. "addu %[h], %[h], $11 \n\t" \
  1434. "sltu $12, %[h], $11 \n\t" \
  1435. "addu %[o], %[o], $12 \n\t" \
  1436. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1437. : [a] "r" (va), [b] "r" (vb) \
  1438. : "$10", "$11", "$12", "$lo", "$hi" \
  1439. )
  1440. /* Square va and store double size result in: vh | vl */
  1441. #define SP_ASM_SQR(vl, vh, va) \
  1442. __asm__ __volatile__ ( \
  1443. "multu %[a], %[a] \n\t" \
  1444. "mflo %[l] \n\t" \
  1445. "mfhi %[h] \n\t" \
  1446. : [h] "+r" (vh), [l] "+r" (vl) \
  1447. : [a] "r" (va) \
  1448. : "memory", "$lo", "$hi" \
  1449. )
  1450. /* Square va and add double size result into: vo | vh | vl */
  1451. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1452. __asm__ __volatile__ ( \
  1453. "multu %[a], %[a] \n\t" \
  1454. "mflo $10 \n\t" \
  1455. "mfhi $11 \n\t" \
  1456. "addu %[l], %[l], $10 \n\t" \
  1457. "sltu $12, %[l], $10 \n\t" \
  1458. "addu %[h], %[h], $12 \n\t" \
  1459. "sltu $12, %[h], $12 \n\t" \
  1460. "addu %[o], %[o], $12 \n\t" \
  1461. "addu %[h], %[h], $11 \n\t" \
  1462. "sltu $12, %[h], $11 \n\t" \
  1463. "addu %[o], %[o], $12 \n\t" \
  1464. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1465. : [a] "r" (va) \
  1466. : "$10", "$11", "$12", "$lo", "$hi" \
  1467. )
  1468. /* Square va and add double size result into: vh | vl */
  1469. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1470. __asm__ __volatile__ ( \
  1471. "multu %[a], %[a] \n\t" \
  1472. "mflo $10 \n\t" \
  1473. "mfhi $11 \n\t" \
  1474. "addu %[l], %[l], $10 \n\t" \
  1475. "sltu $12, %[l], $10 \n\t" \
  1476. "addu %[h], %[h], $11 \n\t" \
  1477. "addu %[h], %[h], $12 \n\t" \
  1478. : [l] "+r" (vl), [h] "+r" (vh) \
  1479. : [a] "r" (va) \
  1480. : "$10", "$11", "$12", "$lo", "$hi" \
  1481. )
  1482. /* Add va into: vh | vl */
  1483. #define SP_ASM_ADDC(vl, vh, va) \
  1484. __asm__ __volatile__ ( \
  1485. "addu %[l], %[l], %[a] \n\t" \
  1486. "sltu $12, %[l], %[a] \n\t" \
  1487. "addu %[h], %[h], $12 \n\t" \
  1488. : [l] "+r" (vl), [h] "+r" (vh) \
  1489. : [a] "r" (va) \
  1490. : "$12" \
  1491. )
  1492. /* Sub va from: vh | vl */
  1493. #define SP_ASM_SUBC(vl, vh, va) \
  1494. __asm__ __volatile__ ( \
  1495. "move $12, %[l] \n\t" \
  1496. "subu %[l], $12, %[a] \n\t" \
  1497. "sltu $12, $12, %[l] \n\t" \
  1498. "subu %[h], %[h], $12 \n\t" \
  1499. : [l] "+r" (vl), [h] "+r" (vh) \
  1500. : [a] "r" (va) \
  1501. : "$12" \
  1502. )
  1503. /* Add two times vc | vb | va into vo | vh | vl */
  1504. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1505. __asm__ __volatile__ ( \
  1506. "addu %[l], %[l], %[a] \n\t" \
  1507. "sltu $12, %[l], %[a] \n\t" \
  1508. "addu %[h], %[h], $12 \n\t" \
  1509. "sltu $12, %[h], $12 \n\t" \
  1510. "addu %[o], %[o], $12 \n\t" \
  1511. "addu %[h], %[h], %[b] \n\t" \
  1512. "sltu $12, %[h], %[b] \n\t" \
  1513. "addu %[o], %[o], %[c] \n\t" \
  1514. "addu %[o], %[o], $12 \n\t" \
  1515. "addu %[l], %[l], %[a] \n\t" \
  1516. "sltu $12, %[l], %[a] \n\t" \
  1517. "addu %[h], %[h], $12 \n\t" \
  1518. "sltu $12, %[h], $12 \n\t" \
  1519. "addu %[o], %[o], $12 \n\t" \
  1520. "addu %[h], %[h], %[b] \n\t" \
  1521. "sltu $12, %[h], %[b] \n\t" \
  1522. "addu %[o], %[o], %[c] \n\t" \
  1523. "addu %[o], %[o], $12 \n\t" \
  1524. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1525. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  1526. : "$12" \
  1527. )
  1528. #define SP_INT_ASM_AVAILABLE
  1529. #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
  1530. #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
  1531. /* Multiply va by vb and store double size result in: vh | vl */
  1532. #define SP_ASM_MUL(vl, vh, va, vb) \
  1533. __asm__ __volatile__ ( \
  1534. "mul %[l], %[a], %[b] \n\t" \
  1535. "mulhu %[h], %[a], %[b] \n\t" \
  1536. : [h] "+r" (vh), [l] "+r" (vl) \
  1537. : [a] "r" (va), [b] "r" (vb) \
  1538. : "memory" \
  1539. )
  1540. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1541. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1542. __asm__ __volatile__ ( \
  1543. "mulhu %[h], %[a], %[b] \n\t" \
  1544. "mul %[l], %[a], %[b] \n\t" \
  1545. "add %[o], zero, zero \n\t" \
  1546. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  1547. : [a] "r" (va), [b] "r" (vb) \
  1548. : \
  1549. )
  1550. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1551. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1552. __asm__ __volatile__ ( \
  1553. "mul a5, %[a], %[b] \n\t" \
  1554. "mulhu a6, %[a], %[b] \n\t" \
  1555. "add %[l], %[l], a5 \n\t" \
  1556. "sltu a7, %[l], a5 \n\t" \
  1557. "add %[h], %[h], a7 \n\t" \
  1558. "sltu a7, %[h], a7 \n\t" \
  1559. "add %[o], %[o], a7 \n\t" \
  1560. "add %[h], %[h], a6 \n\t" \
  1561. "sltu a7, %[h], a6 \n\t" \
  1562. "add %[o], %[o], a7 \n\t" \
  1563. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1564. : [a] "r" (va), [b] "r" (vb) \
  1565. : "a5", "a6", "a7" \
  1566. )
  1567. /* Multiply va by vb and add double size result into: vh | vl */
  1568. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1569. __asm__ __volatile__ ( \
  1570. "mul a5, %[a], %[b] \n\t" \
  1571. "mulhu a6, %[a], %[b] \n\t" \
  1572. "add %[l], %[l], a5 \n\t" \
  1573. "sltu a7, %[l], a5 \n\t" \
  1574. "add %[h], %[h], a6 \n\t" \
  1575. "add %[h], %[h], a7 \n\t" \
  1576. : [l] "+r" (vl), [h] "+r" (vh) \
  1577. : [a] "r" (va), [b] "r" (vb) \
  1578. : "a5", "a6", "a7" \
  1579. )
  1580. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1581. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1582. __asm__ __volatile__ ( \
  1583. "mul a5, %[a], %[b] \n\t" \
  1584. "mulhu a6, %[a], %[b] \n\t" \
  1585. "add %[l], %[l], a5 \n\t" \
  1586. "sltu a7, %[l], a5 \n\t" \
  1587. "add %[h], %[h], a7 \n\t" \
  1588. "sltu a7, %[h], a7 \n\t" \
  1589. "add %[o], %[o], a7 \n\t" \
  1590. "add %[h], %[h], a6 \n\t" \
  1591. "sltu a7, %[h], a6 \n\t" \
  1592. "add %[o], %[o], a7 \n\t" \
  1593. "add %[l], %[l], a5 \n\t" \
  1594. "sltu a7, %[l], a5 \n\t" \
  1595. "add %[h], %[h], a7 \n\t" \
  1596. "sltu a7, %[h], a7 \n\t" \
  1597. "add %[o], %[o], a7 \n\t" \
  1598. "add %[h], %[h], a6 \n\t" \
  1599. "sltu a7, %[h], a6 \n\t" \
  1600. "add %[o], %[o], a7 \n\t" \
  1601. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1602. : [a] "r" (va), [b] "r" (vb) \
  1603. : "a5", "a6", "a7" \
  1604. )
  1605. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1606. * Assumes first add will not overflow vh | vl
  1607. */
  1608. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1609. __asm__ __volatile__ ( \
  1610. "mul a5, %[a], %[b] \n\t" \
  1611. "mulhu a6, %[a], %[b] \n\t" \
  1612. "add %[l], %[l], a5 \n\t" \
  1613. "sltu a7, %[l], a5 \n\t" \
  1614. "add %[h], %[h], a6 \n\t" \
  1615. "add %[h], %[h], a7 \n\t" \
  1616. "add %[l], %[l], a5 \n\t" \
  1617. "sltu a7, %[l], a5 \n\t" \
  1618. "add %[h], %[h], a7 \n\t" \
  1619. "sltu a7, %[h], a7 \n\t" \
  1620. "add %[o], %[o], a7 \n\t" \
  1621. "add %[h], %[h], a6 \n\t" \
  1622. "sltu a7, %[h], a6 \n\t" \
  1623. "add %[o], %[o], a7 \n\t" \
  1624. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1625. : [a] "r" (va), [b] "r" (vb) \
  1626. : "a5", "a6", "a7" \
  1627. )
  1628. /* Square va and store double size result in: vh | vl */
  1629. #define SP_ASM_SQR(vl, vh, va) \
  1630. __asm__ __volatile__ ( \
  1631. "mul %[l], %[a], %[a] \n\t" \
  1632. "mulhu %[h], %[a], %[a] \n\t" \
  1633. : [h] "+r" (vh), [l] "+r" (vl) \
  1634. : [a] "r" (va) \
  1635. : "memory" \
  1636. )
  1637. /* Square va and add double size result into: vo | vh | vl */
  1638. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1639. __asm__ __volatile__ ( \
  1640. "mul a5, %[a], %[a] \n\t" \
  1641. "mulhu a6, %[a], %[a] \n\t" \
  1642. "add %[l], %[l], a5 \n\t" \
  1643. "sltu a7, %[l], a5 \n\t" \
  1644. "add %[h], %[h], a7 \n\t" \
  1645. "sltu a7, %[h], a7 \n\t" \
  1646. "add %[o], %[o], a7 \n\t" \
  1647. "add %[h], %[h], a6 \n\t" \
  1648. "sltu a7, %[h], a6 \n\t" \
  1649. "add %[o], %[o], a7 \n\t" \
  1650. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1651. : [a] "r" (va) \
  1652. : "a5", "a6", "a7" \
  1653. )
  1654. /* Square va and add double size result into: vh | vl */
  1655. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1656. __asm__ __volatile__ ( \
  1657. "mul a5, %[a], %[a] \n\t" \
  1658. "mulhu a6, %[a], %[a] \n\t" \
  1659. "add %[l], %[l], a5 \n\t" \
  1660. "sltu a7, %[l], a5 \n\t" \
  1661. "add %[h], %[h], a6 \n\t" \
  1662. "add %[h], %[h], a7 \n\t" \
  1663. : [l] "+r" (vl), [h] "+r" (vh) \
  1664. : [a] "r" (va) \
  1665. : "a5", "a6", "a7" \
  1666. )
  1667. /* Add va into: vh | vl */
  1668. #define SP_ASM_ADDC(vl, vh, va) \
  1669. __asm__ __volatile__ ( \
  1670. "add %[l], %[l], %[a] \n\t" \
  1671. "sltu a7, %[l], %[a] \n\t" \
  1672. "add %[h], %[h], a7 \n\t" \
  1673. : [l] "+r" (vl), [h] "+r" (vh) \
  1674. : [a] "r" (va) \
  1675. : "a7" \
  1676. )
  1677. /* Sub va from: vh | vl */
  1678. #define SP_ASM_SUBC(vl, vh, va) \
  1679. __asm__ __volatile__ ( \
  1680. "add a7, %[l], zero \n\t" \
  1681. "sub %[l], a7, %[a] \n\t" \
  1682. "sltu a7, a7, %[l] \n\t" \
  1683. "sub %[h], %[h], a7 \n\t" \
  1684. : [l] "+r" (vl), [h] "+r" (vh) \
  1685. : [a] "r" (va) \
  1686. : "a7" \
  1687. )
  1688. /* Add two times vc | vb | va into vo | vh | vl */
  1689. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1690. __asm__ __volatile__ ( \
  1691. "add %[l], %[l], %[a] \n\t" \
  1692. "sltu a7, %[l], %[a] \n\t" \
  1693. "add %[h], %[h], a7 \n\t" \
  1694. "sltu a7, %[h], a7 \n\t" \
  1695. "add %[o], %[o], a7 \n\t" \
  1696. "add %[h], %[h], %[b] \n\t" \
  1697. "sltu a7, %[h], %[b] \n\t" \
  1698. "add %[o], %[o], %[c] \n\t" \
  1699. "add %[o], %[o], a7 \n\t" \
  1700. "add %[l], %[l], %[a] \n\t" \
  1701. "sltu a7, %[l], %[a] \n\t" \
  1702. "add %[h], %[h], a7 \n\t" \
  1703. "sltu a7, %[h], a7 \n\t" \
  1704. "add %[o], %[o], a7 \n\t" \
  1705. "add %[h], %[h], %[b] \n\t" \
  1706. "sltu a7, %[h], %[b] \n\t" \
  1707. "add %[o], %[o], %[c] \n\t" \
  1708. "add %[o], %[o], a7 \n\t" \
  1709. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1710. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  1711. : "a7" \
  1712. )
  1713. #define SP_INT_ASM_AVAILABLE
  1714. #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
  1715. #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
  1716. /* Multiply va by vb and store double size result in: vh | vl */
  1717. #define SP_ASM_MUL(vl, vh, va, vb) \
  1718. __asm__ __volatile__ ( \
  1719. "mul %[l], %[a], %[b] \n\t" \
  1720. "mulhu %[h], %[a], %[b] \n\t" \
  1721. : [h] "+r" (vh), [l] "+r" (vl) \
  1722. : [a] "r" (va), [b] "r" (vb) \
  1723. : "memory" \
  1724. )
  1725. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1726. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1727. __asm__ __volatile__ ( \
  1728. "mulhu %[h], %[a], %[b] \n\t" \
  1729. "mul %[l], %[a], %[b] \n\t" \
  1730. "add %[o], zero, zero \n\t" \
  1731. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  1732. : [a] "r" (va), [b] "r" (vb) \
  1733. : \
  1734. )
  1735. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1736. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1737. __asm__ __volatile__ ( \
  1738. "mul a5, %[a], %[b] \n\t" \
  1739. "mulhu a6, %[a], %[b] \n\t" \
  1740. "add %[l], %[l], a5 \n\t" \
  1741. "sltu a7, %[l], a5 \n\t" \
  1742. "add %[h], %[h], a7 \n\t" \
  1743. "sltu a7, %[h], a7 \n\t" \
  1744. "add %[o], %[o], a7 \n\t" \
  1745. "add %[h], %[h], a6 \n\t" \
  1746. "sltu a7, %[h], a6 \n\t" \
  1747. "add %[o], %[o], a7 \n\t" \
  1748. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1749. : [a] "r" (va), [b] "r" (vb) \
  1750. : "a5", "a6", "a7" \
  1751. )
  1752. /* Multiply va by vb and add double size result into: vh | vl */
  1753. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1754. __asm__ __volatile__ ( \
  1755. "mul a5, %[a], %[b] \n\t" \
  1756. "mulhu a6, %[a], %[b] \n\t" \
  1757. "add %[l], %[l], a5 \n\t" \
  1758. "sltu a7, %[l], a5 \n\t" \
  1759. "add %[h], %[h], a6 \n\t" \
  1760. "add %[h], %[h], a7 \n\t" \
  1761. : [l] "+r" (vl), [h] "+r" (vh) \
  1762. : [a] "r" (va), [b] "r" (vb) \
  1763. : "a5", "a6", "a7" \
  1764. )
  1765. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1766. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1767. __asm__ __volatile__ ( \
  1768. "mul a5, %[a], %[b] \n\t" \
  1769. "mulhu a6, %[a], %[b] \n\t" \
  1770. "add %[l], %[l], a5 \n\t" \
  1771. "sltu a7, %[l], a5 \n\t" \
  1772. "add %[h], %[h], a7 \n\t" \
  1773. "sltu a7, %[h], a7 \n\t" \
  1774. "add %[o], %[o], a7 \n\t" \
  1775. "add %[h], %[h], a6 \n\t" \
  1776. "sltu a7, %[h], a6 \n\t" \
  1777. "add %[o], %[o], a7 \n\t" \
  1778. "add %[l], %[l], a5 \n\t" \
  1779. "sltu a7, %[l], a5 \n\t" \
  1780. "add %[h], %[h], a7 \n\t" \
  1781. "sltu a7, %[h], a7 \n\t" \
  1782. "add %[o], %[o], a7 \n\t" \
  1783. "add %[h], %[h], a6 \n\t" \
  1784. "sltu a7, %[h], a6 \n\t" \
  1785. "add %[o], %[o], a7 \n\t" \
  1786. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1787. : [a] "r" (va), [b] "r" (vb) \
  1788. : "a5", "a6", "a7" \
  1789. )
  1790. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1791. * Assumes first add will not overflow vh | vl
  1792. */
  1793. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1794. __asm__ __volatile__ ( \
  1795. "mul a5, %[a], %[b] \n\t" \
  1796. "mulhu a6, %[a], %[b] \n\t" \
  1797. "add %[l], %[l], a5 \n\t" \
  1798. "sltu a7, %[l], a5 \n\t" \
  1799. "add %[h], %[h], a6 \n\t" \
  1800. "add %[h], %[h], a7 \n\t" \
  1801. "add %[l], %[l], a5 \n\t" \
  1802. "sltu a7, %[l], a5 \n\t" \
  1803. "add %[h], %[h], a7 \n\t" \
  1804. "sltu a7, %[h], a7 \n\t" \
  1805. "add %[o], %[o], a7 \n\t" \
  1806. "add %[h], %[h], a6 \n\t" \
  1807. "sltu a7, %[h], a6 \n\t" \
  1808. "add %[o], %[o], a7 \n\t" \
  1809. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1810. : [a] "r" (va), [b] "r" (vb) \
  1811. : "a5", "a6", "a7" \
  1812. )
  1813. /* Square va and store double size result in: vh | vl */
  1814. #define SP_ASM_SQR(vl, vh, va) \
  1815. __asm__ __volatile__ ( \
  1816. "mul %[l], %[a], %[a] \n\t" \
  1817. "mulhu %[h], %[a], %[a] \n\t" \
  1818. : [h] "+r" (vh), [l] "+r" (vl) \
  1819. : [a] "r" (va) \
  1820. : "memory" \
  1821. )
  1822. /* Square va and add double size result into: vo | vh | vl */
  1823. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1824. __asm__ __volatile__ ( \
  1825. "mul a5, %[a], %[a] \n\t" \
  1826. "mulhu a6, %[a], %[a] \n\t" \
  1827. "add %[l], %[l], a5 \n\t" \
  1828. "sltu a7, %[l], a5 \n\t" \
  1829. "add %[h], %[h], a7 \n\t" \
  1830. "sltu a7, %[h], a7 \n\t" \
  1831. "add %[o], %[o], a7 \n\t" \
  1832. "add %[h], %[h], a6 \n\t" \
  1833. "sltu a7, %[h], a6 \n\t" \
  1834. "add %[o], %[o], a7 \n\t" \
  1835. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1836. : [a] "r" (va) \
  1837. : "a5", "a6", "a7" \
  1838. )
  1839. /* Square va and add double size result into: vh | vl */
  1840. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1841. __asm__ __volatile__ ( \
  1842. "mul a5, %[a], %[a] \n\t" \
  1843. "mulhu a6, %[a], %[a] \n\t" \
  1844. "add %[l], %[l], a5 \n\t" \
  1845. "sltu a7, %[l], a5 \n\t" \
  1846. "add %[h], %[h], a6 \n\t" \
  1847. "add %[h], %[h], a7 \n\t" \
  1848. : [l] "+r" (vl), [h] "+r" (vh) \
  1849. : [a] "r" (va) \
  1850. : "a5", "a6", "a7" \
  1851. )
  1852. /* Add va into: vh | vl */
  1853. #define SP_ASM_ADDC(vl, vh, va) \
  1854. __asm__ __volatile__ ( \
  1855. "add %[l], %[l], %[a] \n\t" \
  1856. "sltu a7, %[l], %[a] \n\t" \
  1857. "add %[h], %[h], a7 \n\t" \
  1858. : [l] "+r" (vl), [h] "+r" (vh) \
  1859. : [a] "r" (va) \
  1860. : "a7" \
  1861. )
  1862. /* Sub va from: vh | vl */
  1863. #define SP_ASM_SUBC(vl, vh, va) \
  1864. __asm__ __volatile__ ( \
  1865. "add a7, %[l], zero \n\t" \
  1866. "sub %[l], a7, %[a] \n\t" \
  1867. "sltu a7, a7, %[l] \n\t" \
  1868. "sub %[h], %[h], a7 \n\t" \
  1869. : [l] "+r" (vl), [h] "+r" (vh) \
  1870. : [a] "r" (va) \
  1871. : "a7" \
  1872. )
  1873. /* Add two times vc | vb | va into vo | vh | vl */
  1874. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1875. __asm__ __volatile__ ( \
  1876. "add %[l], %[l], %[a] \n\t" \
  1877. "sltu a7, %[l], %[a] \n\t" \
  1878. "add %[h], %[h], a7 \n\t" \
  1879. "sltu a7, %[h], a7 \n\t" \
  1880. "add %[o], %[o], a7 \n\t" \
  1881. "add %[h], %[h], %[b] \n\t" \
  1882. "sltu a7, %[h], %[b] \n\t" \
  1883. "add %[o], %[o], %[c] \n\t" \
  1884. "add %[o], %[o], a7 \n\t" \
  1885. "add %[l], %[l], %[a] \n\t" \
  1886. "sltu a7, %[l], %[a] \n\t" \
  1887. "add %[h], %[h], a7 \n\t" \
  1888. "sltu a7, %[h], a7 \n\t" \
  1889. "add %[o], %[o], a7 \n\t" \
  1890. "add %[h], %[h], %[b] \n\t" \
  1891. "sltu a7, %[h], %[b] \n\t" \
  1892. "add %[o], %[o], %[c] \n\t" \
  1893. "add %[o], %[o], a7 \n\t" \
  1894. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1895. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  1896. : "a7" \
  1897. )
  1898. #define SP_INT_ASM_AVAILABLE
  1899. #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
  1900. #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
  1901. /* Multiply va by vb and store double size result in: vh | vl */
  1902. #define SP_ASM_MUL(vl, vh, va, vb) \
  1903. __asm__ __volatile__ ( \
  1904. "lgr %%r1, %[a] \n\t" \
  1905. "mlgr %%r0, %[b] \n\t" \
  1906. "lgr %[l], %%r1 \n\t" \
  1907. "lgr %[h], %%r0 \n\t" \
  1908. : [h] "+r" (vh), [l] "+r" (vl) \
  1909. : [a] "r" (va), [b] "r" (vb) \
  1910. : "memory", "r0", "r1" \
  1911. )
  1912. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1913. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1914. __asm__ __volatile__ ( \
  1915. "lgr %%r1, %[a] \n\t" \
  1916. "mlgr %%r0, %[b] \n\t" \
  1917. "lghi %[o], 0 \n\t" \
  1918. "lgr %[l], %%r1 \n\t" \
  1919. "lgr %[h], %%r0 \n\t" \
  1920. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  1921. : [a] "r" (va), [b] "r" (vb) \
  1922. : "r0", "r1" \
  1923. )
  1924. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1925. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1926. __asm__ __volatile__ ( \
  1927. "lghi %%r10, 0 \n\t" \
  1928. "lgr %%r1, %[a] \n\t" \
  1929. "mlgr %%r0, %[b] \n\t" \
  1930. "algr %[l], %%r1 \n\t" \
  1931. "alcgr %[h], %%r0 \n\t" \
  1932. "alcgr %[o], %%r10 \n\t" \
  1933. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1934. : [a] "r" (va), [b] "r" (vb) \
  1935. : "r0", "r1", "r10", "cc" \
  1936. )
  1937. /* Multiply va by vb and add double size result into: vh | vl */
  1938. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1939. __asm__ __volatile__ ( \
  1940. "lgr %%r1, %[a] \n\t" \
  1941. "mlgr %%r0, %[b] \n\t" \
  1942. "algr %[l], %%r1 \n\t" \
  1943. "alcgr %[h], %%r0 \n\t" \
  1944. : [l] "+r" (vl), [h] "+r" (vh) \
  1945. : [a] "r" (va), [b] "r" (vb) \
  1946. : "r0", "r1", "cc" \
  1947. )
  1948. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1949. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1950. __asm__ __volatile__ ( \
  1951. "lghi %%r10, 0 \n\t" \
  1952. "lgr %%r1, %[a] \n\t" \
  1953. "mlgr %%r0, %[b] \n\t" \
  1954. "algr %[l], %%r1 \n\t" \
  1955. "alcgr %[h], %%r0 \n\t" \
  1956. "alcgr %[o], %%r10 \n\t" \
  1957. "algr %[l], %%r1 \n\t" \
  1958. "alcgr %[h], %%r0 \n\t" \
  1959. "alcgr %[o], %%r10 \n\t" \
  1960. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1961. : [a] "r" (va), [b] "r" (vb) \
  1962. : "r0", "r1", "r10", "cc" \
  1963. )
  1964. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1965. * Assumes first add will not overflow vh | vl
  1966. */
  1967. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1968. __asm__ __volatile__ ( \
  1969. "lghi %%r10, 0 \n\t" \
  1970. "lgr %%r1, %[a] \n\t" \
  1971. "mlgr %%r0, %[b] \n\t" \
  1972. "algr %[l], %%r1 \n\t" \
  1973. "alcgr %[h], %%r0 \n\t" \
  1974. "algr %[l], %%r1 \n\t" \
  1975. "alcgr %[h], %%r0 \n\t" \
  1976. "alcgr %[o], %%r10 \n\t" \
  1977. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1978. : [a] "r" (va), [b] "r" (vb) \
  1979. : "r0", "r1", "r10", "cc" \
  1980. )
  1981. /* Square va and store double size result in: vh | vl */
  1982. #define SP_ASM_SQR(vl, vh, va) \
  1983. __asm__ __volatile__ ( \
  1984. "lgr %%r1, %[a] \n\t" \
  1985. "mlgr %%r0, %%r1 \n\t" \
  1986. "lgr %[l], %%r1 \n\t" \
  1987. "lgr %[h], %%r0 \n\t" \
  1988. : [h] "+r" (vh), [l] "+r" (vl) \
  1989. : [a] "r" (va) \
  1990. : "memory", "r0", "r1" \
  1991. )
  1992. /* Square va and add double size result into: vo | vh | vl */
  1993. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1994. __asm__ __volatile__ ( \
  1995. "lghi %%r10, 0 \n\t" \
  1996. "lgr %%r1, %[a] \n\t" \
  1997. "mlgr %%r0, %%r1 \n\t" \
  1998. "algr %[l], %%r1 \n\t" \
  1999. "alcgr %[h], %%r0 \n\t" \
  2000. "alcgr %[o], %%r10 \n\t" \
  2001. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  2002. : [a] "r" (va) \
  2003. : "r0", "r1", "r10", "cc" \
  2004. )
  2005. /* Square va and add double size result into: vh | vl */
  2006. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  2007. __asm__ __volatile__ ( \
  2008. "lgr %%r1, %[a] \n\t" \
  2009. "mlgr %%r0, %%r1 \n\t" \
  2010. "algr %[l], %%r1 \n\t" \
  2011. "alcgr %[h], %%r0 \n\t" \
  2012. : [l] "+r" (vl), [h] "+r" (vh) \
  2013. : [a] "r" (va) \
  2014. : "r0", "r1", "cc" \
  2015. )
  2016. /* Add va into: vh | vl */
  2017. #define SP_ASM_ADDC(vl, vh, va) \
  2018. __asm__ __volatile__ ( \
  2019. "lghi %%r10, 0 \n\t" \
  2020. "algr %[l], %[a] \n\t" \
  2021. "alcgr %[h], %%r10 \n\t" \
  2022. : [l] "+r" (vl), [h] "+r" (vh) \
  2023. : [a] "r" (va) \
  2024. : "r10", "cc" \
  2025. )
  2026. /* Sub va from: vh | vl */
  2027. #define SP_ASM_SUBC(vl, vh, va) \
  2028. __asm__ __volatile__ ( \
  2029. "lghi %%r10, 0 \n\t" \
  2030. "slgr %[l], %[a] \n\t" \
  2031. "slbgr %[h], %%r10 \n\t" \
  2032. : [l] "+r" (vl), [h] "+r" (vh) \
  2033. : [a] "r" (va) \
  2034. : "r10", "cc" \
  2035. )
  2036. /* Add two times vc | vb | va into vo | vh | vl */
  2037. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  2038. __asm__ __volatile__ ( \
  2039. "algr %[l], %[a] \n\t" \
  2040. "alcgr %[h], %[b] \n\t" \
  2041. "alcgr %[o], %[c] \n\t" \
  2042. "algr %[l], %[a] \n\t" \
  2043. "alcgr %[h], %[b] \n\t" \
  2044. "alcgr %[o], %[c] \n\t" \
  2045. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  2046. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  2047. : "cc" \
  2048. )
  2049. #define SP_INT_ASM_AVAILABLE
  2050. #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
  2051. #ifdef SP_INT_ASM_AVAILABLE
  2052. #ifndef SP_INT_NO_ASM
  2053. #define SQR_MUL_ASM
  2054. #endif
  2055. #ifndef SP_ASM_ADDC_REG
  2056. #define SP_ASM_ADDC_REG SP_ASM_ADDC
  2057. #endif /* SP_ASM_ADDC_REG */
  2058. #endif /* SQR_MUL_ASM */
  2059. #endif /* !WOLFSSL_NO_ASM */
  2060. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
  2061. !defined(NO_DSA) || !defined(NO_DH) || \
  2062. (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
  2063. (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  2064. #ifndef WC_NO_CACHE_RESISTANT
  2065. /* Mask of address for constant time operations. */
  2066. const size_t sp_off_on_addr[2] =
  2067. {
  2068. (size_t) 0,
  2069. (size_t)-1
  2070. };
  2071. #endif
  2072. #endif
  2073. #if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
  2074. #ifdef __cplusplus
  2075. extern "C" {
  2076. #endif
  2077. /* Modular exponentiation implementations using Single Precision. */
  2078. WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
  2079. sp_int* res);
  2080. WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
  2081. sp_int* res);
  2082. WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
  2083. sp_int* res);
  2084. WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
  2085. sp_int* res);
  2086. WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
  2087. sp_int* res);
  2088. #ifdef __cplusplus
  2089. } /* extern "C" */
  2090. #endif
  2091. #endif
  2092. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
  2093. static int _sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp);
  2094. #endif
  2095. /* Set the multi-precision number to zero.
  2096. *
  2097. * Assumes a is not NULL.
  2098. *
  2099. * @param [out] a SP integer to set to zero.
  2100. */
  2101. static void _sp_zero(sp_int* a)
  2102. {
  2103. a->used = 0;
  2104. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2105. a->sign = MP_ZPOS;
  2106. #endif
  2107. }
  2108. /* Initialize the multi-precision number to be zero.
  2109. *
  2110. * @param [out] a SP integer.
  2111. *
  2112. * @return MP_OKAY on success.
  2113. * @return MP_VAL when a is NULL.
  2114. */
  2115. int sp_init(sp_int* a)
  2116. {
  2117. int err = MP_OKAY;
  2118. if (a == NULL) {
  2119. err = MP_VAL;
  2120. }
  2121. if (err == MP_OKAY) {
  2122. _sp_zero(a);
  2123. a->size = SP_INT_DIGITS;
  2124. #ifdef HAVE_WOLF_BIGINT
  2125. wc_bigint_init(&a->raw);
  2126. #endif
  2127. }
  2128. return err;
  2129. }
  2130. int sp_init_size(sp_int* a, int size)
  2131. {
  2132. int err = sp_init(a);
  2133. if (err == MP_OKAY) {
  2134. a->size = size;
  2135. }
  2136. return err;
  2137. }
  2138. #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
  2139. /* Initialize up to six multi-precision numbers to be zero.
  2140. *
  2141. * @param [out] n1 SP integer.
  2142. * @param [out] n2 SP integer.
  2143. * @param [out] n3 SP integer.
  2144. * @param [out] n4 SP integer.
  2145. * @param [out] n5 SP integer.
  2146. * @param [out] n6 SP integer.
  2147. *
  2148. * @return MP_OKAY on success.
  2149. */
  2150. int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
  2151. sp_int* n6)
  2152. {
  2153. if (n1 != NULL) {
  2154. _sp_zero(n1);
  2155. n1->dp[0] = 0;
  2156. n1->size = SP_INT_DIGITS;
  2157. }
  2158. if (n2 != NULL) {
  2159. _sp_zero(n2);
  2160. n2->dp[0] = 0;
  2161. n2->size = SP_INT_DIGITS;
  2162. }
  2163. if (n3 != NULL) {
  2164. _sp_zero(n3);
  2165. n3->dp[0] = 0;
  2166. n3->size = SP_INT_DIGITS;
  2167. }
  2168. if (n4 != NULL) {
  2169. _sp_zero(n4);
  2170. n4->dp[0] = 0;
  2171. n4->size = SP_INT_DIGITS;
  2172. }
  2173. if (n5 != NULL) {
  2174. _sp_zero(n5);
  2175. n5->dp[0] = 0;
  2176. n5->size = SP_INT_DIGITS;
  2177. }
  2178. if (n6 != NULL) {
  2179. _sp_zero(n6);
  2180. n6->dp[0] = 0;
  2181. n6->size = SP_INT_DIGITS;
  2182. }
  2183. return MP_OKAY;
  2184. }
  2185. #endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
  2186. /* Free the memory allocated in the multi-precision number.
  2187. *
  2188. * @param [in] a SP integer.
  2189. */
  2190. void sp_free(sp_int* a)
  2191. {
  2192. if (a != NULL) {
  2193. #ifdef HAVE_WOLF_BIGINT
  2194. wc_bigint_free(&a->raw);
  2195. #endif
  2196. }
  2197. }
  2198. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
  2199. /* Grow multi-precision number to be able to hold l digits.
  2200. * This function does nothing as the number of digits is fixed.
  2201. *
  2202. * @param [in,out] a SP integer.
  2203. * @param [in] l Number of digits to grow to.
  2204. *
  2205. * @return MP_OKAY on success
  2206. * @return MP_MEM if the number of digits requested is more than available.
  2207. */
  2208. int sp_grow(sp_int* a, int l)
  2209. {
  2210. int err = MP_OKAY;
  2211. if (a == NULL) {
  2212. err = MP_VAL;
  2213. }
  2214. if ((err == MP_OKAY) && (l > a->size)) {
  2215. err = MP_MEM;
  2216. }
  2217. if (err == MP_OKAY) {
  2218. int i;
  2219. for (i = a->used; i < l; i++) {
  2220. a->dp[i] = 0;
  2221. }
  2222. }
  2223. return err;
  2224. }
  2225. #endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
  2226. #if !defined(WOLFSSL_RSA_VERIFY_ONLY)
  2227. /* Set the multi-precision number to zero.
  2228. *
  2229. * @param [out] a SP integer to set to zero.
  2230. */
  2231. void sp_zero(sp_int* a)
  2232. {
  2233. if (a != NULL) {
  2234. _sp_zero(a);
  2235. }
  2236. }
  2237. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  2238. /* Clear the data from the multi-precision number and set to zero.
  2239. *
  2240. * @param [out] a SP integer.
  2241. */
  2242. void sp_clear(sp_int* a)
  2243. {
  2244. if (a != NULL) {
  2245. int i;
  2246. for (i = 0; i < a->used; i++) {
  2247. a->dp[i] = 0;
  2248. }
  2249. _sp_zero(a);
  2250. }
  2251. }
  2252. #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
  2253. /* Ensure the data in the multi-precision number is zeroed.
  2254. *
  2255. * Use when security sensitive data needs to be wiped.
  2256. *
  2257. * @param [in] a SP integer.
  2258. */
  2259. void sp_forcezero(sp_int* a)
  2260. {
  2261. ForceZero(a->dp, a->used * sizeof(sp_int_digit));
  2262. _sp_zero(a);
  2263. #ifdef HAVE_WOLF_BIGINT
  2264. wc_bigint_zero(&a->raw);
  2265. #endif
  2266. }
  2267. #endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
  2268. #if defined(WOLSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  2269. !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
  2270. /* Copy value of multi-precision number a into r.
  2271. *
  2272. * @param [in] a SP integer - source.
  2273. * @param [out] r SP integer - destination.
  2274. *
  2275. * @return MP_OKAY on success.
  2276. */
  2277. int sp_copy(sp_int* a, sp_int* r)
  2278. {
  2279. int err = MP_OKAY;
  2280. if ((a == NULL) || (r == NULL)) {
  2281. err = MP_VAL;
  2282. }
  2283. else if (a != r) {
  2284. XMEMCPY(r->dp, a->dp, a->used * sizeof(sp_int_digit));
  2285. if (a->used == 0)
  2286. r->dp[0] = 0;
  2287. r->used = a->used;
  2288. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2289. r->sign = a->sign;
  2290. #endif
  2291. }
  2292. return err;
  2293. }
  2294. #endif
  2295. #if defined(WOLSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
  2296. /* Initializes r and copies in value from a.
  2297. *
  2298. * @param [out] r SP integer - destination.
  2299. * @param [in] a SP integer - source.
  2300. *
  2301. * @return MP_OKAY on success.
  2302. * @return MP_VAL when a or r is NULL.
  2303. */
  2304. int sp_init_copy(sp_int* r, sp_int* a)
  2305. {
  2306. int err;
  2307. err = sp_init(r);
  2308. if (err == MP_OKAY) {
  2309. err = sp_copy(a, r);
  2310. }
  2311. return err;
  2312. }
  2313. #endif /* WOLSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
  2314. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  2315. !defined(NO_DH) || !defined(NO_DSA)
  2316. /* Exchange the values in a and b.
  2317. *
  2318. * @param [in,out] a SP integer to swap.
  2319. * @param [in,out] b SP integer to swap.
  2320. *
  2321. * @return MP_OKAY on success.
  2322. * @return MP_VAL when a or b is NULL.
  2323. * @return MP_MEM when dynamic memory allocation fails.
  2324. */
  2325. int sp_exch(sp_int* a, sp_int* b)
  2326. {
  2327. int err = MP_OKAY;
  2328. DECL_SP_INT(t, (a != NULL) ? a->used : 1);
  2329. if ((a == NULL) || (b == NULL)) {
  2330. err = MP_VAL;
  2331. }
  2332. if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
  2333. err = MP_VAL;
  2334. }
  2335. ALLOC_SP_INT(t, a->used, err, NULL);
  2336. if (err == MP_OKAY) {
  2337. int asize = a->size;
  2338. int bsize = b->size;
  2339. XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
  2340. XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
  2341. XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
  2342. a->size = asize;
  2343. b->size = bsize;
  2344. }
  2345. FREE_SP_INT(t, NULL);
  2346. return err;
  2347. }
  2348. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
  2349. * !NO_DSA */
  2350. #if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
  2351. !defined(WC_NO_CACHE_RESISTANT)
  2352. int sp_cond_swap_ct(sp_int * a, sp_int * b, int c, int m)
  2353. {
  2354. int i;
  2355. int err = MP_OKAY;
  2356. sp_digit mask = (sp_digit)0 - m;
  2357. DECL_SP_INT(t, c);
  2358. ALLOC_SP_INT(t, c, err, NULL);
  2359. if (err == MP_OKAY) {
  2360. t->used = (int)((a->used ^ b->used) & mask);
  2361. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2362. t->sign = (int)((a->sign ^ b->sign) & mask);
  2363. #endif
  2364. for (i = 0; i < c; i++) {
  2365. t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
  2366. }
  2367. a->used ^= t->used;
  2368. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2369. a->sign ^= t->sign;
  2370. #endif
  2371. for (i = 0; i < c; i++) {
  2372. a->dp[i] ^= t->dp[i];
  2373. }
  2374. b->used ^= t->used;
  2375. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2376. b->sign ^= b->sign;
  2377. #endif
  2378. for (i = 0; i < c; i++) {
  2379. b->dp[i] ^= t->dp[i];
  2380. }
  2381. }
  2382. FREE_SP_INT(t, NULL);
  2383. return MP_OKAY;
  2384. }
  2385. #endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
  2386. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2387. /* Calculate the absolute value of the multi-precision number.
  2388. *
  2389. * @param [in] a SP integer to calculate absolute value of.
  2390. * @param [out] r SP integer to hold result.
  2391. *
  2392. * @return MP_OKAY on success.
  2393. * @return MP_VAL when a or r is NULL.
  2394. */
  2395. int sp_abs(sp_int* a, sp_int* r)
  2396. {
  2397. int err;
  2398. err = sp_copy(a, r);
  2399. if (r != NULL) {
  2400. r->sign = MP_ZPOS;
  2401. }
  2402. return err;
  2403. }
  2404. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  2405. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  2406. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  2407. /* Compare absolute value of two multi-precision numbers.
  2408. *
  2409. * @param [in] a SP integer.
  2410. * @param [in] b SP integer.
  2411. *
  2412. * @return MP_GT when a is greater than b.
  2413. * @return MP_LT when a is less than b.
  2414. * @return MP_EQ when a is equals b.
  2415. */
  2416. static int _sp_cmp_abs(sp_int* a, sp_int* b)
  2417. {
  2418. int ret = MP_EQ;
  2419. if (a->used > b->used) {
  2420. ret = MP_GT;
  2421. }
  2422. else if (a->used < b->used) {
  2423. ret = MP_LT;
  2424. }
  2425. else {
  2426. int i;
  2427. for (i = a->used - 1; i >= 0; i--) {
  2428. if (a->dp[i] > b->dp[i]) {
  2429. ret = MP_GT;
  2430. break;
  2431. }
  2432. else if (a->dp[i] < b->dp[i]) {
  2433. ret = MP_LT;
  2434. break;
  2435. }
  2436. }
  2437. }
  2438. return ret;
  2439. }
  2440. #endif
  2441. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  2442. /* Compare absolute value of two multi-precision numbers.
  2443. *
  2444. * @param [in] a SP integer.
  2445. * @param [in] b SP integer.
  2446. *
  2447. * @return MP_GT when a is greater than b.
  2448. * @return MP_LT when a is less than b.
  2449. * @return MP_EQ when a is equals b.
  2450. */
  2451. int sp_cmp_mag(sp_int* a, sp_int* b)
  2452. {
  2453. int ret;
  2454. if (a == b) {
  2455. ret = MP_EQ;
  2456. }
  2457. else if (a == NULL) {
  2458. ret = MP_LT;
  2459. }
  2460. else if (b == NULL) {
  2461. ret = MP_GT;
  2462. }
  2463. else
  2464. {
  2465. ret = _sp_cmp_abs(a, b);
  2466. }
  2467. return ret;
  2468. }
  2469. #endif
  2470. #if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
  2471. defined(OPENSSL_EXTRA) || \
  2472. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  2473. /* Compare two multi-precision numbers.
  2474. *
  2475. * Assumes a and b are not NULL.
  2476. *
  2477. * @param [in] a SP integer.
  2478. * @param [in] a SP integer.
  2479. *
  2480. * @return MP_GT when a is greater than b.
  2481. * @return MP_LT when a is less than b.
  2482. * @return MP_EQ when a is equals b.
  2483. */
  2484. static int _sp_cmp(sp_int* a, sp_int* b)
  2485. {
  2486. int ret;
  2487. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2488. if (a->sign == b->sign) {
  2489. #endif
  2490. ret = _sp_cmp_abs(a, b);
  2491. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2492. }
  2493. else if (a->sign > b->sign) {
  2494. ret = MP_LT;
  2495. }
  2496. else /* (a->sign < b->sign) */ {
  2497. ret = MP_GT;
  2498. }
  2499. #endif
  2500. return ret;
  2501. }
  2502. #endif
  2503. #ifndef WOLFSSL_RSA_VERIFY_ONLY
  2504. /* Compare two multi-precision numbers.
  2505. *
  2506. * Pointers are compared such that NULL is less than not NULL.
  2507. *
  2508. * @param [in] a SP integer.
  2509. * @param [in] a SP integer.
  2510. *
  2511. * @return MP_GT when a is greater than b.
  2512. * @return MP_LT when a is less than b.
  2513. * @return MP_EQ when a is equals b.
  2514. */
  2515. int sp_cmp(sp_int* a, sp_int* b)
  2516. {
  2517. int ret;
  2518. if (a == b) {
  2519. ret = MP_EQ;
  2520. }
  2521. else if (a == NULL) {
  2522. ret = MP_LT;
  2523. }
  2524. else if (b == NULL) {
  2525. ret = MP_GT;
  2526. }
  2527. else
  2528. {
  2529. ret = _sp_cmp(a, b);
  2530. }
  2531. return ret;
  2532. }
  2533. #endif
  2534. /*************************
  2535. * Bit check/set functions
  2536. *************************/
  2537. #if !defined(WOLFSSL_RSA_VERIFY_ONLY)
  2538. /* Check if a bit is set
  2539. *
  2540. * When a is NULL, result is 0.
  2541. *
  2542. * @param [in] a SP integer.
  2543. * @param [in] b Bit position to check.
  2544. *
  2545. * @return 0 when bit is not set.
  2546. * @return 1 when bit is set.
  2547. */
  2548. int sp_is_bit_set(sp_int* a, unsigned int b)
  2549. {
  2550. int ret = 0;
  2551. int i = (int)(b >> SP_WORD_SHIFT);
  2552. int s = (int)(b & SP_WORD_MASK);
  2553. if ((a != NULL) && (i < a->used)) {
  2554. ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
  2555. }
  2556. return ret;
  2557. }
  2558. #endif /* WOLFSSL_RSA_VERIFY_ONLY */
  2559. /* Count the number of bits in the multi-precision number.
  2560. *
  2561. * When a is not NULL, result is 0.
  2562. *
  2563. * @param [in] a SP integer.
  2564. *
  2565. * @return The number of bits in the number.
  2566. */
  2567. int sp_count_bits(sp_int* a)
  2568. {
  2569. int r = 0;
  2570. if (a != NULL) {
  2571. r = a->used - 1;
  2572. while ((r >= 0) && (a->dp[r] == 0)) {
  2573. r--;
  2574. }
  2575. if (r < 0) {
  2576. r = 0;
  2577. }
  2578. else {
  2579. sp_int_digit d;
  2580. d = a->dp[r];
  2581. r *= SP_WORD_SIZE;
  2582. if (d > SP_HALF_MAX) {
  2583. r += SP_WORD_SIZE;
  2584. while ((d & (1UL << (SP_WORD_SIZE - 1))) == 0) {
  2585. r--;
  2586. d <<= 1;
  2587. }
  2588. }
  2589. else {
  2590. while (d != 0) {
  2591. r++;
  2592. d >>= 1;
  2593. }
  2594. }
  2595. }
  2596. }
  2597. return r;
  2598. }
  2599. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  2600. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) || \
  2601. (defined(HAVE_ECC) && defined(FP_ECC))
  2602. /* Number of entries in array of number of least significant zero bits. */
  2603. #define SP_LNZ_CNT 16
  2604. /* Number of bits the array checks. */
  2605. #define SP_LNZ_BITS 4
  2606. /* Mask to apply to check with array. */
  2607. #define SP_LNZ_MASK 0xf
  2608. /* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
  2609. static const int sp_lnz[SP_LNZ_CNT] = {
  2610. 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
  2611. };
  2612. /* Count the number of least significant zero bits.
  2613. *
  2614. * When a is not NULL, result is 0.
  2615. *
  2616. * @param [in] a SP integer to use.
  2617. *
  2618. * @return Number of leas significant zero bits.
  2619. */
  2620. #if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
  2621. static
  2622. #endif /* !HAVE_ECC || HAVE_COMP_KEY */
  2623. int sp_cnt_lsb(sp_int* a)
  2624. {
  2625. int bc = 0;
  2626. if ((a != NULL) && (!sp_iszero(a))) {
  2627. int i;
  2628. int j;
  2629. int cnt = 0;
  2630. for (i = 0; i < a->used && a->dp[i] == 0; i++, cnt += SP_WORD_SIZE) {
  2631. }
  2632. for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
  2633. bc = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
  2634. if (bc != 4) {
  2635. bc += cnt + j;
  2636. break;
  2637. }
  2638. }
  2639. }
  2640. return bc;
  2641. }
  2642. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
  2643. #if !defined(WOLFSSL_RSA_VERIFY_ONLY)
  2644. /* Determine if the most significant byte of the encoded multi-precision number
  2645. * has the top bit set.
  2646. *
  2647. * When A is NULL, result is 0.
  2648. *
  2649. * @param [in] a SP integer.
  2650. *
  2651. * @return 1 when the top bit of top byte is set.
  2652. * @return 0 when the top bit of top byte is not set.
  2653. */
  2654. int sp_leading_bit(sp_int* a)
  2655. {
  2656. int bit = 0;
  2657. if ((a != NULL) && (a->used > 0)) {
  2658. sp_int_digit d = a->dp[a->used - 1];
  2659. #if SP_WORD_SIZE > 8
  2660. while (d > (sp_int_digit)0xff) {
  2661. d >>= 8;
  2662. }
  2663. #endif
  2664. bit = (int)(d >> 7);
  2665. }
  2666. return bit;
  2667. }
  2668. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  2669. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  2670. defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
  2671. !defined(NO_RSA)
  2672. /* Set a bit of a: a |= 1 << i
  2673. * The field 'used' is updated in a.
  2674. *
  2675. * @param [in,out] a SP integer to set bit into.
  2676. * @param [in] i Index of bit to set.
  2677. *
  2678. * @return MP_OKAY on success.
  2679. * @return MP_VAL when a is NULL or index is too large.
  2680. */
  2681. int sp_set_bit(sp_int* a, int i)
  2682. {
  2683. int err = MP_OKAY;
  2684. int w = (int)(i >> SP_WORD_SHIFT);
  2685. if ((a == NULL) || (w >= a->size)) {
  2686. err = MP_VAL;
  2687. }
  2688. else {
  2689. int s = (int)(i & (SP_WORD_SIZE - 1));
  2690. int j;
  2691. for (j = a->used; j <= w; j++) {
  2692. a->dp[j] = 0;
  2693. }
  2694. a->dp[w] |= (sp_int_digit)1 << s;
  2695. if (a->used <= w) {
  2696. a->used = w + 1;
  2697. }
  2698. }
  2699. return err;
  2700. }
  2701. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
  2702. * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
  2703. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  2704. defined(WOLFSSL_KEY_GEN)
  2705. /* Exponentiate 2 to the power of e: a = 2^e
  2706. * This is done by setting the 'e'th bit.
  2707. *
  2708. * @param [out] a SP integer to hold result.
  2709. * @param [in] e Exponent.
  2710. *
  2711. * @return MP_OKAY on success.
  2712. * @return MP_VAL when a is NULL or 2^exponent is too large.
  2713. */
  2714. int sp_2expt(sp_int* a, int e)
  2715. {
  2716. int err = MP_OKAY;
  2717. if (a == NULL) {
  2718. err = MP_VAL;
  2719. }
  2720. if (err == MP_OKAY) {
  2721. _sp_zero(a);
  2722. err = sp_set_bit(a, e);
  2723. }
  2724. return err;
  2725. }
  2726. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  2727. * WOLFSSL_KEY_GEN */
  2728. /**********************
  2729. * Digit/Long functions
  2730. **********************/
  2731. /* Set the multi-precision number to be the value of the digit.
  2732. *
  2733. * @param [out] a SP integer to become number.
  2734. * @param [in] d Digit to be set.
  2735. *
  2736. * @return MP_OKAY on success.
  2737. * @return MP_VAL when a is NULL.
  2738. */
  2739. int sp_set(sp_int* a, sp_int_digit d)
  2740. {
  2741. int err = MP_OKAY;
  2742. if (a == NULL) {
  2743. err = MP_VAL;
  2744. }
  2745. if (err == MP_OKAY) {
  2746. a->dp[0] = d;
  2747. a->used = d > 0;
  2748. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2749. a->sign = MP_ZPOS;
  2750. #endif
  2751. }
  2752. return err;
  2753. }
  2754. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA)
  2755. /* Set a number into the multi-precision number.
  2756. *
  2757. * Number may be larger than the size of a digit.
  2758. *
  2759. * @param [out] a SP integer to set.
  2760. * @param [in] n Long value to set.
  2761. *
  2762. * @return MP_OKAY on success.
  2763. * @return MP_VAL when a is NULL.
  2764. */
  2765. int sp_set_int(sp_int* a, unsigned long n)
  2766. {
  2767. int err = MP_OKAY;
  2768. if (a == NULL) {
  2769. err = MP_VAL;
  2770. }
  2771. if (err == MP_OKAY) {
  2772. #if SP_WORD_SIZE < SP_ULONG_BITS
  2773. if (n <= (sp_int_digit)SP_DIGIT_MAX) {
  2774. #endif
  2775. a->dp[0] = (sp_int_digit)n;
  2776. a->used = (n != 0);
  2777. #if SP_WORD_SIZE < SP_ULONG_BITS
  2778. }
  2779. else {
  2780. int i;
  2781. for (i = 0; n > 0; i++,n >>= SP_WORD_SIZE) {
  2782. a->dp[i] = (sp_int_digit)n;
  2783. }
  2784. a->used = i;
  2785. }
  2786. #endif
  2787. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2788. a->sign = MP_ZPOS;
  2789. #endif
  2790. }
  2791. return err;
  2792. }
  2793. #endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA */
  2794. #ifndef WOLFSSL_RSA_VERIFY_ONLY
  2795. /* Compare a one digit number with a multi-precision number.
  2796. *
  2797. * When a is NULL, MP_LT is returned.
  2798. *
  2799. * @param [in] a SP integer to compare.
  2800. * @param [in] d Digit to compare with.
  2801. *
  2802. * @return MP_GT when a is greater than d.
  2803. * @return MP_LT when a is less than d.
  2804. * @return MP_EQ when a is equals d.
  2805. */
  2806. int sp_cmp_d(sp_int* a, sp_int_digit d)
  2807. {
  2808. int ret = MP_EQ;
  2809. if (a == NULL) {
  2810. ret = MP_LT;
  2811. }
  2812. else
  2813. #ifdef WOLFSSL_SP_INT_NEGATIVE
  2814. if (a->sign == MP_NEG) {
  2815. ret = MP_LT;
  2816. }
  2817. else
  2818. #endif
  2819. {
  2820. /* special case for zero*/
  2821. if (a->used == 0) {
  2822. if (d == 0) {
  2823. ret = MP_EQ;
  2824. }
  2825. else {
  2826. ret = MP_LT;
  2827. }
  2828. }
  2829. else if (a->used > 1) {
  2830. ret = MP_GT;
  2831. }
  2832. else {
  2833. if (a->dp[0] > d) {
  2834. ret = MP_GT;
  2835. }
  2836. else if (a->dp[0] < d) {
  2837. ret = MP_LT;
  2838. }
  2839. }
  2840. }
  2841. return ret;
  2842. }
  2843. #endif
  2844. #if defined(WOLFSSL_SP_INT_NEGATIVE) || !defined(NO_PWDBASED) || \
  2845. defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || defined(HAVE_ECC) || \
  2846. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  2847. /* Add a one digit number to the multi-precision number.
  2848. *
  2849. * @param [in] a SP integer be added to.
  2850. * @param [in] d Digit to add.
  2851. * @param [out] r SP integer to store result in.
  2852. *
  2853. * @return MP_OKAY on success.
  2854. * @return MP_VAL when result is too large for fixed size dp array.
  2855. */
  2856. static int _sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
  2857. {
  2858. int err = MP_OKAY;
  2859. int i = 0;
  2860. sp_int_digit t;
  2861. r->used = a->used;
  2862. if (a->used == 0) {
  2863. r->used = d > 0;
  2864. }
  2865. t = a->dp[0] + d;
  2866. if (t < a->dp[0]) {
  2867. for (++i; i < a->used; i++) {
  2868. r->dp[i] = a->dp[i] + 1;
  2869. if (r->dp[i] != 0) {
  2870. break;
  2871. }
  2872. }
  2873. if (i == a->used) {
  2874. r->used++;
  2875. if (i < r->size)
  2876. r->dp[i] = 1;
  2877. else
  2878. err = MP_VAL;
  2879. }
  2880. }
  2881. if (err == MP_OKAY) {
  2882. r->dp[0] = t;
  2883. if (r != a) {
  2884. for (++i; i < a->used; i++) {
  2885. r->dp[i] = a->dp[i];
  2886. }
  2887. }
  2888. }
  2889. return err;
  2890. }
  2891. #endif /* WOLFSSL_SP_INT_NEGATIVE || !NO_PWDBASED || WOLFSSL_KEY_GEN ||
  2892. * !NO_DH || !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  2893. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  2894. defined(WOLFSSL_SP_INT_NEGATIVE) || \
  2895. !defined(NO_DH) || !defined(NO_DSA) || defined(HAVE_ECC) || \
  2896. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  2897. /* Sub a one digit number from the multi-precision number.
  2898. *
  2899. * returns MP_OKAY always.
  2900. * @param [in] a SP integer be subtracted from.
  2901. * @param [in] d Digit to subtract.
  2902. * @param [out] r SP integer to store result in.
  2903. */
  2904. static void _sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
  2905. {
  2906. int i = 0;
  2907. sp_int_digit t;
  2908. r->used = a->used;
  2909. if (a->used == 0) {
  2910. r->dp[0] = 0;
  2911. }
  2912. else {
  2913. t = a->dp[0] - d;
  2914. if (t > a->dp[0]) {
  2915. for (++i; i < a->used; i++) {
  2916. r->dp[i] = a->dp[i] - 1;
  2917. if (r->dp[i] != SP_DIGIT_MAX) {
  2918. break;
  2919. }
  2920. }
  2921. }
  2922. r->dp[0] = t;
  2923. if (r != a) {
  2924. for (++i; i < a->used; i++) {
  2925. r->dp[i] = a->dp[i];
  2926. }
  2927. }
  2928. sp_clamp(r);
  2929. }
  2930. }
  2931. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH || !NO_DSA ||
  2932. * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  2933. #if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || \
  2934. !defined(NO_DSA) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  2935. /* Add a one digit number to the multi-precision number.
  2936. *
  2937. * @param [in] a SP integer be added to.
  2938. * @param [in] d Digit to add.
  2939. * @param [out] r SP integer to store result in.
  2940. *
  2941. * @return MP_OKAY on success.
  2942. * @return MP_VAL when result is too large for fixed size dp array.
  2943. */
  2944. int sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
  2945. {
  2946. int err = MP_OKAY;
  2947. if ((a == NULL) || (r == NULL)) {
  2948. err = MP_VAL;
  2949. }
  2950. else
  2951. {
  2952. #ifndef WOLFSSL_SP_INT_NEGATIVE
  2953. err = _sp_add_d(a, d, r);
  2954. #else
  2955. if (a->sign == MP_ZPOS) {
  2956. r->sign = MP_ZPOS;
  2957. err = _sp_add_d(a, d, r);
  2958. }
  2959. else if ((a->used > 1) || (a->dp[0] > d)) {
  2960. r->sign = MP_NEG;
  2961. _sp_sub_d(a, d, r);
  2962. }
  2963. else {
  2964. r->sign = MP_ZPOS;
  2965. r->dp[0] = d - a->dp[0];
  2966. }
  2967. #endif
  2968. }
  2969. return err;
  2970. }
  2971. #endif /* !NO_PWDBASED || WOLFSSL_KEY_GEN || !NO_DH || !NO_DSA || !NO_RSA */
  2972. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  2973. !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
  2974. /* Sub a one digit number from the multi-precision number.
  2975. *
  2976. * @param [in] a SP integer be subtracted from.
  2977. * @param [in] d Digit to subtract.
  2978. * @param [out] r SP integer to store result in.
  2979. *
  2980. * @return MP_OKAY on success.
  2981. * @return MP_VAL when a or r is NULL.
  2982. */
  2983. int sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
  2984. {
  2985. int err = MP_OKAY;
  2986. if ((a == NULL) || (r == NULL)) {
  2987. err = MP_VAL;
  2988. }
  2989. else {
  2990. #ifndef WOLFSSL_SP_INT_NEGATIVE
  2991. _sp_sub_d(a, d, r);
  2992. #else
  2993. if (a->sign == MP_NEG) {
  2994. r->sign = MP_NEG;
  2995. err = _sp_add_d(a, d, r);
  2996. }
  2997. else if ((a->used > 1) || (a->dp[0] >= d)) {
  2998. r->sign = MP_ZPOS;
  2999. _sp_sub_d(a, d, r);
  3000. }
  3001. else {
  3002. r->sign = MP_NEG;
  3003. r->dp[0] = d - a->dp[0];
  3004. r->used = r->dp[0] > 0;
  3005. }
  3006. #endif
  3007. }
  3008. return err;
  3009. }
  3010. #endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH || HAVE_ECC ||
  3011. * !NO_DSA */
  3012. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  3013. defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
  3014. !defined(NO_DH) || defined(HAVE_ECC) || \
  3015. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  3016. !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
  3017. (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
  3018. /* Multiply a by digit n and put result into r shifting up o digits.
  3019. * r = (a * n) << (o * SP_WORD_SIZE)
  3020. *
  3021. * @param [in] a SP integer to be multiplied.
  3022. * @param [in] n Number (SP digit) to multiply by.
  3023. * @param [out] r SP integer result.
  3024. * @param [in] o Number of digits to move result up by.
  3025. */
  3026. static void _sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r, int o)
  3027. {
  3028. int i;
  3029. sp_int_word t = 0;
  3030. #ifdef WOLFSSL_SP_SMALL
  3031. for (i = 0; i < o; i++) {
  3032. r->dp[i] = 0;
  3033. }
  3034. #else
  3035. /* Don't use the offset. Only when doing small code size div. */
  3036. (void)o;
  3037. #endif
  3038. for (i = 0; i < a->used; i++, o++) {
  3039. t += (sp_int_word)a->dp[i] * n;
  3040. r->dp[o] = (sp_int_digit)t;
  3041. t >>= SP_WORD_SIZE;
  3042. }
  3043. r->dp[o++] = (sp_int_digit)t;
  3044. r->used = o;
  3045. sp_clamp(r);
  3046. }
  3047. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  3048. * WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
  3049. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  3050. (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
  3051. /* Multiply a by digit n and put result into r. r = a * n
  3052. *
  3053. * @param [in] a SP integer to multiply.
  3054. * @param [in] n Digit to multiply by.
  3055. * @param [out] r SP integer to hold result.
  3056. *
  3057. * @return MP_OKAY on success.
  3058. * @return MP_VAL when a or b is NULL, or a has maximum number of digits used.
  3059. */
  3060. int sp_mul_d(sp_int* a, sp_int_digit d, sp_int* r)
  3061. {
  3062. int err = MP_OKAY;
  3063. if ((a == NULL) || (r == NULL)) {
  3064. err = MP_VAL;
  3065. }
  3066. if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
  3067. err = MP_VAL;
  3068. }
  3069. if (err == MP_OKAY) {
  3070. _sp_mul_d(a, d, r, 0);
  3071. #ifdef WOLFSSL_SP_INT_NEGATIVE
  3072. if (d == 0) {
  3073. r->sign = MP_ZPOS;
  3074. }
  3075. else {
  3076. r->sign = a->sign;
  3077. }
  3078. #endif
  3079. }
  3080. return err;
  3081. }
  3082. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  3083. * (WOLFSSL_KEY_GEN && !NO_RSA) */
  3084. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  3085. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  3086. #ifndef SP_ASM_DIV_WORD
  3087. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  3088. *
  3089. * @param [in] hi SP integer digit. High digit of the dividend.
  3090. * @param [in] lo SP integer digit. Lower digit of the dividend.
  3091. * @param [in] d SP integer digit. Number to divide by.
  3092. * @reutrn The division result.
  3093. */
  3094. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  3095. sp_int_digit d)
  3096. {
  3097. #ifdef WOLFSSL_SP_DIV_WORD_HALF
  3098. sp_int_digit r;
  3099. if (hi != 0) {
  3100. sp_int_digit divsz = d >> SP_HALF_SIZE;
  3101. sp_int_digit r2;
  3102. sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
  3103. sp_int_word trial;
  3104. r = hi / divsz;
  3105. if (r > SP_HALF_MAX) {
  3106. r = SP_HALF_MAX;
  3107. }
  3108. r <<= SP_HALF_SIZE;
  3109. trial = r * (sp_int_word)d;
  3110. while (trial > w) {
  3111. r -= (sp_int_digit)1 << SP_HALF_SIZE;
  3112. trial -= (sp_int_word)d << SP_HALF_SIZE;
  3113. }
  3114. w -= trial;
  3115. r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divsz;
  3116. trial = r2 * (sp_int_word)d;
  3117. while (trial > w) {
  3118. r2--;
  3119. trial -= d;
  3120. }
  3121. w -= trial;
  3122. r += r2;
  3123. r2 = ((sp_int_digit)w) / d;
  3124. r += r2;
  3125. }
  3126. else {
  3127. r = lo / d;
  3128. }
  3129. return r;
  3130. #else
  3131. sp_int_word w;
  3132. sp_int_digit r;
  3133. w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
  3134. w /= d;
  3135. r = (sp_int_digit)w;
  3136. return r;
  3137. #endif /* WOLFSSL_SP_DIV_WORD_HALF */
  3138. }
  3139. #endif /* !SP_ASM_DIV_WORD */
  3140. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  3141. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  3142. #if !defined(WOLFSSL_SP_SMALL) && ((defined(WOLFSSL_SP_MATH_ALL) && \
  3143. !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  3144. defined(WOLFSSL_HAVE_SP_DH) || (defined(HAVE_ECC) && (defined(FP_ECC) || \
  3145. defined(HAVE_COMP_KEY))))
  3146. /* Divide by 3: r = a / 3 and rem = a % 3
  3147. *
  3148. * @param [in] a SP integer to be divided.
  3149. * @param [out] r SP integer that is the quotient. May be NULL.
  3150. * @param [out] rem SP integer that is the remainder. May be NULL.
  3151. */
  3152. static void _sp_div_3(sp_int* a, sp_int* r, sp_int_digit* rem)
  3153. {
  3154. int i;
  3155. sp_int_word t;
  3156. sp_int_digit tr = 0;
  3157. sp_int_digit tt;
  3158. static const char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
  3159. static const char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
  3160. if (r == NULL) {
  3161. for (i = a->used - 1; i >= 0; i--) {
  3162. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  3163. #if SP_WORD_SIZE == 64
  3164. tt = (t * 0x5555555555555555L) >> 64;
  3165. #elif SP_WORD_SIZE == 32
  3166. tt = (t * 0x55555555) >> 32;
  3167. #elif SP_WORD_SIZE == 16
  3168. tt = (t * 0x5555) >> 16;
  3169. #elif SP_WORD_SIZE == 8
  3170. tt = (t * 0x55) >> 8;
  3171. #endif
  3172. tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
  3173. tr = sp_rem6[tr];
  3174. }
  3175. *rem = tr;
  3176. }
  3177. else {
  3178. for (i = a->used - 1; i >= 0; i--) {
  3179. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  3180. #if SP_WORD_SIZE == 64
  3181. tt = (t * 0x5555555555555555L) >> 64;
  3182. #elif SP_WORD_SIZE == 32
  3183. tt = (t * 0x55555555) >> 32;
  3184. #elif SP_WORD_SIZE == 16
  3185. tt = (t * 0x5555) >> 16;
  3186. #elif SP_WORD_SIZE == 8
  3187. tt = (t * 0x55) >> 8;
  3188. #endif
  3189. tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
  3190. tt += sp_r6[tr];
  3191. tr = sp_rem6[tr];
  3192. r->dp[i] = tt;
  3193. }
  3194. r->used = a->used;
  3195. sp_clamp(r);
  3196. if (rem != NULL) {
  3197. *rem = tr;
  3198. }
  3199. }
  3200. }
  3201. /* Divide by 10: r = a / 10 and rem = a % 10
  3202. *
  3203. * @param [in] a SP integer to be divided.
  3204. * @param [out] r SP integer that is the quotient. May be NULL.
  3205. * @param [out] rem SP integer that is the remainder. May be NULL.
  3206. */
  3207. static void _sp_div_10(sp_int* a, sp_int* r, sp_int_digit* rem)
  3208. {
  3209. int i;
  3210. sp_int_word t;
  3211. sp_int_digit tr = 0;
  3212. sp_int_digit tt;
  3213. if (r == NULL) {
  3214. for (i = a->used - 1; i >= 0; i--) {
  3215. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  3216. #if SP_WORD_SIZE == 64
  3217. tt = (t * 0x1999999999999999L) >> 64;
  3218. #elif SP_WORD_SIZE == 32
  3219. tt = (t * 0x19999999) >> 32;
  3220. #elif SP_WORD_SIZE == 16
  3221. tt = (t * 0x1999) >> 16;
  3222. #elif SP_WORD_SIZE == 8
  3223. tt = (t * 0x19) >> 8;
  3224. #endif
  3225. tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
  3226. tr = tr % 10;
  3227. }
  3228. *rem = tr;
  3229. }
  3230. else {
  3231. for (i = a->used - 1; i >= 0; i--) {
  3232. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  3233. #if SP_WORD_SIZE == 64
  3234. tt = (t * 0x1999999999999999L) >> 64;
  3235. #elif SP_WORD_SIZE == 32
  3236. tt = (t * 0x19999999) >> 32;
  3237. #elif SP_WORD_SIZE == 16
  3238. tt = (t * 0x1999) >> 16;
  3239. #elif SP_WORD_SIZE == 8
  3240. tt = (t * 0x19) >> 8;
  3241. #endif
  3242. tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
  3243. tt += tr / 10;
  3244. tr = tr % 10;
  3245. r->dp[i] = tt;
  3246. }
  3247. r->used = a->used;
  3248. sp_clamp(r);
  3249. if (rem != NULL) {
  3250. *rem = tr;
  3251. }
  3252. }
  3253. }
  3254. #endif /* !WOLFSSL_SP_SMALL && ((WOLFSSL_SP_MATH_ALL &&
  3255. * !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH ||
  3256. * (HAVE_ECC && FP_ECC)) */
  3257. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  3258. defined(WOLFSSL_HAVE_SP_DH) || \
  3259. (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY)))
  3260. /* Divide by small number: r = a / d and rem = a % d
  3261. *
  3262. * @param [in] a SP integer to be divided.
  3263. * @param [in] d Digit to divide by.
  3264. * @param [out] r SP integer that is the quotient. May be NULL.
  3265. * @param [out] rem SP integer that is the remainder. May be NULL.
  3266. */
  3267. static void _sp_div_small(sp_int* a, sp_int_digit d, sp_int* r,
  3268. sp_int_digit* rem)
  3269. {
  3270. int i;
  3271. sp_int_word t;
  3272. sp_int_digit tr = 0;
  3273. sp_int_digit tt;
  3274. sp_int_digit m;
  3275. if (r == NULL) {
  3276. m = SP_DIGIT_MAX / d;
  3277. for (i = a->used - 1; i >= 0; i--) {
  3278. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  3279. tt = (t * m) >> SP_WORD_SIZE;
  3280. tr = (sp_int_digit)(t - tt * d);
  3281. tr = tr % d;
  3282. }
  3283. *rem = tr;
  3284. }
  3285. else {
  3286. m = SP_DIGIT_MAX / d;
  3287. for (i = a->used - 1; i >= 0; i--) {
  3288. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  3289. tt = (t * m) >> SP_WORD_SIZE;
  3290. tr = (sp_int_digit)(t - tt * d);
  3291. tt += tr / d;
  3292. tr = tr % d;
  3293. r->dp[i] = tt;
  3294. }
  3295. r->used = a->used;
  3296. sp_clamp(r);
  3297. if (rem != NULL) {
  3298. *rem = tr;
  3299. }
  3300. }
  3301. }
  3302. #endif
  3303. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  3304. defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
  3305. /* Divide a multi-precision number by a digit size number and calcualte
  3306. * remainder.
  3307. * r = a / d; rem = a % d
  3308. *
  3309. * @param [in] a SP integer to be divided.
  3310. * @param [in] d Digit to divide by.
  3311. * @param [out] r SP integer that is the quotient. May be NULL.
  3312. * @param [out] rem Digit that is the remainder. May be NULL.
  3313. *
  3314. * @return MP_OKAY on success.
  3315. * @return MP_VAL when a is NULL or d is 0.
  3316. */
  3317. int sp_div_d(sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
  3318. {
  3319. int err = MP_OKAY;
  3320. if ((a == NULL) || (d == 0)) {
  3321. err = MP_VAL;
  3322. }
  3323. if (err == MP_OKAY) {
  3324. #if !defined(WOLFSSL_SP_SMALL)
  3325. if (d == 3) {
  3326. _sp_div_3(a, r, rem);
  3327. }
  3328. else if (d == 10) {
  3329. _sp_div_10(a, r, rem);
  3330. }
  3331. else
  3332. #endif
  3333. if (d <= SP_HALF_MAX) {
  3334. _sp_div_small(a, d, r, rem);
  3335. }
  3336. else
  3337. {
  3338. int i;
  3339. sp_int_word w = 0;
  3340. sp_int_digit t;
  3341. for (i = a->used - 1; i >= 0; i--) {
  3342. t = sp_div_word((sp_int_digit)w, a->dp[i], d);
  3343. w = (w << SP_WORD_SIZE) | a->dp[i];
  3344. w -= (sp_int_word)t * d;
  3345. if (r != NULL) {
  3346. r->dp[i] = t;
  3347. }
  3348. }
  3349. if (r != NULL) {
  3350. r->used = a->used;
  3351. sp_clamp(r);
  3352. }
  3353. if (rem != NULL) {
  3354. *rem = (sp_int_digit)w;
  3355. }
  3356. }
  3357. #ifdef WOLFSSL_SP_INT_NEGATIVE
  3358. if (r != NULL) {
  3359. r->sign = a->sign;
  3360. }
  3361. #endif
  3362. }
  3363. return err;
  3364. }
  3365. #endif
  3366. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  3367. defined(WOLFSSL_HAVE_SP_DH) || \
  3368. (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY)))
  3369. /* Calculate a modulo the digit d into r: r = a mod d
  3370. *
  3371. * @param [in] a SP integer to reduce.
  3372. * @param [in] d Digit to that is the modulus.
  3373. * @param [out] r Digit that is the result..
  3374. *
  3375. * @return MP_OKAY on success.
  3376. * @return MP_VAL when a is NULL or d is 0.
  3377. */
  3378. #if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
  3379. !defined(HAVE_COMP_KEY))
  3380. static
  3381. #endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
  3382. int sp_mod_d(sp_int* a, const sp_int_digit d, sp_int_digit* r)
  3383. {
  3384. int err = MP_OKAY;
  3385. if ((a == NULL) || (r == NULL) || (d == 0)) {
  3386. err = MP_VAL;
  3387. }
  3388. if (0) {
  3389. sp_print(a, "a");
  3390. sp_print_digit(d, "m");
  3391. }
  3392. if (err == MP_OKAY) {
  3393. /* Check whether d is a power of 2. */
  3394. if ((d & (d - 1)) == 0) {
  3395. if (a->used == 0) {
  3396. *r = 0;
  3397. }
  3398. else {
  3399. *r = a->dp[0] & (d - 1);
  3400. }
  3401. }
  3402. #if !defined(WOLFSSL_SP_SMALL)
  3403. else if (d == 3) {
  3404. _sp_div_3(a, NULL, r);
  3405. }
  3406. else if (d == 10) {
  3407. _sp_div_10(a, NULL, r);
  3408. }
  3409. #endif
  3410. else if (d <= SP_HALF_MAX) {
  3411. _sp_div_small(a, d, NULL, r);
  3412. }
  3413. else {
  3414. int i;
  3415. sp_int_word w = 0;
  3416. sp_int_digit t;
  3417. for (i = a->used - 1; i >= 0; i--) {
  3418. t = sp_div_word((sp_int_digit)w, a->dp[i], d);
  3419. w = (w << SP_WORD_SIZE) | a->dp[i];
  3420. w -= (sp_int_word)t * d;
  3421. }
  3422. *r = (sp_int_digit)w;
  3423. }
  3424. #ifdef WOLFSSL_SP_INT_NEGATIVE
  3425. if (a->sign == MP_NEG) {
  3426. *r = d - *r;
  3427. }
  3428. #endif
  3429. }
  3430. if (0) {
  3431. sp_print_digit(*r, "rmod");
  3432. }
  3433. return err;
  3434. }
  3435. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERFIY_ONLY) || \
  3436. * WOLFSSL_HAVE_SP_DH || (HAVE_ECC && (FP_ECC || HAVE_COMP_KEY)) */
  3437. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  3438. /* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
  3439. *
  3440. * r = a / 2 (mod m) - constant time (a < m and positive)
  3441. *
  3442. * @param [in] a SP integer to divide.
  3443. * @param [in] m SP integer that is modulus.
  3444. * @param [out] r SP integer to hold result.
  3445. *
  3446. * @return MP_OKAY on success.
  3447. * @return MP_VAL when a, m or r is NULL.
  3448. */
  3449. int sp_div_2_mod_ct(sp_int* a, sp_int* m, sp_int* r)
  3450. {
  3451. int err = MP_OKAY;
  3452. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  3453. err = MP_VAL;
  3454. }
  3455. if (err == MP_OKAY) {
  3456. sp_int_word w = 0;
  3457. sp_int_digit mask;
  3458. int i;
  3459. if (0) {
  3460. sp_print(a, "a");
  3461. sp_print(m, "m");
  3462. }
  3463. mask = 0 - (a->dp[0] & 1);
  3464. for (i = 0; i < m->used; i++) {
  3465. sp_int_digit mask_a = 0 - (i < a->used);
  3466. w += m->dp[i] & mask;
  3467. w += a->dp[i] & mask_a;
  3468. r->dp[i] = (sp_int_digit)w;
  3469. w >>= DIGIT_BIT;
  3470. }
  3471. r->dp[i] = (sp_int_digit)w;
  3472. r->used = i + 1;
  3473. #ifdef WOLFSSL_SP_INT_NEGATIVE
  3474. r->sign = MP_ZPOS;
  3475. #endif
  3476. sp_clamp(r);
  3477. sp_div_2(r, r);
  3478. if (0) {
  3479. sp_print(r, "rd2");
  3480. }
  3481. }
  3482. return err;
  3483. }
  3484. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  3485. #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
  3486. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  3487. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  3488. /* Divides a by 2 and stores in r: r = a >> 1
  3489. *
  3490. * @param [in] a SP integer to divide.
  3491. * @param [out] r SP integer to hold result.
  3492. *
  3493. * @return MP_OKAY on success.
  3494. * @return MP_VAL when a or r is NULL.
  3495. */
  3496. #if !(defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
  3497. static
  3498. #endif
  3499. int sp_div_2(sp_int* a, sp_int* r)
  3500. {
  3501. int err = MP_OKAY;
  3502. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  3503. /* Only when a public API. */
  3504. if ((a == NULL) || (r == NULL)) {
  3505. err = MP_VAL;
  3506. }
  3507. #endif
  3508. if (err == MP_OKAY) {
  3509. int i;
  3510. r->used = a->used;
  3511. for (i = 0; i < a->used - 1; i++) {
  3512. r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
  3513. }
  3514. r->dp[i] = a->dp[i] >> 1;
  3515. r->used = i + 1;
  3516. sp_clamp(r);
  3517. #ifdef WOLFSSL_SP_INT_NEGATIVE
  3518. r->sign = a->sign;
  3519. #endif
  3520. }
  3521. return err;
  3522. }
  3523. #endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
  3524. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  3525. /************************
  3526. * Add/Subtract Functions
  3527. ************************/
  3528. #if !defined(WOLFSSL_RSA_VERIFY_ONLY)
  3529. /* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZEOF))
  3530. *
  3531. * @param [in] a SP integer to add to.
  3532. * @param [in] b SP integer to add.
  3533. * @param [out] r SP integer to store result in.
  3534. * @param [in] o Number of digits to offset b.
  3535. *
  3536. * @return MP_OKAY on success.
  3537. */
  3538. static int _sp_add_off(sp_int* a, sp_int* b, sp_int* r, int o)
  3539. {
  3540. int i;
  3541. int j;
  3542. sp_int_word t = 0;
  3543. if (0) {
  3544. sp_print(a, "a");
  3545. sp_print(b, "b");
  3546. }
  3547. #ifdef SP_MATH_NEED_ADD_OFF
  3548. for (i = 0; (i < o) && (i < a->used); i++) {
  3549. r->dp[i] = a->dp[i];
  3550. }
  3551. for (; i < o; i++) {
  3552. r->dp[i] = 0;
  3553. }
  3554. #else
  3555. i = 0;
  3556. (void)o;
  3557. #endif
  3558. for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
  3559. t += a->dp[i];
  3560. t += b->dp[j];
  3561. r->dp[i] = (sp_int_digit)t;
  3562. t >>= SP_WORD_SIZE;
  3563. }
  3564. for (; i < a->used; i++) {
  3565. t += a->dp[i];
  3566. r->dp[i] = (sp_int_digit)t;
  3567. t >>= SP_WORD_SIZE;
  3568. }
  3569. for (; j < b->used; i++, j++) {
  3570. t += b->dp[j];
  3571. r->dp[i] = (sp_int_digit)t;
  3572. t >>= SP_WORD_SIZE;
  3573. }
  3574. r->used = i;
  3575. if (t != 0) {
  3576. r->dp[i] = (sp_int_digit)t;
  3577. r->used++;
  3578. }
  3579. sp_clamp(r);
  3580. if (0) {
  3581. sp_print(r, "radd");
  3582. }
  3583. return MP_OKAY;
  3584. }
  3585. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  3586. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
  3587. !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
  3588. !defined(WOLFSSL_RSA_VERIFY_ONLY))
  3589. /* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZEOF))
  3590. * a must be greater than b.
  3591. *
  3592. * @param [in] a SP integer to subtract from.
  3593. * @param [in] b SP integer to subtract.
  3594. * @param [out] r SP integer to store result in.
  3595. * @param [in] o Number of digits to offset b.
  3596. *
  3597. * @return MP_OKAY on success.
  3598. */
  3599. static int _sp_sub_off(sp_int* a, sp_int* b, sp_int* r, int o)
  3600. {
  3601. int i;
  3602. int j;
  3603. sp_int_sword t = 0;
  3604. for (i = 0; (i < o) && (i < a->used); i++) {
  3605. r->dp[i] = a->dp[i];
  3606. }
  3607. for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
  3608. t += a->dp[i];
  3609. t -= b->dp[j];
  3610. r->dp[i] = (sp_int_digit)t;
  3611. t >>= SP_WORD_SIZE;
  3612. }
  3613. for (; i < a->used; i++) {
  3614. t += a->dp[i];
  3615. r->dp[i] = (sp_int_digit)t;
  3616. t >>= SP_WORD_SIZE;
  3617. }
  3618. r->used = i;
  3619. sp_clamp(r);
  3620. return MP_OKAY;
  3621. }
  3622. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
  3623. * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  3624. #if !defined(WOLFSSL_RSA_VERIFY_ONLY)
  3625. /* Add b to a into r: r = a + b
  3626. *
  3627. * @param [in] a SP integer to add to.
  3628. * @param [in] b SP integer to add.
  3629. * @param [out] r SP integer to store result in.
  3630. *
  3631. * @return MP_OKAY on success.
  3632. * @return MP_VAL when a, b, or r is NULL.
  3633. */
  3634. int sp_add(sp_int* a, sp_int* b, sp_int* r)
  3635. {
  3636. int err = MP_OKAY;
  3637. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  3638. err = MP_VAL;
  3639. }
  3640. if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
  3641. err = MP_VAL;
  3642. }
  3643. if (err == MP_OKAY) {
  3644. #ifndef WOLFSSL_SP_INT_NEGATIVE
  3645. err = _sp_add_off(a, b, r, 0);
  3646. #else
  3647. if (a->sign == b->sign) {
  3648. r->sign = a->sign;
  3649. err = _sp_add_off(a, b, r, 0);
  3650. }
  3651. else if (_sp_cmp_abs(a, b) != MP_LT) {
  3652. err = _sp_sub_off(a, b, r, 0);
  3653. if (sp_iszero(r)) {
  3654. r->sign = MP_ZPOS;
  3655. }
  3656. else {
  3657. r->sign = a->sign;
  3658. }
  3659. }
  3660. else {
  3661. err = _sp_sub_off(b, a, r, 0);
  3662. if (sp_iszero(r)) {
  3663. r->sign = MP_ZPOS;
  3664. }
  3665. else {
  3666. r->sign = b->sign;
  3667. }
  3668. }
  3669. #endif
  3670. }
  3671. return err;
  3672. }
  3673. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  3674. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  3675. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  3676. /* Subtract b from a into r: r = a - b
  3677. *
  3678. * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
  3679. *
  3680. * @param [in] a SP integer to subtract from.
  3681. * @param [in] b SP integer to subtract.
  3682. * @param [out] r SP integer to store result in.
  3683. *
  3684. * @return MP_OKAY on success.
  3685. * @return MP_VAL when a, b, or r is NULL.
  3686. */
  3687. int sp_sub(sp_int* a, sp_int* b, sp_int* r)
  3688. {
  3689. int err = MP_OKAY;
  3690. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  3691. err = MP_VAL;
  3692. }
  3693. else {
  3694. #ifndef WOLFSSL_SP_INT_NEGATIVE
  3695. err = _sp_sub_off(a, b, r, 0);
  3696. #else
  3697. if (a->sign != b->sign) {
  3698. r->sign = a->sign;
  3699. err = _sp_add_off(a, b, r, 0);
  3700. }
  3701. else if (_sp_cmp_abs(a, b) != MP_LT) {
  3702. err = _sp_sub_off(a, b, r, 0);
  3703. if (sp_iszero(r)) {
  3704. r->sign = MP_ZPOS;
  3705. }
  3706. else {
  3707. r->sign = a->sign;
  3708. }
  3709. }
  3710. else {
  3711. err = _sp_sub_off(b, a, r, 0);
  3712. if (sp_iszero(r)) {
  3713. r->sign = MP_ZPOS;
  3714. }
  3715. else {
  3716. r->sign = 1 - a->sign;
  3717. }
  3718. }
  3719. #endif
  3720. }
  3721. return err;
  3722. }
  3723. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  3724. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
  3725. /****************************
  3726. * Add/Subtract mod functions
  3727. ****************************/
  3728. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  3729. (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES))
  3730. /* Add two value and reduce: r = (a + b) % m
  3731. *
  3732. * @param [in] a SP integer to add.
  3733. * @param [in] b SP integer to add with.
  3734. * @param [in] m SP integer that is the modulus.
  3735. * @param [out] r SP integer to hold result.
  3736. *
  3737. * @return MP_OKAY on success.
  3738. * @return MP_VAL when a, b, m or r is NULL.
  3739. * @return MP_MEM when dynamic memory allocation fails.
  3740. */
  3741. int sp_addmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
  3742. {
  3743. int err = MP_OKAY;
  3744. int used = ((a == NULL) || (b == NULL)) ? 1 :
  3745. ((a->used >= b->used) ? a->used + 1 : b->used + 1);
  3746. DECL_SP_INT(t, used);
  3747. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  3748. err = MP_VAL;
  3749. }
  3750. ALLOC_SP_INT_SIZE(t, used, err, NULL);
  3751. if (0 && (err == MP_OKAY)) {
  3752. sp_print(a, "a");
  3753. sp_print(b, "b");
  3754. sp_print(m, "m");
  3755. }
  3756. if (err == MP_OKAY) {
  3757. err = sp_add(a, b, t);
  3758. }
  3759. if (err == MP_OKAY) {
  3760. err = sp_mod(t, m, r);
  3761. }
  3762. if (0 && (err == MP_OKAY)) {
  3763. sp_print(r, "rma");
  3764. }
  3765. FREE_SP_INT(t, NULL);
  3766. return err;
  3767. }
  3768. #endif /* WOLFSSL_SP_MATH_ALL || (!WOLFSSL_SP_MATH && WOLFSSL_CUSTOM_CURVES) */
  3769. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  3770. /* Sub b from a and reduce: r = (a - b) % m
  3771. * Result is always positive.
  3772. *
  3773. * @param [in] a SP integer to subtract from
  3774. * @param [in] b SP integer to subtract.
  3775. * @param [in] m SP integer that is the modulus.
  3776. * @param [out] r SP integer to hold result.
  3777. *
  3778. * @return MP_OKAY on success.
  3779. * @return MP_VAL when a, b, m or r is NULL.
  3780. * @return MP_MEM when dynamic memory allocation fails.
  3781. */
  3782. int sp_submod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
  3783. {
  3784. #ifndef WOLFSSL_SP_INT_NEGATIVE
  3785. int err = MP_OKAY;
  3786. int used = ((a == NULL) || (b == NULL) || (m == NULL)) ? 1 :
  3787. ((a->used >= m->used) ?
  3788. ((a->used >= b->used) ? (a->used + 1) : (b->used + 1)) :
  3789. ((b->used >= m->used)) ? (b->used + 1) : (m->used + 1));
  3790. DECL_SP_INT_ARRAY(t, used, 2);
  3791. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  3792. err = MP_VAL;
  3793. }
  3794. if (0 && (err == MP_OKAY)) {
  3795. sp_print(a, "a");
  3796. sp_print(b, "b");
  3797. sp_print(m, "m");
  3798. }
  3799. ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
  3800. if (err == MP_OKAY) {
  3801. if (_sp_cmp(a, m) == MP_GT) {
  3802. err = sp_mod(a, m, t[0]);
  3803. a = t[0];
  3804. }
  3805. }
  3806. if (err == MP_OKAY) {
  3807. if (_sp_cmp(b, m) == MP_GT) {
  3808. err = sp_mod(b, m, t[1]);
  3809. b = t[1];
  3810. }
  3811. }
  3812. if (err == MP_OKAY) {
  3813. if (_sp_cmp(a, b) == MP_LT) {
  3814. err = sp_add(a, m, t[0]);
  3815. if (err == MP_OKAY) {
  3816. err = sp_sub(t[0], b, r);
  3817. }
  3818. }
  3819. else {
  3820. err = sp_sub(a, b, r);
  3821. }
  3822. }
  3823. if (0 && (err == MP_OKAY)) {
  3824. sp_print(r, "rms");
  3825. }
  3826. FREE_SP_INT_ARRAY(t, NULL);
  3827. return err;
  3828. #else /* WOLFSSL_SP_INT_NEGATIVE */
  3829. int err = MP_OKAY;
  3830. int used = ((a == NULL) || (b == NULL)) ? 1 :
  3831. ((a->used >= b->used) ? a->used + 1 : b->used + 1);
  3832. DECL_SP_INT(t, used);
  3833. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  3834. err = MP_VAL;
  3835. }
  3836. if (0 && (err == MP_OKAY)) {
  3837. sp_print(a, "a");
  3838. sp_print(b, "b");
  3839. sp_print(m, "m");
  3840. }
  3841. ALLOC_SP_INT_SIZE(t, used, err, NULL);
  3842. if (err == MP_OKAY) {
  3843. err = sp_sub(a, b, t);
  3844. }
  3845. if (err == MP_OKAY) {
  3846. err = sp_mod(t, m, r);
  3847. }
  3848. if (0 && (err == MP_OKAY)) {
  3849. sp_print(r, "rms");
  3850. }
  3851. FREE_SP_INT(t, NULL);
  3852. return err;
  3853. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  3854. }
  3855. #endif /* WOLFSSL_SP_MATH_ALL */
  3856. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  3857. /* Compare two multi-precision numbers.
  3858. *
  3859. * Constant time implementation.
  3860. *
  3861. * @param [in] a SP integer to compare.
  3862. * @param [in] b SP integer to compare.
  3863. * @param [in] len Number of digits to compare.
  3864. *
  3865. * @return MP_GT when a is greater than b.
  3866. * @return MP_LT when a is less than b.
  3867. * @return MP_EQ when a is equals b.
  3868. */
  3869. static int sp_cmp_mag_ct(sp_int* a, sp_int* b, int len)
  3870. {
  3871. int i;
  3872. sp_sint_digit r = MP_EQ;
  3873. sp_int_digit mask = SP_MASK;
  3874. for (i = len - 1; i >= 0; i--) {
  3875. sp_int_digit am = 0 - (i < a->used);
  3876. sp_int_digit bm = 0 - (i < b->used);
  3877. sp_int_digit ad = a->dp[i] & am;
  3878. sp_int_digit bd = b->dp[i] & bm;
  3879. r |= mask & (ad > bd);
  3880. mask &= (ad > bd) - 1;
  3881. r |= mask & (-(ad < bd));
  3882. mask &= (ad < bd) - 1;
  3883. }
  3884. return (int)r;
  3885. }
  3886. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  3887. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  3888. /* Add two value and reduce: r = (a + b) % m
  3889. *
  3890. * r = a + b (mod m) - constant time (|a| < m and |b| < m and positive)
  3891. *
  3892. * Assumes a, b, m and r are not NULL.
  3893. *
  3894. * @param [in] a SP integer to add.
  3895. * @param [in] b SP integer to add with.
  3896. * @param [in] m SP integer that is the modulus.
  3897. * @param [out] r SP integer to hold result.
  3898. *
  3899. * @return MP_OKAY on success.
  3900. */
  3901. int sp_addmod_ct(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
  3902. {
  3903. sp_int_word w = 0;
  3904. sp_int_digit mask;
  3905. int i;
  3906. if (0) {
  3907. sp_print(a, "a");
  3908. sp_print(b, "b");
  3909. sp_print(m, "m");
  3910. }
  3911. _sp_add_off(a, b, r, 0);
  3912. mask = 0 - (sp_cmp_mag_ct(r, m, m->used + 1) != MP_LT);
  3913. for (i = 0; i < m->used; i++) {
  3914. sp_int_digit mask_r = 0 - (i < r->used);
  3915. w += m->dp[i] & mask;
  3916. w = (r->dp[i] & mask_r) - w;
  3917. r->dp[i] = (sp_int_digit)w;
  3918. w = (w >> DIGIT_BIT) & 1;
  3919. }
  3920. r->dp[i] = 0;
  3921. r->used = i;
  3922. #ifdef WOLFSSL_SP_INT_NEGATIVE
  3923. r->sign = a->sign;
  3924. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  3925. sp_clamp(r);
  3926. if (0) {
  3927. sp_print(r, "rma");
  3928. }
  3929. return MP_OKAY;
  3930. }
  3931. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  3932. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  3933. /* Sub b from a and reduce: r = (a - b) % m
  3934. * Result is always positive.
  3935. *
  3936. * r = a - b (mod m) - constant time (a < n and b < m and positive)
  3937. *
  3938. * Assumes a, b, m and r are not NULL.
  3939. *
  3940. * @param [in] a SP integer to subtract from
  3941. * @param [in] b SP integer to subtract.
  3942. * @param [in] m SP integer that is the modulus.
  3943. * @param [out] r SP integer to hold result.
  3944. *
  3945. * @return MP_OKAY on success.
  3946. */
  3947. int sp_submod_ct(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
  3948. {
  3949. sp_int_word w = 0;
  3950. sp_int_digit mask;
  3951. int i;
  3952. if (0) {
  3953. sp_print(a, "a");
  3954. sp_print(b, "b");
  3955. sp_print(m, "m");
  3956. }
  3957. mask = 0 - (sp_cmp_mag_ct(a, b, m->used + 1) == MP_LT);
  3958. for (i = 0; i < m->used + 1; i++) {
  3959. sp_int_digit mask_a = 0 - (i < a->used);
  3960. sp_int_digit mask_m = 0 - (i < m->used);
  3961. w += m->dp[i] & mask_m & mask;
  3962. w += a->dp[i] & mask_a;
  3963. r->dp[i] = (sp_int_digit)w;
  3964. w >>= DIGIT_BIT;
  3965. }
  3966. r->dp[i] = (sp_int_digit)w;
  3967. r->used = i + 1;
  3968. #ifdef WOLFSSL_SP_INT_NEGATIVE
  3969. r->sign = MP_ZPOS;
  3970. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  3971. sp_clamp(r);
  3972. _sp_sub_off(r, b, r, 0);
  3973. if (0) {
  3974. sp_print(r, "rms");
  3975. }
  3976. return MP_OKAY;
  3977. }
  3978. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  3979. /********************
  3980. * Shifting functoins
  3981. ********************/
  3982. #if !defined(NO_DH) || defined(HAVE_ECC) || (defined(WC_RSA_BLINDING) && \
  3983. !defined(WOLFSSL_RSA_VERIFY_ONLY))
  3984. /* Left shift the multi-precision number by a number of digits.
  3985. *
  3986. * @param [in,out] a SP integer to shift.
  3987. * @param [in] s Number of digits to shift.
  3988. *
  3989. * @return MP_OKAY on success.
  3990. * @return MP_VAL when a is NULL or the result is too big to fit in an SP.
  3991. */
  3992. int sp_lshd(sp_int* a, int s)
  3993. {
  3994. int err = MP_OKAY;
  3995. if (a == NULL) {
  3996. err = MP_VAL;
  3997. }
  3998. if ((err == MP_OKAY) && (a->used + s > a->size)) {
  3999. err = MP_VAL;
  4000. }
  4001. if (err == MP_OKAY) {
  4002. XMEMMOVE(a->dp + s, a->dp, a->used * sizeof(sp_int_digit));
  4003. a->used += s;
  4004. XMEMSET(a->dp, 0, s * sizeof(sp_int_digit));
  4005. sp_clamp(a);
  4006. }
  4007. return err;
  4008. }
  4009. #endif
  4010. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4011. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4012. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4013. /* Left shift the multi-precision number by n bits.
  4014. * Bits may be larger than the word size.
  4015. *
  4016. * @param [in,out] a SP integer to shift.
  4017. * @param [in] n Number of bits to shift left.
  4018. *
  4019. * @return MP_OKAY on success.
  4020. */
  4021. static int sp_lshb(sp_int* a, int n)
  4022. {
  4023. int err = MP_OKAY;
  4024. if (a->used != 0) {
  4025. int s = n >> SP_WORD_SHIFT;
  4026. int i;
  4027. if (a->used + s >= a->size) {
  4028. err = MP_VAL;
  4029. }
  4030. if (err == MP_OKAY) {
  4031. n &= SP_WORD_MASK;
  4032. if (n != 0) {
  4033. sp_int_digit v;
  4034. v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
  4035. a->dp[a->used - 1 + s] = a->dp[a->used - 1] << n;
  4036. for (i = a->used - 2; i >= 0; i--) {
  4037. a->dp[i + 1 + s] |= a->dp[i] >> (SP_WORD_SIZE - n);
  4038. a->dp[i + s] = a->dp[i] << n;
  4039. }
  4040. if (v != 0) {
  4041. a->dp[a->used + s] = v;
  4042. a->used++;
  4043. }
  4044. }
  4045. else if (s > 0) {
  4046. for (i = a->used - 1; i >= 0; i--) {
  4047. a->dp[i + s] = a->dp[i];
  4048. }
  4049. }
  4050. a->used += s;
  4051. XMEMSET(a->dp, 0, SP_WORD_SIZEOF * s);
  4052. }
  4053. }
  4054. return err;
  4055. }
  4056. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  4057. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  4058. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4059. !defined(NO_DH) || defined(HAVE_ECC) || \
  4060. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  4061. /* Shift a right by n digits into r: r = a >> (n * SP_WORD_SIZE)
  4062. *
  4063. * @param [in] a SP integer to shift.
  4064. * @param [in] n Number of digits to shift.
  4065. * @param [out] r SP integer to store result in.
  4066. */
  4067. void sp_rshd(sp_int* a, int c)
  4068. {
  4069. if (a != NULL) {
  4070. int i;
  4071. int j;
  4072. if (c >= a->used) {
  4073. _sp_zero(a);
  4074. }
  4075. else {
  4076. for (i = c, j = 0; i < a->used; i++, j++) {
  4077. a->dp[j] = a->dp[i];
  4078. }
  4079. a->used -= c;
  4080. }
  4081. }
  4082. }
  4083. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
  4084. * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  4085. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4086. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4087. defined(WOLFSSL_HAVE_SP_DH)
  4088. /* Shift a right by n bits into r: r = a >> n
  4089. *
  4090. * @param [in] a SP integer to shift.
  4091. * @param [in] n Number of bits to shift.
  4092. * @param [out] r SP integer to store result in.
  4093. */
  4094. void sp_rshb(sp_int* a, int n, sp_int* r)
  4095. {
  4096. int i = n >> SP_WORD_SHIFT;
  4097. if (i >= a->used) {
  4098. _sp_zero(r);
  4099. }
  4100. else {
  4101. int j;
  4102. n &= SP_WORD_SIZE - 1;
  4103. if (n == 0) {
  4104. for (j = 0; i < a->used; i++, j++)
  4105. r->dp[j] = a->dp[i];
  4106. r->used = j;
  4107. }
  4108. else if (n > 0) {
  4109. for (j = 0; i < a->used-1; i++, j++)
  4110. r->dp[j] = (a->dp[i] >> n) | (a->dp[i+1] << (SP_WORD_SIZE - n));
  4111. r->dp[j] = a->dp[i] >> n;
  4112. r->used = j + 1;
  4113. sp_clamp(r);
  4114. }
  4115. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4116. if (sp_iszero(r)) {
  4117. r->sign = MP_ZPOS;
  4118. }
  4119. else {
  4120. r->sign = a->sign;
  4121. }
  4122. #endif
  4123. }
  4124. }
  4125. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  4126. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
  4127. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4128. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4129. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4130. /* Divide a by d and return the quotient in r and the remainder in rem.
  4131. * r = a / d; rem = a % d
  4132. *
  4133. * @param [in] a SP integer to be divided.
  4134. * @param [in] d SP integer to divide by.
  4135. * @param [out] r SP integer that is the quotient.
  4136. * @param [out] rem SP integer that is the remainder.
  4137. *
  4138. * @return MP_OKAY on success.
  4139. * @return MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
  4140. * @return MP_MEM when dynamic memory allocation fails.
  4141. */
  4142. #ifndef WOLFSSL_SP_MATH_ALL
  4143. static
  4144. #endif
  4145. int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem)
  4146. {
  4147. int err = MP_OKAY;
  4148. int ret;
  4149. int done = 0;
  4150. int i;
  4151. int s = 0;
  4152. sp_int_digit dt;
  4153. sp_int_digit t;
  4154. sp_int* sa = NULL;
  4155. sp_int* sd = NULL;
  4156. sp_int* tr = NULL;
  4157. sp_int* trial = NULL;
  4158. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4159. int aSign = MP_ZPOS;
  4160. int dSign = MP_ZPOS;
  4161. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  4162. DECL_SP_INT_ARRAY(td, (a == NULL) ? 1 : a->used + 1, 4);
  4163. if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
  4164. err = MP_VAL;
  4165. }
  4166. if ((err == MP_OKAY) && sp_iszero(d)) {
  4167. err = MP_VAL;
  4168. }
  4169. if ((err == MP_OKAY) && (r != NULL) && (r->size < a->used - d->used + 2)) {
  4170. err = MP_VAL;
  4171. }
  4172. if ((err == MP_OKAY) && (rem != NULL) && (rem->size < a->used + 1)) {
  4173. err = MP_VAL;
  4174. }
  4175. if (0 && (err == MP_OKAY)) {
  4176. sp_print(a, "a");
  4177. sp_print(d, "b");
  4178. }
  4179. if (err == MP_OKAY) {
  4180. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4181. aSign = a->sign;
  4182. dSign = d->sign;
  4183. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  4184. ret = _sp_cmp_abs(a, d);
  4185. if (ret == MP_LT) {
  4186. if (rem != NULL) {
  4187. sp_copy(a, rem);
  4188. }
  4189. if (r != NULL) {
  4190. sp_set(r, 0);
  4191. }
  4192. done = 1;
  4193. }
  4194. else if (ret == MP_EQ) {
  4195. if (rem != NULL) {
  4196. sp_set(rem, 0);
  4197. }
  4198. if (r != NULL) {
  4199. sp_set(r, 1);
  4200. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4201. r->sign = aSign;
  4202. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  4203. }
  4204. done = 1;
  4205. }
  4206. else if (sp_count_bits(a) == sp_count_bits(d)) {
  4207. /* a is greater than d but same bit length */
  4208. if (rem != NULL) {
  4209. _sp_sub_off(a, d, rem, 0);
  4210. }
  4211. if (r != NULL) {
  4212. sp_set(r, 1);
  4213. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4214. r->sign = aSign;
  4215. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  4216. }
  4217. done = 1;
  4218. }
  4219. }
  4220. if (!done) {
  4221. /* Macro always has code associated with it and checks err first. */
  4222. ALLOC_SP_INT_ARRAY(td, a->used + 1, 4, err, NULL);
  4223. }
  4224. if ((!done) && (err == MP_OKAY)) {
  4225. sa = td[0];
  4226. sd = td[1];
  4227. tr = td[2];
  4228. trial = td[3];
  4229. sp_init_size(sa, a->used + 1);
  4230. sp_init_size(sd, d->used + 1);
  4231. sp_init_size(tr, a->used - d->used + 2);
  4232. sp_init_size(trial, a->used + 1);
  4233. s = sp_count_bits(d);
  4234. s = SP_WORD_SIZE - (s & SP_WORD_MASK);
  4235. sp_copy(a, sa);
  4236. if (s != SP_WORD_SIZE) {
  4237. err = sp_lshb(sa, s);
  4238. if (err == MP_OKAY) {
  4239. sp_copy(d, sd);
  4240. d = sd;
  4241. err = sp_lshb(sd, s);
  4242. }
  4243. }
  4244. }
  4245. if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
  4246. #ifdef WOLFSSL_SP_SMALL
  4247. int c;
  4248. #else
  4249. int j;
  4250. int o;
  4251. sp_int_sword sw;
  4252. #endif /* WOLFSSL_SP_SMALL */
  4253. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4254. sa->sign = MP_ZPOS;
  4255. sd->sign = MP_ZPOS;
  4256. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  4257. tr->used = sa->used - d->used + 1;
  4258. sp_clear(tr);
  4259. tr->used = sa->used - d->used + 1;
  4260. dt = d->dp[d->used-1];
  4261. for (i = d->used - 1; i > 0; i--) {
  4262. if (sa->dp[sa->used - d->used + i] != d->dp[i]) {
  4263. break;
  4264. }
  4265. }
  4266. if (sa->dp[sa->used - d->used + i] >= d->dp[i]) {
  4267. i = sa->used;
  4268. _sp_sub_off(sa, d, sa, sa->used - d->used);
  4269. /* Keep the same used so that 0 zeros will be put in. */
  4270. sa->used = i;
  4271. if (r != NULL) {
  4272. tr->dp[sa->used - d->used] = 1;
  4273. }
  4274. }
  4275. for (i = sa->used - 1; i >= d->used; i--) {
  4276. if (sa->dp[i] == dt) {
  4277. t = SP_DIGIT_MAX;
  4278. }
  4279. else {
  4280. t = sp_div_word(sa->dp[i], sa->dp[i-1], dt);
  4281. }
  4282. #ifdef WOLFSSL_SP_SMALL
  4283. do {
  4284. _sp_mul_d(d, t, trial, i - d->used);
  4285. c = _sp_cmp_abs(trial, sa);
  4286. if (c == MP_GT) {
  4287. t--;
  4288. }
  4289. }
  4290. while (c == MP_GT);
  4291. _sp_sub_off(sa, trial, sa, 0);
  4292. tr->dp[i - d->used] += t;
  4293. if (tr->dp[i - d->used] < t) {
  4294. tr->dp[i + 1 - d->used]++;
  4295. }
  4296. #else
  4297. o = i - d->used;
  4298. do {
  4299. sp_int_word tw = 0;
  4300. for (j = 0; j < d->used; j++) {
  4301. tw += (sp_int_word)d->dp[j] * t;
  4302. trial->dp[j] = (sp_int_digit)tw;
  4303. tw >>= SP_WORD_SIZE;
  4304. }
  4305. trial->dp[j] = (sp_int_digit)tw;
  4306. for (j = d->used; j > 0; j--) {
  4307. if (trial->dp[j] != sa->dp[j + o]) {
  4308. break;
  4309. }
  4310. }
  4311. if (trial->dp[j] > sa->dp[j + o]) {
  4312. t--;
  4313. }
  4314. }
  4315. while (trial->dp[j] > sa->dp[j + o]);
  4316. sw = 0;
  4317. for (j = 0; j <= d->used; j++) {
  4318. sw += sa->dp[j + o];
  4319. sw -= trial->dp[j];
  4320. sa->dp[j + o] = (sp_int_digit)sw;
  4321. sw >>= SP_WORD_SIZE;
  4322. }
  4323. tr->dp[o] = t;
  4324. #endif /* WOLFSSL_SP_SMALL */
  4325. }
  4326. sa->used = i + 1;
  4327. if (rem != NULL) {
  4328. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4329. sa->sign = (sa->used == 0) ? MP_ZPOS : aSign;
  4330. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  4331. if (s != SP_WORD_SIZE) {
  4332. sp_rshb(sa, s, sa);
  4333. }
  4334. sp_copy(sa, rem);
  4335. sp_clamp(rem);
  4336. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4337. if (sp_iszero(rem)) {
  4338. rem->sign = MP_ZPOS;
  4339. }
  4340. #endif
  4341. }
  4342. if (r != NULL) {
  4343. sp_copy(tr, r);
  4344. sp_clamp(r);
  4345. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4346. if (sp_iszero(r)) {
  4347. r->sign = MP_ZPOS;
  4348. }
  4349. else {
  4350. r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
  4351. }
  4352. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  4353. }
  4354. }
  4355. if (0 && (err == MP_OKAY)) {
  4356. if (rem != NULL) {
  4357. sp_print(rem, "rdr");
  4358. }
  4359. if (r != NULL) {
  4360. sp_print(r, "rdw");
  4361. }
  4362. }
  4363. FREE_SP_INT_ARRAY(td, NULL);
  4364. return err;
  4365. }
  4366. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
  4367. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  4368. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4369. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4370. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4371. #ifndef FREESCALE_LTC_TFM
  4372. /* Calculate the remainder of dividing a by m: r = a mod m.
  4373. *
  4374. * @param [in] a SP integer to reduce.
  4375. * @param [in] m SP integer that is the modulus.
  4376. * @param [out] r SP integer to store result in.
  4377. *
  4378. * @return MP_OKAY on success.
  4379. * @return MP_VAL when a, m or r is NULL or m is 0.
  4380. */
  4381. int sp_mod(sp_int* a, sp_int* m, sp_int* r)
  4382. {
  4383. int err = MP_OKAY;
  4384. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4385. DECL_SP_INT(t, (m == NULL) ? 1 : m->used);
  4386. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  4387. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  4388. err = MP_VAL;
  4389. }
  4390. #ifndef WOLFSSL_SP_INT_NEGATIVE
  4391. if (err == MP_OKAY) {
  4392. err = sp_div(a, m, NULL, r);
  4393. }
  4394. #else
  4395. ALLOC_SP_INT(t, m->used, err, NULL);
  4396. if (err == MP_OKAY) {
  4397. sp_init_size(t, m->used);
  4398. err = sp_div(a, m, NULL, t);
  4399. }
  4400. if (err == MP_OKAY) {
  4401. if (t->sign != m->sign) {
  4402. err = sp_add(t, m, r);
  4403. }
  4404. else {
  4405. err = sp_copy(t, r);
  4406. }
  4407. }
  4408. FREE_SP_INT(t, NULL);
  4409. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  4410. return err;
  4411. }
  4412. #endif /* !FREESCALE_LTC_TFM */
  4413. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
  4414. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  4415. /* START SP_MUL implementations. */
  4416. /* This code is generated.
  4417. * To generate:
  4418. * cd scripts/sp/sp_int
  4419. * ./gen.sh
  4420. * File sp_mul.c contains code.
  4421. */
  4422. #ifdef SQR_MUL_ASM
  4423. /* Multiply a by b into r where a and b have same no. digits. r = a * b
  4424. *
  4425. * Optimised code for when number of digits in a and b are the same.
  4426. *
  4427. * @param [in] a SP integer to mulitply.
  4428. * @param [in] b SP integer to mulitply by.
  4429. * @param [out] r SP integer to hod reult.
  4430. *
  4431. * @return MP_OKAY otherwise.
  4432. * @return MP_MEM when dynamic memory allocation fails.
  4433. */
  4434. static int _sp_mul_nxn(sp_int* a, sp_int* b, sp_int* r)
  4435. {
  4436. int err = MP_OKAY;
  4437. int i;
  4438. int j;
  4439. int k;
  4440. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4441. sp_int_digit* t = NULL;
  4442. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && defined(WOLFSSL_SP_SMALL)
  4443. sp_int_digit t[a->used * 2];
  4444. #else
  4445. sp_int_digit t[SP_INT_DIGITS];
  4446. #endif
  4447. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4448. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
  4449. DYNAMIC_TYPE_BIGINT);
  4450. if (t == NULL) {
  4451. err = MP_MEM;
  4452. }
  4453. #endif
  4454. if (err == MP_OKAY) {
  4455. sp_int_digit l, h, o;
  4456. sp_int_digit* dp;
  4457. h = 0;
  4458. l = 0;
  4459. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  4460. t[0] = h;
  4461. h = 0;
  4462. o = 0;
  4463. for (k = 1; k <= a->used - 1; k++) {
  4464. j = k;
  4465. dp = a->dp;
  4466. for (; j >= 0; dp++, j--) {
  4467. SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
  4468. }
  4469. t[k] = l;
  4470. l = h;
  4471. h = o;
  4472. o = 0;
  4473. }
  4474. for (; k <= (a->used - 1) * 2; k++) {
  4475. i = k - (b->used - 1);
  4476. dp = &b->dp[b->used - 1];
  4477. for (; i < a->used; i++, dp--) {
  4478. SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
  4479. }
  4480. t[k] = l;
  4481. l = h;
  4482. h = o;
  4483. o = 0;
  4484. }
  4485. t[k] = l;
  4486. r->used = k + 1;
  4487. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  4488. sp_clamp(r);
  4489. }
  4490. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4491. if (t != NULL) {
  4492. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  4493. }
  4494. #endif
  4495. return err;
  4496. }
  4497. /* Multiply a by b into r. r = a * b
  4498. *
  4499. * @param [in] a SP integer to mulitply.
  4500. * @param [in] b SP integer to mulitply by.
  4501. * @param [out] r SP integer to hod reult.
  4502. *
  4503. * @return MP_OKAY otherwise.
  4504. * @return MP_MEM when dynamic memory allocation fails.
  4505. */
  4506. static int _sp_mul(sp_int* a, sp_int* b, sp_int* r)
  4507. {
  4508. int err = MP_OKAY;
  4509. int i;
  4510. int j;
  4511. int k;
  4512. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4513. sp_int_digit* t = NULL;
  4514. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && defined(WOLFSSL_SP_SMALL)
  4515. sp_int_digit t[a->used + b->used];
  4516. #else
  4517. sp_int_digit t[SP_INT_DIGITS];
  4518. #endif
  4519. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4520. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used),
  4521. NULL, DYNAMIC_TYPE_BIGINT);
  4522. if (t == NULL) {
  4523. err = MP_MEM;
  4524. }
  4525. #endif
  4526. if (err == MP_OKAY) {
  4527. sp_int_digit l;
  4528. sp_int_digit h;
  4529. sp_int_digit o;
  4530. h = 0;
  4531. l = 0;
  4532. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  4533. t[0] = h;
  4534. h = 0;
  4535. o = 0;
  4536. for (k = 1; k <= b->used - 1; k++) {
  4537. i = 0;
  4538. j = k;
  4539. for (; (i < a->used) && (j >= 0); i++, j--) {
  4540. SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
  4541. }
  4542. t[k] = l;
  4543. l = h;
  4544. h = o;
  4545. o = 0;
  4546. }
  4547. for (; k <= (a->used - 1) + (b->used - 1); k++) {
  4548. j = b->used - 1;
  4549. i = k - j;
  4550. for (; (i < a->used) && (j >= 0); i++, j--) {
  4551. SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
  4552. }
  4553. t[k] = l;
  4554. l = h;
  4555. h = o;
  4556. o = 0;
  4557. }
  4558. t[k] = l;
  4559. r->used = k + 1;
  4560. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  4561. sp_clamp(r);
  4562. }
  4563. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4564. if (t != NULL) {
  4565. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  4566. }
  4567. #endif
  4568. return err;
  4569. }
  4570. #else
  4571. /* Multiply a by b into r. r = a * b
  4572. *
  4573. * @param [in] a SP integer to mulitply.
  4574. * @param [in] b SP integer to mulitply by.
  4575. * @param [out] r SP integer to hod reult.
  4576. *
  4577. * @return MP_OKAY otherwise.
  4578. * @return MP_MEM when dynamic memory allocation fails.
  4579. */
  4580. static int _sp_mul(sp_int* a, sp_int* b, sp_int* r)
  4581. {
  4582. int err = MP_OKAY;
  4583. int i;
  4584. int j;
  4585. int k;
  4586. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4587. sp_int_digit* t = NULL;
  4588. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && defined(WOLFSSL_SP_SMALL)
  4589. sp_int_digit t[a->used + b->used];
  4590. #else
  4591. sp_int_digit t[SP_INT_DIGITS];
  4592. #endif
  4593. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4594. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used),
  4595. NULL, DYNAMIC_TYPE_BIGINT);
  4596. if (t == NULL) {
  4597. err = MP_MEM;
  4598. }
  4599. #endif
  4600. if (err == MP_OKAY) {
  4601. sp_int_word w;
  4602. sp_int_word l;
  4603. sp_int_word h;
  4604. #ifdef SP_WORD_OVERFLOW
  4605. sp_int_word o;
  4606. #endif
  4607. w = (sp_int_word)a->dp[0] * b->dp[0];
  4608. t[0] = (sp_int_digit)w;
  4609. l = (sp_int_digit)(w >> SP_WORD_SIZE);
  4610. h = 0;
  4611. #ifdef SP_WORD_OVERFLOW
  4612. o = 0;
  4613. #endif
  4614. for (k = 1; k <= (a->used - 1) + (b->used - 1); k++) {
  4615. i = k - (b->used - 1);
  4616. i &= ~(i >> (sizeof(i) * 8 - 1));
  4617. j = k - i;
  4618. for (; (i < a->used) && (j >= 0); i++, j--) {
  4619. w = (sp_int_word)a->dp[i] * b->dp[j];
  4620. l += (sp_int_digit)w;
  4621. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  4622. #ifdef SP_WORD_OVERFLOW
  4623. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  4624. l &= SP_MASK;
  4625. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  4626. h &= SP_MASK;
  4627. #endif
  4628. }
  4629. t[k] = (sp_int_digit)l;
  4630. l >>= SP_WORD_SIZE;
  4631. l += (sp_int_digit)h;
  4632. h >>= SP_WORD_SIZE;
  4633. #ifdef SP_WORD_OVERFLOW
  4634. h += o & SP_MASK;
  4635. o >>= SP_WORD_SIZE;
  4636. #endif
  4637. }
  4638. t[k] = (sp_int_digit)l;
  4639. r->used = k + 1;
  4640. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  4641. sp_clamp(r);
  4642. }
  4643. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4644. if (t != NULL) {
  4645. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  4646. }
  4647. #endif
  4648. return err;
  4649. }
  4650. #endif
  4651. #ifndef WOLFSSL_SP_SMALL
  4652. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  4653. #if SP_WORD_SIZE == 64
  4654. #ifndef SQR_MUL_ASM
  4655. /* Multiply a by b and store in r: r = a * b
  4656. *
  4657. * @param [in] a SP integer to multiply.
  4658. * @param [in] b SP integer to multiply.
  4659. * @param [out] r SP integer result.
  4660. *
  4661. * @return MP_OKAY on success.
  4662. * @return MP_MEM when dynamic memory allocation fails.
  4663. */
  4664. static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r)
  4665. {
  4666. int err = MP_OKAY;
  4667. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4668. sp_int_word* w = NULL;
  4669. #else
  4670. sp_int_word w[16];
  4671. #endif
  4672. sp_int_digit* da = a->dp;
  4673. sp_int_digit* db = b->dp;
  4674. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4675. w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
  4676. DYNAMIC_TYPE_BIGINT);
  4677. if (w == NULL) {
  4678. err = MP_MEM;
  4679. }
  4680. #endif
  4681. if (err == MP_OKAY) {
  4682. w[0] = (sp_int_word)da[0] * db[0];
  4683. w[1] = (sp_int_word)da[0] * db[1];
  4684. w[2] = (sp_int_word)da[1] * db[0];
  4685. w[3] = (sp_int_word)da[0] * db[2];
  4686. w[4] = (sp_int_word)da[1] * db[1];
  4687. w[5] = (sp_int_word)da[2] * db[0];
  4688. w[6] = (sp_int_word)da[0] * db[3];
  4689. w[7] = (sp_int_word)da[1] * db[2];
  4690. w[8] = (sp_int_word)da[2] * db[1];
  4691. w[9] = (sp_int_word)da[3] * db[0];
  4692. w[10] = (sp_int_word)da[1] * db[3];
  4693. w[11] = (sp_int_word)da[2] * db[2];
  4694. w[12] = (sp_int_word)da[3] * db[1];
  4695. w[13] = (sp_int_word)da[2] * db[3];
  4696. w[14] = (sp_int_word)da[3] * db[2];
  4697. w[15] = (sp_int_word)da[3] * db[3];
  4698. r->dp[0] = w[0];
  4699. w[0] >>= SP_WORD_SIZE;
  4700. w[0] += (sp_int_digit)w[1];
  4701. w[0] += (sp_int_digit)w[2];
  4702. r->dp[1] = w[0];
  4703. w[0] >>= SP_WORD_SIZE;
  4704. w[1] >>= SP_WORD_SIZE;
  4705. w[0] += (sp_int_digit)w[1];
  4706. w[2] >>= SP_WORD_SIZE;
  4707. w[0] += (sp_int_digit)w[2];
  4708. w[0] += (sp_int_digit)w[3];
  4709. w[0] += (sp_int_digit)w[4];
  4710. w[0] += (sp_int_digit)w[5];
  4711. r->dp[2] = w[0];
  4712. w[0] >>= SP_WORD_SIZE;
  4713. w[3] >>= SP_WORD_SIZE;
  4714. w[0] += (sp_int_digit)w[3];
  4715. w[4] >>= SP_WORD_SIZE;
  4716. w[0] += (sp_int_digit)w[4];
  4717. w[5] >>= SP_WORD_SIZE;
  4718. w[0] += (sp_int_digit)w[5];
  4719. w[0] += (sp_int_digit)w[6];
  4720. w[0] += (sp_int_digit)w[7];
  4721. w[0] += (sp_int_digit)w[8];
  4722. w[0] += (sp_int_digit)w[9];
  4723. r->dp[3] = w[0];
  4724. w[0] >>= SP_WORD_SIZE;
  4725. w[6] >>= SP_WORD_SIZE;
  4726. w[0] += (sp_int_digit)w[6];
  4727. w[7] >>= SP_WORD_SIZE;
  4728. w[0] += (sp_int_digit)w[7];
  4729. w[8] >>= SP_WORD_SIZE;
  4730. w[0] += (sp_int_digit)w[8];
  4731. w[9] >>= SP_WORD_SIZE;
  4732. w[0] += (sp_int_digit)w[9];
  4733. w[0] += (sp_int_digit)w[10];
  4734. w[0] += (sp_int_digit)w[11];
  4735. w[0] += (sp_int_digit)w[12];
  4736. r->dp[4] = w[0];
  4737. w[0] >>= SP_WORD_SIZE;
  4738. w[10] >>= SP_WORD_SIZE;
  4739. w[0] += (sp_int_digit)w[10];
  4740. w[11] >>= SP_WORD_SIZE;
  4741. w[0] += (sp_int_digit)w[11];
  4742. w[12] >>= SP_WORD_SIZE;
  4743. w[0] += (sp_int_digit)w[12];
  4744. w[0] += (sp_int_digit)w[13];
  4745. w[0] += (sp_int_digit)w[14];
  4746. r->dp[5] = w[0];
  4747. w[0] >>= SP_WORD_SIZE;
  4748. w[13] >>= SP_WORD_SIZE;
  4749. w[0] += (sp_int_digit)w[13];
  4750. w[14] >>= SP_WORD_SIZE;
  4751. w[0] += (sp_int_digit)w[14];
  4752. w[0] += (sp_int_digit)w[15];
  4753. r->dp[6] = w[0];
  4754. w[0] >>= SP_WORD_SIZE;
  4755. w[15] >>= SP_WORD_SIZE;
  4756. w[0] += (sp_int_digit)w[15];
  4757. r->dp[7] = w[0];
  4758. r->used = 8;
  4759. sp_clamp(r);
  4760. }
  4761. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  4762. if (w != NULL) {
  4763. XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
  4764. }
  4765. #endif
  4766. return err;
  4767. }
  4768. #else /* SQR_MUL_ASM */
  4769. /* Multiply a by b and store in r: r = a * b
  4770. *
  4771. * @param [in] a SP integer to multiply.
  4772. * @param [in] b SP integer to multiply.
  4773. * @param [out] r SP integer result.
  4774. *
  4775. * @return MP_OKAY on success.
  4776. * @return MP_MEM when dynamic memory allocation fails.
  4777. */
  4778. static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r)
  4779. {
  4780. sp_int_digit l = 0;
  4781. sp_int_digit h = 0;
  4782. sp_int_digit o = 0;
  4783. sp_int_digit t[4];
  4784. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  4785. t[0] = h;
  4786. h = 0;
  4787. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  4788. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  4789. t[1] = l;
  4790. l = h;
  4791. h = o;
  4792. o = 0;
  4793. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  4794. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  4795. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  4796. t[2] = l;
  4797. l = h;
  4798. h = o;
  4799. o = 0;
  4800. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  4801. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  4802. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  4803. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  4804. t[3] = l;
  4805. l = h;
  4806. h = o;
  4807. o = 0;
  4808. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  4809. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  4810. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  4811. r->dp[4] = l;
  4812. l = h;
  4813. h = o;
  4814. o = 0;
  4815. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  4816. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  4817. r->dp[5] = l;
  4818. l = h;
  4819. h = o;
  4820. SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
  4821. r->dp[6] = l;
  4822. r->dp[7] = h;
  4823. XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
  4824. r->used = 8;
  4825. sp_clamp(r);
  4826. return MP_OKAY;
  4827. }
  4828. #endif /* SQR_MUL_ASM */
  4829. #endif /* SP_WORD_SIZE == 64 */
  4830. #if SP_WORD_SIZE == 64
  4831. #ifdef SQR_MUL_ASM
  4832. /* Multiply a by b and store in r: r = a * b
  4833. *
  4834. * @param [in] a SP integer to multiply.
  4835. * @param [in] b SP integer to multiply.
  4836. * @param [out] r SP integer result.
  4837. *
  4838. * @return MP_OKAY on success.
  4839. * @return MP_MEM when dynamic memory allocation fails.
  4840. */
  4841. static int _sp_mul_6(sp_int* a, sp_int* b, sp_int* r)
  4842. {
  4843. sp_int_digit l = 0;
  4844. sp_int_digit h = 0;
  4845. sp_int_digit o = 0;
  4846. sp_int_digit t[6];
  4847. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  4848. t[0] = h;
  4849. h = 0;
  4850. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  4851. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  4852. t[1] = l;
  4853. l = h;
  4854. h = o;
  4855. o = 0;
  4856. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  4857. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  4858. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  4859. t[2] = l;
  4860. l = h;
  4861. h = o;
  4862. o = 0;
  4863. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  4864. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  4865. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  4866. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  4867. t[3] = l;
  4868. l = h;
  4869. h = o;
  4870. o = 0;
  4871. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  4872. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  4873. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  4874. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  4875. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  4876. t[4] = l;
  4877. l = h;
  4878. h = o;
  4879. o = 0;
  4880. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  4881. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  4882. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  4883. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  4884. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  4885. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  4886. t[5] = l;
  4887. l = h;
  4888. h = o;
  4889. o = 0;
  4890. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  4891. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  4892. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  4893. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  4894. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  4895. r->dp[6] = l;
  4896. l = h;
  4897. h = o;
  4898. o = 0;
  4899. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  4900. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  4901. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  4902. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  4903. r->dp[7] = l;
  4904. l = h;
  4905. h = o;
  4906. o = 0;
  4907. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  4908. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  4909. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  4910. r->dp[8] = l;
  4911. l = h;
  4912. h = o;
  4913. o = 0;
  4914. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  4915. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  4916. r->dp[9] = l;
  4917. l = h;
  4918. h = o;
  4919. SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
  4920. r->dp[10] = l;
  4921. r->dp[11] = h;
  4922. XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
  4923. r->used = 12;
  4924. sp_clamp(r);
  4925. return MP_OKAY;
  4926. }
  4927. #endif /* SQR_MUL_ASM */
  4928. #endif /* SP_WORD_SIZE == 64 */
  4929. #if SP_WORD_SIZE == 32
  4930. #ifdef SQR_MUL_ASM
  4931. /* Multiply a by b and store in r: r = a * b
  4932. *
  4933. * @param [in] a SP integer to multiply.
  4934. * @param [in] b SP integer to multiply.
  4935. * @param [out] r SP integer result.
  4936. *
  4937. * @return MP_OKAY on success.
  4938. * @return MP_MEM when dynamic memory allocation fails.
  4939. */
  4940. static int _sp_mul_8(sp_int* a, sp_int* b, sp_int* r)
  4941. {
  4942. sp_int_digit l = 0;
  4943. sp_int_digit h = 0;
  4944. sp_int_digit o = 0;
  4945. sp_int_digit t[8];
  4946. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  4947. t[0] = h;
  4948. h = 0;
  4949. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  4950. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  4951. t[1] = l;
  4952. l = h;
  4953. h = o;
  4954. o = 0;
  4955. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  4956. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  4957. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  4958. t[2] = l;
  4959. l = h;
  4960. h = o;
  4961. o = 0;
  4962. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  4963. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  4964. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  4965. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  4966. t[3] = l;
  4967. l = h;
  4968. h = o;
  4969. o = 0;
  4970. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  4971. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  4972. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  4973. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  4974. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  4975. t[4] = l;
  4976. l = h;
  4977. h = o;
  4978. o = 0;
  4979. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  4980. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  4981. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  4982. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  4983. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  4984. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  4985. t[5] = l;
  4986. l = h;
  4987. h = o;
  4988. o = 0;
  4989. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  4990. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  4991. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  4992. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  4993. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  4994. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  4995. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  4996. t[6] = l;
  4997. l = h;
  4998. h = o;
  4999. o = 0;
  5000. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  5001. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  5002. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  5003. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  5004. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  5005. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  5006. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  5007. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  5008. t[7] = l;
  5009. l = h;
  5010. h = o;
  5011. o = 0;
  5012. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  5013. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  5014. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  5015. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  5016. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  5017. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  5018. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  5019. r->dp[8] = l;
  5020. l = h;
  5021. h = o;
  5022. o = 0;
  5023. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  5024. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  5025. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  5026. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  5027. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  5028. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  5029. r->dp[9] = l;
  5030. l = h;
  5031. h = o;
  5032. o = 0;
  5033. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  5034. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  5035. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  5036. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  5037. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  5038. r->dp[10] = l;
  5039. l = h;
  5040. h = o;
  5041. o = 0;
  5042. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  5043. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  5044. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  5045. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  5046. r->dp[11] = l;
  5047. l = h;
  5048. h = o;
  5049. o = 0;
  5050. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  5051. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  5052. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  5053. r->dp[12] = l;
  5054. l = h;
  5055. h = o;
  5056. o = 0;
  5057. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  5058. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  5059. r->dp[13] = l;
  5060. l = h;
  5061. h = o;
  5062. SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
  5063. r->dp[14] = l;
  5064. r->dp[15] = h;
  5065. XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
  5066. r->used = 16;
  5067. sp_clamp(r);
  5068. return MP_OKAY;
  5069. }
  5070. #endif /* SQR_MUL_ASM */
  5071. #endif /* SP_WORD_SIZE == 32 */
  5072. #if SP_WORD_SIZE == 32
  5073. #ifdef SQR_MUL_ASM
  5074. /* Multiply a by b and store in r: r = a * b
  5075. *
  5076. * @param [in] a SP integer to multiply.
  5077. * @param [in] b SP integer to multiply.
  5078. * @param [out] r SP integer result.
  5079. *
  5080. * @return MP_OKAY on success.
  5081. * @return MP_MEM when dynamic memory allocation fails.
  5082. */
  5083. static int _sp_mul_12(sp_int* a, sp_int* b, sp_int* r)
  5084. {
  5085. sp_int_digit l = 0;
  5086. sp_int_digit h = 0;
  5087. sp_int_digit o = 0;
  5088. sp_int_digit t[12];
  5089. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  5090. t[0] = h;
  5091. h = 0;
  5092. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  5093. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  5094. t[1] = l;
  5095. l = h;
  5096. h = o;
  5097. o = 0;
  5098. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  5099. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  5100. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  5101. t[2] = l;
  5102. l = h;
  5103. h = o;
  5104. o = 0;
  5105. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  5106. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  5107. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  5108. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  5109. t[3] = l;
  5110. l = h;
  5111. h = o;
  5112. o = 0;
  5113. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  5114. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  5115. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  5116. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  5117. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  5118. t[4] = l;
  5119. l = h;
  5120. h = o;
  5121. o = 0;
  5122. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  5123. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  5124. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  5125. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  5126. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  5127. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  5128. t[5] = l;
  5129. l = h;
  5130. h = o;
  5131. o = 0;
  5132. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  5133. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  5134. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  5135. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  5136. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  5137. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  5138. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  5139. t[6] = l;
  5140. l = h;
  5141. h = o;
  5142. o = 0;
  5143. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  5144. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  5145. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  5146. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  5147. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  5148. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  5149. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  5150. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  5151. t[7] = l;
  5152. l = h;
  5153. h = o;
  5154. o = 0;
  5155. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  5156. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  5157. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  5158. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  5159. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  5160. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  5161. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  5162. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  5163. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  5164. t[8] = l;
  5165. l = h;
  5166. h = o;
  5167. o = 0;
  5168. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  5169. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  5170. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  5171. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  5172. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  5173. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  5174. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  5175. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  5176. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  5177. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  5178. t[9] = l;
  5179. l = h;
  5180. h = o;
  5181. o = 0;
  5182. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  5183. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  5184. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  5185. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  5186. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  5187. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  5188. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  5189. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  5190. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  5191. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  5192. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  5193. t[10] = l;
  5194. l = h;
  5195. h = o;
  5196. o = 0;
  5197. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  5198. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  5199. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  5200. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  5201. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  5202. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  5203. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  5204. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  5205. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  5206. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  5207. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  5208. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  5209. t[11] = l;
  5210. l = h;
  5211. h = o;
  5212. o = 0;
  5213. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  5214. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  5215. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  5216. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  5217. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  5218. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  5219. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  5220. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  5221. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  5222. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  5223. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  5224. r->dp[12] = l;
  5225. l = h;
  5226. h = o;
  5227. o = 0;
  5228. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  5229. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  5230. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  5231. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  5232. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  5233. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  5234. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  5235. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  5236. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  5237. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  5238. r->dp[13] = l;
  5239. l = h;
  5240. h = o;
  5241. o = 0;
  5242. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  5243. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  5244. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  5245. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  5246. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  5247. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  5248. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  5249. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  5250. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  5251. r->dp[14] = l;
  5252. l = h;
  5253. h = o;
  5254. o = 0;
  5255. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  5256. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  5257. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  5258. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  5259. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  5260. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  5261. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  5262. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  5263. r->dp[15] = l;
  5264. l = h;
  5265. h = o;
  5266. o = 0;
  5267. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  5268. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  5269. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  5270. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  5271. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  5272. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  5273. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  5274. r->dp[16] = l;
  5275. l = h;
  5276. h = o;
  5277. o = 0;
  5278. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  5279. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  5280. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  5281. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  5282. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  5283. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  5284. r->dp[17] = l;
  5285. l = h;
  5286. h = o;
  5287. o = 0;
  5288. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  5289. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  5290. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  5291. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  5292. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  5293. r->dp[18] = l;
  5294. l = h;
  5295. h = o;
  5296. o = 0;
  5297. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  5298. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  5299. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  5300. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  5301. r->dp[19] = l;
  5302. l = h;
  5303. h = o;
  5304. o = 0;
  5305. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  5306. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  5307. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  5308. r->dp[20] = l;
  5309. l = h;
  5310. h = o;
  5311. o = 0;
  5312. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  5313. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  5314. r->dp[21] = l;
  5315. l = h;
  5316. h = o;
  5317. SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
  5318. r->dp[22] = l;
  5319. r->dp[23] = h;
  5320. XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
  5321. r->used = 24;
  5322. sp_clamp(r);
  5323. return MP_OKAY;
  5324. }
  5325. #endif /* SQR_MUL_ASM */
  5326. #endif /* SP_WORD_SIZE == 32 */
  5327. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  5328. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  5329. #if SP_INT_DIGITS >= 32
  5330. /* Multiply a by b and store in r: r = a * b
  5331. *
  5332. * @param [in] a SP integer to multiply.
  5333. * @param [in] b SP integer to multiply.
  5334. * @param [out] r SP integer result.
  5335. *
  5336. * @return MP_OKAY on success.
  5337. * @return MP_MEM when dynamic memory allocation fails.
  5338. */
  5339. static int _sp_mul_16(sp_int* a, sp_int* b, sp_int* r)
  5340. {
  5341. int err = MP_OKAY;
  5342. sp_int_digit l = 0;
  5343. sp_int_digit h = 0;
  5344. sp_int_digit o = 0;
  5345. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  5346. sp_int_digit* t = NULL;
  5347. #else
  5348. sp_int_digit t[16];
  5349. #endif
  5350. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  5351. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
  5352. DYNAMIC_TYPE_BIGINT);
  5353. if (t == NULL) {
  5354. err = MP_MEM;
  5355. }
  5356. #endif
  5357. if (err == MP_OKAY) {
  5358. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  5359. t[0] = h;
  5360. h = 0;
  5361. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  5362. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  5363. t[1] = l;
  5364. l = h;
  5365. h = o;
  5366. o = 0;
  5367. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  5368. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  5369. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  5370. t[2] = l;
  5371. l = h;
  5372. h = o;
  5373. o = 0;
  5374. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  5375. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  5376. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  5377. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  5378. t[3] = l;
  5379. l = h;
  5380. h = o;
  5381. o = 0;
  5382. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  5383. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  5384. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  5385. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  5386. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  5387. t[4] = l;
  5388. l = h;
  5389. h = o;
  5390. o = 0;
  5391. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  5392. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  5393. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  5394. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  5395. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  5396. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  5397. t[5] = l;
  5398. l = h;
  5399. h = o;
  5400. o = 0;
  5401. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  5402. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  5403. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  5404. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  5405. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  5406. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  5407. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  5408. t[6] = l;
  5409. l = h;
  5410. h = o;
  5411. o = 0;
  5412. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  5413. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  5414. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  5415. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  5416. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  5417. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  5418. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  5419. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  5420. t[7] = l;
  5421. l = h;
  5422. h = o;
  5423. o = 0;
  5424. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  5425. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  5426. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  5427. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  5428. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  5429. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  5430. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  5431. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  5432. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  5433. t[8] = l;
  5434. l = h;
  5435. h = o;
  5436. o = 0;
  5437. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  5438. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  5439. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  5440. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  5441. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  5442. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  5443. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  5444. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  5445. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  5446. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  5447. t[9] = l;
  5448. l = h;
  5449. h = o;
  5450. o = 0;
  5451. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  5452. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  5453. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  5454. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  5455. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  5456. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  5457. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  5458. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  5459. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  5460. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  5461. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  5462. t[10] = l;
  5463. l = h;
  5464. h = o;
  5465. o = 0;
  5466. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  5467. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  5468. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  5469. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  5470. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  5471. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  5472. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  5473. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  5474. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  5475. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  5476. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  5477. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  5478. t[11] = l;
  5479. l = h;
  5480. h = o;
  5481. o = 0;
  5482. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
  5483. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  5484. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  5485. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  5486. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  5487. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  5488. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  5489. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  5490. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  5491. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  5492. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  5493. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  5494. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
  5495. t[12] = l;
  5496. l = h;
  5497. h = o;
  5498. o = 0;
  5499. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
  5500. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
  5501. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  5502. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  5503. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  5504. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  5505. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  5506. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  5507. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  5508. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  5509. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  5510. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  5511. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
  5512. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
  5513. t[13] = l;
  5514. l = h;
  5515. h = o;
  5516. o = 0;
  5517. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
  5518. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
  5519. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
  5520. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  5521. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  5522. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  5523. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  5524. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  5525. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  5526. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  5527. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  5528. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  5529. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
  5530. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
  5531. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
  5532. t[14] = l;
  5533. l = h;
  5534. h = o;
  5535. o = 0;
  5536. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
  5537. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
  5538. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
  5539. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
  5540. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  5541. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  5542. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  5543. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  5544. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  5545. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  5546. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  5547. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  5548. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
  5549. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
  5550. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
  5551. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
  5552. t[15] = l;
  5553. l = h;
  5554. h = o;
  5555. o = 0;
  5556. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
  5557. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
  5558. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
  5559. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
  5560. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  5561. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  5562. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  5563. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  5564. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  5565. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  5566. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  5567. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
  5568. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
  5569. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
  5570. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
  5571. r->dp[16] = l;
  5572. l = h;
  5573. h = o;
  5574. o = 0;
  5575. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
  5576. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
  5577. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
  5578. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
  5579. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  5580. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  5581. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  5582. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  5583. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  5584. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  5585. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
  5586. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
  5587. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
  5588. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
  5589. r->dp[17] = l;
  5590. l = h;
  5591. h = o;
  5592. o = 0;
  5593. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
  5594. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
  5595. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
  5596. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
  5597. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  5598. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  5599. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  5600. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  5601. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  5602. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
  5603. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
  5604. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
  5605. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
  5606. r->dp[18] = l;
  5607. l = h;
  5608. h = o;
  5609. o = 0;
  5610. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
  5611. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
  5612. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
  5613. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
  5614. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  5615. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  5616. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  5617. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  5618. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
  5619. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
  5620. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
  5621. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
  5622. r->dp[19] = l;
  5623. l = h;
  5624. h = o;
  5625. o = 0;
  5626. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
  5627. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
  5628. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
  5629. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
  5630. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  5631. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  5632. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  5633. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
  5634. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
  5635. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
  5636. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
  5637. r->dp[20] = l;
  5638. l = h;
  5639. h = o;
  5640. o = 0;
  5641. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
  5642. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
  5643. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
  5644. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
  5645. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  5646. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  5647. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
  5648. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
  5649. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
  5650. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
  5651. r->dp[21] = l;
  5652. l = h;
  5653. h = o;
  5654. o = 0;
  5655. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
  5656. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
  5657. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
  5658. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
  5659. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
  5660. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
  5661. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
  5662. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
  5663. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
  5664. r->dp[22] = l;
  5665. l = h;
  5666. h = o;
  5667. o = 0;
  5668. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
  5669. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
  5670. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
  5671. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
  5672. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
  5673. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
  5674. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
  5675. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
  5676. r->dp[23] = l;
  5677. l = h;
  5678. h = o;
  5679. o = 0;
  5680. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
  5681. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
  5682. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
  5683. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
  5684. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
  5685. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
  5686. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
  5687. r->dp[24] = l;
  5688. l = h;
  5689. h = o;
  5690. o = 0;
  5691. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
  5692. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
  5693. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
  5694. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
  5695. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
  5696. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
  5697. r->dp[25] = l;
  5698. l = h;
  5699. h = o;
  5700. o = 0;
  5701. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
  5702. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
  5703. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
  5704. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
  5705. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
  5706. r->dp[26] = l;
  5707. l = h;
  5708. h = o;
  5709. o = 0;
  5710. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
  5711. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
  5712. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
  5713. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
  5714. r->dp[27] = l;
  5715. l = h;
  5716. h = o;
  5717. o = 0;
  5718. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
  5719. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
  5720. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
  5721. r->dp[28] = l;
  5722. l = h;
  5723. h = o;
  5724. o = 0;
  5725. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
  5726. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
  5727. r->dp[29] = l;
  5728. l = h;
  5729. h = o;
  5730. SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
  5731. r->dp[30] = l;
  5732. r->dp[31] = h;
  5733. XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
  5734. r->used = 32;
  5735. sp_clamp(r);
  5736. }
  5737. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  5738. if (t != NULL) {
  5739. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  5740. }
  5741. #endif
  5742. return err;
  5743. }
  5744. #endif /* SP_INT_DIGITS >= 32 */
  5745. #if SP_INT_DIGITS >= 48
  5746. /* Multiply a by b and store in r: r = a * b
  5747. *
  5748. * @param [in] a SP integer to multiply.
  5749. * @param [in] b SP integer to multiply.
  5750. * @param [out] r SP integer result.
  5751. *
  5752. * @return MP_OKAY on success.
  5753. * @return MP_MEM when dynamic memory allocation fails.
  5754. */
  5755. static int _sp_mul_24(sp_int* a, sp_int* b, sp_int* r)
  5756. {
  5757. int err = MP_OKAY;
  5758. sp_int_digit l = 0;
  5759. sp_int_digit h = 0;
  5760. sp_int_digit o = 0;
  5761. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  5762. sp_int_digit* t = NULL;
  5763. #else
  5764. sp_int_digit t[24];
  5765. #endif
  5766. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  5767. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
  5768. DYNAMIC_TYPE_BIGINT);
  5769. if (t == NULL) {
  5770. err = MP_MEM;
  5771. }
  5772. #endif
  5773. if (err == MP_OKAY) {
  5774. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  5775. t[0] = h;
  5776. h = 0;
  5777. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  5778. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  5779. t[1] = l;
  5780. l = h;
  5781. h = o;
  5782. o = 0;
  5783. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  5784. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  5785. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  5786. t[2] = l;
  5787. l = h;
  5788. h = o;
  5789. o = 0;
  5790. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  5791. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  5792. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  5793. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  5794. t[3] = l;
  5795. l = h;
  5796. h = o;
  5797. o = 0;
  5798. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  5799. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  5800. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  5801. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  5802. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  5803. t[4] = l;
  5804. l = h;
  5805. h = o;
  5806. o = 0;
  5807. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  5808. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  5809. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  5810. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  5811. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  5812. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  5813. t[5] = l;
  5814. l = h;
  5815. h = o;
  5816. o = 0;
  5817. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  5818. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  5819. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  5820. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  5821. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  5822. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  5823. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  5824. t[6] = l;
  5825. l = h;
  5826. h = o;
  5827. o = 0;
  5828. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  5829. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  5830. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  5831. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  5832. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  5833. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  5834. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  5835. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  5836. t[7] = l;
  5837. l = h;
  5838. h = o;
  5839. o = 0;
  5840. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  5841. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  5842. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  5843. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  5844. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  5845. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  5846. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  5847. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  5848. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  5849. t[8] = l;
  5850. l = h;
  5851. h = o;
  5852. o = 0;
  5853. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  5854. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  5855. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  5856. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  5857. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  5858. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  5859. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  5860. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  5861. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  5862. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  5863. t[9] = l;
  5864. l = h;
  5865. h = o;
  5866. o = 0;
  5867. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  5868. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  5869. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  5870. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  5871. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  5872. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  5873. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  5874. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  5875. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  5876. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  5877. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  5878. t[10] = l;
  5879. l = h;
  5880. h = o;
  5881. o = 0;
  5882. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  5883. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  5884. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  5885. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  5886. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  5887. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  5888. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  5889. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  5890. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  5891. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  5892. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  5893. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  5894. t[11] = l;
  5895. l = h;
  5896. h = o;
  5897. o = 0;
  5898. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
  5899. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  5900. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  5901. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  5902. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  5903. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  5904. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  5905. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  5906. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  5907. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  5908. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  5909. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  5910. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
  5911. t[12] = l;
  5912. l = h;
  5913. h = o;
  5914. o = 0;
  5915. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
  5916. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
  5917. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  5918. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  5919. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  5920. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  5921. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  5922. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  5923. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  5924. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  5925. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  5926. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  5927. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
  5928. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
  5929. t[13] = l;
  5930. l = h;
  5931. h = o;
  5932. o = 0;
  5933. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
  5934. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
  5935. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
  5936. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  5937. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  5938. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  5939. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  5940. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  5941. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  5942. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  5943. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  5944. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  5945. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
  5946. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
  5947. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
  5948. t[14] = l;
  5949. l = h;
  5950. h = o;
  5951. o = 0;
  5952. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
  5953. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
  5954. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
  5955. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
  5956. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  5957. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  5958. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  5959. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  5960. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  5961. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  5962. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  5963. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  5964. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
  5965. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
  5966. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
  5967. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
  5968. t[15] = l;
  5969. l = h;
  5970. h = o;
  5971. o = 0;
  5972. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
  5973. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
  5974. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
  5975. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
  5976. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
  5977. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  5978. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  5979. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  5980. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  5981. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  5982. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  5983. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  5984. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
  5985. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
  5986. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
  5987. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
  5988. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
  5989. t[16] = l;
  5990. l = h;
  5991. h = o;
  5992. o = 0;
  5993. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
  5994. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
  5995. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
  5996. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
  5997. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
  5998. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
  5999. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  6000. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  6001. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  6002. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  6003. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  6004. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  6005. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
  6006. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
  6007. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
  6008. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
  6009. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
  6010. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
  6011. t[17] = l;
  6012. l = h;
  6013. h = o;
  6014. o = 0;
  6015. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
  6016. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
  6017. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
  6018. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
  6019. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
  6020. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
  6021. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
  6022. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  6023. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  6024. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  6025. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  6026. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  6027. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
  6028. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
  6029. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
  6030. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
  6031. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
  6032. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
  6033. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
  6034. t[18] = l;
  6035. l = h;
  6036. h = o;
  6037. o = 0;
  6038. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
  6039. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
  6040. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
  6041. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
  6042. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
  6043. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
  6044. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
  6045. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
  6046. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  6047. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  6048. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  6049. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  6050. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
  6051. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
  6052. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
  6053. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
  6054. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
  6055. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
  6056. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
  6057. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
  6058. t[19] = l;
  6059. l = h;
  6060. h = o;
  6061. o = 0;
  6062. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
  6063. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
  6064. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
  6065. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
  6066. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
  6067. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
  6068. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
  6069. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
  6070. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
  6071. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  6072. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  6073. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  6074. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
  6075. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
  6076. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
  6077. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
  6078. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
  6079. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
  6080. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
  6081. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
  6082. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
  6083. t[20] = l;
  6084. l = h;
  6085. h = o;
  6086. o = 0;
  6087. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
  6088. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
  6089. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
  6090. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
  6091. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
  6092. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
  6093. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
  6094. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
  6095. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
  6096. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
  6097. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  6098. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  6099. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
  6100. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
  6101. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
  6102. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
  6103. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
  6104. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
  6105. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
  6106. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
  6107. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
  6108. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
  6109. t[21] = l;
  6110. l = h;
  6111. h = o;
  6112. o = 0;
  6113. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
  6114. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
  6115. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
  6116. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
  6117. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
  6118. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
  6119. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
  6120. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
  6121. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
  6122. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
  6123. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
  6124. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
  6125. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
  6126. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
  6127. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
  6128. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
  6129. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
  6130. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
  6131. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
  6132. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
  6133. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
  6134. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
  6135. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
  6136. t[22] = l;
  6137. l = h;
  6138. h = o;
  6139. o = 0;
  6140. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
  6141. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
  6142. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
  6143. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
  6144. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
  6145. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
  6146. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
  6147. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
  6148. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
  6149. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
  6150. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
  6151. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
  6152. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
  6153. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
  6154. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
  6155. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
  6156. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
  6157. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
  6158. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
  6159. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
  6160. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
  6161. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
  6162. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
  6163. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
  6164. t[23] = l;
  6165. l = h;
  6166. h = o;
  6167. o = 0;
  6168. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
  6169. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
  6170. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
  6171. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
  6172. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
  6173. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
  6174. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
  6175. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
  6176. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
  6177. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
  6178. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
  6179. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
  6180. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
  6181. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
  6182. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
  6183. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
  6184. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
  6185. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
  6186. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
  6187. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
  6188. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
  6189. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
  6190. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
  6191. r->dp[24] = l;
  6192. l = h;
  6193. h = o;
  6194. o = 0;
  6195. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
  6196. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
  6197. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
  6198. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
  6199. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
  6200. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
  6201. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
  6202. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
  6203. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
  6204. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
  6205. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
  6206. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
  6207. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
  6208. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
  6209. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
  6210. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
  6211. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
  6212. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
  6213. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
  6214. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
  6215. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
  6216. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
  6217. r->dp[25] = l;
  6218. l = h;
  6219. h = o;
  6220. o = 0;
  6221. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
  6222. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
  6223. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
  6224. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
  6225. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
  6226. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
  6227. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
  6228. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
  6229. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
  6230. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
  6231. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
  6232. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
  6233. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
  6234. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
  6235. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
  6236. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
  6237. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
  6238. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
  6239. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
  6240. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
  6241. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
  6242. r->dp[26] = l;
  6243. l = h;
  6244. h = o;
  6245. o = 0;
  6246. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
  6247. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
  6248. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
  6249. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
  6250. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
  6251. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
  6252. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
  6253. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
  6254. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
  6255. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
  6256. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
  6257. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
  6258. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
  6259. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
  6260. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
  6261. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
  6262. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
  6263. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
  6264. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
  6265. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
  6266. r->dp[27] = l;
  6267. l = h;
  6268. h = o;
  6269. o = 0;
  6270. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
  6271. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
  6272. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
  6273. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
  6274. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
  6275. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
  6276. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
  6277. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
  6278. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
  6279. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
  6280. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
  6281. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
  6282. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
  6283. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
  6284. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
  6285. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
  6286. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
  6287. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
  6288. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
  6289. r->dp[28] = l;
  6290. l = h;
  6291. h = o;
  6292. o = 0;
  6293. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
  6294. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
  6295. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
  6296. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
  6297. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
  6298. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
  6299. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
  6300. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
  6301. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
  6302. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
  6303. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
  6304. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
  6305. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
  6306. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
  6307. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
  6308. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
  6309. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
  6310. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
  6311. r->dp[29] = l;
  6312. l = h;
  6313. h = o;
  6314. o = 0;
  6315. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
  6316. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
  6317. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
  6318. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
  6319. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
  6320. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
  6321. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
  6322. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
  6323. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
  6324. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
  6325. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
  6326. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
  6327. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
  6328. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
  6329. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
  6330. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
  6331. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
  6332. r->dp[30] = l;
  6333. l = h;
  6334. h = o;
  6335. o = 0;
  6336. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
  6337. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
  6338. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
  6339. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
  6340. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
  6341. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
  6342. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
  6343. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
  6344. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
  6345. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
  6346. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
  6347. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
  6348. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
  6349. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
  6350. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
  6351. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
  6352. r->dp[31] = l;
  6353. l = h;
  6354. h = o;
  6355. o = 0;
  6356. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
  6357. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
  6358. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
  6359. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
  6360. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
  6361. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
  6362. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
  6363. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
  6364. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
  6365. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
  6366. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
  6367. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
  6368. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
  6369. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
  6370. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
  6371. r->dp[32] = l;
  6372. l = h;
  6373. h = o;
  6374. o = 0;
  6375. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
  6376. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
  6377. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
  6378. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
  6379. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
  6380. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
  6381. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
  6382. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
  6383. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
  6384. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
  6385. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
  6386. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
  6387. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
  6388. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
  6389. r->dp[33] = l;
  6390. l = h;
  6391. h = o;
  6392. o = 0;
  6393. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
  6394. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
  6395. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
  6396. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
  6397. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
  6398. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
  6399. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
  6400. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
  6401. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
  6402. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
  6403. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
  6404. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
  6405. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
  6406. r->dp[34] = l;
  6407. l = h;
  6408. h = o;
  6409. o = 0;
  6410. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
  6411. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
  6412. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
  6413. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
  6414. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
  6415. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
  6416. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
  6417. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
  6418. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
  6419. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
  6420. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
  6421. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
  6422. r->dp[35] = l;
  6423. l = h;
  6424. h = o;
  6425. o = 0;
  6426. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
  6427. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
  6428. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
  6429. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
  6430. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
  6431. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
  6432. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
  6433. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
  6434. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
  6435. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
  6436. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
  6437. r->dp[36] = l;
  6438. l = h;
  6439. h = o;
  6440. o = 0;
  6441. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
  6442. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
  6443. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
  6444. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
  6445. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
  6446. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
  6447. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
  6448. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
  6449. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
  6450. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
  6451. r->dp[37] = l;
  6452. l = h;
  6453. h = o;
  6454. o = 0;
  6455. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
  6456. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
  6457. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
  6458. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
  6459. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
  6460. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
  6461. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
  6462. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
  6463. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
  6464. r->dp[38] = l;
  6465. l = h;
  6466. h = o;
  6467. o = 0;
  6468. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
  6469. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
  6470. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
  6471. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
  6472. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
  6473. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
  6474. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
  6475. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
  6476. r->dp[39] = l;
  6477. l = h;
  6478. h = o;
  6479. o = 0;
  6480. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
  6481. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
  6482. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
  6483. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
  6484. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
  6485. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
  6486. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
  6487. r->dp[40] = l;
  6488. l = h;
  6489. h = o;
  6490. o = 0;
  6491. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
  6492. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
  6493. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
  6494. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
  6495. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
  6496. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
  6497. r->dp[41] = l;
  6498. l = h;
  6499. h = o;
  6500. o = 0;
  6501. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
  6502. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
  6503. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
  6504. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
  6505. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
  6506. r->dp[42] = l;
  6507. l = h;
  6508. h = o;
  6509. o = 0;
  6510. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
  6511. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
  6512. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
  6513. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
  6514. r->dp[43] = l;
  6515. l = h;
  6516. h = o;
  6517. o = 0;
  6518. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
  6519. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
  6520. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
  6521. r->dp[44] = l;
  6522. l = h;
  6523. h = o;
  6524. o = 0;
  6525. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
  6526. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
  6527. r->dp[45] = l;
  6528. l = h;
  6529. h = o;
  6530. SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
  6531. r->dp[46] = l;
  6532. r->dp[47] = h;
  6533. XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
  6534. r->used = 48;
  6535. sp_clamp(r);
  6536. }
  6537. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  6538. if (t != NULL) {
  6539. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  6540. }
  6541. #endif
  6542. return err;
  6543. }
  6544. #endif /* SP_INT_DIGITS >= 48 */
  6545. #if SP_INT_DIGITS >= 64
  6546. /* Multiply a by b and store in r: r = a * b
  6547. *
  6548. * @param [in] a SP integer to multiply.
  6549. * @param [in] b SP integer to multiply.
  6550. * @param [out] r SP integer result.
  6551. *
  6552. * @return MP_OKAY on success.
  6553. * @return MP_MEM when dynamic memory allocation fails.
  6554. */
  6555. static int _sp_mul_32(sp_int* a, sp_int* b, sp_int* r)
  6556. {
  6557. int err = MP_OKAY;
  6558. int i;
  6559. sp_int_digit l;
  6560. sp_int_digit h;
  6561. sp_int* a1;
  6562. sp_int* b1;
  6563. sp_int* z0;
  6564. sp_int* z1;
  6565. sp_int* z2;
  6566. sp_int_digit ca;
  6567. sp_int_digit cb;
  6568. DECL_SP_INT_ARRAY(t, 16, 2);
  6569. DECL_SP_INT_ARRAY(z, 33, 2);
  6570. ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
  6571. ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
  6572. if (err == MP_OKAY) {
  6573. a1 = t[0];
  6574. b1 = t[1];
  6575. z1 = z[0];
  6576. z2 = z[1];
  6577. z0 = r;
  6578. XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
  6579. a1->used = 16;
  6580. XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
  6581. b1->used = 16;
  6582. /* z2 = a1 * b1 */
  6583. err = _sp_mul_16(a1, b1, z2);
  6584. }
  6585. if (err == MP_OKAY) {
  6586. l = a1->dp[0];
  6587. h = 0;
  6588. SP_ASM_ADDC(l, h, a->dp[0]);
  6589. a1->dp[0] = l;
  6590. l = h;
  6591. h = 0;
  6592. for (i = 1; i < 16; i++) {
  6593. SP_ASM_ADDC(l, h, a1->dp[i]);
  6594. SP_ASM_ADDC(l, h, a->dp[i]);
  6595. a1->dp[i] = l;
  6596. l = h;
  6597. h = 0;
  6598. }
  6599. ca = l;
  6600. /* b01 = b0 + b1 */
  6601. l = b1->dp[0];
  6602. h = 0;
  6603. SP_ASM_ADDC(l, h, b->dp[0]);
  6604. b1->dp[0] = l;
  6605. l = h;
  6606. h = 0;
  6607. for (i = 1; i < 16; i++) {
  6608. SP_ASM_ADDC(l, h, b1->dp[i]);
  6609. SP_ASM_ADDC(l, h, b->dp[i]);
  6610. b1->dp[i] = l;
  6611. l = h;
  6612. h = 0;
  6613. }
  6614. cb = l;
  6615. /* z0 = a0 * b0 */
  6616. err = _sp_mul_16(a, b, z0);
  6617. }
  6618. if (err == MP_OKAY) {
  6619. /* z1 = (a0 + a1) * (b0 + b1) */
  6620. err = _sp_mul_16(a1, b1, z1);
  6621. }
  6622. if (err == MP_OKAY) {
  6623. /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
  6624. /* r = z0 */
  6625. /* r += (z1 - z0 - z2) << 16 */
  6626. z1->dp[32] = ca & cb;
  6627. l = 0;
  6628. if (ca) {
  6629. h = 0;
  6630. for (i = 0; i < 16; i++) {
  6631. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  6632. SP_ASM_ADDC(l, h, b1->dp[i]);
  6633. z1->dp[i + 16] = l;
  6634. l = h;
  6635. h = 0;
  6636. }
  6637. }
  6638. z1->dp[32] += l;
  6639. l = 0;
  6640. if (cb) {
  6641. h = 0;
  6642. for (i = 0; i < 16; i++) {
  6643. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  6644. SP_ASM_ADDC(l, h, a1->dp[i]);
  6645. z1->dp[i + 16] = l;
  6646. l = h;
  6647. h = 0;
  6648. }
  6649. }
  6650. z1->dp[32] += l;
  6651. /* z1 = z1 - z0 - z1 */
  6652. l = 0;
  6653. h = 0;
  6654. for (i = 0; i < 32; i++) {
  6655. l += z1->dp[i];
  6656. SP_ASM_SUBC(l, h, z0->dp[i]);
  6657. SP_ASM_SUBC(l, h, z2->dp[i]);
  6658. z1->dp[i] = l;
  6659. l = h;
  6660. h = 0;
  6661. }
  6662. z1->dp[i] += l;
  6663. /* r += z1 << 16 */
  6664. l = 0;
  6665. h = 0;
  6666. for (i = 0; i < 16; i++) {
  6667. SP_ASM_ADDC(l, h, r->dp[i + 16]);
  6668. SP_ASM_ADDC(l, h, z1->dp[i]);
  6669. r->dp[i + 16] = l;
  6670. l = h;
  6671. h = 0;
  6672. }
  6673. for (; i < 33; i++) {
  6674. SP_ASM_ADDC(l, h, z1->dp[i]);
  6675. r->dp[i + 16] = l;
  6676. l = h;
  6677. h = 0;
  6678. }
  6679. /* r += z2 << 32 */
  6680. l = 0;
  6681. h = 0;
  6682. for (i = 0; i < 17; i++) {
  6683. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  6684. SP_ASM_ADDC(l, h, z2->dp[i]);
  6685. r->dp[i + 32] = l;
  6686. l = h;
  6687. h = 0;
  6688. }
  6689. for (; i < 32; i++) {
  6690. SP_ASM_ADDC(l, h, z2->dp[i]);
  6691. r->dp[i + 32] = l;
  6692. l = h;
  6693. h = 0;
  6694. }
  6695. r->used = 64;
  6696. sp_clamp(r);
  6697. }
  6698. FREE_SP_INT_ARRAY(z, NULL);
  6699. FREE_SP_INT_ARRAY(t, NULL);
  6700. return err;
  6701. }
  6702. #endif /* SP_INT_DIGITS >= 64 */
  6703. #if SP_INT_DIGITS >= 96
  6704. /* Multiply a by b and store in r: r = a * b
  6705. *
  6706. * @param [in] a SP integer to multiply.
  6707. * @param [in] b SP integer to multiply.
  6708. * @param [out] r SP integer result.
  6709. *
  6710. * @return MP_OKAY on success.
  6711. * @return MP_MEM when dynamic memory allocation fails.
  6712. */
  6713. static int _sp_mul_48(sp_int* a, sp_int* b, sp_int* r)
  6714. {
  6715. int err = MP_OKAY;
  6716. int i;
  6717. sp_int_digit l;
  6718. sp_int_digit h;
  6719. sp_int* a1;
  6720. sp_int* b1;
  6721. sp_int* z0;
  6722. sp_int* z1;
  6723. sp_int* z2;
  6724. sp_int_digit ca;
  6725. sp_int_digit cb;
  6726. DECL_SP_INT_ARRAY(t, 24, 2);
  6727. DECL_SP_INT_ARRAY(z, 49, 2);
  6728. ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
  6729. ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
  6730. if (err == MP_OKAY) {
  6731. a1 = t[0];
  6732. b1 = t[1];
  6733. z1 = z[0];
  6734. z2 = z[1];
  6735. z0 = r;
  6736. XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
  6737. a1->used = 24;
  6738. XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
  6739. b1->used = 24;
  6740. /* z2 = a1 * b1 */
  6741. err = _sp_mul_24(a1, b1, z2);
  6742. }
  6743. if (err == MP_OKAY) {
  6744. l = a1->dp[0];
  6745. h = 0;
  6746. SP_ASM_ADDC(l, h, a->dp[0]);
  6747. a1->dp[0] = l;
  6748. l = h;
  6749. h = 0;
  6750. for (i = 1; i < 24; i++) {
  6751. SP_ASM_ADDC(l, h, a1->dp[i]);
  6752. SP_ASM_ADDC(l, h, a->dp[i]);
  6753. a1->dp[i] = l;
  6754. l = h;
  6755. h = 0;
  6756. }
  6757. ca = l;
  6758. /* b01 = b0 + b1 */
  6759. l = b1->dp[0];
  6760. h = 0;
  6761. SP_ASM_ADDC(l, h, b->dp[0]);
  6762. b1->dp[0] = l;
  6763. l = h;
  6764. h = 0;
  6765. for (i = 1; i < 24; i++) {
  6766. SP_ASM_ADDC(l, h, b1->dp[i]);
  6767. SP_ASM_ADDC(l, h, b->dp[i]);
  6768. b1->dp[i] = l;
  6769. l = h;
  6770. h = 0;
  6771. }
  6772. cb = l;
  6773. /* z0 = a0 * b0 */
  6774. err = _sp_mul_24(a, b, z0);
  6775. }
  6776. if (err == MP_OKAY) {
  6777. /* z1 = (a0 + a1) * (b0 + b1) */
  6778. err = _sp_mul_24(a1, b1, z1);
  6779. }
  6780. if (err == MP_OKAY) {
  6781. /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
  6782. /* r = z0 */
  6783. /* r += (z1 - z0 - z2) << 24 */
  6784. z1->dp[48] = ca & cb;
  6785. l = 0;
  6786. if (ca) {
  6787. h = 0;
  6788. for (i = 0; i < 24; i++) {
  6789. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  6790. SP_ASM_ADDC(l, h, b1->dp[i]);
  6791. z1->dp[i + 24] = l;
  6792. l = h;
  6793. h = 0;
  6794. }
  6795. }
  6796. z1->dp[48] += l;
  6797. l = 0;
  6798. if (cb) {
  6799. h = 0;
  6800. for (i = 0; i < 24; i++) {
  6801. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  6802. SP_ASM_ADDC(l, h, a1->dp[i]);
  6803. z1->dp[i + 24] = l;
  6804. l = h;
  6805. h = 0;
  6806. }
  6807. }
  6808. z1->dp[48] += l;
  6809. /* z1 = z1 - z0 - z1 */
  6810. l = 0;
  6811. h = 0;
  6812. for (i = 0; i < 48; i++) {
  6813. l += z1->dp[i];
  6814. SP_ASM_SUBC(l, h, z0->dp[i]);
  6815. SP_ASM_SUBC(l, h, z2->dp[i]);
  6816. z1->dp[i] = l;
  6817. l = h;
  6818. h = 0;
  6819. }
  6820. z1->dp[i] += l;
  6821. /* r += z1 << 16 */
  6822. l = 0;
  6823. h = 0;
  6824. for (i = 0; i < 24; i++) {
  6825. SP_ASM_ADDC(l, h, r->dp[i + 24]);
  6826. SP_ASM_ADDC(l, h, z1->dp[i]);
  6827. r->dp[i + 24] = l;
  6828. l = h;
  6829. h = 0;
  6830. }
  6831. for (; i < 49; i++) {
  6832. SP_ASM_ADDC(l, h, z1->dp[i]);
  6833. r->dp[i + 24] = l;
  6834. l = h;
  6835. h = 0;
  6836. }
  6837. /* r += z2 << 48 */
  6838. l = 0;
  6839. h = 0;
  6840. for (i = 0; i < 25; i++) {
  6841. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  6842. SP_ASM_ADDC(l, h, z2->dp[i]);
  6843. r->dp[i + 48] = l;
  6844. l = h;
  6845. h = 0;
  6846. }
  6847. for (; i < 48; i++) {
  6848. SP_ASM_ADDC(l, h, z2->dp[i]);
  6849. r->dp[i + 48] = l;
  6850. l = h;
  6851. h = 0;
  6852. }
  6853. r->used = 96;
  6854. sp_clamp(r);
  6855. }
  6856. FREE_SP_INT_ARRAY(z, NULL);
  6857. FREE_SP_INT_ARRAY(t, NULL);
  6858. return err;
  6859. }
  6860. #endif /* SP_INT_DIGITS >= 96 */
  6861. #if SP_INT_DIGITS >= 128
  6862. /* Multiply a by b and store in r: r = a * b
  6863. *
  6864. * @param [in] a SP integer to multiply.
  6865. * @param [in] b SP integer to multiply.
  6866. * @param [out] r SP integer result.
  6867. *
  6868. * @return MP_OKAY on success.
  6869. * @return MP_MEM when dynamic memory allocation fails.
  6870. */
  6871. static int _sp_mul_64(sp_int* a, sp_int* b, sp_int* r)
  6872. {
  6873. int err = MP_OKAY;
  6874. int i;
  6875. sp_int_digit l;
  6876. sp_int_digit h;
  6877. sp_int* a1;
  6878. sp_int* b1;
  6879. sp_int* z0;
  6880. sp_int* z1;
  6881. sp_int* z2;
  6882. sp_int_digit ca;
  6883. sp_int_digit cb;
  6884. DECL_SP_INT_ARRAY(t, 32, 2);
  6885. DECL_SP_INT_ARRAY(z, 65, 2);
  6886. ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
  6887. ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
  6888. if (err == MP_OKAY) {
  6889. a1 = t[0];
  6890. b1 = t[1];
  6891. z1 = z[0];
  6892. z2 = z[1];
  6893. z0 = r;
  6894. XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
  6895. a1->used = 32;
  6896. XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
  6897. b1->used = 32;
  6898. /* z2 = a1 * b1 */
  6899. err = _sp_mul_32(a1, b1, z2);
  6900. }
  6901. if (err == MP_OKAY) {
  6902. l = a1->dp[0];
  6903. h = 0;
  6904. SP_ASM_ADDC(l, h, a->dp[0]);
  6905. a1->dp[0] = l;
  6906. l = h;
  6907. h = 0;
  6908. for (i = 1; i < 32; i++) {
  6909. SP_ASM_ADDC(l, h, a1->dp[i]);
  6910. SP_ASM_ADDC(l, h, a->dp[i]);
  6911. a1->dp[i] = l;
  6912. l = h;
  6913. h = 0;
  6914. }
  6915. ca = l;
  6916. /* b01 = b0 + b1 */
  6917. l = b1->dp[0];
  6918. h = 0;
  6919. SP_ASM_ADDC(l, h, b->dp[0]);
  6920. b1->dp[0] = l;
  6921. l = h;
  6922. h = 0;
  6923. for (i = 1; i < 32; i++) {
  6924. SP_ASM_ADDC(l, h, b1->dp[i]);
  6925. SP_ASM_ADDC(l, h, b->dp[i]);
  6926. b1->dp[i] = l;
  6927. l = h;
  6928. h = 0;
  6929. }
  6930. cb = l;
  6931. /* z0 = a0 * b0 */
  6932. err = _sp_mul_32(a, b, z0);
  6933. }
  6934. if (err == MP_OKAY) {
  6935. /* z1 = (a0 + a1) * (b0 + b1) */
  6936. err = _sp_mul_32(a1, b1, z1);
  6937. }
  6938. if (err == MP_OKAY) {
  6939. /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
  6940. /* r = z0 */
  6941. /* r += (z1 - z0 - z2) << 32 */
  6942. z1->dp[64] = ca & cb;
  6943. l = 0;
  6944. if (ca) {
  6945. h = 0;
  6946. for (i = 0; i < 32; i++) {
  6947. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  6948. SP_ASM_ADDC(l, h, b1->dp[i]);
  6949. z1->dp[i + 32] = l;
  6950. l = h;
  6951. h = 0;
  6952. }
  6953. }
  6954. z1->dp[64] += l;
  6955. l = 0;
  6956. if (cb) {
  6957. h = 0;
  6958. for (i = 0; i < 32; i++) {
  6959. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  6960. SP_ASM_ADDC(l, h, a1->dp[i]);
  6961. z1->dp[i + 32] = l;
  6962. l = h;
  6963. h = 0;
  6964. }
  6965. }
  6966. z1->dp[64] += l;
  6967. /* z1 = z1 - z0 - z1 */
  6968. l = 0;
  6969. h = 0;
  6970. for (i = 0; i < 64; i++) {
  6971. l += z1->dp[i];
  6972. SP_ASM_SUBC(l, h, z0->dp[i]);
  6973. SP_ASM_SUBC(l, h, z2->dp[i]);
  6974. z1->dp[i] = l;
  6975. l = h;
  6976. h = 0;
  6977. }
  6978. z1->dp[i] += l;
  6979. /* r += z1 << 16 */
  6980. l = 0;
  6981. h = 0;
  6982. for (i = 0; i < 32; i++) {
  6983. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  6984. SP_ASM_ADDC(l, h, z1->dp[i]);
  6985. r->dp[i + 32] = l;
  6986. l = h;
  6987. h = 0;
  6988. }
  6989. for (; i < 65; i++) {
  6990. SP_ASM_ADDC(l, h, z1->dp[i]);
  6991. r->dp[i + 32] = l;
  6992. l = h;
  6993. h = 0;
  6994. }
  6995. /* r += z2 << 64 */
  6996. l = 0;
  6997. h = 0;
  6998. for (i = 0; i < 33; i++) {
  6999. SP_ASM_ADDC(l, h, r->dp[i + 64]);
  7000. SP_ASM_ADDC(l, h, z2->dp[i]);
  7001. r->dp[i + 64] = l;
  7002. l = h;
  7003. h = 0;
  7004. }
  7005. for (; i < 64; i++) {
  7006. SP_ASM_ADDC(l, h, z2->dp[i]);
  7007. r->dp[i + 64] = l;
  7008. l = h;
  7009. h = 0;
  7010. }
  7011. r->used = 128;
  7012. sp_clamp(r);
  7013. }
  7014. FREE_SP_INT_ARRAY(z, NULL);
  7015. FREE_SP_INT_ARRAY(t, NULL);
  7016. return err;
  7017. }
  7018. #endif /* SP_INT_DIGITS >= 128 */
  7019. #if SP_INT_DIGITS >= 192
  7020. /* Multiply a by b and store in r: r = a * b
  7021. *
  7022. * @param [in] a SP integer to multiply.
  7023. * @param [in] b SP integer to multiply.
  7024. * @param [out] r SP integer result.
  7025. *
  7026. * @return MP_OKAY on success.
  7027. * @return MP_MEM when dynamic memory allocation fails.
  7028. */
  7029. static int _sp_mul_96(sp_int* a, sp_int* b, sp_int* r)
  7030. {
  7031. int err = MP_OKAY;
  7032. int i;
  7033. sp_int_digit l;
  7034. sp_int_digit h;
  7035. sp_int* a1;
  7036. sp_int* b1;
  7037. sp_int* z0;
  7038. sp_int* z1;
  7039. sp_int* z2;
  7040. sp_int_digit ca;
  7041. sp_int_digit cb;
  7042. DECL_SP_INT_ARRAY(t, 48, 2);
  7043. DECL_SP_INT_ARRAY(z, 97, 2);
  7044. ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
  7045. ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
  7046. if (err == MP_OKAY) {
  7047. a1 = t[0];
  7048. b1 = t[1];
  7049. z1 = z[0];
  7050. z2 = z[1];
  7051. z0 = r;
  7052. XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
  7053. a1->used = 48;
  7054. XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
  7055. b1->used = 48;
  7056. /* z2 = a1 * b1 */
  7057. err = _sp_mul_48(a1, b1, z2);
  7058. }
  7059. if (err == MP_OKAY) {
  7060. l = a1->dp[0];
  7061. h = 0;
  7062. SP_ASM_ADDC(l, h, a->dp[0]);
  7063. a1->dp[0] = l;
  7064. l = h;
  7065. h = 0;
  7066. for (i = 1; i < 48; i++) {
  7067. SP_ASM_ADDC(l, h, a1->dp[i]);
  7068. SP_ASM_ADDC(l, h, a->dp[i]);
  7069. a1->dp[i] = l;
  7070. l = h;
  7071. h = 0;
  7072. }
  7073. ca = l;
  7074. /* b01 = b0 + b1 */
  7075. l = b1->dp[0];
  7076. h = 0;
  7077. SP_ASM_ADDC(l, h, b->dp[0]);
  7078. b1->dp[0] = l;
  7079. l = h;
  7080. h = 0;
  7081. for (i = 1; i < 48; i++) {
  7082. SP_ASM_ADDC(l, h, b1->dp[i]);
  7083. SP_ASM_ADDC(l, h, b->dp[i]);
  7084. b1->dp[i] = l;
  7085. l = h;
  7086. h = 0;
  7087. }
  7088. cb = l;
  7089. /* z0 = a0 * b0 */
  7090. err = _sp_mul_48(a, b, z0);
  7091. }
  7092. if (err == MP_OKAY) {
  7093. /* z1 = (a0 + a1) * (b0 + b1) */
  7094. err = _sp_mul_48(a1, b1, z1);
  7095. }
  7096. if (err == MP_OKAY) {
  7097. /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
  7098. /* r = z0 */
  7099. /* r += (z1 - z0 - z2) << 48 */
  7100. z1->dp[96] = ca & cb;
  7101. l = 0;
  7102. if (ca) {
  7103. h = 0;
  7104. for (i = 0; i < 48; i++) {
  7105. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  7106. SP_ASM_ADDC(l, h, b1->dp[i]);
  7107. z1->dp[i + 48] = l;
  7108. l = h;
  7109. h = 0;
  7110. }
  7111. }
  7112. z1->dp[96] += l;
  7113. l = 0;
  7114. if (cb) {
  7115. h = 0;
  7116. for (i = 0; i < 48; i++) {
  7117. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  7118. SP_ASM_ADDC(l, h, a1->dp[i]);
  7119. z1->dp[i + 48] = l;
  7120. l = h;
  7121. h = 0;
  7122. }
  7123. }
  7124. z1->dp[96] += l;
  7125. /* z1 = z1 - z0 - z1 */
  7126. l = 0;
  7127. h = 0;
  7128. for (i = 0; i < 96; i++) {
  7129. l += z1->dp[i];
  7130. SP_ASM_SUBC(l, h, z0->dp[i]);
  7131. SP_ASM_SUBC(l, h, z2->dp[i]);
  7132. z1->dp[i] = l;
  7133. l = h;
  7134. h = 0;
  7135. }
  7136. z1->dp[i] += l;
  7137. /* r += z1 << 16 */
  7138. l = 0;
  7139. h = 0;
  7140. for (i = 0; i < 48; i++) {
  7141. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  7142. SP_ASM_ADDC(l, h, z1->dp[i]);
  7143. r->dp[i + 48] = l;
  7144. l = h;
  7145. h = 0;
  7146. }
  7147. for (; i < 97; i++) {
  7148. SP_ASM_ADDC(l, h, z1->dp[i]);
  7149. r->dp[i + 48] = l;
  7150. l = h;
  7151. h = 0;
  7152. }
  7153. /* r += z2 << 96 */
  7154. l = 0;
  7155. h = 0;
  7156. for (i = 0; i < 49; i++) {
  7157. SP_ASM_ADDC(l, h, r->dp[i + 96]);
  7158. SP_ASM_ADDC(l, h, z2->dp[i]);
  7159. r->dp[i + 96] = l;
  7160. l = h;
  7161. h = 0;
  7162. }
  7163. for (; i < 96; i++) {
  7164. SP_ASM_ADDC(l, h, z2->dp[i]);
  7165. r->dp[i + 96] = l;
  7166. l = h;
  7167. h = 0;
  7168. }
  7169. r->used = 192;
  7170. sp_clamp(r);
  7171. }
  7172. FREE_SP_INT_ARRAY(z, NULL);
  7173. FREE_SP_INT_ARRAY(t, NULL);
  7174. return err;
  7175. }
  7176. #endif /* SP_INT_DIGITS >= 192 */
  7177. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  7178. #endif /* !WOLFSSL_SP_SMALL */
  7179. /* Multiply a by b and store in r: r = a * b
  7180. *
  7181. * @param [in] a SP integer to multiply.
  7182. * @param [in] b SP integer to multiply.
  7183. * @param [out] r SP integer result.
  7184. *
  7185. * @return MP_OKAY on success.
  7186. * @return MP_VAL when a, b or is NULL; or the result will be too big for fixed
  7187. * data length.
  7188. * @return MP_MEM when dynamic memory allocation fails.
  7189. */
  7190. int sp_mul(sp_int* a, sp_int* b, sp_int* r)
  7191. {
  7192. int err = MP_OKAY;
  7193. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7194. int sign;
  7195. #endif
  7196. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  7197. err = MP_VAL;
  7198. }
  7199. /* Need extra digit during calculation. */
  7200. if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
  7201. err = MP_VAL;
  7202. }
  7203. if (0 && (err == MP_OKAY)) {
  7204. sp_print(a, "a");
  7205. sp_print(b, "b");
  7206. }
  7207. if (err == MP_OKAY) {
  7208. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7209. sign = a->sign ^ b->sign;
  7210. #endif
  7211. if ((a->used == 0) || (b->used == 0)) {
  7212. _sp_zero(r);
  7213. }
  7214. else
  7215. #ifndef WOLFSSL_SP_SMALL
  7216. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  7217. #if SP_WORD_SIZE == 64
  7218. if ((a->used == 4) && (b->used == 4)) {
  7219. err = _sp_mul_4(a, b, r);
  7220. }
  7221. else
  7222. #endif /* SP_WORD_SIZE == 64 */
  7223. #if SP_WORD_SIZE == 64
  7224. #ifdef SQR_MUL_ASM
  7225. if ((a->used == 6) && (b->used == 6)) {
  7226. err = _sp_mul_6(a, b, r);
  7227. }
  7228. else
  7229. #endif /* SQR_MUL_ASM */
  7230. #endif /* SP_WORD_SIZE == 64 */
  7231. #if SP_WORD_SIZE == 32
  7232. #ifdef SQR_MUL_ASM
  7233. if ((a->used == 8) && (b->used == 8)) {
  7234. err = _sp_mul_8(a, b, r);
  7235. }
  7236. else
  7237. #endif /* SQR_MUL_ASM */
  7238. #endif /* SP_WORD_SIZE == 32 */
  7239. #if SP_WORD_SIZE == 32
  7240. #ifdef SQR_MUL_ASM
  7241. if ((a->used == 12) && (b->used == 12)) {
  7242. err = _sp_mul_12(a, b, r);
  7243. }
  7244. else
  7245. #endif /* SQR_MUL_ASM */
  7246. #endif /* SP_WORD_SIZE == 32 */
  7247. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  7248. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  7249. #if SP_INT_DIGITS >= 32
  7250. if ((a->used == 16) && (b->used == 16)) {
  7251. err = _sp_mul_16(a, b, r);
  7252. }
  7253. else
  7254. #endif /* SP_INT_DIGITS >= 32 */
  7255. #if SP_INT_DIGITS >= 48
  7256. if ((a->used == 24) && (b->used == 24)) {
  7257. err = _sp_mul_24(a, b, r);
  7258. }
  7259. else
  7260. #endif /* SP_INT_DIGITS >= 48 */
  7261. #if SP_INT_DIGITS >= 64
  7262. if ((a->used == 32) && (b->used == 32)) {
  7263. err = _sp_mul_32(a, b, r);
  7264. }
  7265. else
  7266. #endif /* SP_INT_DIGITS >= 64 */
  7267. #if SP_INT_DIGITS >= 96
  7268. if ((a->used == 48) && (b->used == 48)) {
  7269. err = _sp_mul_48(a, b, r);
  7270. }
  7271. else
  7272. #endif /* SP_INT_DIGITS >= 96 */
  7273. #if SP_INT_DIGITS >= 128
  7274. if ((a->used == 64) && (b->used == 64)) {
  7275. err = _sp_mul_64(a, b, r);
  7276. }
  7277. else
  7278. #endif /* SP_INT_DIGITS >= 128 */
  7279. #if SP_INT_DIGITS >= 192
  7280. if ((a->used == 96) && (b->used == 96)) {
  7281. err = _sp_mul_96(a, b, r);
  7282. }
  7283. else
  7284. #endif /* SP_INT_DIGITS >= 192 */
  7285. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  7286. #endif /* !WOLFSSL_SP_SMALL */
  7287. #ifdef SQR_MUL_ASM
  7288. if (a->used == b->used) {
  7289. err = _sp_mul_nxn(a, b, r);
  7290. }
  7291. else
  7292. #endif
  7293. {
  7294. err = _sp_mul(a, b, r);
  7295. }
  7296. }
  7297. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7298. if (err == MP_OKAY) {
  7299. r->sign = (r->used == 0) ? MP_ZPOS : sign;
  7300. }
  7301. #endif
  7302. if (0 && (err == MP_OKAY)) {
  7303. sp_print(r, "rmul");
  7304. }
  7305. return err;
  7306. }
  7307. /* END SP_MUL implementations. */
  7308. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
  7309. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  7310. *
  7311. * @param [in] a SP integer to multiply.
  7312. * @param [in] b SP integer to multiply.
  7313. * @param [in] m SP integer that is the modulus.
  7314. * @param [out] r SP integer result.
  7315. *
  7316. * @return MP_OKAY on success.
  7317. * @return MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
  7318. * fixed data length.
  7319. * @return MP_MEM when dynamic memory allocation fails.
  7320. */
  7321. int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
  7322. {
  7323. int err = MP_OKAY;
  7324. DECL_SP_INT(t, ((a == NULL) || (b == NULL)) ? 1 : a->used + b->used);
  7325. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  7326. err = MP_VAL;
  7327. }
  7328. if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
  7329. err = MP_VAL;
  7330. }
  7331. ALLOC_SP_INT(t, a->used + b->used, err, NULL);
  7332. if (err == MP_OKAY) {
  7333. err = sp_init_size(t, a->used + b->used);
  7334. }
  7335. if (err == MP_OKAY) {
  7336. err = sp_mul(a, b, t);
  7337. }
  7338. if (err == MP_OKAY) {
  7339. err = sp_mod(t, m, r);
  7340. }
  7341. FREE_SP_INT(t, NULL);
  7342. return err;
  7343. }
  7344. #endif
  7345. #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
  7346. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  7347. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  7348. /* Calculates the multiplicative inverse in the field.
  7349. *
  7350. * @param [in] a SP integer to find inverse of.
  7351. * @param [in] m SP integer this is the modulus.
  7352. * @param [out] r SP integer to hold result.
  7353. *
  7354. * @return MP_OKAY on success.
  7355. * @return MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
  7356. * m is negative.
  7357. * @return MP_MEM when dynamic memory allocation fails.
  7358. */
  7359. int sp_invmod(sp_int* a, sp_int* m, sp_int* r)
  7360. {
  7361. int err = MP_OKAY;
  7362. sp_int* u;
  7363. sp_int* v;
  7364. sp_int* b;
  7365. sp_int* c;
  7366. int used = ((m == NULL) || (a == NULL)) ? 1 :
  7367. ((m->used >= a->used) ? m->used + 1 : a->used + 1);
  7368. DECL_SP_INT_ARRAY(t, used, 4);
  7369. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  7370. err = MP_VAL;
  7371. }
  7372. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7373. if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
  7374. err = MP_VAL;
  7375. }
  7376. #endif
  7377. ALLOC_SP_INT_ARRAY(t, (m == NULL) ? 0 : m->used + 1, 4, err, NULL);
  7378. if (err == MP_OKAY) {
  7379. u = t[0];
  7380. v = t[1];
  7381. b = t[2];
  7382. c = t[3];
  7383. sp_init_size(v, used + 1);
  7384. if (_sp_cmp_abs(a, m) != MP_LT) {
  7385. err = sp_mod(a, m, v);
  7386. a = v;
  7387. }
  7388. }
  7389. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7390. if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
  7391. /* Make 'a' positive */
  7392. err = sp_add(m, a, v);
  7393. a = v;
  7394. }
  7395. #endif
  7396. /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
  7397. if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
  7398. err = MP_VAL;
  7399. }
  7400. /* r*2*x != n*2*y + 1 for integer x,y */
  7401. if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
  7402. err = MP_VAL;
  7403. }
  7404. /* 1*1 = 0*m + 1 */
  7405. if ((err == MP_OKAY) && sp_isone(a)) {
  7406. sp_set(r, 1);
  7407. }
  7408. else if (err != MP_OKAY) {
  7409. }
  7410. else if (sp_iseven(m)) {
  7411. /* a^-1 mod m = m + (1 - m*(m^-1 % a)) / a
  7412. * = m - (m*(m^-1 % a) - 1) / a
  7413. */
  7414. err = sp_invmod(m, a, r);
  7415. if (err == MP_OKAY) {
  7416. err = sp_mul(r, m, r);
  7417. }
  7418. if (err == MP_OKAY) {
  7419. _sp_sub_d(r, 1, r);
  7420. err = sp_div(r, a, r, NULL);
  7421. if (err == MP_OKAY) {
  7422. sp_sub(m, r, r);
  7423. }
  7424. }
  7425. }
  7426. else {
  7427. sp_init_size(u, m->used + 1);
  7428. sp_init_size(b, m->used + 1);
  7429. sp_init_size(c, m->used + 1);
  7430. sp_copy(m, u);
  7431. sp_copy(a, v);
  7432. _sp_zero(b);
  7433. sp_set(c, 1);
  7434. while (!sp_isone(v) && !sp_iszero(u)) {
  7435. if (sp_iseven(u)) {
  7436. sp_div_2(u, u);
  7437. if (sp_isodd(b)) {
  7438. sp_add(b, m, b);
  7439. }
  7440. sp_div_2(b, b);
  7441. }
  7442. else if (sp_iseven(v)) {
  7443. sp_div_2(v, v);
  7444. if (sp_isodd(c)) {
  7445. sp_add(c, m, c);
  7446. }
  7447. sp_div_2(c, c);
  7448. }
  7449. else if (_sp_cmp(u, v) != MP_LT) {
  7450. sp_sub(u, v, u);
  7451. if (_sp_cmp(b, c) == MP_LT) {
  7452. sp_add(b, m, b);
  7453. }
  7454. sp_sub(b, c, b);
  7455. }
  7456. else {
  7457. sp_sub(v, u, v);
  7458. if (_sp_cmp(c, b) == MP_LT) {
  7459. sp_add(c, m, c);
  7460. }
  7461. sp_sub(c, b, c);
  7462. }
  7463. }
  7464. if (sp_iszero(u)) {
  7465. err = MP_VAL;
  7466. }
  7467. else {
  7468. err = sp_copy(c, r);
  7469. }
  7470. }
  7471. FREE_SP_INT_ARRAY(t, NULL);
  7472. return err;
  7473. }
  7474. #endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA || \
  7475. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  7476. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  7477. #define CT_INV_MOD_PRE_CNT 8
  7478. /* Calculates the multiplicative inverse in the field - constant time.
  7479. *
  7480. * Modulus (m) must be a prime and greater than 2.
  7481. *
  7482. * @param [in] a SP integer, Montogmery form, to find inverse of.
  7483. * @param [in] m SP integer this is the modulus.
  7484. * @param [out] r SP integer to hold result.
  7485. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  7486. *
  7487. * @return MP_OKAY on success.
  7488. * @return MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
  7489. * @return MP_MEM when dynamic memory allocation fails.
  7490. */
  7491. int sp_invmod_mont_ct(sp_int* a, sp_int* m, sp_int* r, sp_int_digit mp)
  7492. {
  7493. int err = MP_OKAY;
  7494. int i;
  7495. int j;
  7496. sp_int* t;
  7497. sp_int* e;
  7498. DECL_SP_INT_ARRAY(pre, (m == NULL) ? 1 : m->used * 2 + 1,
  7499. CT_INV_MOD_PRE_CNT + 2);
  7500. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  7501. err = MP_VAL;
  7502. }
  7503. /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
  7504. if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
  7505. (m->used == 1 && m->dp[0] < 3))) {
  7506. err = MP_VAL;
  7507. }
  7508. ALLOC_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err, NULL);
  7509. if (err == MP_OKAY) {
  7510. t = pre[CT_INV_MOD_PRE_CNT + 0];
  7511. e = pre[CT_INV_MOD_PRE_CNT + 1];
  7512. sp_init_size(t, m->used * 2 + 1);
  7513. sp_init_size(e, m->used * 2 + 1);
  7514. sp_init_size(pre[0], m->used * 2 + 1);
  7515. err = sp_copy(a, pre[0]);
  7516. for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
  7517. sp_init_size(pre[i], m->used * 2 + 1);
  7518. err = sp_sqr(pre[i-1], pre[i]);
  7519. if (err == MP_OKAY) {
  7520. err = _sp_mont_red(pre[i], m, mp);
  7521. }
  7522. if (err == MP_OKAY) {
  7523. err = sp_mul(pre[i], a, pre[i]);
  7524. }
  7525. if (err == MP_OKAY) {
  7526. err = _sp_mont_red(pre[i], m, mp);
  7527. }
  7528. }
  7529. }
  7530. if (err == MP_OKAY) {
  7531. _sp_sub_d(m, 2, e);
  7532. for (i = sp_count_bits(e)-1, j = 0; i >= 0; i--, j++) {
  7533. if ((!sp_is_bit_set(e, i)) || (j == CT_INV_MOD_PRE_CNT)) {
  7534. break;
  7535. }
  7536. }
  7537. err = sp_copy(pre[j-1], t);
  7538. for (j = 0; (err == MP_OKAY) && (i >= 0); i--) {
  7539. int set = sp_is_bit_set(e, i);
  7540. if ((j == CT_INV_MOD_PRE_CNT) || ((!set) && j > 0)) {
  7541. err = sp_mul(t, pre[j-1], t);
  7542. if (err == MP_OKAY) {
  7543. err = _sp_mont_red(t, m, mp);
  7544. }
  7545. j = 0;
  7546. }
  7547. if (err == MP_OKAY) {
  7548. err = sp_sqr(t, t);
  7549. if (err == MP_OKAY) {
  7550. err = _sp_mont_red(t, m, mp);
  7551. }
  7552. }
  7553. j += set;
  7554. }
  7555. }
  7556. if (err == MP_OKAY) {
  7557. if (j > 0) {
  7558. err = sp_mul(t, pre[j-1], r);
  7559. if (err == MP_OKAY) {
  7560. err = _sp_mont_red(r, m, mp);
  7561. }
  7562. }
  7563. else {
  7564. err = sp_copy(t, r);
  7565. }
  7566. }
  7567. FREE_SP_INT_ARRAY(pre, NULL);
  7568. return err;
  7569. }
  7570. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  7571. /**************************
  7572. * Exponentiation functions
  7573. **************************/
  7574. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  7575. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
  7576. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  7577. * Process the exponent one bit at a time.
  7578. * Is constant time and can be cache attack resistant.
  7579. *
  7580. * @param [in] b SP integer that is the base.
  7581. * @param [in] e SP integer that is the exponent.
  7582. * @param [in] bits Number of bits in base to use. May be greater than
  7583. * count of bits in b.
  7584. * @param [in] m SP integer that is the modulus.
  7585. * @param [out] r SP integer to hold result.
  7586. *
  7587. * @return MP_OKAY on success.
  7588. * @return MP_MEM when dynamic memory allocation fails.
  7589. */
  7590. static int _sp_exptmod_ex(sp_int* b, sp_int* e, int bits, sp_int* m, sp_int* r)
  7591. {
  7592. int i;
  7593. int err = MP_OKAY;
  7594. int done = 0;
  7595. int j;
  7596. int y;
  7597. int seenTopBit = 0;
  7598. #ifdef WC_NO_CACHE_RESISTANT
  7599. DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
  7600. #else
  7601. DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
  7602. #endif
  7603. #ifdef WC_NO_CACHE_RESISTANT
  7604. ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
  7605. #else
  7606. ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 3, err, NULL);
  7607. #endif
  7608. if (err == MP_OKAY) {
  7609. sp_init_size(t[0], 2 * m->used + 1);
  7610. sp_init_size(t[1], 2 * m->used + 1);
  7611. #ifndef WC_NO_CACHE_RESISTANT
  7612. sp_init_size(t[2], 2 * m->used + 1);
  7613. #endif
  7614. /* Ensure base is less than exponent. */
  7615. if (_sp_cmp(b, m) != MP_LT) {
  7616. err = sp_mod(b, m, t[0]);
  7617. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  7618. sp_set(r, 0);
  7619. done = 1;
  7620. }
  7621. }
  7622. else {
  7623. err = sp_copy(b, t[0]);
  7624. }
  7625. }
  7626. if ((!done) && (err == MP_OKAY)) {
  7627. /* t[0] is dummy value and t[1] is result */
  7628. err = sp_copy(t[0], t[1]);
  7629. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  7630. #ifdef WC_NO_CACHE_RESISTANT
  7631. /* Square real result if seen the top bit. */
  7632. err = sp_sqrmod(t[seenTopBit], m, t[seenTopBit]);
  7633. if (err == MP_OKAY) {
  7634. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  7635. j = y & seenTopBit;
  7636. seenTopBit |= y;
  7637. /* Multiply real result if bit is set and seen the top bit. */
  7638. err = sp_mulmod(t[j], b, m, t[j]);
  7639. }
  7640. #else
  7641. /* Square real result if seen the top bit. */
  7642. sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
  7643. ((size_t)t[1] & sp_off_on_addr[seenTopBit ])),
  7644. t[2]);
  7645. err = sp_sqrmod(t[2], m, t[2]);
  7646. sp_copy(t[2],
  7647. (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
  7648. ((size_t)t[1] & sp_off_on_addr[seenTopBit ])));
  7649. if (err == MP_OKAY) {
  7650. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  7651. j = y & seenTopBit;
  7652. seenTopBit |= y;
  7653. /* Multiply real result if bit is set and seen the top bit. */
  7654. sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  7655. ((size_t)t[1] & sp_off_on_addr[j ])),
  7656. t[2]);
  7657. err = sp_mulmod(t[2], b, m, t[2]);
  7658. sp_copy(t[2],
  7659. (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  7660. ((size_t)t[1] & sp_off_on_addr[j ])));
  7661. }
  7662. #endif
  7663. }
  7664. }
  7665. if ((!done) && (err == MP_OKAY)) {
  7666. err = sp_copy(t[1], r);
  7667. }
  7668. FREE_SP_INT_ARRAY(t, NULL);
  7669. return err;
  7670. }
  7671. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  7672. * WOLFSSL_HAVE_SP_DH */
  7673. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  7674. !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  7675. #ifndef WC_NO_HARDEN
  7676. #if !defined(WC_NO_CACHE_RESISTANT)
  7677. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  7678. * Process the exponent one bit at a time with base in montgomery form.
  7679. * Is constant time and cache attack resistant.
  7680. *
  7681. * @param [in] b SP integer that is the base.
  7682. * @param [in] e SP integer that is the exponent.
  7683. * @param [in] bits Number of bits in base to use. May be greater than
  7684. * count of bits in b.
  7685. * @param [in] m SP integer that is the modulus.
  7686. * @param [out] r SP integer to hold result.
  7687. *
  7688. * @return MP_OKAY on success.
  7689. * @return MP_MEM when dynamic memory allocation fails.
  7690. */
  7691. static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m,
  7692. sp_int* r)
  7693. {
  7694. int i;
  7695. int err = MP_OKAY;
  7696. int done = 0;
  7697. int j;
  7698. int y;
  7699. int seenTopBit = 0;
  7700. sp_int_digit mp;
  7701. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  7702. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  7703. if (err == MP_OKAY) {
  7704. sp_init_size(t[0], m->used * 2 + 1);
  7705. sp_init_size(t[1], m->used * 2 + 1);
  7706. sp_init_size(t[2], m->used * 2 + 1);
  7707. sp_init_size(t[3], m->used * 2 + 1);
  7708. /* Ensure base is less than exponent. */
  7709. if (_sp_cmp(b, m) != MP_LT) {
  7710. err = sp_mod(b, m, t[0]);
  7711. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  7712. sp_set(r, 0);
  7713. done = 1;
  7714. }
  7715. }
  7716. else {
  7717. err = sp_copy(b, t[0]);
  7718. }
  7719. }
  7720. if ((!done) && (err == MP_OKAY)) {
  7721. err = sp_mont_setup(m, &mp);
  7722. if (err == MP_OKAY) {
  7723. err = sp_mont_norm(t[1], m);
  7724. }
  7725. if (err == MP_OKAY) {
  7726. /* Convert to montgomery form. */
  7727. err = sp_mulmod(t[0], t[1], m, t[0]);
  7728. }
  7729. if (err == MP_OKAY) {
  7730. /* t[0] is fake working value and t[1] is real working value. */
  7731. sp_copy(t[0], t[1]);
  7732. /* Montgomert form of base to multiply by. */
  7733. sp_copy(t[0], t[2]);
  7734. }
  7735. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  7736. /* Square real working value if seen the top bit. */
  7737. sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
  7738. ((size_t)t[1] & sp_off_on_addr[seenTopBit ])),
  7739. t[3]);
  7740. err = sp_sqr(t[3], t[3]);
  7741. if (err == MP_OKAY) {
  7742. err = _sp_mont_red(t[3], m, mp);
  7743. }
  7744. sp_copy(t[3],
  7745. (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
  7746. ((size_t)t[1] & sp_off_on_addr[seenTopBit ])));
  7747. if (err == MP_OKAY) {
  7748. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  7749. j = y & seenTopBit;
  7750. seenTopBit |= y;
  7751. /* Multiply real value if bit is set and seen the top bit. */
  7752. sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  7753. ((size_t)t[1] & sp_off_on_addr[j ])),
  7754. t[3]);
  7755. err = sp_mul(t[3], t[2], t[3]);
  7756. if (err == MP_OKAY) {
  7757. err = _sp_mont_red(t[3], m, mp);
  7758. }
  7759. sp_copy(t[3],
  7760. (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  7761. ((size_t)t[1] & sp_off_on_addr[j ])));
  7762. }
  7763. }
  7764. if (err == MP_OKAY) {
  7765. /* Convert from montgomery form. */
  7766. err = _sp_mont_red(t[1], m, mp);
  7767. /* Reduction implementation returns number to range < m. */
  7768. }
  7769. }
  7770. if ((!done) && (err == MP_OKAY)) {
  7771. err = sp_copy(t[1], r);
  7772. }
  7773. FREE_SP_INT_ARRAY(t, NULL);
  7774. return err;
  7775. }
  7776. #else
  7777. /* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
  7778. #define SP_ALLOC
  7779. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  7780. * Creates a window of precalculated exponents with base in montgomery form.
  7781. * Is constant time but NOT cache attack resistant.
  7782. *
  7783. * @param [in] b SP integer that is the base.
  7784. * @param [in] e SP integer that is the exponent.
  7785. * @param [in] bits Number of bits in base to use. May be greater than
  7786. * count of bits in b.
  7787. * @param [in] m SP integer that is the modulus.
  7788. * @param [out] r SP integer to hold result.
  7789. *
  7790. * @return MP_OKAY on success.
  7791. * @return MP_MEM when dynamic memory allocation fails.
  7792. */
  7793. static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m,
  7794. sp_int* r)
  7795. {
  7796. int i;
  7797. int j;
  7798. int c;
  7799. int y;
  7800. int winBits;
  7801. int preCnt;
  7802. int err = MP_OKAY;
  7803. int done = 0;
  7804. sp_int_digit mp;
  7805. sp_int_digit n;
  7806. sp_int_digit mask;
  7807. sp_int* tr = NULL;
  7808. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
  7809. if (bits > 450) {
  7810. winBits = 6;
  7811. }
  7812. else if (bits <= 21) {
  7813. winBits = 1;
  7814. }
  7815. else if (bits <= 36) {
  7816. winBits = 3;
  7817. }
  7818. else if (bits <= 140) {
  7819. winBits = 4;
  7820. }
  7821. else {
  7822. winBits = 5;
  7823. }
  7824. preCnt = 1 << winBits;
  7825. mask = preCnt - 1;
  7826. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
  7827. if (err == MP_OKAY) {
  7828. tr = t[preCnt];
  7829. for (i = 0; i < preCnt; i++) {
  7830. sp_init_size(t[i], m->used * 2 + 1);
  7831. }
  7832. sp_init_size(tr, m->used * 2 + 1);
  7833. /* Ensure base is less than exponent. */
  7834. if (_sp_cmp(b, m) != MP_LT) {
  7835. err = sp_mod(b, m, t[1]);
  7836. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  7837. sp_set(r, 0);
  7838. done = 1;
  7839. }
  7840. }
  7841. else {
  7842. err = sp_copy(b, t[1]);
  7843. }
  7844. }
  7845. if ((!done) && (err == MP_OKAY)) {
  7846. err = sp_mont_setup(m, &mp);
  7847. if (err == MP_OKAY) {
  7848. /* Norm value is 1 in montgomery form. */
  7849. err = sp_mont_norm(t[0], m);
  7850. }
  7851. if (err == MP_OKAY) {
  7852. /* Convert base to montgomery form. */
  7853. err = sp_mulmod(t[1], t[0], m, t[1]);
  7854. }
  7855. /* Pre-calculate values */
  7856. for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
  7857. if ((i & 1) == 0) {
  7858. err = sp_sqr(t[i/2], t[i]);
  7859. }
  7860. else {
  7861. err = sp_mul(t[i-1], t[1], t[i]);
  7862. }
  7863. if (err == MP_OKAY) {
  7864. err = _sp_mont_red(t[i], m, mp);
  7865. }
  7866. }
  7867. if (err == MP_OKAY) {
  7868. /* Bits from the top that - possibly left over. */
  7869. i = (bits - 1) >> SP_WORD_SHIFT;
  7870. n = e->dp[i--];
  7871. c = bits & (SP_WORD_SIZE - 1);
  7872. if (c == 0) {
  7873. c = SP_WORD_SIZE;
  7874. }
  7875. c -= bits % winBits;
  7876. y = (int)(n >> c);
  7877. n <<= SP_WORD_SIZE - c;
  7878. /* Copy window number for top bits. */
  7879. sp_copy(t[y], tr);
  7880. for (; (i >= 0) || (c >= winBits); ) {
  7881. if (c == 0) {
  7882. /* Bits up to end of digit */
  7883. n = e->dp[i--];
  7884. y = (int)(n >> (SP_WORD_SIZE - winBits));
  7885. n <<= winBits;
  7886. c = SP_WORD_SIZE - winBits;
  7887. }
  7888. else if (c < winBits) {
  7889. /* Bits to end of digit and part of next */
  7890. y = (int)(n >> (SP_WORD_SIZE - winBits));
  7891. n = e->dp[i--];
  7892. c = winBits - c;
  7893. y |= (int)(n >> (SP_WORD_SIZE - c));
  7894. n <<= c;
  7895. c = SP_WORD_SIZE - c;
  7896. }
  7897. else {
  7898. /* Bits from middle of digit */
  7899. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  7900. n <<= winBits;
  7901. c -= winBits;
  7902. }
  7903. /* Square for number of bits in window. */
  7904. for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
  7905. err = sp_sqr(tr, tr);
  7906. if (err == MP_OKAY) {
  7907. err = _sp_mont_red(tr, m, mp);
  7908. }
  7909. }
  7910. /* Multiply by window number for next set of bits. */
  7911. if (err == MP_OKAY) {
  7912. err = sp_mul(tr, t[y], tr);
  7913. }
  7914. if (err == MP_OKAY) {
  7915. err = _sp_mont_red(tr, m, mp);
  7916. }
  7917. }
  7918. }
  7919. if (err == MP_OKAY) {
  7920. /* Convert from montgomery form. */
  7921. err = _sp_mont_red(tr, m, mp);
  7922. /* Reduction implementation returns number to range < m. */
  7923. }
  7924. }
  7925. if ((!done) && (err == MP_OKAY)) {
  7926. err = sp_copy(tr, r);
  7927. }
  7928. FREE_SP_INT_ARRAY(t, NULL);
  7929. return err;
  7930. }
  7931. #undef SP_ALLOC
  7932. #endif /* !WC_NO_CACHE_RESISTANT */
  7933. #endif /* !WC_NO_HARDEN */
  7934. #if SP_WORD_SIZE <= 16
  7935. #define EXP2_WINSIZE 2
  7936. #elif SP_WORD_SIZE <= 32
  7937. #define EXP2_WINSIZE 3
  7938. #elif SP_WORD_SIZE <= 64
  7939. #define EXP2_WINSIZE 4
  7940. #elif SP_WORD_SIZE <= 128
  7941. #define EXP2_WINSIZE 5
  7942. #endif
  7943. /* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
  7944. * Is constant time and cache attack resistant.
  7945. *
  7946. * @param [in] e SP integer that is the exponent.
  7947. * @param [in] digits Number of digits in base to use. May be greater than
  7948. * count of bits in b.
  7949. * @param [in] m SP integer that is the modulus.
  7950. * @param [out] r SP integer to hold result.
  7951. *
  7952. * @return MP_OKAY on success.
  7953. * @return MP_MEM when dynamic memory allocation fails.
  7954. */
  7955. static int _sp_exptmod_base_2(sp_int* e, int digits, sp_int* m, sp_int* r)
  7956. {
  7957. int i;
  7958. int j;
  7959. int c;
  7960. int y;
  7961. int err = MP_OKAY;
  7962. sp_int* t = NULL;
  7963. sp_int* tr = NULL;
  7964. sp_int_digit mp = 0, n;
  7965. DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
  7966. if (0) {
  7967. sp_print_int(2, "a");
  7968. sp_print(e, "b");
  7969. sp_print(m, "m");
  7970. }
  7971. ALLOC_SP_INT_ARRAY(d, m->used * 2 + 1, 2, err, NULL);
  7972. if (err == MP_OKAY) {
  7973. t = d[0];
  7974. tr = d[1];
  7975. sp_init_size(t, m->used * 2 + 1);
  7976. sp_init_size(tr, m->used * 2 + 1);
  7977. if (m->used > 1) {
  7978. err = sp_mont_setup(m, &mp);
  7979. if (err == MP_OKAY) {
  7980. /* Norm value is 1 in montgomery form. */
  7981. err = sp_mont_norm(tr, m);
  7982. }
  7983. if (err == MP_OKAY) {
  7984. err = sp_mul_2d(m, 1 << EXP2_WINSIZE, t);
  7985. }
  7986. }
  7987. else {
  7988. err = sp_set(tr, 1);
  7989. }
  7990. if (err == MP_OKAY) {
  7991. /* Bits from the top. */
  7992. i = digits - 1;
  7993. n = e->dp[i--];
  7994. c = SP_WORD_SIZE;
  7995. #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
  7996. c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
  7997. if (c != SP_WORD_SIZE) {
  7998. y = (int)(n >> c);
  7999. n <<= SP_WORD_SIZE - c;
  8000. }
  8001. else
  8002. #endif
  8003. {
  8004. y = 0;
  8005. }
  8006. /* Multiply montgomery representation of 1 by 2 ^ top */
  8007. err = sp_mul_2d(tr, y, tr);
  8008. }
  8009. if ((err == MP_OKAY) && (m->used > 1)) {
  8010. err = sp_add(tr, t, tr);
  8011. }
  8012. if (err == MP_OKAY) {
  8013. err = sp_mod(tr, m, tr);
  8014. }
  8015. if (err == MP_OKAY) {
  8016. for (; (i >= 0) || (c >= EXP2_WINSIZE); ) {
  8017. if (c == 0) {
  8018. /* Bits up to end of digit */
  8019. n = e->dp[i--];
  8020. y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
  8021. n <<= EXP2_WINSIZE;
  8022. c = SP_WORD_SIZE - EXP2_WINSIZE;
  8023. }
  8024. #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
  8025. else if (c < EXP2_WINSIZE) {
  8026. /* Bits to end of digit and part of next */
  8027. y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
  8028. n = e->dp[i--];
  8029. c = EXP2_WINSIZE - c;
  8030. y |= (int)(n >> (SP_WORD_SIZE - c));
  8031. n <<= c;
  8032. c = SP_WORD_SIZE - c;
  8033. }
  8034. #endif
  8035. else {
  8036. /* Bits from middle of digit */
  8037. y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) &
  8038. ((1 << EXP2_WINSIZE) - 1));
  8039. n <<= EXP2_WINSIZE;
  8040. c -= EXP2_WINSIZE;
  8041. }
  8042. /* Square for number of bits in window. */
  8043. for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
  8044. err = sp_sqr(tr, tr);
  8045. if (err != MP_OKAY) {
  8046. break;
  8047. }
  8048. if (m->used > 1) {
  8049. err = _sp_mont_red(tr, m, mp);
  8050. }
  8051. else {
  8052. err = sp_mod(tr, m, tr);
  8053. }
  8054. }
  8055. if (err == MP_OKAY) {
  8056. /* then multiply by 2^y */
  8057. err = sp_mul_2d(tr, y, tr);
  8058. }
  8059. if ((err == MP_OKAY) && (m->used > 1)) {
  8060. /* Add in value to make mod operation take same time */
  8061. err = sp_add(tr, t, tr);
  8062. }
  8063. if (err == MP_OKAY) {
  8064. err = sp_mod(tr, m, tr);
  8065. }
  8066. if (err != MP_OKAY) {
  8067. break;
  8068. }
  8069. }
  8070. }
  8071. if ((err == MP_OKAY) && (m->used > 1)) {
  8072. /* Convert from montgomery form. */
  8073. err = _sp_mont_red(tr, m, mp);
  8074. /* Reduction implementation returns number to range < m. */
  8075. }
  8076. }
  8077. if (err == MP_OKAY) {
  8078. err = sp_copy(tr, r);
  8079. }
  8080. if (0) {
  8081. sp_print(r, "rme");
  8082. }
  8083. FREE_SP_INT_ARRAY(d, NULL);
  8084. return err;
  8085. }
  8086. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  8087. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  8088. defined(WOLFSSL_HAVE_SP_DH)
  8089. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  8090. *
  8091. * @param [in] b SP integer that is the base.
  8092. * @param [in] e SP integer that is the exponent.
  8093. * @param [in] bits Number of bits in base to use. May be greater than
  8094. * count of bits in b.
  8095. * @param [in] m SP integer that is the modulus.
  8096. * @param [out] r SP integer to hold result.
  8097. *
  8098. * @return MP_OKAY on success.
  8099. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  8100. * @return MP_MEM when dynamic memory allocation fails.
  8101. */
  8102. int sp_exptmod_ex(sp_int* b, sp_int* e, int digits, sp_int* m, sp_int* r)
  8103. {
  8104. int err = MP_OKAY;
  8105. int done = 0;
  8106. int mBits = sp_count_bits(m);
  8107. int bBits = sp_count_bits(b);
  8108. int eBits = sp_count_bits(e);
  8109. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  8110. err = MP_VAL;
  8111. }
  8112. if (0 && (err == MP_OKAY)) {
  8113. sp_print(b, "a");
  8114. sp_print(e, "b");
  8115. sp_print(m, "m");
  8116. }
  8117. if (err != MP_OKAY) {
  8118. }
  8119. /* Handle special cases. */
  8120. else if (sp_iszero(m)) {
  8121. err = MP_VAL;
  8122. }
  8123. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8124. else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
  8125. err = MP_VAL;
  8126. }
  8127. #endif
  8128. else if (sp_isone(m)) {
  8129. sp_set(r, 0);
  8130. done = 1;
  8131. }
  8132. else if (sp_iszero(e)) {
  8133. sp_set(r, 1);
  8134. done = 1;
  8135. }
  8136. else if (sp_iszero(b)) {
  8137. sp_set(r, 0);
  8138. done = 1;
  8139. }
  8140. /* Ensure SP integers have space for intermediate values. */
  8141. else if (m->used * 2 >= r->size) {
  8142. err = MP_VAL;
  8143. }
  8144. if ((!done) && (err == MP_OKAY)) {
  8145. /* Use code optimized for specific sizes if possible */
  8146. #if defined(WOLFSSL_SP_MATH_ALL) && (defined(WOLFSSL_HAVE_SP_RSA) || \
  8147. defined(WOLFSSL_HAVE_SP_DH))
  8148. #ifndef WOLFSSL_SP_NO_2048
  8149. if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
  8150. (eBits <= 1024)) {
  8151. err = sp_ModExp_1024(b, e, m, r);
  8152. done = 1;
  8153. }
  8154. else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
  8155. (eBits <= 2048)) {
  8156. err = sp_ModExp_2048(b, e, m, r);
  8157. done = 1;
  8158. }
  8159. else
  8160. #endif
  8161. #ifndef WOLFSSL_SP_NO_3072
  8162. if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
  8163. (eBits <= 1536)) {
  8164. err = sp_ModExp_1536(b, e, m, r);
  8165. done = 1;
  8166. }
  8167. else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
  8168. (eBits <= 3072)) {
  8169. err = sp_ModExp_3072(b, e, m, r);
  8170. done = 1;
  8171. }
  8172. else
  8173. #endif
  8174. #ifdef WOLFSSL_SP_4096
  8175. if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
  8176. (eBits <= 4096)) {
  8177. err = sp_ModExp_4096(b, e, m, r);
  8178. done = 1;
  8179. }
  8180. else
  8181. #endif
  8182. #endif
  8183. {
  8184. }
  8185. }
  8186. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
  8187. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  8188. !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  8189. if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2)) {
  8190. /* Use the generic base 2 implementation. */
  8191. err = _sp_exptmod_base_2(e, digits, m, r);
  8192. }
  8193. else if ((!done) && (err == MP_OKAY) && (m->used > 1)) {
  8194. #ifndef WC_NO_HARDEN
  8195. err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
  8196. #else
  8197. err = sp_exptmod_nct(b, e, m, r);
  8198. #endif
  8199. }
  8200. else
  8201. #elif defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)
  8202. err = sp_exptmod_nct(b, e, m, r);
  8203. #endif
  8204. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  8205. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
  8206. if ((!done) && (err == MP_OKAY)) {
  8207. /* Otherwise use the generic implementation. */
  8208. err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
  8209. }
  8210. #endif
  8211. #else
  8212. if ((!done) && (err == MP_OKAY)) {
  8213. err = MP_VAL;
  8214. }
  8215. #endif
  8216. (void)mBits;
  8217. (void)bBits;
  8218. (void)eBits;
  8219. (void)digits;
  8220. if (0 && (err == MP_OKAY)) {
  8221. sp_print(r, "rme");
  8222. }
  8223. return err;
  8224. }
  8225. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  8226. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  8227. defined(WOLFSSL_HAVE_SP_DH)
  8228. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  8229. *
  8230. * @param [in] b SP integer that is the base.
  8231. * @param [in] e SP integer that is the exponent.
  8232. * @param [in] m SP integer that is the modulus.
  8233. * @param [out] r SP integer to hold result.
  8234. *
  8235. * @return MP_OKAY on success.
  8236. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  8237. * @return MP_MEM when dynamic memory allocation fails.
  8238. */
  8239. int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
  8240. {
  8241. int err = MP_OKAY;
  8242. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  8243. err = MP_VAL;
  8244. }
  8245. if (err == MP_OKAY) {
  8246. err = sp_exptmod_ex(b, e, e->used, m, r);
  8247. }
  8248. return err;
  8249. }
  8250. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  8251. * WOLFSSL_HAVE_SP_DH */
  8252. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
  8253. #if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
  8254. /* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
  8255. #define SP_ALLOC
  8256. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  8257. * Creates a window of precalculated exponents with base in montgomery form.
  8258. * Sliding window and is NOT constant time.
  8259. *
  8260. * @param [in] b SP integer that is the base.
  8261. * @param [in] e SP integer that is the exponent.
  8262. * @param [in] bits Number of bits in base to use. May be greater than
  8263. * count of bits in b.
  8264. * @param [in] m SP integer that is the modulus.
  8265. * @param [out] r SP integer to hold result.
  8266. *
  8267. * @return MP_OKAY on success.
  8268. * @return MP_MEM when dynamic memory allocation fails.
  8269. */
  8270. static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
  8271. {
  8272. int i;
  8273. int j;
  8274. int c;
  8275. int y;
  8276. int bits;
  8277. int winBits;
  8278. int preCnt;
  8279. int err = MP_OKAY;
  8280. int done = 0;
  8281. sp_int* tr = NULL;
  8282. sp_int* bm = NULL;
  8283. sp_int_digit mask;
  8284. /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
  8285. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
  8286. bits = sp_count_bits(e);
  8287. if (bits > 450) {
  8288. winBits = 6;
  8289. }
  8290. else if (bits <= 21) {
  8291. winBits = 1;
  8292. }
  8293. else if (bits <= 36) {
  8294. winBits = 3;
  8295. }
  8296. else if (bits <= 140) {
  8297. winBits = 4;
  8298. }
  8299. else {
  8300. winBits = 5;
  8301. }
  8302. preCnt = 1 << (winBits - 1);
  8303. mask = preCnt - 1;
  8304. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 2, err, NULL);
  8305. if (err == MP_OKAY) {
  8306. /* Initialize window numbers and temporary result. */
  8307. tr = t[preCnt + 0];
  8308. bm = t[preCnt + 1];
  8309. for (i = 0; i < preCnt; i++) {
  8310. sp_init_size(t[i], m->used * 2 + 1);
  8311. }
  8312. sp_init_size(tr, m->used * 2 + 1);
  8313. sp_init_size(bm, m->used * 2 + 1);
  8314. /* Ensure base is less than exponent. */
  8315. if (_sp_cmp(b, m) != MP_LT) {
  8316. err = sp_mod(b, m, bm);
  8317. if ((err == MP_OKAY) && sp_iszero(bm)) {
  8318. sp_set(r, 0);
  8319. done = 1;
  8320. }
  8321. }
  8322. else {
  8323. err = sp_copy(b, bm);
  8324. }
  8325. }
  8326. if ((!done) && (err == MP_OKAY)) {
  8327. sp_int_digit mp;
  8328. sp_int_digit n;
  8329. err = sp_mont_setup(m, &mp);
  8330. if (err == MP_OKAY) {
  8331. err = sp_mont_norm(t[0], m);
  8332. }
  8333. if (err == MP_OKAY) {
  8334. err = sp_mulmod(bm, t[0], m, bm);
  8335. }
  8336. if (err == MP_OKAY) {
  8337. err = sp_copy(bm, t[0]);
  8338. }
  8339. for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
  8340. err = sp_sqr(t[0], t[0]);
  8341. if (err == MP_OKAY) {
  8342. err = _sp_mont_red(t[0], m, mp);
  8343. }
  8344. }
  8345. for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
  8346. err = sp_mul(t[i-1], bm, t[i]);
  8347. if (err == MP_OKAY) {
  8348. err = _sp_mont_red(t[i], m, mp);
  8349. }
  8350. }
  8351. if (err == MP_OKAY) {
  8352. /* Find the top bit. */
  8353. i = (bits - 1) >> SP_WORD_SHIFT;
  8354. n = e->dp[i--];
  8355. c = bits % SP_WORD_SIZE;
  8356. if (c == 0) {
  8357. c = SP_WORD_SIZE;
  8358. }
  8359. /* Put top bit at highest offset in digit. */
  8360. n <<= SP_WORD_SIZE - c;
  8361. if (bits >= winBits) {
  8362. /* Top bit set. Copy from window. */
  8363. if (c < winBits) {
  8364. /* Bits to end of digit and part of next */
  8365. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  8366. n = e->dp[i--];
  8367. c = winBits - c;
  8368. y |= (int)(n >> (SP_WORD_SIZE - c));
  8369. n <<= c;
  8370. c = SP_WORD_SIZE - c;
  8371. }
  8372. else {
  8373. /* Bits from middle of digit */
  8374. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  8375. n <<= winBits;
  8376. c -= winBits;
  8377. }
  8378. err = sp_copy(t[y], tr);
  8379. }
  8380. else {
  8381. /* 1 in Montgomery form. */
  8382. err = sp_mont_norm(tr, m);
  8383. }
  8384. while (err == MP_OKAY) {
  8385. /* Sqaure until we find bit that is 1 or there's less than a
  8386. * window of bits left.
  8387. */
  8388. while ((i >= 0) || (c >= winBits)) {
  8389. sp_digit n2 = n;
  8390. int c2 = c;
  8391. int i2 = i;
  8392. /* Make sure n2 has bits from the right digit. */
  8393. if (c2 == 0) {
  8394. n2 = e->dp[i2--];
  8395. c2 = SP_WORD_SIZE;
  8396. }
  8397. /* Mask off the next bit. */
  8398. y = (int)((n2 >> (SP_WORD_SIZE - 1)) & 1);
  8399. if (y == 1) {
  8400. break;
  8401. }
  8402. /* Square and update position. */
  8403. err = sp_sqr(tr, tr);
  8404. if (err == MP_OKAY) {
  8405. err = _sp_mont_red(tr, m, mp);
  8406. }
  8407. n = n2 << 1;
  8408. c = c2 - 1;
  8409. i = i2;
  8410. }
  8411. if (err == MP_OKAY) {
  8412. /* Check we have enough bits left for a window. */
  8413. if ((i < 0) && (c < winBits)) {
  8414. break;
  8415. }
  8416. if (c == 0) {
  8417. /* Bits up to end of digit */
  8418. n = e->dp[i--];
  8419. y = (int)(n >> (SP_WORD_SIZE - winBits));
  8420. n <<= winBits;
  8421. c = SP_WORD_SIZE - winBits;
  8422. }
  8423. else if (c < winBits) {
  8424. /* Bits to end of digit and part of next */
  8425. y = (int)(n >> (SP_WORD_SIZE - winBits));
  8426. n = e->dp[i--];
  8427. c = winBits - c;
  8428. y |= (int)(n >> (SP_WORD_SIZE - c));
  8429. n <<= c;
  8430. c = SP_WORD_SIZE - c;
  8431. }
  8432. else {
  8433. /* Bits from middle of digit */
  8434. y = (int)(n >> (SP_WORD_SIZE - winBits));
  8435. n <<= winBits;
  8436. c -= winBits;
  8437. }
  8438. y &= mask;
  8439. }
  8440. /* Square for number of bits in window. */
  8441. for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
  8442. err = sp_sqr(tr, tr);
  8443. if (err == MP_OKAY) {
  8444. err = _sp_mont_red(tr, m, mp);
  8445. }
  8446. }
  8447. /* Multiply by window number for next set of bits. */
  8448. if (err == MP_OKAY) {
  8449. err = sp_mul(tr, t[y], tr);
  8450. }
  8451. if (err == MP_OKAY) {
  8452. err = _sp_mont_red(tr, m, mp);
  8453. }
  8454. }
  8455. if ((err == MP_OKAY) && (c > 0)) {
  8456. /* Handle remaining bits.
  8457. * Window values have top bit set and can't be used. */
  8458. n = e->dp[0];
  8459. for (--c; (err == MP_OKAY) && (c >= 0); c--) {
  8460. err = sp_sqr(tr, tr);
  8461. if (err == MP_OKAY) {
  8462. err = _sp_mont_red(tr, m, mp);
  8463. }
  8464. if ((err == MP_OKAY) && ((n >> c) & 1)) {
  8465. err = sp_mul(tr, bm, tr);
  8466. if (err == MP_OKAY) {
  8467. err = _sp_mont_red(tr, m, mp);
  8468. }
  8469. }
  8470. }
  8471. }
  8472. }
  8473. if (err == MP_OKAY) {
  8474. /* Convert from montgomery form. */
  8475. err = _sp_mont_red(tr, m, mp);
  8476. /* Reduction implementation returns number to range < m. */
  8477. }
  8478. }
  8479. if ((!done) && (err == MP_OKAY)) {
  8480. err = sp_copy(tr, r);
  8481. }
  8482. FREE_SP_INT_ARRAY(t, NULL);
  8483. return err;
  8484. }
  8485. #undef SP_ALLOC
  8486. #else
  8487. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  8488. * Non-constant time implementation.
  8489. *
  8490. * @param [in] b SP integer that is the base.
  8491. * @param [in] e SP integer that is the exponent.
  8492. * @param [in] m SP integer that is the modulus.
  8493. * @param [out] r SP integer to hold result.
  8494. *
  8495. * @return MP_OKAY on success.
  8496. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  8497. * @return MP_MEM when dynamic memory allocation fails.
  8498. */
  8499. static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
  8500. {
  8501. int i;
  8502. int err = MP_OKAY;
  8503. int done = 0;
  8504. int y;
  8505. int bits = sp_count_bits(e);
  8506. sp_int_digit mp;
  8507. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
  8508. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
  8509. if (err == MP_OKAY) {
  8510. sp_init_size(t[0], m->used * 2 + 1);
  8511. sp_init_size(t[1], m->used * 2 + 1);
  8512. /* Ensure base is less than exponent. */
  8513. if (_sp_cmp(b, m) != MP_LT) {
  8514. err = sp_mod(b, m, t[0]);
  8515. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  8516. sp_set(r, 0);
  8517. done = 1;
  8518. }
  8519. }
  8520. else {
  8521. err = sp_copy(b, t[0]);
  8522. }
  8523. }
  8524. if ((!done) && (err == MP_OKAY)) {
  8525. err = sp_mont_setup(m, &mp);
  8526. if (err == MP_OKAY) {
  8527. err = sp_mont_norm(t[1], m);
  8528. }
  8529. if (err == MP_OKAY) {
  8530. /* Convert to montgomery form. */
  8531. err = sp_mulmod(t[0], t[1], m, t[0]);
  8532. }
  8533. if (err == MP_OKAY) {
  8534. /* Montgomert form of base to multiply by. */
  8535. sp_copy(t[0], t[1]);
  8536. }
  8537. for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
  8538. err = sp_sqr(t[0], t[0]);
  8539. if (err == MP_OKAY) {
  8540. err = _sp_mont_red(t[0], m, mp);
  8541. }
  8542. if (err == MP_OKAY) {
  8543. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  8544. if (y != 0) {
  8545. err = sp_mul(t[0], t[1], t[0]);
  8546. if (err == MP_OKAY) {
  8547. err = _sp_mont_red(t[0], m, mp);
  8548. }
  8549. }
  8550. }
  8551. }
  8552. if (err == MP_OKAY) {
  8553. /* Convert from montgomery form. */
  8554. err = _sp_mont_red(t[0], m, mp);
  8555. /* Reduction implementation returns number to range < m. */
  8556. }
  8557. }
  8558. if ((!done) && (err == MP_OKAY)) {
  8559. err = sp_copy(t[0], r);
  8560. }
  8561. FREE_SP_INT_ARRAY(t, NULL);
  8562. return err;
  8563. }
  8564. #endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
  8565. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  8566. * Non-constant time implementation.
  8567. *
  8568. * @param [in] b SP integer that is the base.
  8569. * @param [in] e SP integer that is the exponent.
  8570. * @param [in] m SP integer that is the modulus.
  8571. * @param [out] r SP integer to hold result.
  8572. *
  8573. * @return MP_OKAY on success.
  8574. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  8575. * @return MP_MEM when dynamic memory allocation fails.
  8576. */
  8577. int sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
  8578. {
  8579. int err = MP_OKAY;
  8580. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  8581. err = MP_VAL;
  8582. }
  8583. if (0 && (err == MP_OKAY)) {
  8584. sp_print(b, "a");
  8585. sp_print(e, "b");
  8586. sp_print(m, "m");
  8587. }
  8588. if (err != MP_OKAY) {
  8589. }
  8590. /* Handle special cases. */
  8591. else if (sp_iszero(m)) {
  8592. err = MP_VAL;
  8593. }
  8594. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8595. else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
  8596. err = MP_VAL;
  8597. }
  8598. #endif
  8599. else if (sp_isone(m)) {
  8600. sp_set(r, 0);
  8601. }
  8602. else if (sp_iszero(e)) {
  8603. sp_set(r, 1);
  8604. }
  8605. else if (sp_iszero(b)) {
  8606. sp_set(r, 0);
  8607. }
  8608. /* Ensure SP integers have space for intermediate values. */
  8609. else if (m->used * 2 >= r->size) {
  8610. err = MP_VAL;
  8611. }
  8612. else {
  8613. err = _sp_exptmod_nct(b, e, m, r);
  8614. }
  8615. if (0 && (err == MP_OKAY)) {
  8616. sp_print(r, "rme");
  8617. }
  8618. return err;
  8619. }
  8620. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  8621. /***************
  8622. * 2^e functions
  8623. ***************/
  8624. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  8625. /* Divide by 2^e: r = a >> e and rem = bits shifted out
  8626. *
  8627. * @param [in] a SP integer to divide.
  8628. * @param [in] e Exponent bits (dividing by 2^e).
  8629. * @param [in] m SP integer that is the modulus.
  8630. * @param [out] r SP integer to hold result.
  8631. * @param [out] rem SP integer to hold remainder.
  8632. *
  8633. * @return MP_OKAY on success.
  8634. * @return MP_VAL when a is NULL.
  8635. */
  8636. int sp_div_2d(sp_int* a, int e, sp_int* r, sp_int* rem)
  8637. {
  8638. int err = MP_OKAY;
  8639. if (a == NULL) {
  8640. err = MP_VAL;
  8641. }
  8642. if (err == MP_OKAY) {
  8643. int remBits = sp_count_bits(a) - e;
  8644. if (remBits <= 0) {
  8645. /* Shifting down by more bits than in number. */
  8646. _sp_zero(r);
  8647. sp_copy(a, rem);
  8648. }
  8649. else {
  8650. if (rem != NULL) {
  8651. /* Copy a in to remainder. */
  8652. err = sp_copy(a, rem);
  8653. }
  8654. /* Shift a down by into result. */
  8655. sp_rshb(a, e, r);
  8656. if (rem != NULL) {
  8657. /* Set used and mask off top digit of remainder. */
  8658. rem->used = (e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
  8659. e &= SP_WORD_MASK;
  8660. if (e > 0) {
  8661. rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
  8662. }
  8663. sp_clamp(rem);
  8664. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8665. rem->sign = MP_ZPOS;
  8666. #endif
  8667. }
  8668. }
  8669. }
  8670. return err;
  8671. }
  8672. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  8673. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  8674. /* The bottom e bits: r = a & ((1 << e) - 1)
  8675. *
  8676. * @param [in] a SP integer to reduce.
  8677. * @param [in] e Modulus bits (modulus equals 2^e).
  8678. * @param [out] r SP integer to hold result.
  8679. *
  8680. * @return MP_OKAY on success.
  8681. * @return MP_VAL when a or r is NULL.
  8682. */
  8683. int sp_mod_2d(sp_int* a, int e, sp_int* r)
  8684. {
  8685. int err = MP_OKAY;
  8686. if ((a == NULL) || (r == NULL)) {
  8687. err = MP_VAL;
  8688. }
  8689. if (err == MP_OKAY) {
  8690. int digits = (e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
  8691. if (a != r) {
  8692. XMEMCPY(r->dp, a->dp, digits * sizeof(sp_int_digit));
  8693. }
  8694. /* Set used and mask off top digit of result. */
  8695. r->used = digits;
  8696. e &= SP_WORD_MASK;
  8697. if (e > 0) {
  8698. r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
  8699. }
  8700. sp_clamp(r);
  8701. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8702. if (sp_iszero(r)) {
  8703. r->sign = MP_ZPOS;
  8704. }
  8705. else if (a != r) {
  8706. r->sign = a->sign;
  8707. }
  8708. #endif
  8709. }
  8710. return err;
  8711. }
  8712. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  8713. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  8714. /* Multiply by 2^e: r = a << e
  8715. *
  8716. * @param [in] a SP integer to multiply.
  8717. * @param [in] e Multiplier bits (multiplier equals 2^e).
  8718. * @param [out] r SP integer to hold result.
  8719. *
  8720. * @return MP_OKAY on success.
  8721. * @return MP_VAL when a or r is NULL, or result is too big for fixed data
  8722. * length.
  8723. */
  8724. int sp_mul_2d(sp_int* a, int e, sp_int* r)
  8725. {
  8726. int err = MP_OKAY;
  8727. if ((a == NULL) || (r == NULL)) {
  8728. err = MP_VAL;
  8729. }
  8730. if ((err == MP_OKAY) && (sp_count_bits(a) + e > r->size * SP_WORD_SIZE)) {
  8731. err = MP_VAL;
  8732. }
  8733. if (err == MP_OKAY) {
  8734. /* Copy a into r as left shift function works on the number. */
  8735. if (a != r) {
  8736. err = sp_copy(a, r);
  8737. }
  8738. }
  8739. if (err == MP_OKAY) {
  8740. if (0) {
  8741. sp_print(a, "a");
  8742. sp_print_int(e, "n");
  8743. }
  8744. err = sp_lshb(r, e);
  8745. if (0) {
  8746. sp_print(r, "rsl");
  8747. }
  8748. }
  8749. return err;
  8750. }
  8751. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  8752. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  8753. defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  8754. /* START SP_SQR implementations */
  8755. /* This code is generated.
  8756. * To generate:
  8757. * cd scripts/sp/sp_int
  8758. * ./gen.sh
  8759. * File sp_sqr.c contains code.
  8760. */
  8761. #if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
  8762. #ifdef SQR_MUL_ASM
  8763. /* Square a and store in r. r = a * a
  8764. *
  8765. * @param [in] a SP integer to square.
  8766. * @param [out] r SP integer result.
  8767. *
  8768. * @return MP_OKAY on success.
  8769. * @return MP_MEM when dynamic memory allocation fails.
  8770. */
  8771. static int _sp_sqr(sp_int* a, sp_int* r)
  8772. {
  8773. int err = MP_OKAY;
  8774. int i;
  8775. int j;
  8776. int k;
  8777. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8778. sp_int_digit* t = NULL;
  8779. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8780. defined(WOLFSSL_SP_SMALL)
  8781. sp_int_digit t[a->used * 2];
  8782. #else
  8783. sp_int_digit t[SP_INT_DIGITS];
  8784. #endif
  8785. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8786. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
  8787. DYNAMIC_TYPE_BIGINT);
  8788. if (t == NULL) {
  8789. err = MP_MEM;
  8790. }
  8791. #endif
  8792. if ((err == MP_OKAY) && (a->used <= 1)) {
  8793. sp_int_digit l, h;
  8794. h = 0;
  8795. l = 0;
  8796. SP_ASM_SQR(h, l, a->dp[0]);
  8797. t[0] = h;
  8798. t[1] = l;
  8799. }
  8800. else if (err == MP_OKAY) {
  8801. sp_int_digit l, h, o;
  8802. h = 0;
  8803. l = 0;
  8804. SP_ASM_SQR(h, l, a->dp[0]);
  8805. t[0] = h;
  8806. h = 0;
  8807. o = 0;
  8808. for (k = 1; k < (a->used + 1) / 2; k++) {
  8809. i = k;
  8810. j = k - 1;
  8811. for (; (j >= 0); i++, j--) {
  8812. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  8813. }
  8814. t[k * 2 - 1] = l;
  8815. l = h;
  8816. h = o;
  8817. o = 0;
  8818. SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
  8819. i = k + 1;
  8820. j = k - 1;
  8821. for (; (j >= 0); i++, j--) {
  8822. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  8823. }
  8824. t[k * 2] = l;
  8825. l = h;
  8826. h = o;
  8827. o = 0;
  8828. }
  8829. for (; k < a->used; k++) {
  8830. i = k;
  8831. j = k - 1;
  8832. for (; (i < a->used); i++, j--) {
  8833. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  8834. }
  8835. t[k * 2 - 1] = l;
  8836. l = h;
  8837. h = o;
  8838. o = 0;
  8839. SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
  8840. i = k + 1;
  8841. j = k - 1;
  8842. for (; (i < a->used); i++, j--) {
  8843. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  8844. }
  8845. t[k * 2] = l;
  8846. l = h;
  8847. h = o;
  8848. o = 0;
  8849. }
  8850. t[k * 2 - 1] = l;
  8851. }
  8852. if (err == MP_OKAY) {
  8853. r->used = a->used * 2;
  8854. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  8855. sp_clamp(r);
  8856. }
  8857. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8858. if (t != NULL) {
  8859. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8860. }
  8861. #endif
  8862. return err;
  8863. }
  8864. #else /* !SQR_MUL_ASM */
  8865. /* Square a and store in r. r = a * a
  8866. *
  8867. * @param [in] a SP integer to square.
  8868. * @param [out] r SP integer result.
  8869. *
  8870. * @return MP_OKAY on success.
  8871. * @return MP_MEM when dynamic memory allocation fails.
  8872. */
  8873. static int _sp_sqr(sp_int* a, sp_int* r)
  8874. {
  8875. int err = MP_OKAY;
  8876. int i;
  8877. int j;
  8878. int k;
  8879. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8880. sp_int_digit* t = NULL;
  8881. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8882. defined(WOLFSSL_SP_SMALL)
  8883. sp_int_digit t[a->used * 2];
  8884. #else
  8885. sp_int_digit t[SP_INT_DIGITS];
  8886. #endif
  8887. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8888. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
  8889. DYNAMIC_TYPE_BIGINT);
  8890. if (t == NULL) {
  8891. err = MP_MEM;
  8892. }
  8893. #endif
  8894. if (err == MP_OKAY) {
  8895. sp_int_word w;
  8896. sp_int_word l;
  8897. sp_int_word h;
  8898. #ifdef SP_WORD_OVERFLOW
  8899. sp_int_word o;
  8900. #endif
  8901. w = (sp_int_word)a->dp[0] * a->dp[0];
  8902. t[0] = (sp_int_digit)w;
  8903. l = (sp_int_digit)(w >> SP_WORD_SIZE);
  8904. h = 0;
  8905. #ifdef SP_WORD_OVERFLOW
  8906. o = 0;
  8907. #endif
  8908. for (k = 1; k <= (a->used - 1) * 2; k++) {
  8909. i = k / 2;
  8910. j = k - i;
  8911. if (i == j) {
  8912. w = (sp_int_word)a->dp[i] * a->dp[j];
  8913. l += (sp_int_digit)w;
  8914. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  8915. #ifdef SP_WORD_OVERFLOW
  8916. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  8917. l &= SP_MASK;
  8918. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  8919. h &= SP_MASK;
  8920. #endif
  8921. }
  8922. for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
  8923. w = (sp_int_word)a->dp[i] * a->dp[j];
  8924. l += (sp_int_digit)w;
  8925. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  8926. #ifdef SP_WORD_OVERFLOW
  8927. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  8928. l &= SP_MASK;
  8929. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  8930. h &= SP_MASK;
  8931. #endif
  8932. l += (sp_int_digit)w;
  8933. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  8934. #ifdef SP_WORD_OVERFLOW
  8935. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  8936. l &= SP_MASK;
  8937. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  8938. h &= SP_MASK;
  8939. #endif
  8940. }
  8941. t[k] = (sp_int_digit)l;
  8942. l >>= SP_WORD_SIZE;
  8943. l += (sp_int_digit)h;
  8944. h >>= SP_WORD_SIZE;
  8945. #ifdef SP_WORD_OVERFLOW
  8946. h += o & SP_MASK;
  8947. o >>= SP_WORD_SIZE;
  8948. #endif
  8949. }
  8950. t[k] = (sp_int_digit)l;
  8951. r->used = k + 1;
  8952. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  8953. sp_clamp(r);
  8954. }
  8955. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8956. if (t != NULL) {
  8957. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8958. }
  8959. #endif
  8960. return err;
  8961. }
  8962. #endif /* SQR_MUL_ASM */
  8963. #endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
  8964. #ifndef WOLFSSL_SP_SMALL
  8965. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  8966. #if SP_WORD_SIZE == 64
  8967. #ifndef SQR_MUL_ASM
  8968. /* Square a and store in r. r = a * a
  8969. *
  8970. * @param [in] a SP integer to square.
  8971. * @param [out] r SP integer result.
  8972. *
  8973. * @return MP_OKAY on success.
  8974. * @return MP_MEM when dynamic memory allocation fails.
  8975. */
  8976. static int _sp_sqr_4(sp_int* a, sp_int* r)
  8977. {
  8978. int err = MP_OKAY;
  8979. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8980. sp_int_word* w = NULL;
  8981. #else
  8982. sp_int_word w[10];
  8983. #endif
  8984. sp_int_digit* da = a->dp;
  8985. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8986. w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
  8987. DYNAMIC_TYPE_BIGINT);
  8988. if (w == NULL) {
  8989. err = MP_MEM;
  8990. }
  8991. #endif
  8992. if (err == MP_OKAY) {
  8993. w[0] = (sp_int_word)da[0] * da[0];
  8994. w[1] = (sp_int_word)da[0] * da[1];
  8995. w[2] = (sp_int_word)da[0] * da[2];
  8996. w[3] = (sp_int_word)da[1] * da[1];
  8997. w[4] = (sp_int_word)da[0] * da[3];
  8998. w[5] = (sp_int_word)da[1] * da[2];
  8999. w[6] = (sp_int_word)da[1] * da[3];
  9000. w[7] = (sp_int_word)da[2] * da[2];
  9001. w[8] = (sp_int_word)da[2] * da[3];
  9002. w[9] = (sp_int_word)da[3] * da[3];
  9003. r->dp[0] = w[0];
  9004. w[0] >>= SP_WORD_SIZE;
  9005. w[0] += (sp_int_digit)w[1];
  9006. w[0] += (sp_int_digit)w[1];
  9007. r->dp[1] = w[0];
  9008. w[0] >>= SP_WORD_SIZE;
  9009. w[1] >>= SP_WORD_SIZE;
  9010. w[0] += (sp_int_digit)w[1];
  9011. w[0] += (sp_int_digit)w[1];
  9012. w[0] += (sp_int_digit)w[2];
  9013. w[0] += (sp_int_digit)w[2];
  9014. w[0] += (sp_int_digit)w[3];
  9015. r->dp[2] = w[0];
  9016. w[0] >>= SP_WORD_SIZE;
  9017. w[2] >>= SP_WORD_SIZE;
  9018. w[0] += (sp_int_digit)w[2];
  9019. w[0] += (sp_int_digit)w[2];
  9020. w[3] >>= SP_WORD_SIZE;
  9021. w[0] += (sp_int_digit)w[3];
  9022. w[0] += (sp_int_digit)w[4];
  9023. w[0] += (sp_int_digit)w[4];
  9024. w[0] += (sp_int_digit)w[5];
  9025. w[0] += (sp_int_digit)w[5];
  9026. r->dp[3] = w[0];
  9027. w[0] >>= SP_WORD_SIZE;
  9028. w[4] >>= SP_WORD_SIZE;
  9029. w[0] += (sp_int_digit)w[4];
  9030. w[0] += (sp_int_digit)w[4];
  9031. w[5] >>= SP_WORD_SIZE;
  9032. w[0] += (sp_int_digit)w[5];
  9033. w[0] += (sp_int_digit)w[5];
  9034. w[0] += (sp_int_digit)w[6];
  9035. w[0] += (sp_int_digit)w[6];
  9036. w[0] += (sp_int_digit)w[7];
  9037. r->dp[4] = w[0];
  9038. w[0] >>= SP_WORD_SIZE;
  9039. w[6] >>= SP_WORD_SIZE;
  9040. w[0] += (sp_int_digit)w[6];
  9041. w[0] += (sp_int_digit)w[6];
  9042. w[7] >>= SP_WORD_SIZE;
  9043. w[0] += (sp_int_digit)w[7];
  9044. w[0] += (sp_int_digit)w[8];
  9045. w[0] += (sp_int_digit)w[8];
  9046. r->dp[5] = w[0];
  9047. w[0] >>= SP_WORD_SIZE;
  9048. w[8] >>= SP_WORD_SIZE;
  9049. w[0] += (sp_int_digit)w[8];
  9050. w[0] += (sp_int_digit)w[8];
  9051. w[0] += (sp_int_digit)w[9];
  9052. r->dp[6] = w[0];
  9053. w[0] >>= SP_WORD_SIZE;
  9054. w[9] >>= SP_WORD_SIZE;
  9055. w[0] += (sp_int_digit)w[9];
  9056. r->dp[7] = w[0];
  9057. r->used = 8;
  9058. sp_clamp(r);
  9059. }
  9060. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9061. if (w != NULL) {
  9062. XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
  9063. }
  9064. #endif
  9065. return err;
  9066. }
  9067. #else /* SQR_MUL_ASM */
  9068. /* Square a and store in r. r = a * a
  9069. *
  9070. * @param [in] a SP integer to square.
  9071. * @param [out] r SP integer result.
  9072. *
  9073. * @return MP_OKAY on success.
  9074. * @return MP_MEM when dynamic memory allocation fails.
  9075. */
  9076. static int _sp_sqr_4(sp_int* a, sp_int* r)
  9077. {
  9078. sp_int_digit l = 0;
  9079. sp_int_digit h = 0;
  9080. sp_int_digit o = 0;
  9081. sp_int_digit t[4];
  9082. SP_ASM_SQR(h, l, a->dp[0]);
  9083. t[0] = h;
  9084. h = 0;
  9085. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  9086. t[1] = l;
  9087. l = h;
  9088. h = o;
  9089. o = 0;
  9090. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  9091. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  9092. t[2] = l;
  9093. l = h;
  9094. h = o;
  9095. o = 0;
  9096. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  9097. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  9098. t[3] = l;
  9099. l = h;
  9100. h = o;
  9101. o = 0;
  9102. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  9103. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  9104. r->dp[4] = l;
  9105. l = h;
  9106. h = o;
  9107. o = 0;
  9108. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
  9109. r->dp[5] = l;
  9110. l = h;
  9111. h = o;
  9112. SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
  9113. r->dp[6] = l;
  9114. r->dp[7] = h;
  9115. XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
  9116. r->used = 8;
  9117. sp_clamp(r);
  9118. return MP_OKAY;
  9119. }
  9120. #endif /* SQR_MUL_ASM */
  9121. #endif /* SP_WORD_SIZE == 64 */
  9122. #if SP_WORD_SIZE == 64
  9123. #ifdef SQR_MUL_ASM
  9124. /* Square a and store in r. r = a * a
  9125. *
  9126. * @param [in] a SP integer to square.
  9127. * @param [out] r SP integer result.
  9128. *
  9129. * @return MP_OKAY on success.
  9130. * @return MP_MEM when dynamic memory allocation fails.
  9131. */
  9132. static int _sp_sqr_6(sp_int* a, sp_int* r)
  9133. {
  9134. sp_int_digit l = 0;
  9135. sp_int_digit h = 0;
  9136. sp_int_digit o = 0;
  9137. sp_int_digit tl = 0;
  9138. sp_int_digit th = 0;
  9139. sp_int_digit to;
  9140. sp_int_digit t[6];
  9141. SP_ASM_SQR(h, l, a->dp[0]);
  9142. t[0] = h;
  9143. h = 0;
  9144. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  9145. t[1] = l;
  9146. l = h;
  9147. h = o;
  9148. o = 0;
  9149. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  9150. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  9151. t[2] = l;
  9152. l = h;
  9153. h = o;
  9154. o = 0;
  9155. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  9156. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  9157. t[3] = l;
  9158. l = h;
  9159. h = o;
  9160. o = 0;
  9161. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  9162. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  9163. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  9164. t[4] = l;
  9165. l = h;
  9166. h = o;
  9167. o = 0;
  9168. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  9169. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  9170. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  9171. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9172. t[5] = l;
  9173. l = h;
  9174. h = o;
  9175. o = 0;
  9176. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
  9177. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
  9178. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  9179. r->dp[6] = l;
  9180. l = h;
  9181. h = o;
  9182. o = 0;
  9183. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
  9184. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
  9185. r->dp[7] = l;
  9186. l = h;
  9187. h = o;
  9188. o = 0;
  9189. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
  9190. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  9191. r->dp[8] = l;
  9192. l = h;
  9193. h = o;
  9194. o = 0;
  9195. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
  9196. r->dp[9] = l;
  9197. l = h;
  9198. h = o;
  9199. SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
  9200. r->dp[10] = l;
  9201. r->dp[11] = h;
  9202. XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
  9203. r->used = 12;
  9204. sp_clamp(r);
  9205. return MP_OKAY;
  9206. }
  9207. #endif /* SQR_MUL_ASM */
  9208. #endif /* SP_WORD_SIZE == 64 */
  9209. #if SP_WORD_SIZE == 32
  9210. #ifdef SQR_MUL_ASM
  9211. /* Square a and store in r. r = a * a
  9212. *
  9213. * @param [in] a SP integer to square.
  9214. * @param [out] r SP integer result.
  9215. *
  9216. * @return MP_OKAY on success.
  9217. * @return MP_MEM when dynamic memory allocation fails.
  9218. */
  9219. static int _sp_sqr_8(sp_int* a, sp_int* r)
  9220. {
  9221. sp_int_digit l = 0;
  9222. sp_int_digit h = 0;
  9223. sp_int_digit o = 0;
  9224. sp_int_digit tl = 0;
  9225. sp_int_digit th = 0;
  9226. sp_int_digit to;
  9227. sp_int_digit t[8];
  9228. SP_ASM_SQR(h, l, a->dp[0]);
  9229. t[0] = h;
  9230. h = 0;
  9231. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  9232. t[1] = l;
  9233. l = h;
  9234. h = o;
  9235. o = 0;
  9236. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  9237. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  9238. t[2] = l;
  9239. l = h;
  9240. h = o;
  9241. o = 0;
  9242. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  9243. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  9244. t[3] = l;
  9245. l = h;
  9246. h = o;
  9247. o = 0;
  9248. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  9249. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  9250. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  9251. t[4] = l;
  9252. l = h;
  9253. h = o;
  9254. o = 0;
  9255. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  9256. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  9257. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  9258. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9259. t[5] = l;
  9260. l = h;
  9261. h = o;
  9262. o = 0;
  9263. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  9264. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  9265. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  9266. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  9267. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9268. t[6] = l;
  9269. l = h;
  9270. h = o;
  9271. o = 0;
  9272. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  9273. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  9274. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  9275. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  9276. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9277. t[7] = l;
  9278. l = h;
  9279. h = o;
  9280. o = 0;
  9281. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
  9282. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  9283. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  9284. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  9285. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9286. r->dp[8] = l;
  9287. l = h;
  9288. h = o;
  9289. o = 0;
  9290. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
  9291. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  9292. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  9293. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9294. r->dp[9] = l;
  9295. l = h;
  9296. h = o;
  9297. o = 0;
  9298. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
  9299. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
  9300. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  9301. r->dp[10] = l;
  9302. l = h;
  9303. h = o;
  9304. o = 0;
  9305. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
  9306. SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
  9307. r->dp[11] = l;
  9308. l = h;
  9309. h = o;
  9310. o = 0;
  9311. SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
  9312. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  9313. r->dp[12] = l;
  9314. l = h;
  9315. h = o;
  9316. o = 0;
  9317. SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
  9318. r->dp[13] = l;
  9319. l = h;
  9320. h = o;
  9321. SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
  9322. r->dp[14] = l;
  9323. r->dp[15] = h;
  9324. XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
  9325. r->used = 16;
  9326. sp_clamp(r);
  9327. return MP_OKAY;
  9328. }
  9329. #endif /* SQR_MUL_ASM */
  9330. #endif /* SP_WORD_SIZE == 32 */
  9331. #if SP_WORD_SIZE == 32
  9332. #ifdef SQR_MUL_ASM
  9333. /* Square a and store in r. r = a * a
  9334. *
  9335. * @param [in] a SP integer to square.
  9336. * @param [out] r SP integer result.
  9337. *
  9338. * @return MP_OKAY on success.
  9339. * @return MP_MEM when dynamic memory allocation fails.
  9340. */
  9341. static int _sp_sqr_12(sp_int* a, sp_int* r)
  9342. {
  9343. sp_int_digit l = 0;
  9344. sp_int_digit h = 0;
  9345. sp_int_digit o = 0;
  9346. sp_int_digit tl = 0;
  9347. sp_int_digit th = 0;
  9348. sp_int_digit to;
  9349. sp_int_digit t[12];
  9350. SP_ASM_SQR(h, l, a->dp[0]);
  9351. t[0] = h;
  9352. h = 0;
  9353. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  9354. t[1] = l;
  9355. l = h;
  9356. h = o;
  9357. o = 0;
  9358. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  9359. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  9360. t[2] = l;
  9361. l = h;
  9362. h = o;
  9363. o = 0;
  9364. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  9365. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  9366. t[3] = l;
  9367. l = h;
  9368. h = o;
  9369. o = 0;
  9370. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  9371. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  9372. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  9373. t[4] = l;
  9374. l = h;
  9375. h = o;
  9376. o = 0;
  9377. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  9378. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  9379. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  9380. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9381. t[5] = l;
  9382. l = h;
  9383. h = o;
  9384. o = 0;
  9385. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  9386. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  9387. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  9388. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  9389. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9390. t[6] = l;
  9391. l = h;
  9392. h = o;
  9393. o = 0;
  9394. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  9395. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  9396. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  9397. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  9398. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9399. t[7] = l;
  9400. l = h;
  9401. h = o;
  9402. o = 0;
  9403. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  9404. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  9405. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  9406. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  9407. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  9408. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9409. t[8] = l;
  9410. l = h;
  9411. h = o;
  9412. o = 0;
  9413. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  9414. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  9415. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  9416. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  9417. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  9418. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9419. t[9] = l;
  9420. l = h;
  9421. h = o;
  9422. o = 0;
  9423. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  9424. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  9425. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  9426. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  9427. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  9428. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  9429. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9430. t[10] = l;
  9431. l = h;
  9432. h = o;
  9433. o = 0;
  9434. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  9435. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  9436. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  9437. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  9438. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  9439. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  9440. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9441. t[11] = l;
  9442. l = h;
  9443. h = o;
  9444. o = 0;
  9445. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
  9446. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  9447. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  9448. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  9449. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  9450. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  9451. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9452. r->dp[12] = l;
  9453. l = h;
  9454. h = o;
  9455. o = 0;
  9456. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
  9457. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  9458. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  9459. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  9460. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  9461. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9462. r->dp[13] = l;
  9463. l = h;
  9464. h = o;
  9465. o = 0;
  9466. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
  9467. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  9468. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  9469. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  9470. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  9471. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9472. r->dp[14] = l;
  9473. l = h;
  9474. h = o;
  9475. o = 0;
  9476. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
  9477. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  9478. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  9479. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  9480. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9481. r->dp[15] = l;
  9482. l = h;
  9483. h = o;
  9484. o = 0;
  9485. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
  9486. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  9487. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  9488. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  9489. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9490. r->dp[16] = l;
  9491. l = h;
  9492. h = o;
  9493. o = 0;
  9494. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
  9495. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  9496. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  9497. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9498. r->dp[17] = l;
  9499. l = h;
  9500. h = o;
  9501. o = 0;
  9502. SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
  9503. SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
  9504. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  9505. r->dp[18] = l;
  9506. l = h;
  9507. h = o;
  9508. o = 0;
  9509. SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
  9510. SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
  9511. r->dp[19] = l;
  9512. l = h;
  9513. h = o;
  9514. o = 0;
  9515. SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
  9516. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  9517. r->dp[20] = l;
  9518. l = h;
  9519. h = o;
  9520. o = 0;
  9521. SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
  9522. r->dp[21] = l;
  9523. l = h;
  9524. h = o;
  9525. SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
  9526. r->dp[22] = l;
  9527. r->dp[23] = h;
  9528. XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
  9529. r->used = 24;
  9530. sp_clamp(r);
  9531. return MP_OKAY;
  9532. }
  9533. #endif /* SQR_MUL_ASM */
  9534. #endif /* SP_WORD_SIZE == 32 */
  9535. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  9536. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  9537. #if SP_INT_DIGITS >= 32
  9538. /* Square a and store in r. r = a * a
  9539. *
  9540. * @param [in] a SP integer to square.
  9541. * @param [out] r SP integer result.
  9542. *
  9543. * @return MP_OKAY on success.
  9544. * @return MP_MEM when dynamic memory allocation fails.
  9545. */
  9546. static int _sp_sqr_16(sp_int* a, sp_int* r)
  9547. {
  9548. int err = MP_OKAY;
  9549. sp_int_digit l = 0;
  9550. sp_int_digit h = 0;
  9551. sp_int_digit o = 0;
  9552. sp_int_digit tl = 0;
  9553. sp_int_digit th = 0;
  9554. sp_int_digit to;
  9555. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9556. sp_int_digit* t = NULL;
  9557. #else
  9558. sp_int_digit t[16];
  9559. #endif
  9560. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9561. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
  9562. DYNAMIC_TYPE_BIGINT);
  9563. if (t == NULL) {
  9564. err = MP_MEM;
  9565. }
  9566. #endif
  9567. if (err == MP_OKAY) {
  9568. SP_ASM_SQR(h, l, a->dp[0]);
  9569. t[0] = h;
  9570. h = 0;
  9571. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  9572. t[1] = l;
  9573. l = h;
  9574. h = o;
  9575. o = 0;
  9576. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  9577. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  9578. t[2] = l;
  9579. l = h;
  9580. h = o;
  9581. o = 0;
  9582. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  9583. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  9584. t[3] = l;
  9585. l = h;
  9586. h = o;
  9587. o = 0;
  9588. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  9589. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  9590. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  9591. t[4] = l;
  9592. l = h;
  9593. h = o;
  9594. o = 0;
  9595. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  9596. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  9597. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  9598. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9599. t[5] = l;
  9600. l = h;
  9601. h = o;
  9602. o = 0;
  9603. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  9604. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  9605. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  9606. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  9607. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9608. t[6] = l;
  9609. l = h;
  9610. h = o;
  9611. o = 0;
  9612. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  9613. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  9614. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  9615. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  9616. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9617. t[7] = l;
  9618. l = h;
  9619. h = o;
  9620. o = 0;
  9621. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  9622. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  9623. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  9624. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  9625. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  9626. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9627. t[8] = l;
  9628. l = h;
  9629. h = o;
  9630. o = 0;
  9631. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  9632. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  9633. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  9634. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  9635. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  9636. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9637. t[9] = l;
  9638. l = h;
  9639. h = o;
  9640. o = 0;
  9641. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  9642. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  9643. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  9644. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  9645. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  9646. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  9647. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9648. t[10] = l;
  9649. l = h;
  9650. h = o;
  9651. o = 0;
  9652. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  9653. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  9654. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  9655. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  9656. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  9657. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  9658. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9659. t[11] = l;
  9660. l = h;
  9661. h = o;
  9662. o = 0;
  9663. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
  9664. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
  9665. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  9666. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  9667. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  9668. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  9669. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  9670. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9671. t[12] = l;
  9672. l = h;
  9673. h = o;
  9674. o = 0;
  9675. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
  9676. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
  9677. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
  9678. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  9679. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  9680. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  9681. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  9682. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9683. t[13] = l;
  9684. l = h;
  9685. h = o;
  9686. o = 0;
  9687. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
  9688. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
  9689. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
  9690. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
  9691. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  9692. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  9693. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  9694. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  9695. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9696. t[14] = l;
  9697. l = h;
  9698. h = o;
  9699. o = 0;
  9700. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
  9701. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
  9702. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
  9703. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
  9704. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
  9705. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  9706. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  9707. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  9708. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9709. t[15] = l;
  9710. l = h;
  9711. h = o;
  9712. o = 0;
  9713. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
  9714. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
  9715. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
  9716. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
  9717. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
  9718. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  9719. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  9720. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  9721. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9722. r->dp[16] = l;
  9723. l = h;
  9724. h = o;
  9725. o = 0;
  9726. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
  9727. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
  9728. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
  9729. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
  9730. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
  9731. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  9732. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  9733. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9734. r->dp[17] = l;
  9735. l = h;
  9736. h = o;
  9737. o = 0;
  9738. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
  9739. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
  9740. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
  9741. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
  9742. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
  9743. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
  9744. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  9745. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9746. r->dp[18] = l;
  9747. l = h;
  9748. h = o;
  9749. o = 0;
  9750. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
  9751. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
  9752. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
  9753. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
  9754. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
  9755. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
  9756. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9757. r->dp[19] = l;
  9758. l = h;
  9759. h = o;
  9760. o = 0;
  9761. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
  9762. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
  9763. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
  9764. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
  9765. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
  9766. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  9767. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9768. r->dp[20] = l;
  9769. l = h;
  9770. h = o;
  9771. o = 0;
  9772. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
  9773. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
  9774. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
  9775. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
  9776. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
  9777. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9778. r->dp[21] = l;
  9779. l = h;
  9780. h = o;
  9781. o = 0;
  9782. SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
  9783. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
  9784. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
  9785. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
  9786. SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
  9787. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9788. r->dp[22] = l;
  9789. l = h;
  9790. h = o;
  9791. o = 0;
  9792. SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
  9793. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
  9794. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
  9795. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
  9796. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9797. r->dp[23] = l;
  9798. l = h;
  9799. h = o;
  9800. o = 0;
  9801. SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
  9802. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
  9803. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
  9804. SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
  9805. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9806. r->dp[24] = l;
  9807. l = h;
  9808. h = o;
  9809. o = 0;
  9810. SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
  9811. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
  9812. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
  9813. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9814. r->dp[25] = l;
  9815. l = h;
  9816. h = o;
  9817. o = 0;
  9818. SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
  9819. SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
  9820. SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
  9821. r->dp[26] = l;
  9822. l = h;
  9823. h = o;
  9824. o = 0;
  9825. SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
  9826. SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
  9827. r->dp[27] = l;
  9828. l = h;
  9829. h = o;
  9830. o = 0;
  9831. SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
  9832. SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
  9833. r->dp[28] = l;
  9834. l = h;
  9835. h = o;
  9836. o = 0;
  9837. SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
  9838. r->dp[29] = l;
  9839. l = h;
  9840. h = o;
  9841. SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
  9842. r->dp[30] = l;
  9843. r->dp[31] = h;
  9844. XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
  9845. r->used = 32;
  9846. sp_clamp(r);
  9847. }
  9848. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9849. if (t != NULL) {
  9850. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  9851. }
  9852. #endif
  9853. return err;
  9854. }
  9855. #endif /* SP_INT_DIGITS >= 32 */
  9856. #if SP_INT_DIGITS >= 48
  9857. /* Square a and store in r. r = a * a
  9858. *
  9859. * @param [in] a SP integer to square.
  9860. * @param [out] r SP integer result.
  9861. *
  9862. * @return MP_OKAY on success.
  9863. * @return MP_MEM when dynamic memory allocation fails.
  9864. */
  9865. static int _sp_sqr_24(sp_int* a, sp_int* r)
  9866. {
  9867. int err = MP_OKAY;
  9868. sp_int_digit l = 0;
  9869. sp_int_digit h = 0;
  9870. sp_int_digit o = 0;
  9871. sp_int_digit tl = 0;
  9872. sp_int_digit th = 0;
  9873. sp_int_digit to;
  9874. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9875. sp_int_digit* t = NULL;
  9876. #else
  9877. sp_int_digit t[24];
  9878. #endif
  9879. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9880. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
  9881. DYNAMIC_TYPE_BIGINT);
  9882. if (t == NULL) {
  9883. err = MP_MEM;
  9884. }
  9885. #endif
  9886. if (err == MP_OKAY) {
  9887. SP_ASM_SQR(h, l, a->dp[0]);
  9888. t[0] = h;
  9889. h = 0;
  9890. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  9891. t[1] = l;
  9892. l = h;
  9893. h = o;
  9894. o = 0;
  9895. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  9896. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  9897. t[2] = l;
  9898. l = h;
  9899. h = o;
  9900. o = 0;
  9901. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  9902. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  9903. t[3] = l;
  9904. l = h;
  9905. h = o;
  9906. o = 0;
  9907. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  9908. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  9909. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  9910. t[4] = l;
  9911. l = h;
  9912. h = o;
  9913. o = 0;
  9914. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  9915. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  9916. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  9917. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9918. t[5] = l;
  9919. l = h;
  9920. h = o;
  9921. o = 0;
  9922. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  9923. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  9924. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  9925. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  9926. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9927. t[6] = l;
  9928. l = h;
  9929. h = o;
  9930. o = 0;
  9931. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  9932. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  9933. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  9934. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  9935. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9936. t[7] = l;
  9937. l = h;
  9938. h = o;
  9939. o = 0;
  9940. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  9941. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  9942. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  9943. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  9944. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  9945. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9946. t[8] = l;
  9947. l = h;
  9948. h = o;
  9949. o = 0;
  9950. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  9951. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  9952. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  9953. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  9954. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  9955. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9956. t[9] = l;
  9957. l = h;
  9958. h = o;
  9959. o = 0;
  9960. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  9961. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  9962. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  9963. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  9964. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  9965. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  9966. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9967. t[10] = l;
  9968. l = h;
  9969. h = o;
  9970. o = 0;
  9971. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  9972. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  9973. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  9974. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  9975. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  9976. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  9977. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9978. t[11] = l;
  9979. l = h;
  9980. h = o;
  9981. o = 0;
  9982. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
  9983. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
  9984. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  9985. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  9986. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  9987. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  9988. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  9989. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  9990. t[12] = l;
  9991. l = h;
  9992. h = o;
  9993. o = 0;
  9994. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
  9995. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
  9996. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
  9997. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  9998. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  9999. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  10000. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  10001. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10002. t[13] = l;
  10003. l = h;
  10004. h = o;
  10005. o = 0;
  10006. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
  10007. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
  10008. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
  10009. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
  10010. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  10011. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  10012. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  10013. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  10014. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10015. t[14] = l;
  10016. l = h;
  10017. h = o;
  10018. o = 0;
  10019. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
  10020. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
  10021. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
  10022. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
  10023. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
  10024. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  10025. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  10026. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  10027. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10028. t[15] = l;
  10029. l = h;
  10030. h = o;
  10031. o = 0;
  10032. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
  10033. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
  10034. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
  10035. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
  10036. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
  10037. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
  10038. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  10039. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  10040. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  10041. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10042. t[16] = l;
  10043. l = h;
  10044. h = o;
  10045. o = 0;
  10046. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
  10047. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
  10048. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
  10049. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
  10050. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
  10051. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
  10052. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
  10053. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  10054. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  10055. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10056. t[17] = l;
  10057. l = h;
  10058. h = o;
  10059. o = 0;
  10060. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
  10061. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
  10062. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
  10063. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
  10064. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
  10065. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
  10066. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
  10067. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
  10068. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
  10069. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  10070. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10071. t[18] = l;
  10072. l = h;
  10073. h = o;
  10074. o = 0;
  10075. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
  10076. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
  10077. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
  10078. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
  10079. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
  10080. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
  10081. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
  10082. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
  10083. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
  10084. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
  10085. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10086. t[19] = l;
  10087. l = h;
  10088. h = o;
  10089. o = 0;
  10090. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
  10091. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
  10092. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
  10093. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
  10094. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
  10095. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
  10096. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
  10097. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
  10098. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
  10099. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
  10100. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  10101. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10102. t[20] = l;
  10103. l = h;
  10104. h = o;
  10105. o = 0;
  10106. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
  10107. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
  10108. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
  10109. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
  10110. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
  10111. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
  10112. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
  10113. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
  10114. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
  10115. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
  10116. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
  10117. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10118. t[21] = l;
  10119. l = h;
  10120. h = o;
  10121. o = 0;
  10122. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
  10123. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
  10124. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
  10125. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
  10126. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
  10127. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
  10128. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
  10129. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
  10130. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
  10131. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
  10132. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
  10133. SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
  10134. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10135. t[22] = l;
  10136. l = h;
  10137. h = o;
  10138. o = 0;
  10139. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
  10140. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
  10141. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
  10142. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
  10143. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
  10144. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
  10145. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
  10146. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
  10147. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
  10148. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
  10149. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
  10150. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
  10151. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10152. t[23] = l;
  10153. l = h;
  10154. h = o;
  10155. o = 0;
  10156. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
  10157. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
  10158. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
  10159. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
  10160. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
  10161. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
  10162. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
  10163. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
  10164. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
  10165. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
  10166. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
  10167. SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
  10168. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10169. r->dp[24] = l;
  10170. l = h;
  10171. h = o;
  10172. o = 0;
  10173. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
  10174. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
  10175. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
  10176. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
  10177. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
  10178. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
  10179. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
  10180. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
  10181. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
  10182. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
  10183. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
  10184. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10185. r->dp[25] = l;
  10186. l = h;
  10187. h = o;
  10188. o = 0;
  10189. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
  10190. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
  10191. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
  10192. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
  10193. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
  10194. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
  10195. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
  10196. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
  10197. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
  10198. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
  10199. SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
  10200. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10201. r->dp[26] = l;
  10202. l = h;
  10203. h = o;
  10204. o = 0;
  10205. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
  10206. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
  10207. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
  10208. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
  10209. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
  10210. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
  10211. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
  10212. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
  10213. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
  10214. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
  10215. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10216. r->dp[27] = l;
  10217. l = h;
  10218. h = o;
  10219. o = 0;
  10220. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
  10221. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
  10222. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
  10223. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
  10224. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
  10225. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
  10226. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
  10227. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
  10228. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
  10229. SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
  10230. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10231. r->dp[28] = l;
  10232. l = h;
  10233. h = o;
  10234. o = 0;
  10235. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
  10236. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
  10237. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
  10238. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
  10239. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
  10240. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
  10241. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
  10242. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
  10243. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
  10244. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10245. r->dp[29] = l;
  10246. l = h;
  10247. h = o;
  10248. o = 0;
  10249. SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
  10250. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
  10251. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
  10252. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
  10253. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
  10254. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
  10255. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
  10256. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
  10257. SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
  10258. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10259. r->dp[30] = l;
  10260. l = h;
  10261. h = o;
  10262. o = 0;
  10263. SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
  10264. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
  10265. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
  10266. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
  10267. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
  10268. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
  10269. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
  10270. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
  10271. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10272. r->dp[31] = l;
  10273. l = h;
  10274. h = o;
  10275. o = 0;
  10276. SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
  10277. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
  10278. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
  10279. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
  10280. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
  10281. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
  10282. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
  10283. SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
  10284. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10285. r->dp[32] = l;
  10286. l = h;
  10287. h = o;
  10288. o = 0;
  10289. SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
  10290. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
  10291. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
  10292. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
  10293. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
  10294. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
  10295. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
  10296. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10297. r->dp[33] = l;
  10298. l = h;
  10299. h = o;
  10300. o = 0;
  10301. SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
  10302. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
  10303. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
  10304. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
  10305. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
  10306. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
  10307. SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
  10308. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10309. r->dp[34] = l;
  10310. l = h;
  10311. h = o;
  10312. o = 0;
  10313. SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
  10314. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
  10315. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
  10316. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
  10317. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
  10318. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
  10319. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10320. r->dp[35] = l;
  10321. l = h;
  10322. h = o;
  10323. o = 0;
  10324. SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
  10325. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
  10326. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
  10327. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
  10328. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
  10329. SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
  10330. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10331. r->dp[36] = l;
  10332. l = h;
  10333. h = o;
  10334. o = 0;
  10335. SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
  10336. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
  10337. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
  10338. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
  10339. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
  10340. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10341. r->dp[37] = l;
  10342. l = h;
  10343. h = o;
  10344. o = 0;
  10345. SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
  10346. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
  10347. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
  10348. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
  10349. SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
  10350. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10351. r->dp[38] = l;
  10352. l = h;
  10353. h = o;
  10354. o = 0;
  10355. SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
  10356. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
  10357. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
  10358. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
  10359. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10360. r->dp[39] = l;
  10361. l = h;
  10362. h = o;
  10363. o = 0;
  10364. SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
  10365. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
  10366. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
  10367. SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
  10368. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10369. r->dp[40] = l;
  10370. l = h;
  10371. h = o;
  10372. o = 0;
  10373. SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
  10374. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
  10375. SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
  10376. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  10377. r->dp[41] = l;
  10378. l = h;
  10379. h = o;
  10380. o = 0;
  10381. SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
  10382. SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
  10383. SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
  10384. r->dp[42] = l;
  10385. l = h;
  10386. h = o;
  10387. o = 0;
  10388. SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
  10389. SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
  10390. r->dp[43] = l;
  10391. l = h;
  10392. h = o;
  10393. o = 0;
  10394. SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
  10395. SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
  10396. r->dp[44] = l;
  10397. l = h;
  10398. h = o;
  10399. o = 0;
  10400. SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
  10401. r->dp[45] = l;
  10402. l = h;
  10403. h = o;
  10404. SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
  10405. r->dp[46] = l;
  10406. r->dp[47] = h;
  10407. XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
  10408. r->used = 48;
  10409. sp_clamp(r);
  10410. }
  10411. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  10412. if (t != NULL) {
  10413. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  10414. }
  10415. #endif
  10416. return err;
  10417. }
  10418. #endif /* SP_INT_DIGITS >= 48 */
  10419. #if SP_INT_DIGITS >= 64
  10420. /* Square a and store in r. r = a * a
  10421. *
  10422. * @param [in] a SP integer to square.
  10423. * @param [out] r SP integer result.
  10424. *
  10425. * @return MP_OKAY on success.
  10426. * @return MP_MEM when dynamic memory allocation fails.
  10427. */
  10428. static int _sp_sqr_32(sp_int* a, sp_int* r)
  10429. {
  10430. int err = MP_OKAY;
  10431. int i;
  10432. sp_int_digit l;
  10433. sp_int_digit h;
  10434. sp_int* z0;
  10435. sp_int* z1;
  10436. sp_int* z2;
  10437. sp_int_digit ca;
  10438. DECL_SP_INT(a1, 16);
  10439. DECL_SP_INT_ARRAY(z, 33, 2);
  10440. ALLOC_SP_INT(a1, 16, err, NULL);
  10441. ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
  10442. if (err == MP_OKAY) {
  10443. z1 = z[0];
  10444. z2 = z[1];
  10445. z0 = r;
  10446. XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
  10447. a1->used = 16;
  10448. /* z2 = a1 ^ 2 */
  10449. err = _sp_sqr_16(a1, z2);
  10450. }
  10451. if (err == MP_OKAY) {
  10452. l = 0;
  10453. h = 0;
  10454. for (i = 0; i < 16; i++) {
  10455. SP_ASM_ADDC(l, h, a1->dp[i]);
  10456. SP_ASM_ADDC(l, h, a->dp[i]);
  10457. a1->dp[i] = l;
  10458. l = h;
  10459. h = 0;
  10460. }
  10461. ca = l;
  10462. /* z0 = a0 ^ 2 */
  10463. err = _sp_sqr_16(a, z0);
  10464. }
  10465. if (err == MP_OKAY) {
  10466. /* z1 = (a0 + a1) ^ 2 */
  10467. err = _sp_sqr_16(a1, z1);
  10468. }
  10469. if (err == MP_OKAY) {
  10470. /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
  10471. /* r = z0 */
  10472. /* r += (z1 - z0 - z2) << 16 */
  10473. z1->dp[32] = ca;
  10474. l = 0;
  10475. if (ca) {
  10476. l = z1->dp[0 + 16];
  10477. h = 0;
  10478. SP_ASM_ADDC(l, h, a1->dp[0]);
  10479. SP_ASM_ADDC(l, h, a1->dp[0]);
  10480. z1->dp[0 + 16] = l;
  10481. l = h;
  10482. h = 0;
  10483. for (i = 1; i < 16; i++) {
  10484. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  10485. SP_ASM_ADDC(l, h, a1->dp[i]);
  10486. SP_ASM_ADDC(l, h, a1->dp[i]);
  10487. z1->dp[i + 16] = l;
  10488. l = h;
  10489. h = 0;
  10490. }
  10491. }
  10492. z1->dp[32] += l;
  10493. /* z1 = z1 - z0 - z1 */
  10494. l = z1->dp[0];
  10495. h = 0;
  10496. SP_ASM_SUBC(l, h, z0->dp[0]);
  10497. SP_ASM_SUBC(l, h, z2->dp[0]);
  10498. z1->dp[0] = l;
  10499. l = h;
  10500. h = 0;
  10501. for (i = 1; i < 32; i++) {
  10502. l += z1->dp[i];
  10503. SP_ASM_SUBC(l, h, z0->dp[i]);
  10504. SP_ASM_SUBC(l, h, z2->dp[i]);
  10505. z1->dp[i] = l;
  10506. l = h;
  10507. h = 0;
  10508. }
  10509. z1->dp[i] += l;
  10510. /* r += z1 << 16 */
  10511. l = 0;
  10512. h = 0;
  10513. for (i = 0; i < 16; i++) {
  10514. SP_ASM_ADDC(l, h, r->dp[i + 16]);
  10515. SP_ASM_ADDC(l, h, z1->dp[i]);
  10516. r->dp[i + 16] = l;
  10517. l = h;
  10518. h = 0;
  10519. }
  10520. for (; i < 33; i++) {
  10521. SP_ASM_ADDC(l, h, z1->dp[i]);
  10522. r->dp[i + 16] = l;
  10523. l = h;
  10524. h = 0;
  10525. }
  10526. /* r += z2 << 32 */
  10527. l = 0;
  10528. h = 0;
  10529. for (i = 0; i < 17; i++) {
  10530. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  10531. SP_ASM_ADDC(l, h, z2->dp[i]);
  10532. r->dp[i + 32] = l;
  10533. l = h;
  10534. h = 0;
  10535. }
  10536. for (; i < 32; i++) {
  10537. SP_ASM_ADDC(l, h, z2->dp[i]);
  10538. r->dp[i + 32] = l;
  10539. l = h;
  10540. h = 0;
  10541. }
  10542. r->used = 64;
  10543. sp_clamp(r);
  10544. }
  10545. FREE_SP_INT_ARRAY(z, NULL);
  10546. FREE_SP_INT(a1, NULL);
  10547. return err;
  10548. }
  10549. #endif /* SP_INT_DIGITS >= 64 */
  10550. #if SP_INT_DIGITS >= 96
  10551. /* Square a and store in r. r = a * a
  10552. *
  10553. * @param [in] a SP integer to square.
  10554. * @param [out] r SP integer result.
  10555. *
  10556. * @return MP_OKAY on success.
  10557. * @return MP_MEM when dynamic memory allocation fails.
  10558. */
  10559. static int _sp_sqr_48(sp_int* a, sp_int* r)
  10560. {
  10561. int err = MP_OKAY;
  10562. int i;
  10563. sp_int_digit l;
  10564. sp_int_digit h;
  10565. sp_int* z0;
  10566. sp_int* z1;
  10567. sp_int* z2;
  10568. sp_int_digit ca;
  10569. DECL_SP_INT(a1, 24);
  10570. DECL_SP_INT_ARRAY(z, 49, 2);
  10571. ALLOC_SP_INT(a1, 24, err, NULL);
  10572. ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
  10573. if (err == MP_OKAY) {
  10574. z1 = z[0];
  10575. z2 = z[1];
  10576. z0 = r;
  10577. XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
  10578. a1->used = 24;
  10579. /* z2 = a1 ^ 2 */
  10580. err = _sp_sqr_24(a1, z2);
  10581. }
  10582. if (err == MP_OKAY) {
  10583. l = 0;
  10584. h = 0;
  10585. for (i = 0; i < 24; i++) {
  10586. SP_ASM_ADDC(l, h, a1->dp[i]);
  10587. SP_ASM_ADDC(l, h, a->dp[i]);
  10588. a1->dp[i] = l;
  10589. l = h;
  10590. h = 0;
  10591. }
  10592. ca = l;
  10593. /* z0 = a0 ^ 2 */
  10594. err = _sp_sqr_24(a, z0);
  10595. }
  10596. if (err == MP_OKAY) {
  10597. /* z1 = (a0 + a1) ^ 2 */
  10598. err = _sp_sqr_24(a1, z1);
  10599. }
  10600. if (err == MP_OKAY) {
  10601. /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
  10602. /* r = z0 */
  10603. /* r += (z1 - z0 - z2) << 24 */
  10604. z1->dp[48] = ca;
  10605. l = 0;
  10606. if (ca) {
  10607. l = z1->dp[0 + 24];
  10608. h = 0;
  10609. SP_ASM_ADDC(l, h, a1->dp[0]);
  10610. SP_ASM_ADDC(l, h, a1->dp[0]);
  10611. z1->dp[0 + 24] = l;
  10612. l = h;
  10613. h = 0;
  10614. for (i = 1; i < 24; i++) {
  10615. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  10616. SP_ASM_ADDC(l, h, a1->dp[i]);
  10617. SP_ASM_ADDC(l, h, a1->dp[i]);
  10618. z1->dp[i + 24] = l;
  10619. l = h;
  10620. h = 0;
  10621. }
  10622. }
  10623. z1->dp[48] += l;
  10624. /* z1 = z1 - z0 - z1 */
  10625. l = z1->dp[0];
  10626. h = 0;
  10627. SP_ASM_SUBC(l, h, z0->dp[0]);
  10628. SP_ASM_SUBC(l, h, z2->dp[0]);
  10629. z1->dp[0] = l;
  10630. l = h;
  10631. h = 0;
  10632. for (i = 1; i < 48; i++) {
  10633. l += z1->dp[i];
  10634. SP_ASM_SUBC(l, h, z0->dp[i]);
  10635. SP_ASM_SUBC(l, h, z2->dp[i]);
  10636. z1->dp[i] = l;
  10637. l = h;
  10638. h = 0;
  10639. }
  10640. z1->dp[i] += l;
  10641. /* r += z1 << 16 */
  10642. l = 0;
  10643. h = 0;
  10644. for (i = 0; i < 24; i++) {
  10645. SP_ASM_ADDC(l, h, r->dp[i + 24]);
  10646. SP_ASM_ADDC(l, h, z1->dp[i]);
  10647. r->dp[i + 24] = l;
  10648. l = h;
  10649. h = 0;
  10650. }
  10651. for (; i < 49; i++) {
  10652. SP_ASM_ADDC(l, h, z1->dp[i]);
  10653. r->dp[i + 24] = l;
  10654. l = h;
  10655. h = 0;
  10656. }
  10657. /* r += z2 << 48 */
  10658. l = 0;
  10659. h = 0;
  10660. for (i = 0; i < 25; i++) {
  10661. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  10662. SP_ASM_ADDC(l, h, z2->dp[i]);
  10663. r->dp[i + 48] = l;
  10664. l = h;
  10665. h = 0;
  10666. }
  10667. for (; i < 48; i++) {
  10668. SP_ASM_ADDC(l, h, z2->dp[i]);
  10669. r->dp[i + 48] = l;
  10670. l = h;
  10671. h = 0;
  10672. }
  10673. r->used = 96;
  10674. sp_clamp(r);
  10675. }
  10676. FREE_SP_INT_ARRAY(z, NULL);
  10677. FREE_SP_INT(a1, NULL);
  10678. return err;
  10679. }
  10680. #endif /* SP_INT_DIGITS >= 96 */
  10681. #if SP_INT_DIGITS >= 128
  10682. /* Square a and store in r. r = a * a
  10683. *
  10684. * @param [in] a SP integer to square.
  10685. * @param [out] r SP integer result.
  10686. *
  10687. * @return MP_OKAY on success.
  10688. * @return MP_MEM when dynamic memory allocation fails.
  10689. */
  10690. static int _sp_sqr_64(sp_int* a, sp_int* r)
  10691. {
  10692. int err = MP_OKAY;
  10693. int i;
  10694. sp_int_digit l;
  10695. sp_int_digit h;
  10696. sp_int* z0;
  10697. sp_int* z1;
  10698. sp_int* z2;
  10699. sp_int_digit ca;
  10700. DECL_SP_INT(a1, 32);
  10701. DECL_SP_INT_ARRAY(z, 65, 2);
  10702. ALLOC_SP_INT(a1, 32, err, NULL);
  10703. ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
  10704. if (err == MP_OKAY) {
  10705. z1 = z[0];
  10706. z2 = z[1];
  10707. z0 = r;
  10708. XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
  10709. a1->used = 32;
  10710. /* z2 = a1 ^ 2 */
  10711. err = _sp_sqr_32(a1, z2);
  10712. }
  10713. if (err == MP_OKAY) {
  10714. l = 0;
  10715. h = 0;
  10716. for (i = 0; i < 32; i++) {
  10717. SP_ASM_ADDC(l, h, a1->dp[i]);
  10718. SP_ASM_ADDC(l, h, a->dp[i]);
  10719. a1->dp[i] = l;
  10720. l = h;
  10721. h = 0;
  10722. }
  10723. ca = l;
  10724. /* z0 = a0 ^ 2 */
  10725. err = _sp_sqr_32(a, z0);
  10726. }
  10727. if (err == MP_OKAY) {
  10728. /* z1 = (a0 + a1) ^ 2 */
  10729. err = _sp_sqr_32(a1, z1);
  10730. }
  10731. if (err == MP_OKAY) {
  10732. /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
  10733. /* r = z0 */
  10734. /* r += (z1 - z0 - z2) << 32 */
  10735. z1->dp[64] = ca;
  10736. l = 0;
  10737. if (ca) {
  10738. l = z1->dp[0 + 32];
  10739. h = 0;
  10740. SP_ASM_ADDC(l, h, a1->dp[0]);
  10741. SP_ASM_ADDC(l, h, a1->dp[0]);
  10742. z1->dp[0 + 32] = l;
  10743. l = h;
  10744. h = 0;
  10745. for (i = 1; i < 32; i++) {
  10746. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  10747. SP_ASM_ADDC(l, h, a1->dp[i]);
  10748. SP_ASM_ADDC(l, h, a1->dp[i]);
  10749. z1->dp[i + 32] = l;
  10750. l = h;
  10751. h = 0;
  10752. }
  10753. }
  10754. z1->dp[64] += l;
  10755. /* z1 = z1 - z0 - z1 */
  10756. l = z1->dp[0];
  10757. h = 0;
  10758. SP_ASM_SUBC(l, h, z0->dp[0]);
  10759. SP_ASM_SUBC(l, h, z2->dp[0]);
  10760. z1->dp[0] = l;
  10761. l = h;
  10762. h = 0;
  10763. for (i = 1; i < 64; i++) {
  10764. l += z1->dp[i];
  10765. SP_ASM_SUBC(l, h, z0->dp[i]);
  10766. SP_ASM_SUBC(l, h, z2->dp[i]);
  10767. z1->dp[i] = l;
  10768. l = h;
  10769. h = 0;
  10770. }
  10771. z1->dp[i] += l;
  10772. /* r += z1 << 16 */
  10773. l = 0;
  10774. h = 0;
  10775. for (i = 0; i < 32; i++) {
  10776. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  10777. SP_ASM_ADDC(l, h, z1->dp[i]);
  10778. r->dp[i + 32] = l;
  10779. l = h;
  10780. h = 0;
  10781. }
  10782. for (; i < 65; i++) {
  10783. SP_ASM_ADDC(l, h, z1->dp[i]);
  10784. r->dp[i + 32] = l;
  10785. l = h;
  10786. h = 0;
  10787. }
  10788. /* r += z2 << 64 */
  10789. l = 0;
  10790. h = 0;
  10791. for (i = 0; i < 33; i++) {
  10792. SP_ASM_ADDC(l, h, r->dp[i + 64]);
  10793. SP_ASM_ADDC(l, h, z2->dp[i]);
  10794. r->dp[i + 64] = l;
  10795. l = h;
  10796. h = 0;
  10797. }
  10798. for (; i < 64; i++) {
  10799. SP_ASM_ADDC(l, h, z2->dp[i]);
  10800. r->dp[i + 64] = l;
  10801. l = h;
  10802. h = 0;
  10803. }
  10804. r->used = 128;
  10805. sp_clamp(r);
  10806. }
  10807. FREE_SP_INT_ARRAY(z, NULL);
  10808. FREE_SP_INT(a1, NULL);
  10809. return err;
  10810. }
  10811. #endif /* SP_INT_DIGITS >= 128 */
  10812. #if SP_INT_DIGITS >= 192
  10813. /* Square a and store in r. r = a * a
  10814. *
  10815. * @param [in] a SP integer to square.
  10816. * @param [out] r SP integer result.
  10817. *
  10818. * @return MP_OKAY on success.
  10819. * @return MP_MEM when dynamic memory allocation fails.
  10820. */
  10821. static int _sp_sqr_96(sp_int* a, sp_int* r)
  10822. {
  10823. int err = MP_OKAY;
  10824. int i;
  10825. sp_int_digit l;
  10826. sp_int_digit h;
  10827. sp_int* z0;
  10828. sp_int* z1;
  10829. sp_int* z2;
  10830. sp_int_digit ca;
  10831. DECL_SP_INT(a1, 48);
  10832. DECL_SP_INT_ARRAY(z, 97, 2);
  10833. ALLOC_SP_INT(a1, 48, err, NULL);
  10834. ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
  10835. if (err == MP_OKAY) {
  10836. z1 = z[0];
  10837. z2 = z[1];
  10838. z0 = r;
  10839. XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
  10840. a1->used = 48;
  10841. /* z2 = a1 ^ 2 */
  10842. err = _sp_sqr_48(a1, z2);
  10843. }
  10844. if (err == MP_OKAY) {
  10845. l = 0;
  10846. h = 0;
  10847. for (i = 0; i < 48; i++) {
  10848. SP_ASM_ADDC(l, h, a1->dp[i]);
  10849. SP_ASM_ADDC(l, h, a->dp[i]);
  10850. a1->dp[i] = l;
  10851. l = h;
  10852. h = 0;
  10853. }
  10854. ca = l;
  10855. /* z0 = a0 ^ 2 */
  10856. err = _sp_sqr_48(a, z0);
  10857. }
  10858. if (err == MP_OKAY) {
  10859. /* z1 = (a0 + a1) ^ 2 */
  10860. err = _sp_sqr_48(a1, z1);
  10861. }
  10862. if (err == MP_OKAY) {
  10863. /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
  10864. /* r = z0 */
  10865. /* r += (z1 - z0 - z2) << 48 */
  10866. z1->dp[96] = ca;
  10867. l = 0;
  10868. if (ca) {
  10869. l = z1->dp[0 + 48];
  10870. h = 0;
  10871. SP_ASM_ADDC(l, h, a1->dp[0]);
  10872. SP_ASM_ADDC(l, h, a1->dp[0]);
  10873. z1->dp[0 + 48] = l;
  10874. l = h;
  10875. h = 0;
  10876. for (i = 1; i < 48; i++) {
  10877. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  10878. SP_ASM_ADDC(l, h, a1->dp[i]);
  10879. SP_ASM_ADDC(l, h, a1->dp[i]);
  10880. z1->dp[i + 48] = l;
  10881. l = h;
  10882. h = 0;
  10883. }
  10884. }
  10885. z1->dp[96] += l;
  10886. /* z1 = z1 - z0 - z1 */
  10887. l = z1->dp[0];
  10888. h = 0;
  10889. SP_ASM_SUBC(l, h, z0->dp[0]);
  10890. SP_ASM_SUBC(l, h, z2->dp[0]);
  10891. z1->dp[0] = l;
  10892. l = h;
  10893. h = 0;
  10894. for (i = 1; i < 96; i++) {
  10895. l += z1->dp[i];
  10896. SP_ASM_SUBC(l, h, z0->dp[i]);
  10897. SP_ASM_SUBC(l, h, z2->dp[i]);
  10898. z1->dp[i] = l;
  10899. l = h;
  10900. h = 0;
  10901. }
  10902. z1->dp[i] += l;
  10903. /* r += z1 << 16 */
  10904. l = 0;
  10905. h = 0;
  10906. for (i = 0; i < 48; i++) {
  10907. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  10908. SP_ASM_ADDC(l, h, z1->dp[i]);
  10909. r->dp[i + 48] = l;
  10910. l = h;
  10911. h = 0;
  10912. }
  10913. for (; i < 97; i++) {
  10914. SP_ASM_ADDC(l, h, z1->dp[i]);
  10915. r->dp[i + 48] = l;
  10916. l = h;
  10917. h = 0;
  10918. }
  10919. /* r += z2 << 96 */
  10920. l = 0;
  10921. h = 0;
  10922. for (i = 0; i < 49; i++) {
  10923. SP_ASM_ADDC(l, h, r->dp[i + 96]);
  10924. SP_ASM_ADDC(l, h, z2->dp[i]);
  10925. r->dp[i + 96] = l;
  10926. l = h;
  10927. h = 0;
  10928. }
  10929. for (; i < 96; i++) {
  10930. SP_ASM_ADDC(l, h, z2->dp[i]);
  10931. r->dp[i + 96] = l;
  10932. l = h;
  10933. h = 0;
  10934. }
  10935. r->used = 192;
  10936. sp_clamp(r);
  10937. }
  10938. FREE_SP_INT_ARRAY(z, NULL);
  10939. FREE_SP_INT(a1, NULL);
  10940. return err;
  10941. }
  10942. #endif /* SP_INT_DIGITS >= 192 */
  10943. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  10944. #endif /* !WOLFSSL_SP_SMALL */
  10945. /* Square a and store in r. r = a * a
  10946. *
  10947. * @param [in] a SP integer to square.
  10948. * @param [out] r SP integer result.
  10949. *
  10950. * @return MP_OKAY on success.
  10951. * @return MP_VAL when a or r is NULL, or the result will be too big for fixed
  10952. * data length.
  10953. * @return MP_MEM when dynamic memory allocation fails.
  10954. */
  10955. int sp_sqr(sp_int* a, sp_int* r)
  10956. {
  10957. #if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
  10958. return sp_mul(a, a, r);
  10959. #else
  10960. int err = MP_OKAY;
  10961. if ((a == NULL) || (r == NULL)) {
  10962. err = MP_VAL;
  10963. }
  10964. /* Need extra digit during calculation. */
  10965. if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
  10966. err = MP_VAL;
  10967. }
  10968. if (0 && (err == MP_OKAY)) {
  10969. sp_print(a, "a");
  10970. }
  10971. if (err == MP_OKAY) {
  10972. if (a->used == 0) {
  10973. _sp_zero(r);
  10974. }
  10975. else
  10976. #ifndef WOLFSSL_SP_SMALL
  10977. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  10978. #if SP_WORD_SIZE == 64
  10979. if (a->used == 4) {
  10980. err = _sp_sqr_4(a, r);
  10981. }
  10982. else
  10983. #endif /* SP_WORD_SIZE == 64 */
  10984. #if SP_WORD_SIZE == 64
  10985. #ifdef SQR_MUL_ASM
  10986. if (a->used == 6) {
  10987. err = _sp_sqr_6(a, r);
  10988. }
  10989. else
  10990. #endif /* SQR_MUL_ASM */
  10991. #endif /* SP_WORD_SIZE == 64 */
  10992. #if SP_WORD_SIZE == 32
  10993. #ifdef SQR_MUL_ASM
  10994. if (a->used == 8) {
  10995. err = _sp_sqr_8(a, r);
  10996. }
  10997. else
  10998. #endif /* SQR_MUL_ASM */
  10999. #endif /* SP_WORD_SIZE == 32 */
  11000. #if SP_WORD_SIZE == 32
  11001. #ifdef SQR_MUL_ASM
  11002. if (a->used == 12) {
  11003. err = _sp_sqr_12(a, r);
  11004. }
  11005. else
  11006. #endif /* SQR_MUL_ASM */
  11007. #endif /* SP_WORD_SIZE == 32 */
  11008. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  11009. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  11010. #if SP_INT_DIGITS >= 32
  11011. if (a->used == 16) {
  11012. err = _sp_sqr_16(a, r);
  11013. }
  11014. else
  11015. #endif /* SP_INT_DIGITS >= 32 */
  11016. #if SP_INT_DIGITS >= 48
  11017. if (a->used == 24) {
  11018. err = _sp_sqr_24(a, r);
  11019. }
  11020. else
  11021. #endif /* SP_INT_DIGITS >= 48 */
  11022. #if SP_INT_DIGITS >= 64
  11023. if (a->used == 32) {
  11024. err = _sp_sqr_32(a, r);
  11025. }
  11026. else
  11027. #endif /* SP_INT_DIGITS >= 64 */
  11028. #if SP_INT_DIGITS >= 96
  11029. if (a->used == 48) {
  11030. err = _sp_sqr_48(a, r);
  11031. }
  11032. else
  11033. #endif /* SP_INT_DIGITS >= 96 */
  11034. #if SP_INT_DIGITS >= 128
  11035. if (a->used == 64) {
  11036. err = _sp_sqr_64(a, r);
  11037. }
  11038. else
  11039. #endif /* SP_INT_DIGITS >= 128 */
  11040. #if SP_INT_DIGITS >= 192
  11041. if (a->used == 96) {
  11042. err = _sp_sqr_96(a, r);
  11043. }
  11044. else
  11045. #endif /* SP_INT_DIGITS >= 192 */
  11046. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  11047. #endif /* !WOLFSSL_SP_SMALL */
  11048. {
  11049. err = _sp_sqr(a, r);
  11050. }
  11051. }
  11052. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11053. if (err == MP_OKAY) {
  11054. r->sign = MP_ZPOS;
  11055. }
  11056. #endif
  11057. if (0 && (err == MP_OKAY)) {
  11058. sp_print(r, "rsqr");
  11059. }
  11060. return err;
  11061. #endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
  11062. }
  11063. /* END SP_SQR implementations */
  11064. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
  11065. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  11066. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  11067. /* Square a mod m and store in r: r = (a * a) mod m
  11068. *
  11069. * @param [in] a SP integer to square.
  11070. * @param [in] m SP integer that is the modulus.
  11071. * @param [out] r SP integer result.
  11072. *
  11073. * @return MP_OKAY on success.
  11074. * @return MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
  11075. * for fixed data length.
  11076. * @return MP_MEM when dynamic memory allocation fails.
  11077. */
  11078. int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r)
  11079. {
  11080. int err = MP_OKAY;
  11081. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  11082. err = MP_VAL;
  11083. }
  11084. if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
  11085. err = MP_VAL;
  11086. }
  11087. if (err == MP_OKAY) {
  11088. err = sp_sqr(a, r);
  11089. }
  11090. if (err == MP_OKAY) {
  11091. err = sp_mod(r, m, r);
  11092. }
  11093. return err;
  11094. }
  11095. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  11096. /**********************
  11097. * Montogmery functions
  11098. **********************/
  11099. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
  11100. /* Reduce a number in montgomery form.
  11101. *
  11102. * Assumes a and m are not NULL and m is not 0.
  11103. *
  11104. * @param [in,out] a SP integer to Montgomery reduce.
  11105. * @param [in] m SP integer that is the modulus.
  11106. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  11107. *
  11108. * @return MP_OKAY on success.
  11109. */
  11110. static int _sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp)
  11111. {
  11112. #if !defined(SQR_MUL_ASM)
  11113. int i;
  11114. int bits;
  11115. sp_int_word w;
  11116. sp_int_digit mu;
  11117. if (0) {
  11118. sp_print(a, "a");
  11119. sp_print(m, "m");
  11120. }
  11121. bits = sp_count_bits(m);
  11122. for (i = a->used; i < m->used * 2; i++) {
  11123. a->dp[i] = 0;
  11124. }
  11125. if (m->used == 1) {
  11126. mu = mp * a->dp[0];
  11127. w = a->dp[0];
  11128. w += (sp_int_word)mu * m->dp[0];
  11129. a->dp[0] = (sp_int_digit)w;
  11130. w >>= SP_WORD_SIZE;
  11131. w += a->dp[1];
  11132. a->dp[1] = (sp_int_digit)w;
  11133. w >>= SP_WORD_SIZE;
  11134. a->dp[2] = (sp_int_digit)w;
  11135. a->used = 3;
  11136. /* mp is SP_WORD_SIZE */
  11137. bits = SP_WORD_SIZE;
  11138. }
  11139. else {
  11140. sp_int_digit mask = (sp_int_digit)
  11141. ((1UL << (bits & (SP_WORD_SIZE - 1))) - 1);
  11142. sp_int_word o = 0;
  11143. for (i = 0; i < m->used; i++) {
  11144. int j;
  11145. mu = mp * a->dp[i];
  11146. if ((i == m->used - 1) && (mask != 0)) {
  11147. mu &= mask;
  11148. }
  11149. w = a->dp[i];
  11150. w += (sp_int_word)mu * m->dp[0];
  11151. a->dp[i] = (sp_int_digit)w;
  11152. w >>= SP_WORD_SIZE;
  11153. for (j = 1; j < m->used - 1; j++) {
  11154. w += a->dp[i + j];
  11155. w += (sp_int_word)mu * m->dp[j];
  11156. a->dp[i + j] = (sp_int_digit)w;
  11157. w >>= SP_WORD_SIZE;
  11158. }
  11159. w += o;
  11160. w += a->dp[i + j];
  11161. o = (sp_int_digit)(w >> SP_WORD_SIZE);
  11162. w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
  11163. a->dp[i + j] = (sp_int_digit)w;
  11164. w >>= SP_WORD_SIZE;
  11165. o += w;
  11166. }
  11167. o += a->dp[m->used * 2 - 1];
  11168. a->dp[m->used * 2 - 1] = (sp_int_digit)o;
  11169. o >>= SP_WORD_SIZE;
  11170. a->dp[m->used * 2] = (sp_int_digit)o;
  11171. a->used = m->used * 2 + 1;
  11172. }
  11173. sp_clamp(a);
  11174. sp_rshb(a, bits, a);
  11175. if (_sp_cmp(a, m) != MP_LT) {
  11176. _sp_sub_off(a, m, a, 0);
  11177. }
  11178. if (0) {
  11179. sp_print(a, "rr");
  11180. }
  11181. return MP_OKAY;
  11182. #else /* !SQR_MUL_ASM */
  11183. int i;
  11184. int j;
  11185. int bits;
  11186. sp_int_digit mu;
  11187. sp_int_digit o;
  11188. sp_int_digit mask;
  11189. bits = sp_count_bits(m);
  11190. mask = (1UL << (bits & (SP_WORD_SIZE - 1))) - 1;
  11191. for (i = a->used; i < m->used * 2; i++) {
  11192. a->dp[i] = 0;
  11193. }
  11194. if (m->used <= 1) {
  11195. sp_int_word w;
  11196. mu = mp * a->dp[0];
  11197. w = a->dp[0];
  11198. w += (sp_int_word)mu * m->dp[0];
  11199. a->dp[0] = w;
  11200. w >>= SP_WORD_SIZE;
  11201. w += a->dp[1];
  11202. a->dp[1] = w;
  11203. w >>= SP_WORD_SIZE;
  11204. a->dp[2] = w;
  11205. a->used = m->used * 2 + 1;
  11206. /* mp is SP_WORD_SIZE */
  11207. bits = SP_WORD_SIZE;
  11208. }
  11209. #ifndef WOLFSSL_HAVE_SP_ECC
  11210. #if SP_WORD_SIZE == 64
  11211. else if (m->used == 4) {
  11212. sp_int_digit l;
  11213. sp_int_digit h;
  11214. l = 0;
  11215. h = 0;
  11216. o = 0;
  11217. for (i = 0; i < 4; i++) {
  11218. mu = mp * a->dp[i];
  11219. if ((i == 3) && (mask != 0)) {
  11220. mu &= mask;
  11221. }
  11222. l = a->dp[i];
  11223. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  11224. a->dp[i] = l;
  11225. l = h;
  11226. h = 0;
  11227. SP_ASM_ADDC(l, h, a->dp[i + 1]);
  11228. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
  11229. a->dp[i + 1] = l;
  11230. l = h;
  11231. h = 0;
  11232. SP_ASM_ADDC(l, h, a->dp[i + 2]);
  11233. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
  11234. a->dp[i + 2] = l;
  11235. l = h;
  11236. h = 0;
  11237. SP_ASM_ADDC_REG(l, h, o);
  11238. SP_ASM_ADDC(l, h, a->dp[i + 3]);
  11239. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
  11240. a->dp[i + 3] = l;
  11241. o = h;
  11242. l = h;
  11243. h = 0;
  11244. }
  11245. SP_ASM_ADDC(l, h, a->dp[7]);
  11246. a->dp[7] = l;
  11247. a->dp[8] = h;
  11248. a->used = 9;
  11249. }
  11250. else if (m->used == 6) {
  11251. sp_int_digit l;
  11252. sp_int_digit h;
  11253. l = 0;
  11254. h = 0;
  11255. o = 0;
  11256. for (i = 0; i < 6; i++) {
  11257. mu = mp * a->dp[i];
  11258. if ((i == 5) && (mask != 0)) {
  11259. mu &= mask;
  11260. }
  11261. l = a->dp[i];
  11262. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  11263. a->dp[i] = l;
  11264. l = h;
  11265. h = 0;
  11266. SP_ASM_ADDC(l, h, a->dp[i + 1]);
  11267. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
  11268. a->dp[i + 1] = l;
  11269. l = h;
  11270. h = 0;
  11271. SP_ASM_ADDC(l, h, a->dp[i + 2]);
  11272. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
  11273. a->dp[i + 2] = l;
  11274. l = h;
  11275. h = 0;
  11276. SP_ASM_ADDC(l, h, a->dp[i + 3]);
  11277. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
  11278. a->dp[i + 3] = l;
  11279. l = h;
  11280. h = 0;
  11281. SP_ASM_ADDC(l, h, a->dp[i + 4]);
  11282. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
  11283. a->dp[i + 4] = l;
  11284. l = h;
  11285. h = 0;
  11286. SP_ASM_ADDC_REG(l, h, o);
  11287. SP_ASM_ADDC(l, h, a->dp[i + 5]);
  11288. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[5]);
  11289. a->dp[i + 5] = l;
  11290. o = h;
  11291. l = h;
  11292. h = 0;
  11293. }
  11294. SP_ASM_ADDC(l, h, a->dp[11]);
  11295. a->dp[11] = l;
  11296. a->dp[12] = h;
  11297. a->used = 13;
  11298. }
  11299. #endif /* SP_WORD_SIZE == 64 */
  11300. #endif /* WOLFSSL_HAVE_SP_ECC */
  11301. else {
  11302. sp_int_digit l;
  11303. sp_int_digit h;
  11304. sp_int_digit o2;
  11305. sp_int_digit* ad;
  11306. sp_int_digit* md;
  11307. o = 0;
  11308. o2 = 0;
  11309. ad = a->dp;
  11310. for (i = 0; i < m->used; i++, ad++) {
  11311. md = m->dp;
  11312. mu = mp * ad[0];
  11313. if ((i == m->used - 1) && (mask != 0)) {
  11314. mu &= mask;
  11315. }
  11316. l = ad[0];
  11317. h = 0;
  11318. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  11319. ad[0] = l;
  11320. l = h;
  11321. for (j = 1; j + 1 < m->used - 1; j += 2) {
  11322. h = 0;
  11323. SP_ASM_ADDC(l, h, ad[j + 0]);
  11324. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  11325. ad[j + 0] = l;
  11326. l = 0;
  11327. SP_ASM_ADDC(h, l, ad[j + 1]);
  11328. SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
  11329. ad[j + 1] = h;
  11330. }
  11331. for (; j < m->used - 1; j++) {
  11332. h = 0;
  11333. SP_ASM_ADDC(l, h, ad[j]);
  11334. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  11335. ad[j] = l;
  11336. l = h;
  11337. }
  11338. h = o2;
  11339. o2 = 0;
  11340. SP_ASM_ADDC_REG(l, h, o);
  11341. SP_ASM_ADDC(l, h, ad[j]);
  11342. SP_ASM_MUL_ADD(l, h, o2, mu, *md);
  11343. ad[j] = l;
  11344. o = h;
  11345. }
  11346. l = o;
  11347. h = o2;
  11348. SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
  11349. a->dp[m->used * 2 - 1] = l;
  11350. a->dp[m->used * 2] = h;
  11351. a->used = m->used * 2 + 1;
  11352. }
  11353. sp_clamp(a);
  11354. sp_rshb(a, bits, a);
  11355. if (_sp_cmp(a, m) != MP_LT) {
  11356. sp_sub(a, m, a);
  11357. }
  11358. return MP_OKAY;
  11359. #endif /* !SQR_MUL_ASM */
  11360. }
  11361. #ifndef WOLFSSL_RSA_VERIFY_ONLY
  11362. /* Reduce a number in montgomery form.
  11363. *
  11364. * @param [in,out] a SP integer to Montgomery reduce.
  11365. * @param [in] m SP integer that is the modulus.
  11366. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  11367. *
  11368. * @return MP_OKAY on success.
  11369. * @return MP_VAL when a or m is NULL or m is zero.
  11370. */
  11371. int sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp)
  11372. {
  11373. int err;
  11374. if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
  11375. err = MP_VAL;
  11376. }
  11377. else if (a->size < m->used * 2 + 1) {
  11378. err = MP_VAL;
  11379. }
  11380. else {
  11381. err = _sp_mont_red(a, m, mp);
  11382. }
  11383. return err;
  11384. }
  11385. #endif
  11386. /* Calculate the bottom digit of the inverse of negative m.
  11387. *
  11388. * Used when performing Montgomery Reduction.
  11389. *
  11390. * @param [in] m SP integer that is the modulus.
  11391. * @param [out] mp SP integer digit that is the bottom digit of inv(-m).
  11392. *
  11393. * @return MP_OKAY on success.
  11394. * @return MP_VAL when m or rho is NULL.
  11395. */
  11396. int sp_mont_setup(sp_int* m, sp_int_digit* rho)
  11397. {
  11398. int err = MP_OKAY;
  11399. if ((m == NULL) || (rho == NULL)) {
  11400. err = MP_VAL;
  11401. }
  11402. if ((err == MP_OKAY) && !sp_isodd(m)) {
  11403. err = MP_VAL;
  11404. }
  11405. if (err == MP_OKAY) {
  11406. sp_int_digit x;
  11407. sp_int_digit b;
  11408. b = m->dp[0];
  11409. x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
  11410. x *= 2 - b * x; /* here x*a==1 mod 2**8 */
  11411. #if SP_WORD_SIZE >= 16
  11412. x *= 2 - b * x; /* here x*a==1 mod 2**16 */
  11413. #if SP_WORD_SIZE >= 32
  11414. x *= 2 - b * x; /* here x*a==1 mod 2**32 */
  11415. #if SP_WORD_SIZE >= 64
  11416. x *= 2 - b * x; /* here x*a==1 mod 2**64 */
  11417. #endif /* SP_WORD_SIZE >= 64 */
  11418. #endif /* SP_WORD_SIZE >= 32 */
  11419. #endif /* SP_WORD_SIZE >= 16 */
  11420. /* rho = -1/m mod b */
  11421. *rho = -x;
  11422. }
  11423. return err;
  11424. }
  11425. /* Calculate the normalization value of m.
  11426. * norm = 2^k - m, where k is the number of bits in m
  11427. *
  11428. * @param [out] norm SP integer that normalises numbers into Montgomery
  11429. * form.
  11430. * @param [in] m SP integer that is the modulus.
  11431. *
  11432. * @return MP_OKAY on success.
  11433. * @return MP_VAL when norm or m is NULL, or number of bits in m is maximual.
  11434. */
  11435. int sp_mont_norm(sp_int* norm, sp_int* m)
  11436. {
  11437. int err = MP_OKAY;
  11438. int bits = 0;
  11439. if ((norm == NULL) || (m == NULL)) {
  11440. err = MP_VAL;
  11441. }
  11442. if (err == MP_OKAY) {
  11443. bits = sp_count_bits(m);
  11444. if (bits == m->size * SP_WORD_SIZE) {
  11445. err = MP_VAL;
  11446. }
  11447. }
  11448. if (err == MP_OKAY) {
  11449. if (bits < SP_WORD_SIZE) {
  11450. bits = SP_WORD_SIZE;
  11451. }
  11452. _sp_zero(norm);
  11453. sp_set_bit(norm, bits);
  11454. err = sp_sub(norm, m, norm);
  11455. }
  11456. if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
  11457. norm->dp[0] %= m->dp[0];
  11458. }
  11459. if (err == MP_OKAY) {
  11460. sp_clamp(norm);
  11461. }
  11462. return err;
  11463. }
  11464. #endif
  11465. /*********************************
  11466. * To and from binary and strings.
  11467. *********************************/
  11468. /* Calculate the number of 8-bit values required to represent the
  11469. * multi-precision number.
  11470. *
  11471. * When a is NULL, return s 0.
  11472. *
  11473. * @param [in] a SP integer.
  11474. *
  11475. * @return The count of 8-bit values.
  11476. */
  11477. int sp_unsigned_bin_size(sp_int* a)
  11478. {
  11479. int cnt = 0;
  11480. if (a != NULL) {
  11481. cnt = (sp_count_bits(a) + 7) / 8;
  11482. }
  11483. return cnt;
  11484. }
  11485. /* Convert a number as an array of bytes in big-endian format to a
  11486. * multi-precision number.
  11487. *
  11488. * @param [out] a SP integer.
  11489. * @param [in] in Array of bytes.
  11490. * @param [in] inSz Number of data bytes in array.
  11491. *
  11492. * @return MP_OKAY on success.
  11493. * @return MP_VAL when the number is too big to fit in an SP.
  11494. */
  11495. int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
  11496. {
  11497. int err = MP_OKAY;
  11498. if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
  11499. err = MP_VAL;
  11500. }
  11501. /* Extra digit added to SP_INT_DIGITS to be used in calculations. */
  11502. if ((err == MP_OKAY) && (inSz > ((word32)a->size - 1) * SP_WORD_SIZEOF)) {
  11503. err = MP_VAL;
  11504. }
  11505. #ifndef LITTLE_ENDIAN_ORDER
  11506. if (err == MP_OKAY) {
  11507. int i;
  11508. int j;
  11509. int s;
  11510. for (i = inSz-1,j = 0; i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF,j++) {
  11511. a->dp[j] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
  11512. }
  11513. a->dp[j] = 0;
  11514. for (s = 0; i >= 0; i--,s += 8) {
  11515. a->dp[j] |= ((sp_int_digit)in[i]) << s;
  11516. }
  11517. a->used = j + 1;
  11518. sp_clamp(a);
  11519. }
  11520. #else
  11521. if (err == MP_OKAY) {
  11522. int i;
  11523. int j;
  11524. a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  11525. for (i = inSz-1, j = 0; i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
  11526. a->dp[j] = ((sp_int_digit)in[i - 0] << 0);
  11527. #if SP_WORD_SIZE >= 16
  11528. a->dp[j] |= ((sp_int_digit)in[i - 1] << 8);
  11529. #endif
  11530. #if SP_WORD_SIZE >= 32
  11531. a->dp[j] |= ((sp_int_digit)in[i - 2] << 16) |
  11532. ((sp_int_digit)in[i - 3] << 24);
  11533. #endif
  11534. #if SP_WORD_SIZE >= 64
  11535. a->dp[j] |= ((sp_int_digit)in[i - 4] << 32) |
  11536. ((sp_int_digit)in[i - 5] << 40) |
  11537. ((sp_int_digit)in[i - 6] << 48) |
  11538. ((sp_int_digit)in[i - 7] << 56);
  11539. #endif
  11540. j++;
  11541. }
  11542. a->dp[j] = 0;
  11543. #if SP_WORD_SIZE >= 16
  11544. if (i >= 0) {
  11545. byte *d = (byte*)a->dp;
  11546. a->dp[a->used - 1] = 0;
  11547. switch (i) {
  11548. case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
  11549. case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
  11550. case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
  11551. case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
  11552. case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
  11553. case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
  11554. case 0: d[inSz - 1 - 0] = in[0];
  11555. }
  11556. }
  11557. #endif
  11558. sp_clamp(a);
  11559. }
  11560. #endif /* LITTLE_ENDIAN_ORDER */
  11561. return err;
  11562. }
  11563. #if (!defined(NO_DH) || defined(HAVE_ECC) || defined(WC_RSA_BLINDING) || \
  11564. defined(WOLFSSL_RSA_PUBLIC_ONLY)) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  11565. /* Convert the multi-precision number to an array of bytes in big-endian format.
  11566. *
  11567. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  11568. * to calculate the number of bytes required.
  11569. *
  11570. * @param [in] a SP integer.
  11571. * @param [out] out Array to put encoding into.
  11572. *
  11573. * @return MP_OKAY on success.
  11574. * @return MP_VAL when a or out is NULL.
  11575. */
  11576. int sp_to_unsigned_bin(sp_int* a, byte* out)
  11577. {
  11578. return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
  11579. }
  11580. #endif /* (!NO_DH || HAVE_ECC || WC_RSA_BLINDING || WOLFSSL_RSA_PUBLIC_ONLY)
  11581. && !WOLFSSL_RSA_VERIFY_ONLY */
  11582. /* Convert the multi-precision number to an array of bytes in big-endian format.
  11583. *
  11584. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  11585. * to calculate the number of bytes required.
  11586. * Front-pads the output array with zeros make number the size of the array.
  11587. *
  11588. * @param [in] a SP integer.
  11589. * @param [out] out Array to put encoding into.
  11590. * @param [in] outSz Size of the array in bytes.
  11591. *
  11592. * @return MP_OKAY on success.
  11593. * @return MP_VAL when a or out is NULL.
  11594. */
  11595. int sp_to_unsigned_bin_len(sp_int* a, byte* out, int outSz)
  11596. {
  11597. int err = MP_OKAY;
  11598. if ((a == NULL) || (out == NULL)) {
  11599. err = MP_VAL;
  11600. }
  11601. if (err == MP_OKAY) {
  11602. int j = outSz - 1;
  11603. if (!sp_iszero(a)) {
  11604. int i;
  11605. for (i = 0; (j >= 0) && (i < a->used); i++) {
  11606. int b;
  11607. for (b = 0; b < SP_WORD_SIZE; b += 8) {
  11608. out[j--] = a->dp[i] >> b;
  11609. if (j < 0) {
  11610. break;
  11611. }
  11612. }
  11613. }
  11614. }
  11615. for (; j >= 0; j--) {
  11616. out[j] = 0;
  11617. }
  11618. }
  11619. return err;
  11620. }
  11621. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  11622. /* Store the number in big-endian format in array at an offset.
  11623. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  11624. * to calculate the number of bytes required.
  11625. *
  11626. * @param [in] o Offset into array o start encoding.
  11627. * @param [in] a SP integer.
  11628. * @param [out] out Array to put encoding into.
  11629. *
  11630. * @return Index of next byte after data.
  11631. * @return MP_VAL when a or out is NULL.
  11632. */
  11633. int sp_to_unsigned_bin_at_pos(int o, sp_int*a, unsigned char* out)
  11634. {
  11635. int ret = sp_to_unsigned_bin(a, out + o);
  11636. if (ret == MP_OKAY) {
  11637. ret = o + sp_unsigned_bin_size(a);
  11638. }
  11639. return ret;
  11640. }
  11641. #endif /* WOLFSSL_SP_MATH_ALL */
  11642. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  11643. defined(HAVE_ECC)
  11644. /* Convert hexadecimal number as string in big-endian format to a
  11645. * multi-precision number.
  11646. *
  11647. * Negative values supported when compiled with WOLFSSL_SP_INT_NEGATIVE.
  11648. *
  11649. * @param [out] a SP integer.
  11650. * @param [in] in NUL terminated string.
  11651. *
  11652. * @return MP_OKAY on success.
  11653. * @return MP_VAL when radix not supported, value is negative, or a character
  11654. * is not valid.
  11655. */
  11656. static int _sp_read_radix_16(sp_int* a, const char* in)
  11657. {
  11658. int err = MP_OKAY;
  11659. int i;
  11660. int s = 0;
  11661. int j = 0;
  11662. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11663. if (*in == '-') {
  11664. a->sign = MP_NEG;
  11665. in++;
  11666. }
  11667. #endif
  11668. while (*in == '0') {
  11669. in++;
  11670. }
  11671. a->dp[0] = 0;
  11672. for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
  11673. char ch = in[i];
  11674. if ((ch >= '0') && (ch <= '9')) {
  11675. ch -= '0';
  11676. }
  11677. else if ((ch >= 'A') && (ch <= 'F')) {
  11678. ch -= 'A' - 10;
  11679. }
  11680. else if ((ch >= 'a') && (ch <= 'f')) {
  11681. ch -= 'a' - 10;
  11682. }
  11683. else {
  11684. err = MP_VAL;
  11685. break;
  11686. }
  11687. if (s == SP_WORD_SIZE) {
  11688. j++;
  11689. if (j >= a->size) {
  11690. err = MP_VAL;
  11691. break;
  11692. }
  11693. s = 0;
  11694. a->dp[j] = 0;
  11695. }
  11696. a->dp[j] |= ((sp_int_digit)ch) << s;
  11697. s += 4;
  11698. }
  11699. if (err == MP_OKAY) {
  11700. a->used = j + 1;
  11701. sp_clamp(a);
  11702. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11703. if (sp_iszero(a)) {
  11704. a->sign = MP_ZPOS;
  11705. }
  11706. #endif
  11707. }
  11708. return err;
  11709. }
  11710. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
  11711. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  11712. /* Convert decimal number as string in big-endian format to a multi-precision
  11713. * number.
  11714. *
  11715. * Negative values supported when compiled with WOLFSSL_SP_INT_NEGATIVE.
  11716. *
  11717. * @param [out] a SP integer.
  11718. * @param [in] in NUL terminated string.
  11719. *
  11720. * @return MP_OKAY on success.
  11721. * @return MP_VAL when radix not supported, value is negative, or a character
  11722. * is not valid.
  11723. */
  11724. static int _sp_read_radix_10(sp_int* a, const char* in)
  11725. {
  11726. int err = MP_OKAY;
  11727. int i;
  11728. int len;
  11729. char ch;
  11730. _sp_zero(a);
  11731. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11732. if (*in == '-') {
  11733. a->sign = MP_NEG;
  11734. in++;
  11735. }
  11736. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  11737. while (*in == '0') {
  11738. in++;
  11739. }
  11740. len = (int)XSTRLEN(in);
  11741. for (i = 0; i < len; i++) {
  11742. ch = in[i];
  11743. if ((ch >= '0') && (ch <= '9')) {
  11744. ch -= '0';
  11745. }
  11746. else {
  11747. err = MP_VAL;
  11748. break;
  11749. }
  11750. if (a->used + 1 > a->size) {
  11751. err = MP_VAL;
  11752. break;
  11753. }
  11754. _sp_mul_d(a, 10, a, 0);
  11755. (void)_sp_add_d(a, ch, a);
  11756. }
  11757. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11758. if ((err == MP_OKAY) && sp_iszero(a)) {
  11759. a->sign = MP_ZPOS;
  11760. }
  11761. #endif
  11762. return err;
  11763. }
  11764. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  11765. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  11766. defined(HAVE_ECC)
  11767. /* Convert a number as string in big-endian format to a big number.
  11768. * Only supports base-16 (hexadecimal) and base-10 (decimal).
  11769. *
  11770. * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
  11771. *
  11772. * @param [out] a SP integer.
  11773. * @param [in] in NUL terminated string.
  11774. * @param [in] radix Number of values in a digit.
  11775. *
  11776. * @return MP_OKAY on success.
  11777. * @return MP_VAL when a or in is NULL, radix not supported, value is negative,
  11778. * or a character is not valid.
  11779. */
  11780. int sp_read_radix(sp_int* a, const char* in, int radix)
  11781. {
  11782. int err = MP_OKAY;
  11783. if ((a == NULL) || (in == NULL)) {
  11784. err = MP_VAL;
  11785. }
  11786. if (err == MP_OKAY) {
  11787. #ifndef WOLFSSL_SP_INT_NEGATIVE
  11788. if (*in == '-') {
  11789. err = MP_VAL;
  11790. }
  11791. else
  11792. #endif
  11793. if (radix == 16) {
  11794. err = _sp_read_radix_16(a, in);
  11795. }
  11796. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  11797. else if (radix == 10) {
  11798. err = _sp_read_radix_10(a, in);
  11799. }
  11800. #endif
  11801. else {
  11802. err = MP_VAL;
  11803. }
  11804. }
  11805. return err;
  11806. }
  11807. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
  11808. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  11809. defined(WC_MP_TO_RADIX)
  11810. /* Hex string characters. */
  11811. static const char sp_hex_char[16] = {
  11812. '0', '1', '2', '3', '4', '5', '6', '7',
  11813. '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
  11814. };
  11815. /* Put the big-endian, hex string encoding of a into str.
  11816. *
  11817. * Assumes str is large enough for result.
  11818. * Use sp_radix_size() to calculate required length.
  11819. *
  11820. * @param [in] a SP integer to convert.
  11821. * @param [out] str String to hold hex string result.
  11822. *
  11823. * @return MP_OKAY on success.
  11824. * @return MP_VAL when a or str is NULL.
  11825. */
  11826. int sp_tohex(sp_int* a, char* str)
  11827. {
  11828. int err = MP_OKAY;
  11829. int i;
  11830. int j;
  11831. if ((a == NULL) || (str == NULL)) {
  11832. err = MP_VAL;
  11833. }
  11834. if (err == MP_OKAY) {
  11835. /* quick out if its zero */
  11836. if (sp_iszero(a) == MP_YES) {
  11837. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  11838. *str++ = '0';
  11839. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  11840. *str++ = '0';
  11841. *str = '\0';
  11842. }
  11843. else {
  11844. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11845. if (a->sign == MP_NEG) {
  11846. *str = '-';
  11847. str++;
  11848. }
  11849. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  11850. i = a->used - 1;
  11851. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  11852. for (j = SP_WORD_SIZE - 8; j >= 0; j -= 8) {
  11853. if (((a->dp[i] >> j) & 0xff) != 0) {
  11854. break;
  11855. }
  11856. else if (j == 0) {
  11857. j = SP_WORD_SIZE - 8;
  11858. --i;
  11859. }
  11860. }
  11861. j += 4;
  11862. #else
  11863. for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
  11864. if (((a->dp[i] >> j) & 0xf) != 0) {
  11865. break;
  11866. }
  11867. else if (j == 0) {
  11868. j = SP_WORD_SIZE - 4;
  11869. --i;
  11870. }
  11871. }
  11872. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  11873. for (; j >= 0; j -= 4) {
  11874. *(str++) = sp_hex_char[(a->dp[i] >> j) & 0xf];
  11875. }
  11876. for (--i; i >= 0; i--) {
  11877. for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
  11878. *(str++) = sp_hex_char[(a->dp[i] >> j) & 0xf];
  11879. }
  11880. }
  11881. *str = '\0';
  11882. }
  11883. }
  11884. return err;
  11885. }
  11886. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  11887. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  11888. defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
  11889. /* Put the big-endian, decimal string encoding of a into str.
  11890. *
  11891. * Assumes str is large enough for result.
  11892. * Use sp_radix_size() to calculate required length.
  11893. *
  11894. * @param [in] a SP integer to convert.
  11895. * @param [out] str String to hold hex string result.
  11896. *
  11897. * @return MP_OKAY on success.
  11898. * @return MP_VAL when a or str is NULL.
  11899. * @return MP_MEM when dynamic memory allocation fails.
  11900. */
  11901. int sp_todecimal(sp_int* a, char* str)
  11902. {
  11903. int err = MP_OKAY;
  11904. int i;
  11905. int j;
  11906. sp_int_digit d;
  11907. if ((a == NULL) || (str == NULL)) {
  11908. err = MP_VAL;
  11909. }
  11910. /* quick out if its zero */
  11911. else if (sp_iszero(a) == MP_YES) {
  11912. *str++ = '0';
  11913. *str = '\0';
  11914. }
  11915. else {
  11916. DECL_SP_INT(t, a->used + 1);
  11917. ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
  11918. if (err == MP_OKAY) {
  11919. err = sp_copy(a, t);
  11920. }
  11921. if (err == MP_OKAY) {
  11922. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11923. if (a->sign == MP_NEG) {
  11924. *str = '-';
  11925. str++;
  11926. }
  11927. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  11928. i = 0;
  11929. while (!sp_iszero(t)) {
  11930. sp_div_d(t, 10, t, &d);
  11931. str[i++] = '0' + d;
  11932. }
  11933. str[i] = '\0';
  11934. for (j = 0; j <= (i - 1) / 2; j++) {
  11935. int c = str[j];
  11936. str[j] = str[i - 1 - j];
  11937. str[i - 1 - j] = c;
  11938. }
  11939. }
  11940. FREE_SP_INT(t, NULL);
  11941. }
  11942. return err;
  11943. }
  11944. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  11945. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  11946. /* Put the string version, big-endian, of a in str using the given radix.
  11947. *
  11948. * @param [in] a SP integer to convert.
  11949. * @param [out] str String to hold hex string result.
  11950. * @param [in] radix Base of character.
  11951. * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
  11952. *
  11953. * @return MP_OKAY on success.
  11954. * @return MP_VAL when a or str is NULL, or radix not supported.
  11955. */
  11956. int sp_toradix(sp_int* a, char* str, int radix)
  11957. {
  11958. int err = MP_OKAY;
  11959. if ((a == NULL) || (str == NULL)) {
  11960. err = MP_VAL;
  11961. }
  11962. else if (radix == MP_RADIX_HEX) {
  11963. err = sp_tohex(a, str);
  11964. }
  11965. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
  11966. defined(HAVE_COMP_KEY)
  11967. else if (radix == MP_RADIX_DEC) {
  11968. err = sp_todecimal(a, str);
  11969. }
  11970. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  11971. else {
  11972. err = MP_VAL;
  11973. }
  11974. return err;
  11975. }
  11976. #endif /* WOLFSSL_SP_MATH_ALL */
  11977. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  11978. /* Calculate the length of the string version, big-endian, of a using the given
  11979. * radix.
  11980. *
  11981. * @param [in] a SP integer to convert.
  11982. * @param [in] radix Base of character.
  11983. * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
  11984. * @param [out] size The number of characters in encoding.
  11985. *
  11986. * @return MP_OKAY on success.
  11987. * @return MP_VAL when a or size is NULL, or radix not supported.
  11988. */
  11989. int sp_radix_size(sp_int* a, int radix, int* size)
  11990. {
  11991. int err = MP_OKAY;
  11992. if ((a == NULL) || (size == NULL)) {
  11993. err = MP_VAL;
  11994. }
  11995. else if (radix == MP_RADIX_HEX) {
  11996. if (a->used == 0) {
  11997. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  11998. /* 00 and '\0' */
  11999. *size = 2 + 1;
  12000. #else
  12001. /* Zero and '\0' */
  12002. *size = 1 + 1;
  12003. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  12004. }
  12005. else {
  12006. int nibbles = (sp_count_bits(a) + 3) / 4;
  12007. #ifdef WOLFSSL_SP_INT_NEGATIVE
  12008. if (a->sign == MP_NEG) {
  12009. nibbles++;
  12010. }
  12011. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  12012. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  12013. if (nibbles & 1) {
  12014. nibbles++;
  12015. }
  12016. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  12017. /* One more for \0 */
  12018. *size = nibbles + 1;
  12019. }
  12020. }
  12021. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
  12022. defined(HAVE_COMP_KEY)
  12023. else if (radix == MP_RADIX_DEC) {
  12024. int i;
  12025. sp_int_digit d;
  12026. /* quick out if its zero */
  12027. if (sp_iszero(a) == MP_YES) {
  12028. /* Zero and '\0' */
  12029. *size = 1 + 1;
  12030. }
  12031. else {
  12032. DECL_SP_INT(t, a->used + 1);
  12033. ALLOC_SP_INT(t, a->used + 1, err, NULL);
  12034. if (err == MP_OKAY) {
  12035. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  12036. t->size = a->used + 1;
  12037. #endif /* WOLFSSL_SMALL_STACK && !WOLFSSL_SP_NO_MALLOC */
  12038. err = sp_copy(a, t);
  12039. }
  12040. if (err == MP_OKAY) {
  12041. for (i = 0; !sp_iszero(t); i++) {
  12042. sp_div_d(t, 10, t, &d);
  12043. }
  12044. #ifdef WOLFSSL_SP_INT_NEGATIVE
  12045. if (a->sign == MP_NEG) {
  12046. i++;
  12047. }
  12048. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  12049. /* One more for \0 */
  12050. *size = i + 1;
  12051. }
  12052. FREE_SP_INT(t, NULL);
  12053. }
  12054. }
  12055. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  12056. else {
  12057. err = MP_VAL;
  12058. }
  12059. return err;
  12060. }
  12061. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  12062. /***************************************
  12063. * Prime number generation and checking.
  12064. ***************************************/
  12065. #if defined(WOLFSSL_KEY_GEN) && (!defined(NO_DH) || !defined(NO_DSA)) && \
  12066. !defined(WC_NO_RNG)
  12067. /* Generate a random prime for RSA only.
  12068. *
  12069. * @param [out] r SP integer to hold result.
  12070. * @param [in] len Number of bytes in prime.
  12071. * @param [in] rng Random number generator.
  12072. * @param [in] heap Heap hint. Unused.
  12073. *
  12074. * @return MP_OKAY on success
  12075. * @return MP_VAL when r or rng is NULL, length is not supported or random
  12076. * number generator fails.
  12077. */
  12078. int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
  12079. {
  12080. static const int USE_BBS = 1;
  12081. int err = MP_OKAY;
  12082. int type = 0;
  12083. int isPrime = MP_NO;
  12084. #ifdef WOLFSSL_SP_MATH_ALL
  12085. int bits = 0;
  12086. #endif /* WOLFSSL_SP_MATH_ALL */
  12087. (void)heap;
  12088. /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
  12089. if ((r == NULL) || (rng == NULL) || (len == 0)) {
  12090. err = MP_VAL;
  12091. }
  12092. if (err == MP_OKAY) {
  12093. /* get type */
  12094. if (len < 0) {
  12095. type = USE_BBS;
  12096. len = -len;
  12097. }
  12098. #ifndef WOLFSSL_SP_MATH_ALL
  12099. /* For minimal maths, support only what's in SP and needed for DH. */
  12100. #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
  12101. if (len == 32) {
  12102. }
  12103. else
  12104. #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
  12105. /* Generate RSA primes that are half the modulus length. */
  12106. #ifndef WOLFSSL_SP_NO_3072
  12107. if ((len != 128) && (len != 192))
  12108. #else
  12109. if (len != 128)
  12110. #endif /* WOLFSSL_SP_NO_3072 */
  12111. {
  12112. err = MP_VAL;
  12113. }
  12114. #endif /* !WOLFSSL_SP_MATH_ALL */
  12115. #ifdef WOLFSSL_SP_INT_NEGATIVE
  12116. r->sign = MP_ZPOS;
  12117. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  12118. r->used = (len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  12119. #ifdef WOLFSSL_SP_MATH_ALL
  12120. bits = (len * 8) & SP_WORD_MASK;
  12121. #endif /* WOLFSSL_SP_MATH_ALL */
  12122. }
  12123. /* Assume the candidate is probably prime and then test until
  12124. * it is proven composite. */
  12125. while (err == MP_OKAY && isPrime == MP_NO) {
  12126. #ifdef SHOW_GEN
  12127. printf(".");
  12128. fflush(stdout);
  12129. #endif /* SHOW_GEN */
  12130. /* generate value */
  12131. err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, len);
  12132. if (err != 0) {
  12133. err = MP_VAL;
  12134. break;
  12135. }
  12136. #ifndef LITTLE_ENDIAN_ORDER
  12137. if (((len * 8) & SP_WORD_MASK) != 0) {
  12138. r->dp[r->used-1] >>= SP_WORD_SIZE - ((len * 8) & SP_WORD_MASK);
  12139. }
  12140. #endif /* LITTLE_ENDIAN_ORDER */
  12141. #ifdef WOLFSSL_SP_MATH_ALL
  12142. if (bits > 0) {
  12143. r->dp[r->used - 1] &= (1L << bits) - 1;
  12144. }
  12145. #endif /* WOLFSSL_SP_MATH_ALL */
  12146. /* munge bits */
  12147. #ifndef LITTLE_ENDIAN_ORDER
  12148. ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
  12149. #else
  12150. ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
  12151. #endif /* LITTLE_ENDIAN_ORDER */
  12152. r->dp[0] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
  12153. /* test */
  12154. /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
  12155. * of a 1024-bit candidate being a false positive, when it is our
  12156. * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
  12157. * Using 8 because we've always used 8 */
  12158. sp_prime_is_prime_ex(r, 8, &isPrime, rng);
  12159. }
  12160. return err;
  12161. }
  12162. #endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
  12163. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  12164. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
  12165. /* Miller-Rabin test of "a" to the base of "b" as described in
  12166. * HAC pp. 139 Algorithm 4.24
  12167. *
  12168. * Sets result to 0 if definitely composite or 1 if probably prime.
  12169. * Randomly the chance of error is no more than 1/4 and often
  12170. * very much lower.
  12171. *
  12172. * @param [in] a SP integer to check.
  12173. * @param [in] b SP integer that is a small prime.
  12174. * @param [out] result MP_YES when number is likey prime.
  12175. * MP_NO otherwise.
  12176. * @param [in] n1 SP integer temporary.
  12177. * @param [in] y SP integer temporary.
  12178. * @param [in] r SP integer temporary.
  12179. *
  12180. * @return MP_OKAY on success.
  12181. * @return MP_MEM when dynamic memory allocation fails.
  12182. */
  12183. static int sp_prime_miller_rabin_ex(sp_int* a, sp_int* b, int* result,
  12184. sp_int* n1, sp_int* y, sp_int* r)
  12185. {
  12186. int s;
  12187. int j;
  12188. int err = MP_OKAY;
  12189. /* default */
  12190. *result = MP_NO;
  12191. /* ensure b > 1 */
  12192. if (sp_cmp_d(b, 1) == MP_GT) {
  12193. /* get n1 = a - 1 */
  12194. (void)sp_copy(a, n1);
  12195. _sp_sub_d(n1, 1, n1);
  12196. /* set 2**s * r = n1 */
  12197. (void)sp_copy(n1, r);
  12198. /* count the number of least significant bits
  12199. * which are zero
  12200. */
  12201. s = sp_cnt_lsb(r);
  12202. /* now divide n - 1 by 2**s */
  12203. sp_rshb(r, s, r);
  12204. /* compute y = b**r mod a */
  12205. err = sp_exptmod(b, r, a, y);
  12206. if (err == MP_OKAY) {
  12207. /* probably prime until shown otherwise */
  12208. *result = MP_YES;
  12209. /* if y != 1 and y != n1 do */
  12210. if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
  12211. j = 1;
  12212. /* while j <= s-1 and y != n1 */
  12213. while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
  12214. err = sp_sqrmod(y, a, y);
  12215. if (err != MP_OKAY) {
  12216. break;
  12217. }
  12218. /* if y == 1 then composite */
  12219. if (sp_cmp_d(y, 1) == MP_EQ) {
  12220. *result = MP_NO;
  12221. break;
  12222. }
  12223. ++j;
  12224. }
  12225. /* if y != n1 then composite */
  12226. if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
  12227. *result = MP_NO;
  12228. }
  12229. }
  12230. }
  12231. }
  12232. return err;
  12233. }
  12234. /* Miller-Rabin test of "a" to the base of "b" as described in
  12235. * HAC pp. 139 Algorithm 4.24
  12236. *
  12237. * Sets result to 0 if definitely composite or 1 if probably prime.
  12238. * Randomly the chance of error is no more than 1/4 and often
  12239. * very much lower.
  12240. *
  12241. * @param [in] a SP integer to check.
  12242. * @param [in] b SP integer that is a small prime.
  12243. * @param [out] result MP_YES when number is likey prime.
  12244. * MP_NO otherwise.
  12245. *
  12246. * @return MP_OKAY on success.
  12247. * @return MP_MEM when dynamic memory allocation fails.
  12248. */
  12249. static int sp_prime_miller_rabin(sp_int* a, sp_int* b, int* result)
  12250. {
  12251. int err = MP_OKAY;
  12252. sp_int *n1;
  12253. sp_int *y;
  12254. sp_int *r;
  12255. DECL_SP_INT_ARRAY(t, a->used * 2 + 1, 3);
  12256. ALLOC_SP_INT_ARRAY(t, a->used * 2 + 1, 3, err, NULL);
  12257. if (err == MP_OKAY) {
  12258. n1 = t[0];
  12259. y = t[1];
  12260. r = t[2];
  12261. /* Only 'y' needs to be twice as big. */
  12262. sp_init_size(n1, a->used * 2 + 1);
  12263. sp_init_size(y, a->used * 2 + 1);
  12264. sp_init_size(r, a->used * 2 + 1);
  12265. err = sp_prime_miller_rabin_ex(a, b, result, n1, y, r);
  12266. sp_clear(n1);
  12267. sp_clear(y);
  12268. sp_clear(r);
  12269. }
  12270. FREE_SP_INT_ARRAY(t, NULL);
  12271. return err;
  12272. }
  12273. #if SP_WORD_SIZE == 8
  12274. /* Number of pre-computed primes. First n primes - fitting in a digit. */
  12275. #define SP_PRIME_SIZE 54
  12276. static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
  12277. 0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
  12278. 0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
  12279. 0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
  12280. 0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
  12281. 0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
  12282. 0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
  12283. 0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
  12284. };
  12285. #else
  12286. /* Number of pre-computed primes. First n primes. */
  12287. #define SP_PRIME_SIZE 256
  12288. /* The first 256 primes. */
  12289. static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
  12290. 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
  12291. 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
  12292. 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
  12293. 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
  12294. 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
  12295. 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
  12296. 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
  12297. 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
  12298. 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
  12299. 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
  12300. 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
  12301. 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
  12302. 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
  12303. 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
  12304. 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
  12305. 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
  12306. 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
  12307. 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
  12308. 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
  12309. 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
  12310. 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
  12311. 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
  12312. 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
  12313. 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
  12314. 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
  12315. 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
  12316. 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
  12317. 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
  12318. 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
  12319. 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
  12320. 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
  12321. 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
  12322. };
  12323. #endif
  12324. /* Check whether a is prime.
  12325. * Checks against a number of small primes and does t iterations of
  12326. * Miller-Rabin.
  12327. *
  12328. * @param [in] a SP integer to check.
  12329. * @param [in] t Number of iterations of Miller-Rabin test to perform.
  12330. * @param [out] result MP_YES when number is prime.
  12331. * MP_NO otherwise.
  12332. *
  12333. * @return MP_OKAY on success.
  12334. * @return MP_VAL when a or result is NULL, or t is out of range.
  12335. * @return MP_MEM when dynamic memory allocation fails.
  12336. */
  12337. int sp_prime_is_prime(sp_int* a, int t, int* result)
  12338. {
  12339. int err = MP_OKAY;
  12340. int i;
  12341. int haveRes = 0;
  12342. sp_int_digit d;
  12343. DECL_SP_INT(b, 2);
  12344. if ((a == NULL) || (result == NULL)) {
  12345. if (result != NULL) {
  12346. *result = MP_NO;
  12347. }
  12348. err = MP_VAL;
  12349. }
  12350. if ((err == MP_OKAY) && ((t <= 0) || (t > SP_PRIME_SIZE))) {
  12351. *result = MP_NO;
  12352. err = MP_VAL;
  12353. }
  12354. if ((err == MP_OKAY) && sp_isone(a)) {
  12355. *result = MP_NO;
  12356. haveRes = 1;
  12357. }
  12358. if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
  12359. /* check against primes table */
  12360. for (i = 0; i < SP_PRIME_SIZE; i++) {
  12361. if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
  12362. *result = MP_YES;
  12363. haveRes = 1;
  12364. break;
  12365. }
  12366. }
  12367. }
  12368. if ((err == MP_OKAY) && (!haveRes)) {
  12369. /* do trial division */
  12370. for (i = 0; i < SP_PRIME_SIZE; i++) {
  12371. err = sp_mod_d(a, sp_primes[i], &d);
  12372. if ((err != MP_OKAY) || (d == 0)) {
  12373. *result = MP_NO;
  12374. haveRes = 1;
  12375. break;
  12376. }
  12377. }
  12378. }
  12379. if ((err == MP_OKAY) && (!haveRes)) {
  12380. ALLOC_SP_INT(b, 1, err, NULL);
  12381. if (err == MP_OKAY) {
  12382. /* now do 't' miller rabins */
  12383. sp_init_size(b, 1);
  12384. for (i = 0; i < t; i++) {
  12385. sp_set(b, sp_primes[i]);
  12386. err = sp_prime_miller_rabin(a, b, result);
  12387. if ((err != MP_OKAY) || (*result == MP_NO)) {
  12388. break;
  12389. }
  12390. }
  12391. }
  12392. }
  12393. FREE_SP_INT(b, NULL);
  12394. return err;
  12395. }
  12396. /* Check whether a is prime.
  12397. * Checks against a number of small primes and does t iterations of
  12398. * Miller-Rabin.
  12399. *
  12400. * @param [in] a SP integer to check.
  12401. * @param [in] t Number of iterations of Miller-Rabin test to perform.
  12402. * @param [out] result MP_YES when number is prime.
  12403. * MP_NO otherwise.
  12404. * @param [in] rng Random number generator for Miller-Rabin testing.
  12405. *
  12406. * @return MP_OKAY on success.
  12407. * @return MP_VAL when a, result or rng is NULL.
  12408. * @return MP_MEM when dynamic memory allocation fails.
  12409. */
  12410. int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng)
  12411. {
  12412. int err = MP_OKAY;
  12413. int ret = MP_YES;
  12414. int haveRes = 0;
  12415. int i;
  12416. #ifndef WC_NO_RNG
  12417. sp_int *b = NULL;
  12418. sp_int *c = NULL;
  12419. sp_int *n1 = NULL;
  12420. sp_int *y = NULL;
  12421. sp_int *r = NULL;
  12422. #endif /* WC_NO_RNG */
  12423. if ((a == NULL) || (result == NULL) || (rng == NULL)) {
  12424. err = MP_VAL;
  12425. }
  12426. if ((err == MP_OKAY) && sp_isone(a)) {
  12427. ret = MP_NO;
  12428. haveRes = 1;
  12429. }
  12430. if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
  12431. /* check against primes table */
  12432. for (i = 0; i < SP_PRIME_SIZE; i++) {
  12433. if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
  12434. ret = MP_YES;
  12435. haveRes = 1;
  12436. break;
  12437. }
  12438. }
  12439. }
  12440. if ((err == MP_OKAY) && (!haveRes)) {
  12441. sp_int_digit d;
  12442. /* do trial division */
  12443. for (i = 0; i < SP_PRIME_SIZE; i++) {
  12444. err = sp_mod_d(a, sp_primes[i], &d);
  12445. if ((err != MP_OKAY) || (d == 0)) {
  12446. ret = MP_NO;
  12447. haveRes = 1;
  12448. break;
  12449. }
  12450. }
  12451. }
  12452. #ifndef WC_NO_RNG
  12453. /* now do a miller rabin with up to t random numbers, this should
  12454. * give a (1/4)^t chance of a false prime. */
  12455. if ((err == MP_OKAY) && (!haveRes)) {
  12456. int bits = sp_count_bits(a);
  12457. word32 baseSz = (bits + 7) / 8;
  12458. DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 5);
  12459. ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 5, err, NULL);
  12460. if (err == MP_OKAY) {
  12461. b = d[0];
  12462. c = d[1];
  12463. n1 = d[2];
  12464. y = d[3];
  12465. r = d[4];
  12466. /* Only 'y' needs to be twice as big. */
  12467. sp_init_size(b , a->used * 2 + 1);
  12468. sp_init_size(c , a->used * 2 + 1);
  12469. sp_init_size(n1, a->used * 2 + 1);
  12470. sp_init_size(y , a->used * 2 + 1);
  12471. sp_init_size(r , a->used * 2 + 1);
  12472. _sp_sub_d(a, 2, c);
  12473. bits &= SP_WORD_MASK;
  12474. while (t > 0) {
  12475. err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
  12476. if (err != MP_OKAY) {
  12477. break;
  12478. }
  12479. b->used = a->used;
  12480. /* Ensure the top word has no more bits than necessary. */
  12481. if (bits > 0) {
  12482. b->dp[b->used - 1] &= (1L << bits) - 1;
  12483. }
  12484. if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
  12485. continue;
  12486. }
  12487. err = sp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
  12488. if ((err != MP_OKAY) || (ret == MP_NO)) {
  12489. break;
  12490. }
  12491. t--;
  12492. }
  12493. sp_clear(n1);
  12494. sp_clear(y);
  12495. sp_clear(r);
  12496. sp_clear(b);
  12497. sp_clear(c);
  12498. }
  12499. FREE_SP_INT_ARRAY(d, NULL);
  12500. }
  12501. #else
  12502. (void)t;
  12503. #endif /* !WC_NO_RNG */
  12504. if (result != NULL) {
  12505. *result = ret;
  12506. }
  12507. return err;
  12508. }
  12509. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  12510. #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
  12511. /* Calculates the Greatest Common Denominator (GCD) of a and b into r.
  12512. *
  12513. * @param [in] a SP integer of first operand.
  12514. * @param [in] b SP integer of second operand.
  12515. * @param [out] r SP integer to hold result.
  12516. *
  12517. * @return MP_OKAY on success.
  12518. * @return MP_VAL when a, b or r is NULL.
  12519. * @return MP_MEM when dynamic memory allocation fails.
  12520. */
  12521. int sp_gcd(sp_int* a, sp_int* b, sp_int* r)
  12522. {
  12523. int err = MP_OKAY;
  12524. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  12525. err = MP_VAL;
  12526. }
  12527. else if (sp_iszero(a)) {
  12528. /* GCD of 0 and 0 is undefined as all integers divide 0. */
  12529. if (sp_iszero(b)) {
  12530. err = MP_VAL;
  12531. }
  12532. else {
  12533. err = sp_copy(b, r);
  12534. }
  12535. }
  12536. else if (sp_iszero(b)) {
  12537. err = sp_copy(a, r);
  12538. }
  12539. else {
  12540. sp_int* u = NULL;
  12541. sp_int* v = NULL;
  12542. sp_int* t = NULL;
  12543. int used = (a->used >= b->used) ? a->used + 1 : b->used + 1;
  12544. DECL_SP_INT_ARRAY(d, used, 3);
  12545. ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
  12546. if (err == MP_OKAY) {
  12547. u = d[0];
  12548. v = d[1];
  12549. t = d[2];
  12550. sp_init_size(u, used);
  12551. sp_init_size(v, used);
  12552. sp_init_size(t, used);
  12553. if (_sp_cmp(a, b) != MP_LT) {
  12554. sp_copy(b, u);
  12555. /* First iteration - u = a, v = b */
  12556. if (b->used == 1) {
  12557. err = sp_mod_d(a, b->dp[0], &v->dp[0]);
  12558. if (err == MP_OKAY) {
  12559. v->used = (v->dp[0] != 0);
  12560. }
  12561. }
  12562. else {
  12563. err = sp_mod(a, b, v);
  12564. }
  12565. }
  12566. else {
  12567. sp_copy(a, u);
  12568. /* First iteration - u = b, v = a */
  12569. if (a->used == 1) {
  12570. err = sp_mod_d(b, a->dp[0], &v->dp[0]);
  12571. if (err == MP_OKAY) {
  12572. v->used = (v->dp[0] != 0);
  12573. }
  12574. }
  12575. else {
  12576. err = sp_mod(b, a, v);
  12577. }
  12578. }
  12579. }
  12580. if (err == MP_OKAY) {
  12581. #ifdef WOLFSSL_SP_INT_NEGATIVE
  12582. u->sign = MP_ZPOS;
  12583. v->sign = MP_ZPOS;
  12584. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  12585. while (!sp_iszero(v)) {
  12586. if (v->used == 1) {
  12587. err = sp_mod_d(u, v->dp[0], &t->dp[0]);
  12588. if (err == MP_OKAY) {
  12589. t->used = (t->dp[0] != 0);
  12590. }
  12591. }
  12592. else {
  12593. err = sp_mod(u, v, t);
  12594. }
  12595. if (err != MP_OKAY) {
  12596. break;
  12597. }
  12598. sp_copy(v, u);
  12599. sp_copy(t, v);
  12600. }
  12601. if (err == MP_OKAY)
  12602. err = sp_copy(u, r);
  12603. }
  12604. FREE_SP_INT_ARRAY(d, NULL);
  12605. }
  12606. return err;
  12607. }
  12608. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
  12609. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
  12610. /* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
  12611. *
  12612. * @param [in] a SP integer of first operand.
  12613. * @param [in] b SP integer of second operand.
  12614. * @param [out] r SP integer to hold result.
  12615. *
  12616. * @return MP_OKAY on success.
  12617. * @return MP_VAL when a, b or r is NULL; or a or b is zero.
  12618. * @return MP_MEM when dynamic memory allocation fails.
  12619. */
  12620. int sp_lcm(sp_int* a, sp_int* b, sp_int* r)
  12621. {
  12622. int err = MP_OKAY;
  12623. int used = ((a == NULL) || (b == NULL)) ? 1 :
  12624. (a->used >= b->used ? a->used + 1: b->used + 1);
  12625. DECL_SP_INT_ARRAY(t, used, 2);
  12626. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  12627. err = MP_VAL;
  12628. }
  12629. /* LCM of 0 and any number is undefined as 0 is not in the set of values
  12630. * being used.
  12631. */
  12632. if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
  12633. err = MP_VAL;
  12634. }
  12635. ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
  12636. if (err == MP_OKAY) {
  12637. sp_init_size(t[0], used);
  12638. sp_init_size(t[1], used);
  12639. err = sp_gcd(a, b, t[0]);
  12640. if (err == MP_OKAY) {
  12641. if (_sp_cmp_abs(a, b) == MP_GT) {
  12642. err = sp_div(a, t[0], t[1], NULL);
  12643. if (err == MP_OKAY) {
  12644. err = sp_mul(b, t[1], r);
  12645. }
  12646. }
  12647. else {
  12648. err = sp_div(b, t[0], t[1], NULL);
  12649. if (err == MP_OKAY) {
  12650. err = sp_mul(a, t[1], r);
  12651. }
  12652. }
  12653. }
  12654. }
  12655. FREE_SP_INT_ARRAY(t, NULL);
  12656. return err;
  12657. }
  12658. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
  12659. /* Returns the run time settings.
  12660. *
  12661. * @return Settings value.
  12662. */
  12663. word32 CheckRunTimeSettings(void)
  12664. {
  12665. return CTC_SETTINGS;
  12666. }
  12667. /* Returns the fast math settings.
  12668. *
  12669. * @return Setting - number of bits in a digit.
  12670. */
  12671. word32 CheckRunTimeFastMath(void)
  12672. {
  12673. return SP_WORD_SIZE;
  12674. }
  12675. #endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */