sp_int.c 652 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895188961889718898188991890018901189021890318904189051890618907189081890918910189111891218913189141891518916189171891818919189201892118922189231892418925189261892718928189291893018931189321893318934189351893618937189381893918940189411894218943189441894518946189471894818949189501895118952189531895418955189561895718958189591896018961189621896318964189651896618967189681896918970189711897218973189741897518976189771897818979189801898118982189831898418985189861898718988189891899018991189921899318994189951899618997189981899919000190011900219003190041900519006190071900819009190101901119012190131901419015190161901719018190191902019021190221902319024190251902619027190281902919030190311903219033190341903519036190371903819039190401904119042190431904419045190461904719048190491905019051190521905319054190551905619057190581905919060190611906219063190641906519066190671906819069190701907119072190731907419075190761907719078190791908019081190821908319084190851908619087190881908919090190911909219093190941909519096190971909819099191001910119102191031910419105191061910719108191091911019111191121911319114191151911619117191181911919120191211912219123191241912519126191271912819129191301913119132191331913419135191361913719138191391914019141191421914319144191451914619147191481914919150191511915219153191541915519156191571915819159191601916119162191631916419165191661916719168191691917019171191721917319174191751917619177191781917919180191811918219183191841918519186191871918819189191901919119192191931919419195191961919719198191991920019201192021920319204192051920619207192081920919210192111921219213192141921519216192171921819219192201922119222192231922419225192261922719228192291923019231192321923319234192351923619237192381923919240192411924219243192441924519246192471924819249192501925119252192531925419255192561925719258192591926019261192621926319264192651926619267192681926919270192711927219273192741927519276192771927819279192801928119282192831928419285192861928719288192891929019291192921929319294192951929619297192981929919300193011930219303193041930519306193071930819309193101931119312193131931419315193161931719318193191932019321193221932319324193251932619327193281932919330193311933219333193341933519336193371933819339193401934119342193431934419345193461934719348193491935019351193521935319354193551935619357193581935919360193611936219363193641936519366193671936819369193701937119372193731937419375193761937719378193791938019381193821938319384193851938619387193881938919390193911939219393193941939519396193971939819399194001940119402194031940419405194061940719408194091941019411194121941319414194151941619417194181941919420194211942219423194241942519426194271942819429194301943119432194331943419435194361943719438194391944019441194421944319444194451944619447194481944919450194511945219453194541945519456194571945819459194601946119462194631946419465194661946719468194691947019471194721947319474194751947619477194781947919480194811948219483194841948519486194871948819489194901949119492194931949419495194961949719498194991950019501195021950319504195051950619507195081950919510195111951219513195141951519516195171951819519195201952119522195231952419525195261952719528195291953019531195321953319534195351953619537195381953919540195411954219543195441954519546195471954819549195501955119552195531955419555195561955719558195591956019561195621956319564195651956619567195681956919570195711957219573195741957519576195771957819579195801958119582195831958419585195861958719588195891959019591195921959319594195951959619597195981959919600
  1. /* sp_int.c
  2. *
  3. * Copyright (C) 2006-2023 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /* Implementation by Sean Parkinson. */
  22. /*
  23. DESCRIPTION
  24. This library provides single precision (SP) integer math functions.
  25. */
  26. #ifdef HAVE_CONFIG_H
  27. #include <config.h>
  28. #endif
  29. #include <wolfssl/wolfcrypt/settings.h>
  30. #if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
  31. #if (!defined(WOLFSSL_SMALL_STACK) && !defined(SP_ALLOC)) || \
  32. defined(WOLFSSL_SP_NO_MALLOC)
  33. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  34. !defined(WOLFSSL_SP_NO_DYN_STACK)
  35. #pragma GCC diagnostic push
  36. /* We are statically declaring a variable smaller than sp_int.
  37. * We track available memory in the 'size' field.
  38. * Disable warnings of sp_int being partly outside array bounds of variable.
  39. */
  40. #pragma GCC diagnostic ignored "-Warray-bounds"
  41. #endif
  42. #endif
  43. #ifdef NO_INLINE
  44. #include <wolfssl/wolfcrypt/misc.h>
  45. #else
  46. #define WOLFSSL_MISC_INCLUDED
  47. #include <wolfcrypt/src/misc.c>
  48. #endif
  49. /* SP Build Options:
  50. * WOLFSSL_HAVE_SP_RSA: Enable SP RSA support
  51. * WOLFSSL_HAVE_SP_DH: Enable SP DH support
  52. * WOLFSSL_HAVE_SP_ECC: Enable SP ECC support
  53. * WOLFSSL_SP_MATH: Use only single precision math and algorithms
  54. * it supports (no fastmath tfm.c or normal integer.c)
  55. * WOLFSSL_SP_MATH_ALL Implementation of all MP functions
  56. * (replacement for tfm.c and integer.c)
  57. * WOLFSSL_SP_SMALL: Use smaller version of code and avoid large
  58. * stack variables
  59. * WOLFSSL_SP_NO_MALLOC: Always use stack, no heap XMALLOC/XFREE allowed
  60. * WOLFSSL_SP_NO_2048: Disable RSA/DH 2048-bit support
  61. * WOLFSSL_SP_NO_3072: Disable RSA/DH 3072-bit support
  62. * WOLFSSL_SP_4096: Enable RSA/RH 4096-bit support
  63. * WOLFSSL_SP_NO_256 Disable ECC 256-bit SECP256R1 support
  64. * WOLFSSL_SP_384 Enable ECC 384-bit SECP384R1 support
  65. * WOLFSSL_SP_521 Enable ECC 521-bit SECP521R1 support
  66. * WOLFSSL_SP_ASM Enable assembly speedups (detect platform)
  67. * WOLFSSL_SP_X86_64_ASM Enable Intel x64 assembly implementation
  68. * WOLFSSL_SP_ARM32_ASM Enable Aarch32 assembly implementation
  69. * WOLFSSL_SP_ARM64_ASM Enable Aarch64 assembly implementation
  70. * WOLFSSL_SP_ARM_CORTEX_M_ASM Enable Cortex-M assembly implementation
  71. * WOLFSSL_SP_ARM_THUMB_ASM Enable ARM Thumb assembly implementation
  72. * (used with -mthumb)
  73. * WOLFSSL_SP_X86_64 Enable Intel x86 64-bit assembly speedups
  74. * WOLFSSL_SP_X86 Enable Intel x86 assembly speedups
  75. * WOLFSSL_SP_ARM64 Enable Aarch64 assembly speedups
  76. * WOLFSSL_SP_ARM32 Enable ARM32 assembly speedups
  77. * WOLFSSL_SP_ARM32_UDIV Enable word divide asm that uses UDIV instr
  78. * WOLFSSL_SP_ARM_THUMB Enable ARM Thumb assembly speedups
  79. * (explicitly uses register 'r7')
  80. * WOLFSSL_SP_PPC64 Enable PPC64 assembly speedups
  81. * WOLFSSL_SP_PPC Enable PPC assembly speedups
  82. * WOLFSSL_SP_MIPS64 Enable MIPS64 assembly speedups
  83. * WOLFSSL_SP_MIPS Enable MIPS assembly speedups
  84. * WOLFSSL_SP_RISCV64 Enable RISCV64 assembly speedups
  85. * WOLFSSL_SP_RISCV32 Enable RISCV32 assembly speedups
  86. * WOLFSSL_SP_S390X Enable S390X assembly speedups
  87. * SP_WORD_SIZE Force 32 or 64 bit mode
  88. * WOLFSSL_SP_NONBLOCK Enables "non blocking" mode for SP math, which
  89. * will return FP_WOULDBLOCK for long operations and function must be
  90. * called again until complete.
  91. * WOLFSSL_SP_FAST_NCT_EXPTMOD Enables the faster non-constant time modular
  92. * exponentiation implementation.
  93. * WOLFSSL_SP_INT_NEGATIVE Enables negative values to be used.
  94. * WOLFSSL_SP_INT_DIGIT_ALIGN Enable when unaligned access of sp_int_digit
  95. * pointer is not allowed.
  96. * WOLFSSL_SP_NO_DYN_STACK Disable use of dynamic stack items.
  97. * Dynamic arrays used when not small stack.
  98. * WOLFSSL_SP_FAST_MODEXP Allow fast mod_exp with small C code
  99. * WOLFSSL_SP_LOW_MEM Use algorithms that use less memory.
  100. */
  101. /* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
  102. #if defined(__clang__) && defined(__clang_major__) && \
  103. (__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
  104. #undef WOLFSSL_SP_SMALL
  105. #endif
  106. #include <wolfssl/wolfcrypt/sp_int.h>
  107. /* DECL_SP_INT: Declare one variable of type 'sp_int'. */
  108. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  109. !defined(WOLFSSL_SP_NO_MALLOC)
  110. /* Declare a variable that will be assigned a value on XMALLOC. */
  111. #define DECL_SP_INT(n, s) \
  112. sp_int* n = NULL
  113. #else
  114. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  115. !defined(WOLFSSL_SP_NO_DYN_STACK)
  116. /* Declare a variable on the stack with the required data size. */
  117. #define DECL_SP_INT(n, s) \
  118. byte n##d[MP_INT_SIZEOF(s)]; \
  119. sp_int* (n) = (sp_int*)n##d
  120. #else
  121. /* Declare a variable on the stack. */
  122. #define DECL_SP_INT(n, s) \
  123. sp_int n[1]
  124. #endif
  125. #endif
  126. /* ALLOC_SP_INT: Allocate an 'sp_int' of required size. */
  127. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  128. !defined(WOLFSSL_SP_NO_MALLOC)
  129. /* Dynamically allocate just enough data to support size. */
  130. #define ALLOC_SP_INT(n, s, err, h) \
  131. do { \
  132. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  133. (err) = MP_VAL; \
  134. } \
  135. if ((err) == MP_OKAY) { \
  136. (n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h), \
  137. DYNAMIC_TYPE_BIGINT); \
  138. if ((n) == NULL) { \
  139. (err) = MP_MEM; \
  140. } \
  141. } \
  142. } \
  143. while (0)
  144. /* Dynamically allocate just enough data to support size - and set size. */
  145. #define ALLOC_SP_INT_SIZE(n, s, err, h) \
  146. do { \
  147. ALLOC_SP_INT(n, s, err, h); \
  148. if ((err) == MP_OKAY) { \
  149. (n)->size = (s); \
  150. } \
  151. } \
  152. while (0)
  153. #else
  154. /* Array declared on stack - check size is valid. */
  155. #define ALLOC_SP_INT(n, s, err, h) \
  156. do { \
  157. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  158. (err) = MP_VAL; \
  159. } \
  160. } \
  161. while (0)
  162. /* Array declared on stack - set the size field. */
  163. #define ALLOC_SP_INT_SIZE(n, s, err, h) \
  164. do { \
  165. ALLOC_SP_INT(n, s, err, h); \
  166. if ((err) == MP_OKAY) { \
  167. (n)->size = (unsigned int)(s); \
  168. } \
  169. } \
  170. while (0)
  171. #endif
  172. /* FREE_SP_INT: Free an 'sp_int' variable. */
  173. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  174. !defined(WOLFSSL_SP_NO_MALLOC)
  175. /* Free dynamically allocated data. */
  176. #define FREE_SP_INT(n, h) \
  177. do { \
  178. if ((n) != NULL) { \
  179. XFREE(n, h, DYNAMIC_TYPE_BIGINT); \
  180. } \
  181. } \
  182. while (0)
  183. #else
  184. /* Nothing to do as declared on stack. */
  185. #define FREE_SP_INT(n, h) WC_DO_NOTHING
  186. #endif
  187. /* Declare a variable that will be assigned a value on XMALLOC. */
  188. #define DECL_DYN_SP_INT_ARRAY(n, s, c) \
  189. sp_int* n##d = NULL; \
  190. sp_int* (n)[c] = { NULL, }
  191. /* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
  192. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  193. !defined(WOLFSSL_SP_NO_MALLOC)
  194. /* Declare a variable that will be assigned a value on XMALLOC. */
  195. #define DECL_SP_INT_ARRAY(n, s, c) \
  196. DECL_DYN_SP_INT_ARRAY(n, s, c)
  197. #else
  198. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  199. !defined(WOLFSSL_SP_NO_DYN_STACK)
  200. /* Declare a variable on the stack with the required data size. */
  201. #define DECL_SP_INT_ARRAY(n, s, c) \
  202. byte n##d[MP_INT_SIZEOF(s) * (c)]; \
  203. sp_int* (n)[c] = { NULL, }
  204. #else
  205. /* Declare a variable on the stack. */
  206. #define DECL_SP_INT_ARRAY(n, s, c) \
  207. sp_int n##d[c]; \
  208. sp_int* (n)[c]
  209. #endif
  210. #endif
  211. /* Dynamically allocate just enough data to support multiple sp_ints of the
  212. * required size. Use pointers into data to make up array and set sizes.
  213. */
  214. #define ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h) \
  215. do { \
  216. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  217. (err) = MP_VAL; \
  218. } \
  219. if ((err) == MP_OKAY) { \
  220. n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h), \
  221. DYNAMIC_TYPE_BIGINT); \
  222. if (n##d == NULL) { \
  223. (err) = MP_MEM; \
  224. } \
  225. else { \
  226. int n##ii; \
  227. (n)[0] = n##d; \
  228. (n)[0]->size = (s); \
  229. for (n##ii = 1; n##ii < (int)(c); n##ii++) { \
  230. (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
  231. (n)[n##ii]->size = (s); \
  232. } \
  233. } \
  234. } \
  235. } \
  236. while (0)
  237. /* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of required size. */
  238. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  239. !defined(WOLFSSL_SP_NO_MALLOC)
  240. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  241. ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)
  242. #else
  243. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  244. !defined(WOLFSSL_SP_NO_DYN_STACK)
  245. /* Data declared on stack that supports multiple sp_ints of the
  246. * required size. Use pointers into data to make up array and set sizes.
  247. */
  248. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  249. do { \
  250. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  251. (err) = MP_VAL; \
  252. } \
  253. if ((err) == MP_OKAY) { \
  254. int n##ii; \
  255. (n)[0] = (sp_int*)n##d; \
  256. ((sp_int_minimal*)(n)[0])->size = (s); \
  257. for (n##ii = 1; n##ii < (int)(c); n##ii++) { \
  258. (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
  259. ((sp_int_minimal*)(n)[n##ii])->size = (s); \
  260. } \
  261. } \
  262. } \
  263. while (0)
  264. #else
  265. /* Data declared on stack that supports multiple sp_ints of the
  266. * required size. Set into array and set sizes.
  267. */
  268. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  269. do { \
  270. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  271. (err) = MP_VAL; \
  272. } \
  273. if ((err) == MP_OKAY) { \
  274. int n##ii; \
  275. for (n##ii = 0; n##ii < (int)(c); n##ii++) { \
  276. (n)[n##ii] = &n##d[n##ii]; \
  277. (n)[n##ii]->size = (s); \
  278. } \
  279. } \
  280. } \
  281. while (0)
  282. #endif
  283. #endif
  284. /* Free data variable that was dynamically allocated. */
  285. #define FREE_DYN_SP_INT_ARRAY(n, h) \
  286. do { \
  287. if (n##d != NULL) { \
  288. XFREE(n##d, h, DYNAMIC_TYPE_BIGINT); \
  289. } \
  290. } \
  291. while (0)
  292. /* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
  293. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  294. !defined(WOLFSSL_SP_NO_MALLOC)
  295. #define FREE_SP_INT_ARRAY(n, h) \
  296. FREE_DYN_SP_INT_ARRAY(n, h)
  297. #else
  298. /* Nothing to do as data declared on stack. */
  299. #define FREE_SP_INT_ARRAY(n, h) WC_DO_NOTHING
  300. #endif
  301. #ifndef WOLFSSL_NO_ASM
  302. #ifdef __IAR_SYSTEMS_ICC__
  303. #define __asm__ asm
  304. #define __volatile__ volatile
  305. #endif /* __IAR_SYSTEMS_ICC__ */
  306. #ifdef __KEIL__
  307. #define __asm__ __asm
  308. #define __volatile__ volatile
  309. #endif
  310. #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
  311. /*
  312. * CPU: x86_64
  313. */
  314. #ifndef _MSC_VER
  315. /* Multiply va by vb and store double size result in: vh | vl */
  316. #define SP_ASM_MUL(vl, vh, va, vb) \
  317. __asm__ __volatile__ ( \
  318. "movq %[b], %%rax \n\t" \
  319. "mulq %[a] \n\t" \
  320. "movq %%rax, %[l] \n\t" \
  321. "movq %%rdx, %[h] \n\t" \
  322. : [h] "+r" (vh), [l] "+r" (vl) \
  323. : [a] "m" (va), [b] "m" (vb) \
  324. : "memory", "%rax", "%rdx", "cc" \
  325. )
  326. /* Multiply va by vb and store double size result in: vo | vh | vl */
  327. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  328. __asm__ __volatile__ ( \
  329. "movq %[b], %%rax \n\t" \
  330. "mulq %[a] \n\t" \
  331. "movq $0 , %[o] \n\t" \
  332. "movq %%rax, %[l] \n\t" \
  333. "movq %%rdx, %[h] \n\t" \
  334. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  335. : [a] "m" (va), [b] "m" (vb) \
  336. : "%rax", "%rdx", "cc" \
  337. )
  338. /* Multiply va by vb and add double size result into: vo | vh | vl */
  339. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  340. __asm__ __volatile__ ( \
  341. "movq %[b], %%rax \n\t" \
  342. "mulq %[a] \n\t" \
  343. "addq %%rax, %[l] \n\t" \
  344. "adcq %%rdx, %[h] \n\t" \
  345. "adcq $0 , %[o] \n\t" \
  346. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  347. : [a] "m" (va), [b] "m" (vb) \
  348. : "%rax", "%rdx", "cc" \
  349. )
  350. /* Multiply va by vb and add double size result into: vh | vl */
  351. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  352. __asm__ __volatile__ ( \
  353. "movq %[b], %%rax \n\t" \
  354. "mulq %[a] \n\t" \
  355. "addq %%rax, %[l] \n\t" \
  356. "adcq %%rdx, %[h] \n\t" \
  357. : [l] "+r" (vl), [h] "+r" (vh) \
  358. : [a] "m" (va), [b] "m" (vb) \
  359. : "%rax", "%rdx", "cc" \
  360. )
  361. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  362. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  363. __asm__ __volatile__ ( \
  364. "movq %[b], %%rax \n\t" \
  365. "mulq %[a] \n\t" \
  366. "addq %%rax, %[l] \n\t" \
  367. "adcq %%rdx, %[h] \n\t" \
  368. "adcq $0 , %[o] \n\t" \
  369. "addq %%rax, %[l] \n\t" \
  370. "adcq %%rdx, %[h] \n\t" \
  371. "adcq $0 , %[o] \n\t" \
  372. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  373. : [a] "m" (va), [b] "m" (vb) \
  374. : "%rax", "%rdx", "cc" \
  375. )
  376. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  377. * Assumes first add will not overflow vh | vl
  378. */
  379. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  380. __asm__ __volatile__ ( \
  381. "movq %[b], %%rax \n\t" \
  382. "mulq %[a] \n\t" \
  383. "addq %%rax, %[l] \n\t" \
  384. "adcq %%rdx, %[h] \n\t" \
  385. "addq %%rax, %[l] \n\t" \
  386. "adcq %%rdx, %[h] \n\t" \
  387. "adcq $0 , %[o] \n\t" \
  388. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  389. : [a] "m" (va), [b] "m" (vb) \
  390. : "%rax", "%rdx", "cc" \
  391. )
  392. /* Square va and store double size result in: vh | vl */
  393. #define SP_ASM_SQR(vl, vh, va) \
  394. __asm__ __volatile__ ( \
  395. "movq %[a], %%rax \n\t" \
  396. "mulq %%rax \n\t" \
  397. "movq %%rax, %[l] \n\t" \
  398. "movq %%rdx, %[h] \n\t" \
  399. : [h] "+r" (vh), [l] "+r" (vl) \
  400. : [a] "m" (va) \
  401. : "memory", "%rax", "%rdx", "cc" \
  402. )
  403. /* Square va and add double size result into: vo | vh | vl */
  404. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  405. __asm__ __volatile__ ( \
  406. "movq %[a], %%rax \n\t" \
  407. "mulq %%rax \n\t" \
  408. "addq %%rax, %[l] \n\t" \
  409. "adcq %%rdx, %[h] \n\t" \
  410. "adcq $0 , %[o] \n\t" \
  411. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  412. : [a] "m" (va) \
  413. : "%rax", "%rdx", "cc" \
  414. )
  415. /* Square va and add double size result into: vh | vl */
  416. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  417. __asm__ __volatile__ ( \
  418. "movq %[a], %%rax \n\t" \
  419. "mulq %%rax \n\t" \
  420. "addq %%rax, %[l] \n\t" \
  421. "adcq %%rdx, %[h] \n\t" \
  422. : [l] "+r" (vl), [h] "+r" (vh) \
  423. : [a] "m" (va) \
  424. : "%rax", "%rdx", "cc" \
  425. )
  426. /* Add va into: vh | vl */
  427. #define SP_ASM_ADDC(vl, vh, va) \
  428. __asm__ __volatile__ ( \
  429. "addq %[a], %[l] \n\t" \
  430. "adcq $0 , %[h] \n\t" \
  431. : [l] "+r" (vl), [h] "+r" (vh) \
  432. : [a] "m" (va) \
  433. : "cc" \
  434. )
  435. /* Add va, variable in a register, into: vh | vl */
  436. #define SP_ASM_ADDC_REG(vl, vh, va) \
  437. __asm__ __volatile__ ( \
  438. "addq %[a], %[l] \n\t" \
  439. "adcq $0 , %[h] \n\t" \
  440. : [l] "+r" (vl), [h] "+r" (vh) \
  441. : [a] "r" (va) \
  442. : "cc" \
  443. )
  444. /* Sub va from: vh | vl */
  445. #define SP_ASM_SUBB(vl, vh, va) \
  446. __asm__ __volatile__ ( \
  447. "subq %[a], %[l] \n\t" \
  448. "sbbq $0 , %[h] \n\t" \
  449. : [l] "+r" (vl), [h] "+r" (vh) \
  450. : [a] "m" (va) \
  451. : "cc" \
  452. )
  453. /* Sub va from: vh | vl */
  454. #define SP_ASM_SUBB_REG(vl, vh, va) \
  455. __asm__ __volatile__ ( \
  456. "subq %[a], %[l] \n\t" \
  457. "sbbq $0 , %[h] \n\t" \
  458. : [l] "+r" (vl), [h] "+r" (vh) \
  459. : [a] "r" (va) \
  460. : "cc" \
  461. )
  462. /* Add two times vc | vb | va into vo | vh | vl */
  463. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  464. __asm__ __volatile__ ( \
  465. "addq %[a], %[l] \n\t" \
  466. "adcq %[b], %[h] \n\t" \
  467. "adcq %[c], %[o] \n\t" \
  468. "addq %[a], %[l] \n\t" \
  469. "adcq %[b], %[h] \n\t" \
  470. "adcq %[c], %[o] \n\t" \
  471. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  472. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  473. : "cc" \
  474. )
  475. /* Index of highest bit set. */
  476. #define SP_ASM_HI_BIT_SET_IDX(va, vi) \
  477. __asm__ __volatile__ ( \
  478. "bsr %[a], %[i] \n\t" \
  479. : [i] "=r" (vi) \
  480. : [a] "r" (va) \
  481. : "cc" \
  482. )
  483. #else
  484. #include <intrin.h>
  485. /* Multiply va by vb and store double size result in: vh | vl */
  486. #define SP_ASM_MUL(vl, vh, va, vb) \
  487. vl = _umul128(va, vb, &vh)
  488. /* Multiply va by vb and store double size result in: vo | vh | vl */
  489. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  490. do { \
  491. vl = _umul128(va, vb, &vh); \
  492. vo = 0; \
  493. } \
  494. while (0)
  495. /* Multiply va by vb and add double size result into: vo | vh | vl */
  496. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  497. do { \
  498. unsigned __int64 vtl, vth; \
  499. unsigned char c; \
  500. vtl = _umul128(va, vb, &vth); \
  501. c = _addcarry_u64(0, vl, vtl, &vl); \
  502. c = _addcarry_u64(c, vh, vth, &vh); \
  503. _addcarry_u64(c, vo, 0, &vo); \
  504. } \
  505. while (0)
  506. /* Multiply va by vb and add double size result into: vh | vl */
  507. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  508. do { \
  509. unsigned __int64 vtl, vth; \
  510. unsigned char c; \
  511. vtl = _umul128(va, vb, &vth); \
  512. c = _addcarry_u64(0, vl, vtl, &vl); \
  513. _addcarry_u64(c, vh, vth, &vh); \
  514. } \
  515. while (0)
  516. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  517. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  518. do { \
  519. unsigned __int64 vtl, vth; \
  520. unsigned char c; \
  521. vtl = _umul128(va, vb, &vth); \
  522. c = _addcarry_u64(0, vl, vtl, &vl); \
  523. c = _addcarry_u64(c, vh, vth, &vh); \
  524. _addcarry_u64(c, vo, 0, &vo); \
  525. c = _addcarry_u64(0, vl, vtl, &vl); \
  526. c = _addcarry_u64(c, vh, vth, &vh); \
  527. _addcarry_u64(c, vo, 0, &vo); \
  528. } \
  529. while (0)
  530. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  531. * Assumes first add will not overflow vh | vl
  532. */
  533. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  534. do { \
  535. unsigned __int64 vtl, vth; \
  536. unsigned char c; \
  537. vtl = _umul128(va, vb, &vth); \
  538. c = _addcarry_u64(0, vl, vtl, &vl); \
  539. _addcarry_u64(c, vh, vth, &vh); \
  540. c = _addcarry_u64(0, vl, vtl, &vl); \
  541. c = _addcarry_u64(c, vh, vth, &vh); \
  542. _addcarry_u64(c, vo, 0, &vo); \
  543. } \
  544. while (0)
  545. /* Square va and store double size result in: vh | vl */
  546. #define SP_ASM_SQR(vl, vh, va) \
  547. vl = _umul128(va, va, &vh)
  548. /* Square va and add double size result into: vo | vh | vl */
  549. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  550. do { \
  551. unsigned __int64 vtl, vth; \
  552. unsigned char c; \
  553. vtl = _umul128(va, va, &vth); \
  554. c = _addcarry_u64(0, vl, vtl, &vl); \
  555. c = _addcarry_u64(c, vh, vth, &vh); \
  556. _addcarry_u64(c, vo, 0, &vo); \
  557. } \
  558. while (0)
  559. /* Square va and add double size result into: vh | vl */
  560. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  561. do { \
  562. unsigned __int64 vtl, vth; \
  563. unsigned char c; \
  564. vtl = _umul128(va, va, &vth); \
  565. c = _addcarry_u64(0, vl, vtl, &vl); \
  566. _addcarry_u64(c, vh, vth, &vh); \
  567. } \
  568. while (0)
  569. /* Add va into: vh | vl */
  570. #define SP_ASM_ADDC(vl, vh, va) \
  571. do { \
  572. unsigned char c; \
  573. c = _addcarry_u64(0, vl, va, &vl); \
  574. _addcarry_u64(c, vh, 0, &vh); \
  575. } \
  576. while (0)
  577. /* Add va, variable in a register, into: vh | vl */
  578. #define SP_ASM_ADDC_REG(vl, vh, va) \
  579. do { \
  580. unsigned char c; \
  581. c = _addcarry_u64(0, vl, va, &vl); \
  582. _addcarry_u64(c, vh, 0, &vh); \
  583. } \
  584. while (0)
  585. /* Sub va from: vh | vl */
  586. #define SP_ASM_SUBB(vl, vh, va) \
  587. do { \
  588. unsigned char c; \
  589. c = _subborrow_u64(0, vl, va, &vl); \
  590. _subborrow_u64(c, vh, 0, &vh); \
  591. } \
  592. while (0)
  593. /* Add two times vc | vb | va into vo | vh | vl */
  594. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  595. do { \
  596. unsigned char c; \
  597. c = _addcarry_u64(0, vl, va, &vl); \
  598. c = _addcarry_u64(c, vh, vb, &vh); \
  599. _addcarry_u64(c, vo, vc, &vo); \
  600. c = _addcarry_u64(0, vl, va, &vl); \
  601. c = _addcarry_u64(c, vh, vb, &vh); \
  602. _addcarry_u64(c, vo, vc, &vo); \
  603. } \
  604. while (0)
  605. /* Index of highest bit set. */
  606. #define SP_ASM_HI_BIT_SET_IDX(va, vi) \
  607. do { \
  608. unsigned long idx; \
  609. _BitScanReverse64(&idx, va); \
  610. vi = idx; \
  611. } \
  612. while (0)
  613. #endif
  614. #if !defined(WOLFSSL_SP_DIV_WORD_HALF) && (!defined(_MSC_VER) || \
  615. _MSC_VER >= 1920)
  616. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  617. *
  618. * Using divq instruction on Intel x64.
  619. *
  620. * @param [in] hi SP integer digit. High digit of the dividend.
  621. * @param [in] lo SP integer digit. Lower digit of the dividend.
  622. * @param [in] d SP integer digit. Number to divide by.
  623. * @return The division result.
  624. */
  625. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  626. sp_int_digit d)
  627. {
  628. #ifndef _MSC_VER
  629. __asm__ __volatile__ (
  630. "divq %2"
  631. : "+a" (lo)
  632. : "d" (hi), "r" (d)
  633. : "cc"
  634. );
  635. return lo;
  636. #elif defined(_MSC_VER) && _MSC_VER >= 1920
  637. return _udiv128(hi, lo, d, NULL);
  638. #endif
  639. }
  640. #define SP_ASM_DIV_WORD
  641. #endif
  642. #define SP_INT_ASM_AVAILABLE
  643. #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
  644. #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
  645. /*
  646. * CPU: x86
  647. */
  648. /* Multiply va by vb and store double size result in: vh | vl */
  649. #define SP_ASM_MUL(vl, vh, va, vb) \
  650. __asm__ __volatile__ ( \
  651. "movl %[b], %%eax \n\t" \
  652. "mull %[a] \n\t" \
  653. "movl %%eax, %[l] \n\t" \
  654. "movl %%edx, %[h] \n\t" \
  655. : [h] "+r" (vh), [l] "+r" (vl) \
  656. : [a] "m" (va), [b] "m" (vb) \
  657. : "memory", "eax", "edx", "cc" \
  658. )
  659. /* Multiply va by vb and store double size result in: vo | vh | vl */
  660. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  661. __asm__ __volatile__ ( \
  662. "movl %[b], %%eax \n\t" \
  663. "mull %[a] \n\t" \
  664. "movl $0 , %[o] \n\t" \
  665. "movl %%eax, %[l] \n\t" \
  666. "movl %%edx, %[h] \n\t" \
  667. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  668. : [a] "m" (va), [b] "m" (vb) \
  669. : "eax", "edx", "cc" \
  670. )
  671. /* Multiply va by vb and add double size result into: vo | vh | vl */
  672. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  673. __asm__ __volatile__ ( \
  674. "movl %[b], %%eax \n\t" \
  675. "mull %[a] \n\t" \
  676. "addl %%eax, %[l] \n\t" \
  677. "adcl %%edx, %[h] \n\t" \
  678. "adcl $0 , %[o] \n\t" \
  679. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  680. : [a] "r" (va), [b] "r" (vb) \
  681. : "eax", "edx", "cc" \
  682. )
  683. /* Multiply va by vb and add double size result into: vh | vl */
  684. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  685. __asm__ __volatile__ ( \
  686. "movl %[b], %%eax \n\t" \
  687. "mull %[a] \n\t" \
  688. "addl %%eax, %[l] \n\t" \
  689. "adcl %%edx, %[h] \n\t" \
  690. : [l] "+r" (vl), [h] "+r" (vh) \
  691. : [a] "m" (va), [b] "m" (vb) \
  692. : "eax", "edx", "cc" \
  693. )
  694. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  695. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  696. __asm__ __volatile__ ( \
  697. "movl %[b], %%eax \n\t" \
  698. "mull %[a] \n\t" \
  699. "addl %%eax, %[l] \n\t" \
  700. "adcl %%edx, %[h] \n\t" \
  701. "adcl $0 , %[o] \n\t" \
  702. "addl %%eax, %[l] \n\t" \
  703. "adcl %%edx, %[h] \n\t" \
  704. "adcl $0 , %[o] \n\t" \
  705. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  706. : [a] "r" (va), [b] "r" (vb) \
  707. : "eax", "edx", "cc" \
  708. )
  709. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  710. * Assumes first add will not overflow vh | vl
  711. */
  712. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  713. __asm__ __volatile__ ( \
  714. "movl %[b], %%eax \n\t" \
  715. "mull %[a] \n\t" \
  716. "addl %%eax, %[l] \n\t" \
  717. "adcl %%edx, %[h] \n\t" \
  718. "addl %%eax, %[l] \n\t" \
  719. "adcl %%edx, %[h] \n\t" \
  720. "adcl $0 , %[o] \n\t" \
  721. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  722. : [a] "m" (va), [b] "m" (vb) \
  723. : "eax", "edx", "cc" \
  724. )
  725. /* Square va and store double size result in: vh | vl */
  726. #define SP_ASM_SQR(vl, vh, va) \
  727. __asm__ __volatile__ ( \
  728. "movl %[a], %%eax \n\t" \
  729. "mull %%eax \n\t" \
  730. "movl %%eax, %[l] \n\t" \
  731. "movl %%edx, %[h] \n\t" \
  732. : [h] "+r" (vh), [l] "+r" (vl) \
  733. : [a] "m" (va) \
  734. : "memory", "eax", "edx", "cc" \
  735. )
  736. /* Square va and add double size result into: vo | vh | vl */
  737. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  738. __asm__ __volatile__ ( \
  739. "movl %[a], %%eax \n\t" \
  740. "mull %%eax \n\t" \
  741. "addl %%eax, %[l] \n\t" \
  742. "adcl %%edx, %[h] \n\t" \
  743. "adcl $0 , %[o] \n\t" \
  744. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  745. : [a] "m" (va) \
  746. : "eax", "edx", "cc" \
  747. )
  748. /* Square va and add double size result into: vh | vl */
  749. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  750. __asm__ __volatile__ ( \
  751. "movl %[a], %%eax \n\t" \
  752. "mull %%eax \n\t" \
  753. "addl %%eax, %[l] \n\t" \
  754. "adcl %%edx, %[h] \n\t" \
  755. : [l] "+r" (vl), [h] "+r" (vh) \
  756. : [a] "m" (va) \
  757. : "eax", "edx", "cc" \
  758. )
  759. /* Add va into: vh | vl */
  760. #define SP_ASM_ADDC(vl, vh, va) \
  761. __asm__ __volatile__ ( \
  762. "addl %[a], %[l] \n\t" \
  763. "adcl $0 , %[h] \n\t" \
  764. : [l] "+r" (vl), [h] "+r" (vh) \
  765. : [a] "m" (va) \
  766. : "cc" \
  767. )
  768. /* Add va, variable in a register, into: vh | vl */
  769. #define SP_ASM_ADDC_REG(vl, vh, va) \
  770. __asm__ __volatile__ ( \
  771. "addl %[a], %[l] \n\t" \
  772. "adcl $0 , %[h] \n\t" \
  773. : [l] "+r" (vl), [h] "+r" (vh) \
  774. : [a] "r" (va) \
  775. : "cc" \
  776. )
  777. /* Sub va from: vh | vl */
  778. #define SP_ASM_SUBB(vl, vh, va) \
  779. __asm__ __volatile__ ( \
  780. "subl %[a], %[l] \n\t" \
  781. "sbbl $0 , %[h] \n\t" \
  782. : [l] "+r" (vl), [h] "+r" (vh) \
  783. : [a] "m" (va) \
  784. : "cc" \
  785. )
  786. /* Sub va from: vh | vl */
  787. #define SP_ASM_SUBB_REG(vl, vh, va) \
  788. __asm__ __volatile__ ( \
  789. "subl %[a], %[l] \n\t" \
  790. "sbbl $0 , %[h] \n\t" \
  791. : [l] "+r" (vl), [h] "+r" (vh) \
  792. : [a] "r" (va) \
  793. : "cc" \
  794. )
  795. /* Add two times vc | vb | va into vo | vh | vl */
  796. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  797. __asm__ __volatile__ ( \
  798. "addl %[a], %[l] \n\t" \
  799. "adcl %[b], %[h] \n\t" \
  800. "adcl %[c], %[o] \n\t" \
  801. "addl %[a], %[l] \n\t" \
  802. "adcl %[b], %[h] \n\t" \
  803. "adcl %[c], %[o] \n\t" \
  804. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  805. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  806. : "cc" \
  807. )
  808. /* Index of highest bit set. */
  809. #define SP_ASM_HI_BIT_SET_IDX(va, vi) \
  810. __asm__ __volatile__ ( \
  811. "bsr %[a], %[i] \n\t" \
  812. : [i] "=r" (vi) \
  813. : [a] "r" (va) \
  814. : "cC" \
  815. )
  816. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  817. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  818. *
  819. * Using divl instruction on Intel x64.
  820. *
  821. * @param [in] hi SP integer digit. High digit of the dividend.
  822. * @param [in] lo SP integer digit. Lower digit of the dividend.
  823. * @param [in] d SP integer digit. Number to divide by.
  824. * @return The division result.
  825. */
  826. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  827. sp_int_digit d)
  828. {
  829. __asm__ __volatile__ (
  830. "divl %2"
  831. : "+a" (lo)
  832. : "d" (hi), "r" (d)
  833. : "cc"
  834. );
  835. return lo;
  836. }
  837. #define SP_ASM_DIV_WORD
  838. #endif
  839. #define SP_INT_ASM_AVAILABLE
  840. #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
  841. #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
  842. /*
  843. * CPU: Aarch64
  844. */
  845. /* Multiply va by vb and store double size result in: vh | vl */
  846. #define SP_ASM_MUL(vl, vh, va, vb) \
  847. __asm__ __volatile__ ( \
  848. "mul %[l], %[a], %[b] \n\t" \
  849. "umulh %[h], %[a], %[b] \n\t" \
  850. : [h] "+r" (vh), [l] "+r" (vl) \
  851. : [a] "r" (va), [b] "r" (vb) \
  852. : "memory", "cc" \
  853. )
  854. /* Multiply va by vb and store double size result in: vo | vh | vl */
  855. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  856. __asm__ __volatile__ ( \
  857. "mul x8, %[a], %[b] \n\t" \
  858. "umulh %[h], %[a], %[b] \n\t" \
  859. "mov %[l], x8 \n\t" \
  860. "mov %[o], xzr \n\t" \
  861. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  862. : [a] "r" (va), [b] "r" (vb) \
  863. : "x8" \
  864. )
  865. /* Multiply va by vb and add double size result into: vo | vh | vl */
  866. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  867. __asm__ __volatile__ ( \
  868. "mul x8, %[a], %[b] \n\t" \
  869. "umulh x9, %[a], %[b] \n\t" \
  870. "adds %[l], %[l], x8 \n\t" \
  871. "adcs %[h], %[h], x9 \n\t" \
  872. "adc %[o], %[o], xzr \n\t" \
  873. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  874. : [a] "r" (va), [b] "r" (vb) \
  875. : "x8", "x9", "cc" \
  876. )
  877. /* Multiply va by vb and add double size result into: vh | vl */
  878. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  879. __asm__ __volatile__ ( \
  880. "mul x8, %[a], %[b] \n\t" \
  881. "umulh x9, %[a], %[b] \n\t" \
  882. "adds %[l], %[l], x8 \n\t" \
  883. "adc %[h], %[h], x9 \n\t" \
  884. : [l] "+r" (vl), [h] "+r" (vh) \
  885. : [a] "r" (va), [b] "r" (vb) \
  886. : "x8", "x9", "cc" \
  887. )
  888. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  889. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  890. __asm__ __volatile__ ( \
  891. "mul x8, %[a], %[b] \n\t" \
  892. "umulh x9, %[a], %[b] \n\t" \
  893. "adds %[l], %[l], x8 \n\t" \
  894. "adcs %[h], %[h], x9 \n\t" \
  895. "adc %[o], %[o], xzr \n\t" \
  896. "adds %[l], %[l], x8 \n\t" \
  897. "adcs %[h], %[h], x9 \n\t" \
  898. "adc %[o], %[o], xzr \n\t" \
  899. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  900. : [a] "r" (va), [b] "r" (vb) \
  901. : "x8", "x9", "cc" \
  902. )
  903. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  904. * Assumes first add will not overflow vh | vl
  905. */
  906. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  907. __asm__ __volatile__ ( \
  908. "mul x8, %[a], %[b] \n\t" \
  909. "umulh x9, %[a], %[b] \n\t" \
  910. "adds %[l], %[l], x8 \n\t" \
  911. "adc %[h], %[h], x9 \n\t" \
  912. "adds %[l], %[l], x8 \n\t" \
  913. "adcs %[h], %[h], x9 \n\t" \
  914. "adc %[o], %[o], xzr \n\t" \
  915. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  916. : [a] "r" (va), [b] "r" (vb) \
  917. : "x8", "x9", "cc" \
  918. )
  919. /* Square va and store double size result in: vh | vl */
  920. #define SP_ASM_SQR(vl, vh, va) \
  921. __asm__ __volatile__ ( \
  922. "mul %[l], %[a], %[a] \n\t" \
  923. "umulh %[h], %[a], %[a] \n\t" \
  924. : [h] "+r" (vh), [l] "+r" (vl) \
  925. : [a] "r" (va) \
  926. : "memory" \
  927. )
  928. /* Square va and add double size result into: vo | vh | vl */
  929. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  930. __asm__ __volatile__ ( \
  931. "mul x8, %[a], %[a] \n\t" \
  932. "umulh x9, %[a], %[a] \n\t" \
  933. "adds %[l], %[l], x8 \n\t" \
  934. "adcs %[h], %[h], x9 \n\t" \
  935. "adc %[o], %[o], xzr \n\t" \
  936. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  937. : [a] "r" (va) \
  938. : "x8", "x9", "cc" \
  939. )
  940. /* Square va and add double size result into: vh | vl */
  941. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  942. __asm__ __volatile__ ( \
  943. "mul x8, %[a], %[a] \n\t" \
  944. "umulh x9, %[a], %[a] \n\t" \
  945. "adds %[l], %[l], x8 \n\t" \
  946. "adc %[h], %[h], x9 \n\t" \
  947. : [l] "+r" (vl), [h] "+r" (vh) \
  948. : [a] "r" (va) \
  949. : "x8", "x9", "cc" \
  950. )
  951. /* Add va into: vh | vl */
  952. #define SP_ASM_ADDC(vl, vh, va) \
  953. __asm__ __volatile__ ( \
  954. "adds %[l], %[l], %[a] \n\t" \
  955. "adc %[h], %[h], xzr \n\t" \
  956. : [l] "+r" (vl), [h] "+r" (vh) \
  957. : [a] "r" (va) \
  958. : "cc" \
  959. )
  960. /* Sub va from: vh | vl */
  961. #define SP_ASM_SUBB(vl, vh, va) \
  962. __asm__ __volatile__ ( \
  963. "subs %[l], %[l], %[a] \n\t" \
  964. "sbc %[h], %[h], xzr \n\t" \
  965. : [l] "+r" (vl), [h] "+r" (vh) \
  966. : [a] "r" (va) \
  967. : "cc" \
  968. )
  969. /* Add two times vc | vb | va into vo | vh | vl */
  970. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  971. __asm__ __volatile__ ( \
  972. "adds %[l], %[l], %[a] \n\t" \
  973. "adcs %[h], %[h], %[b] \n\t" \
  974. "adc %[o], %[o], %[c] \n\t" \
  975. "adds %[l], %[l], %[a] \n\t" \
  976. "adcs %[h], %[h], %[b] \n\t" \
  977. "adc %[o], %[o], %[c] \n\t" \
  978. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  979. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  980. : "cc" \
  981. )
  982. /* Count leading zeros. */
  983. #define SP_ASM_LZCNT(va, vn) \
  984. __asm__ __volatile__ ( \
  985. "clz %[n], %[a] \n\t" \
  986. : [n] "=r" (vn) \
  987. : [a] "r" (va) \
  988. : \
  989. )
  990. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  991. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  992. *
  993. * Using udiv instruction on Aarch64.
  994. * Constant time.
  995. *
  996. * @param [in] hi SP integer digit. High digit of the dividend.
  997. * @param [in] lo SP integer digit. Lower digit of the dividend.
  998. * @param [in] d SP integer digit. Number to divide by.
  999. * @return The division result.
  1000. */
  1001. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  1002. sp_int_digit d)
  1003. {
  1004. __asm__ __volatile__ (
  1005. "lsr x3, %[d], 48\n\t"
  1006. "mov x5, 16\n\t"
  1007. "cmp x3, 0\n\t"
  1008. "mov x4, 63\n\t"
  1009. "csel x3, x5, xzr, eq\n\t"
  1010. "sub x4, x4, x3\n\t"
  1011. "lsl %[d], %[d], x3\n\t"
  1012. "lsl %[hi], %[hi], x3\n\t"
  1013. "lsr x5, %[lo], x4\n\t"
  1014. "lsl %[lo], %[lo], x3\n\t"
  1015. "orr %[hi], %[hi], x5, lsr 1\n\t"
  1016. "lsr x5, %[d], 32\n\t"
  1017. "add x5, x5, 1\n\t"
  1018. "udiv x3, %[hi], x5\n\t"
  1019. "lsl x6, x3, 32\n\t"
  1020. "mul x4, %[d], x6\n\t"
  1021. "umulh x3, %[d], x6\n\t"
  1022. "subs %[lo], %[lo], x4\n\t"
  1023. "sbc %[hi], %[hi], x3\n\t"
  1024. "udiv x3, %[hi], x5\n\t"
  1025. "lsl x3, x3, 32\n\t"
  1026. "add x6, x6, x3\n\t"
  1027. "mul x4, %[d], x3\n\t"
  1028. "umulh x3, %[d], x3\n\t"
  1029. "subs %[lo], %[lo], x4\n\t"
  1030. "sbc %[hi], %[hi], x3\n\t"
  1031. "lsr x3, %[lo], 32\n\t"
  1032. "orr x3, x3, %[hi], lsl 32\n\t"
  1033. "udiv x3, x3, x5\n\t"
  1034. "add x6, x6, x3\n\t"
  1035. "mul x4, %[d], x3\n\t"
  1036. "umulh x3, %[d], x3\n\t"
  1037. "subs %[lo], %[lo], x4\n\t"
  1038. "sbc %[hi], %[hi], x3\n\t"
  1039. "lsr x3, %[lo], 32\n\t"
  1040. "orr x3, x3, %[hi], lsl 32\n\t"
  1041. "udiv x3, x3, x5\n\t"
  1042. "add x6, x6, x3\n\t"
  1043. "mul x4, %[d], x3\n\t"
  1044. "sub %[lo], %[lo], x4\n\t"
  1045. "udiv x3, %[lo], %[d]\n\t"
  1046. "add %[hi], x6, x3\n\t"
  1047. : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1048. :
  1049. : "x3", "x4", "x5", "x6", "cc"
  1050. );
  1051. return hi;
  1052. }
  1053. #define SP_ASM_DIV_WORD
  1054. #endif
  1055. #define SP_INT_ASM_AVAILABLE
  1056. #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
  1057. #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
  1058. SP_WORD_SIZE == 32
  1059. /*
  1060. * CPU: ARM32 or Cortex-M4 and similar
  1061. */
  1062. /* Multiply va by vb and store double size result in: vh | vl */
  1063. #define SP_ASM_MUL(vl, vh, va, vb) \
  1064. __asm__ __volatile__ ( \
  1065. "umull %[l], %[h], %[a], %[b] \n\t" \
  1066. : [h] "+r" (vh), [l] "+r" (vl) \
  1067. : [a] "r" (va), [b] "r" (vb) \
  1068. : "memory" \
  1069. )
  1070. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1071. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1072. __asm__ __volatile__ ( \
  1073. "umull %[l], %[h], %[a], %[b] \n\t" \
  1074. "mov %[o], #0 \n\t" \
  1075. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  1076. : [a] "r" (va), [b] "r" (vb) \
  1077. : \
  1078. )
  1079. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1080. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1081. __asm__ __volatile__ ( \
  1082. "umull r8, r9, %[a], %[b] \n\t" \
  1083. "adds %[l], %[l], r8 \n\t" \
  1084. "adcs %[h], %[h], r9 \n\t" \
  1085. "adc %[o], %[o], #0 \n\t" \
  1086. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1087. : [a] "r" (va), [b] "r" (vb) \
  1088. : "r8", "r9", "cc" \
  1089. )
  1090. /* Multiply va by vb and add double size result into: vh | vl */
  1091. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1092. __asm__ __volatile__ ( \
  1093. "umlal %[l], %[h], %[a], %[b] \n\t" \
  1094. : [l] "+r" (vl), [h] "+r" (vh) \
  1095. : [a] "r" (va), [b] "r" (vb) \
  1096. : \
  1097. )
  1098. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1099. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1100. __asm__ __volatile__ ( \
  1101. "umull r8, r9, %[a], %[b] \n\t" \
  1102. "adds %[l], %[l], r8 \n\t" \
  1103. "adcs %[h], %[h], r9 \n\t" \
  1104. "adc %[o], %[o], #0 \n\t" \
  1105. "adds %[l], %[l], r8 \n\t" \
  1106. "adcs %[h], %[h], r9 \n\t" \
  1107. "adc %[o], %[o], #0 \n\t" \
  1108. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1109. : [a] "r" (va), [b] "r" (vb) \
  1110. : "r8", "r9", "cc" \
  1111. )
  1112. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1113. * Assumes first add will not overflow vh | vl
  1114. */
  1115. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1116. __asm__ __volatile__ ( \
  1117. "umull r8, r9, %[a], %[b] \n\t" \
  1118. "adds %[l], %[l], r8 \n\t" \
  1119. "adc %[h], %[h], r9 \n\t" \
  1120. "adds %[l], %[l], r8 \n\t" \
  1121. "adcs %[h], %[h], r9 \n\t" \
  1122. "adc %[o], %[o], #0 \n\t" \
  1123. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1124. : [a] "r" (va), [b] "r" (vb) \
  1125. : "r8", "r9", "cc" \
  1126. )
  1127. /* Square va and store double size result in: vh | vl */
  1128. #define SP_ASM_SQR(vl, vh, va) \
  1129. __asm__ __volatile__ ( \
  1130. "umull %[l], %[h], %[a], %[a] \n\t" \
  1131. : [h] "+r" (vh), [l] "+r" (vl) \
  1132. : [a] "r" (va) \
  1133. : "memory" \
  1134. )
  1135. /* Square va and add double size result into: vo | vh | vl */
  1136. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1137. __asm__ __volatile__ ( \
  1138. "umull r8, r9, %[a], %[a] \n\t" \
  1139. "adds %[l], %[l], r8 \n\t" \
  1140. "adcs %[h], %[h], r9 \n\t" \
  1141. "adc %[o], %[o], #0 \n\t" \
  1142. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1143. : [a] "r" (va) \
  1144. : "r8", "r9", "cc" \
  1145. )
  1146. /* Square va and add double size result into: vh | vl */
  1147. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1148. __asm__ __volatile__ ( \
  1149. "umlal %[l], %[h], %[a], %[a] \n\t" \
  1150. : [l] "+r" (vl), [h] "+r" (vh) \
  1151. : [a] "r" (va) \
  1152. : "cc" \
  1153. )
  1154. /* Add va into: vh | vl */
  1155. #define SP_ASM_ADDC(vl, vh, va) \
  1156. __asm__ __volatile__ ( \
  1157. "adds %[l], %[l], %[a] \n\t" \
  1158. "adc %[h], %[h], #0 \n\t" \
  1159. : [l] "+r" (vl), [h] "+r" (vh) \
  1160. : [a] "r" (va) \
  1161. : "cc" \
  1162. )
  1163. /* Sub va from: vh | vl */
  1164. #define SP_ASM_SUBB(vl, vh, va) \
  1165. __asm__ __volatile__ ( \
  1166. "subs %[l], %[l], %[a] \n\t" \
  1167. "sbc %[h], %[h], #0 \n\t" \
  1168. : [l] "+r" (vl), [h] "+r" (vh) \
  1169. : [a] "r" (va) \
  1170. : "cc" \
  1171. )
  1172. /* Add two times vc | vb | va into vo | vh | vl */
  1173. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1174. __asm__ __volatile__ ( \
  1175. "adds %[l], %[l], %[a] \n\t" \
  1176. "adcs %[h], %[h], %[b] \n\t" \
  1177. "adc %[o], %[o], %[c] \n\t" \
  1178. "adds %[l], %[l], %[a] \n\t" \
  1179. "adcs %[h], %[h], %[b] \n\t" \
  1180. "adc %[o], %[o], %[c] \n\t" \
  1181. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1182. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  1183. : "cc" \
  1184. )
  1185. #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 7)
  1186. /* Count leading zeros - instruction only available on ARMv7 and newer. */
  1187. #define SP_ASM_LZCNT(va, vn) \
  1188. __asm__ __volatile__ ( \
  1189. "clz %[n], %[a] \n\t" \
  1190. : [n] "=r" (vn) \
  1191. : [a] "r" (va) \
  1192. : \
  1193. )
  1194. #endif
  1195. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  1196. #ifndef WOLFSSL_SP_ARM32_UDIV
  1197. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  1198. *
  1199. * No division instruction used - does operation bit by bit.
  1200. * Constant time.
  1201. *
  1202. * @param [in] hi SP integer digit. High digit of the dividend.
  1203. * @param [in] lo SP integer digit. Lower digit of the dividend.
  1204. * @param [in] d SP integer digit. Number to divide by.
  1205. * @return The division result.
  1206. */
  1207. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  1208. sp_int_digit d)
  1209. {
  1210. sp_int_digit r = 0;
  1211. #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
  1212. static const char debruijn32[32] = {
  1213. 0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
  1214. 1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
  1215. };
  1216. static const sp_uint32 debruijn32_mul = 0x076be629;
  1217. #endif
  1218. __asm__ __volatile__ (
  1219. /* Shift d so that top bit is set. */
  1220. #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
  1221. "ldr r4, %[m]\n\t"
  1222. "mov r5, %[d]\n\t"
  1223. "orr r5, r5, r5, lsr #1\n\t"
  1224. "orr r5, r5, r5, lsr #2\n\t"
  1225. "orr r5, r5, r5, lsr #4\n\t"
  1226. "orr r5, r5, r5, lsr #8\n\t"
  1227. "orr r5, r5, r5, lsr #16\n\t"
  1228. "add r5, r5, #1\n\t"
  1229. "mul r6, r5, r4\n\t"
  1230. "lsr r5, r6, #27\n\t"
  1231. "ldrb r5, [%[t], r5]\n\t"
  1232. #else
  1233. "clz r5, %[d]\n\t"
  1234. #endif
  1235. "rsb r6, r5, #31\n\t"
  1236. "lsl %[d], %[d], r5\n\t"
  1237. "lsl %[hi], %[hi], r5\n\t"
  1238. "lsr r9, %[lo], r6\n\t"
  1239. "lsl %[lo], %[lo], r5\n\t"
  1240. "orr %[hi], %[hi], r9, lsr #1\n\t"
  1241. "lsr r5, %[d], #1\n\t"
  1242. "add r5, r5, #1\n\t"
  1243. "mov r6, %[lo]\n\t"
  1244. "mov r9, %[hi]\n\t"
  1245. /* Do top 32 */
  1246. "subs r8, r5, r9\n\t"
  1247. "sbc r8, r8, r8\n\t"
  1248. "add %[r], %[r], %[r]\n\t"
  1249. "sub %[r], %[r], r8\n\t"
  1250. "and r8, r8, r5\n\t"
  1251. "subs r9, r9, r8\n\t"
  1252. /* Next 30 bits */
  1253. "mov r4, #29\n\t"
  1254. "\n1:\n\t"
  1255. "movs r6, r6, lsl #1\n\t"
  1256. "adc r9, r9, r9\n\t"
  1257. "subs r8, r5, r9\n\t"
  1258. "sbc r8, r8, r8\n\t"
  1259. "add %[r], %[r], %[r]\n\t"
  1260. "sub %[r], %[r], r8\n\t"
  1261. "and r8, r8, r5\n\t"
  1262. "subs r9, r9, r8\n\t"
  1263. "subs r4, r4, #1\n\t"
  1264. "bpl 1b\n\t"
  1265. "add %[r], %[r], %[r]\n\t"
  1266. "add %[r], %[r], #1\n\t"
  1267. /* Handle difference has hi word > 0. */
  1268. "umull r4, r5, %[r], %[d]\n\t"
  1269. "subs r4, %[lo], r4\n\t"
  1270. "sbc r5, %[hi], r5\n\t"
  1271. "add %[r], %[r], r5\n\t"
  1272. "umull r4, r5, %[r], %[d]\n\t"
  1273. "subs r4, %[lo], r4\n\t"
  1274. "sbc r5, %[hi], r5\n\t"
  1275. "add %[r], %[r], r5\n\t"
  1276. /* Add 1 to result if bottom half of difference is >= d. */
  1277. "mul r4, %[r], %[d]\n\t"
  1278. "subs r4, %[lo], r4\n\t"
  1279. "subs r9, %[d], r4\n\t"
  1280. "sbc r8, r8, r8\n\t"
  1281. "sub %[r], %[r], r8\n\t"
  1282. "subs r9, r9, #1\n\t"
  1283. "sbc r8, r8, r8\n\t"
  1284. "sub %[r], %[r], r8\n\t"
  1285. : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1286. #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
  1287. : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
  1288. #else
  1289. :
  1290. #endif
  1291. : "r4", "r5", "r6", "r8", "r9", "cc"
  1292. );
  1293. return r;
  1294. }
  1295. #else
  1296. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  1297. *
  1298. * Using udiv instruction on arm32
  1299. * Constant time.
  1300. *
  1301. * @param [in] hi SP integer digit. High digit of the dividend.
  1302. * @param [in] lo SP integer digit. Lower digit of the dividend.
  1303. * @param [in] d SP integer digit. Number to divide by.
  1304. * @return The division result.
  1305. */
  1306. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  1307. sp_int_digit d)
  1308. {
  1309. __asm__ __volatile__ (
  1310. "lsrs r3, %[d], #24\n\t"
  1311. "it eq\n\t"
  1312. "moveq r3, #8\n\t"
  1313. "it ne\n\t"
  1314. "movne r3, #0\n\t"
  1315. "rsb r4, r3, #31\n\t"
  1316. "lsl %[d], %[d], r3\n\t"
  1317. "lsl %[hi], %[hi], r3\n\t"
  1318. "lsr r5, %[lo], r4\n\t"
  1319. "lsl %[lo], %[lo], r3\n\t"
  1320. "orr %[hi], %[hi], r5, lsr #1\n\t"
  1321. "lsr r5, %[d], 16\n\t"
  1322. "add r5, r5, 1\n\t"
  1323. "udiv r3, %[hi], r5\n\t"
  1324. "lsl r6, r3, 16\n\t"
  1325. "umull r4, r3, %[d], r6\n\t"
  1326. "subs %[lo], %[lo], r4\n\t"
  1327. "sbc %[hi], %[hi], r3\n\t"
  1328. "udiv r3, %[hi], r5\n\t"
  1329. "lsl r3, r3, 16\n\t"
  1330. "add r6, r6, r3\n\t"
  1331. "umull r4, r3, %[d], r3\n\t"
  1332. "subs %[lo], %[lo], r4\n\t"
  1333. "sbc %[hi], %[hi], r3\n\t"
  1334. "lsr r3, %[lo], 16\n\t"
  1335. "orr r3, r3, %[hi], lsl 16\n\t"
  1336. "udiv r3, r3, r5\n\t"
  1337. "add r6, r6, r3\n\t"
  1338. "umull r4, r3, %[d], r3\n\t"
  1339. "subs %[lo], %[lo], r4\n\t"
  1340. "sbc %[hi], %[hi], r3\n\t"
  1341. "lsr r3, %[lo], 16\n\t"
  1342. "orr r3, r3, %[hi], lsl 16\n\t"
  1343. "udiv r3, r3, r5\n\t"
  1344. "add r6, r6, r3\n\t"
  1345. "mul r4, %[d], r3\n\t"
  1346. "sub %[lo], %[lo], r4\n\t"
  1347. "udiv r3, %[lo], %[d]\n\t"
  1348. "add %[hi], r6, r3\n\t"
  1349. : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1350. :
  1351. : "r3", "r4", "r5", "r6", "cc"
  1352. );
  1353. return hi;
  1354. }
  1355. #endif
  1356. #define SP_ASM_DIV_WORD
  1357. #endif
  1358. #define SP_INT_ASM_AVAILABLE
  1359. #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
  1360. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  1361. /*
  1362. * CPU: ARM Thumb (like Cortex-M0)
  1363. */
  1364. /* Compile with -fomit-frame-pointer, or similar, if compiler complains about
  1365. * usage of register 'r7'.
  1366. */
  1367. #if defined(__clang__)
  1368. /* Multiply va by vb and store double size result in: vh | vl */
  1369. #define SP_ASM_MUL(vl, vh, va, vb) \
  1370. __asm__ __volatile__ ( \
  1371. /* al * bl */ \
  1372. "uxth r6, %[a] \n\t" \
  1373. "uxth %[l], %[b] \n\t" \
  1374. "muls %[l], r6 \n\t" \
  1375. /* al * bh */ \
  1376. "lsrs r4, %[b], #16 \n\t" \
  1377. "muls r6, r4 \n\t" \
  1378. "lsrs %[h], r6, #16 \n\t" \
  1379. "lsls r6, r6, #16 \n\t" \
  1380. "adds %[l], %[l], r6 \n\t" \
  1381. "movs r5, #0 \n\t" \
  1382. "adcs %[h], r5 \n\t" \
  1383. /* ah * bh */ \
  1384. "lsrs r6, %[a], #16 \n\t" \
  1385. "muls r4, r6 \n\t" \
  1386. "adds %[h], %[h], r4 \n\t" \
  1387. /* ah * bl */ \
  1388. "uxth r4, %[b] \n\t" \
  1389. "muls r6, r4 \n\t" \
  1390. "lsrs r4, r6, #16 \n\t" \
  1391. "lsls r6, r6, #16 \n\t" \
  1392. "adds %[l], %[l], r6 \n\t" \
  1393. "adcs %[h], r4 \n\t" \
  1394. : [h] "+l" (vh), [l] "+l" (vl) \
  1395. : [a] "l" (va), [b] "l" (vb) \
  1396. : "r4", "r5", "r6", "cc" \
  1397. )
  1398. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1399. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1400. __asm__ __volatile__ ( \
  1401. /* al * bl */ \
  1402. "uxth r6, %[a] \n\t" \
  1403. "uxth %[l], %[b] \n\t" \
  1404. "muls %[l], r6 \n\t" \
  1405. /* al * bh */ \
  1406. "lsrs r5, %[b], #16 \n\t" \
  1407. "muls r6, r5 \n\t" \
  1408. "lsrs %[h], r6, #16 \n\t" \
  1409. "lsls r6, r6, #16 \n\t" \
  1410. "adds %[l], %[l], r6 \n\t" \
  1411. "movs %[o], #0 \n\t" \
  1412. "adcs %[h], %[o] \n\t" \
  1413. /* ah * bh */ \
  1414. "lsrs r6, %[a], #16 \n\t" \
  1415. "muls r5, r6 \n\t" \
  1416. "adds %[h], %[h], r5 \n\t" \
  1417. /* ah * bl */ \
  1418. "uxth r5, %[b] \n\t" \
  1419. "muls r6, r5 \n\t" \
  1420. "lsrs r5, r6, #16 \n\t" \
  1421. "lsls r6, r6, #16 \n\t" \
  1422. "adds %[l], %[l], r6 \n\t" \
  1423. "adcs %[h], r5 \n\t" \
  1424. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1425. : [a] "l" (va), [b] "l" (vb) \
  1426. : "r5", "r6", "cc" \
  1427. )
  1428. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  1429. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1430. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1431. __asm__ __volatile__ ( \
  1432. /* al * bl */ \
  1433. "uxth r6, %[a] \n\t" \
  1434. "uxth r7, %[b] \n\t" \
  1435. "muls r7, r6 \n\t" \
  1436. "adds %[l], %[l], r7 \n\t" \
  1437. "movs r5, #0 \n\t" \
  1438. "adcs %[h], r5 \n\t" \
  1439. "adcs %[o], r5 \n\t" \
  1440. /* al * bh */ \
  1441. "lsrs r7, %[b], #16 \n\t" \
  1442. "muls r6, r7 \n\t" \
  1443. "lsrs r7, r6, #16 \n\t" \
  1444. "lsls r6, r6, #16 \n\t" \
  1445. "adds %[l], %[l], r6 \n\t" \
  1446. "adcs %[h], r7 \n\t" \
  1447. "adcs %[o], r5 \n\t" \
  1448. /* ah * bh */ \
  1449. "lsrs r6, %[a], #16 \n\t" \
  1450. "lsrs r7, %[b], #16 \n\t" \
  1451. "muls r7, r6 \n\t" \
  1452. "adds %[h], %[h], r7 \n\t" \
  1453. "adcs %[o], r5 \n\t" \
  1454. /* ah * bl */ \
  1455. "uxth r7, %[b] \n\t" \
  1456. "muls r6, r7 \n\t" \
  1457. "lsrs r7, r6, #16 \n\t" \
  1458. "lsls r6, r6, #16 \n\t" \
  1459. "adds %[l], %[l], r6 \n\t" \
  1460. "adcs %[h], r7 \n\t" \
  1461. "adcs %[o], r5 \n\t" \
  1462. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1463. : [a] "l" (va), [b] "l" (vb) \
  1464. : "r5", "r6", "r7", "cc" \
  1465. )
  1466. #else
  1467. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1468. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1469. __asm__ __volatile__ ( \
  1470. /* al * bl */ \
  1471. "uxth r6, %[a] \n\t" \
  1472. "uxth r5, %[b] \n\t" \
  1473. "muls r5, r6 \n\t" \
  1474. "adds %[l], %[l], r5 \n\t" \
  1475. "movs r5, #0 \n\t" \
  1476. "adcs %[h], r5 \n\t" \
  1477. "adcs %[o], r5 \n\t" \
  1478. /* al * bh */ \
  1479. "lsrs r5, %[b], #16 \n\t" \
  1480. "muls r6, r5 \n\t" \
  1481. "lsrs r5, r6, #16 \n\t" \
  1482. "lsls r6, r6, #16 \n\t" \
  1483. "adds %[l], %[l], r6 \n\t" \
  1484. "adcs %[h], r5 \n\t" \
  1485. "movs r5, #0 \n\t" \
  1486. "adcs %[o], r5 \n\t" \
  1487. /* ah * bh */ \
  1488. "lsrs r6, %[a], #16 \n\t" \
  1489. "lsrs r5, %[b], #16 \n\t" \
  1490. "muls r5, r6 \n\t" \
  1491. "adds %[h], %[h], r5 \n\t" \
  1492. "movs r5, #0 \n\t" \
  1493. "adcs %[o], r5 \n\t" \
  1494. /* ah * bl */ \
  1495. "uxth r5, %[b] \n\t" \
  1496. "muls r6, r5 \n\t" \
  1497. "lsrs r5, r6, #16 \n\t" \
  1498. "lsls r6, r6, #16 \n\t" \
  1499. "adds %[l], %[l], r6 \n\t" \
  1500. "adcs %[h], r5 \n\t" \
  1501. "movs r5, #0 \n\t" \
  1502. "adcs %[o], r5 \n\t" \
  1503. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1504. : [a] "l" (va), [b] "l" (vb) \
  1505. : "r5", "r6", "cc" \
  1506. )
  1507. #endif
  1508. /* Multiply va by vb and add double size result into: vh | vl */
  1509. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1510. __asm__ __volatile__ ( \
  1511. /* al * bl */ \
  1512. "uxth r6, %[a] \n\t" \
  1513. "uxth r4, %[b] \n\t" \
  1514. "muls r4, r6 \n\t" \
  1515. "adds %[l], %[l], r4 \n\t" \
  1516. "movs r5, #0 \n\t" \
  1517. "adcs %[h], r5 \n\t" \
  1518. /* al * bh */ \
  1519. "lsrs r4, %[b], #16 \n\t" \
  1520. "muls r6, r4 \n\t" \
  1521. "lsrs r4, r6, #16 \n\t" \
  1522. "lsls r6, r6, #16 \n\t" \
  1523. "adds %[l], %[l], r6 \n\t" \
  1524. "adcs %[h], r4 \n\t" \
  1525. /* ah * bh */ \
  1526. "lsrs r6, %[a], #16 \n\t" \
  1527. "lsrs r4, %[b], #16 \n\t" \
  1528. "muls r4, r6 \n\t" \
  1529. "adds %[h], %[h], r4 \n\t" \
  1530. /* ah * bl */ \
  1531. "uxth r4, %[b] \n\t" \
  1532. "muls r6, r4 \n\t" \
  1533. "lsrs r4, r6, #16 \n\t" \
  1534. "lsls r6, r6, #16 \n\t" \
  1535. "adds %[l], %[l], r6 \n\t" \
  1536. "adcs %[h], r4 \n\t" \
  1537. : [l] "+l" (vl), [h] "+l" (vh) \
  1538. : [a] "l" (va), [b] "l" (vb) \
  1539. : "r4", "r5", "r6", "cc" \
  1540. )
  1541. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  1542. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1543. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1544. __asm__ __volatile__ ( \
  1545. /* al * bl */ \
  1546. "uxth r6, %[a] \n\t" \
  1547. "uxth r7, %[b] \n\t" \
  1548. "muls r7, r6 \n\t" \
  1549. "adds %[l], %[l], r7 \n\t" \
  1550. "movs r5, #0 \n\t" \
  1551. "adcs %[h], r5 \n\t" \
  1552. "adcs %[o], r5 \n\t" \
  1553. "adds %[l], %[l], r7 \n\t" \
  1554. "adcs %[h], r5 \n\t" \
  1555. "adcs %[o], r5 \n\t" \
  1556. /* al * bh */ \
  1557. "lsrs r7, %[b], #16 \n\t" \
  1558. "muls r6, r7 \n\t" \
  1559. "lsrs r7, r6, #16 \n\t" \
  1560. "lsls r6, r6, #16 \n\t" \
  1561. "adds %[l], %[l], r6 \n\t" \
  1562. "adcs %[h], r7 \n\t" \
  1563. "adcs %[o], r5 \n\t" \
  1564. "adds %[l], %[l], r6 \n\t" \
  1565. "adcs %[h], r7 \n\t" \
  1566. "adcs %[o], r5 \n\t" \
  1567. /* ah * bh */ \
  1568. "lsrs r6, %[a], #16 \n\t" \
  1569. "lsrs r7, %[b], #16 \n\t" \
  1570. "muls r7, r6 \n\t" \
  1571. "adds %[h], %[h], r7 \n\t" \
  1572. "adcs %[o], r5 \n\t" \
  1573. "adds %[h], %[h], r7 \n\t" \
  1574. "adcs %[o], r5 \n\t" \
  1575. /* ah * bl */ \
  1576. "uxth r7, %[b] \n\t" \
  1577. "muls r6, r7 \n\t" \
  1578. "lsrs r7, r6, #16 \n\t" \
  1579. "lsls r6, r6, #16 \n\t" \
  1580. "adds %[l], %[l], r6 \n\t" \
  1581. "adcs %[h], r7 \n\t" \
  1582. "adcs %[o], r5 \n\t" \
  1583. "adds %[l], %[l], r6 \n\t" \
  1584. "adcs %[h], r7 \n\t" \
  1585. "adcs %[o], r5 \n\t" \
  1586. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1587. : [a] "l" (va), [b] "l" (vb) \
  1588. : "r5", "r6", "r7", "cc" \
  1589. )
  1590. #else
  1591. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1592. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1593. __asm__ __volatile__ ( \
  1594. "movs r8, %[a] \n\t" \
  1595. /* al * bl */ \
  1596. "uxth r6, %[a] \n\t" \
  1597. "uxth r5, %[b] \n\t" \
  1598. "muls r5, r6 \n\t" \
  1599. "adds %[l], %[l], r5 \n\t" \
  1600. "movs %[a], #0 \n\t" \
  1601. "adcs %[h], %[a] \n\t" \
  1602. "adcs %[o], %[a] \n\t" \
  1603. "adds %[l], %[l], r5 \n\t" \
  1604. "adcs %[h], %[a] \n\t" \
  1605. "adcs %[o], %[a] \n\t" \
  1606. /* al * bh */ \
  1607. "lsrs r5, %[b], #16 \n\t" \
  1608. "muls r6, r5 \n\t" \
  1609. "lsrs r5, r6, #16 \n\t" \
  1610. "lsls r6, r6, #16 \n\t" \
  1611. "adds %[l], %[l], r6 \n\t" \
  1612. "adcs %[h], r5 \n\t" \
  1613. "adcs %[o], %[a] \n\t" \
  1614. "adds %[l], %[l], r6 \n\t" \
  1615. "adcs %[h], r5 \n\t" \
  1616. "adcs %[o], %[a] \n\t" \
  1617. /* ah * bh */ \
  1618. "movs %[a], r8 \n\t" \
  1619. "lsrs r6, %[a], #16 \n\t" \
  1620. "lsrs r5, %[b], #16 \n\t" \
  1621. "muls r5, r6 \n\t" \
  1622. "adds %[h], %[h], r5 \n\t" \
  1623. "movs %[a], #0 \n\t" \
  1624. "adcs %[o], %[a] \n\t" \
  1625. "adds %[h], %[h], r5 \n\t" \
  1626. "adcs %[o], %[a] \n\t" \
  1627. /* ah * bl */ \
  1628. "uxth r5, %[b] \n\t" \
  1629. "muls r6, r5 \n\t" \
  1630. "lsrs r5, r6, #16 \n\t" \
  1631. "lsls r6, r6, #16 \n\t" \
  1632. "adds %[l], %[l], r6 \n\t" \
  1633. "adcs %[h], r5 \n\t" \
  1634. "adcs %[o], %[a] \n\t" \
  1635. "adds %[l], %[l], r6 \n\t" \
  1636. "adcs %[h], r5 \n\t" \
  1637. "adcs %[o], %[a] \n\t" \
  1638. "movs %[a], r8 \n\t" \
  1639. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1640. : [a] "l" (va), [b] "l" (vb) \
  1641. : "r5", "r6", "r8", "cc" \
  1642. )
  1643. #endif
  1644. #ifndef DEBUG
  1645. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1646. * Assumes first add will not overflow vh | vl
  1647. */
  1648. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1649. __asm__ __volatile__ ( \
  1650. /* al * bl */ \
  1651. "uxth r6, %[a] \n\t" \
  1652. "uxth r7, %[b] \n\t" \
  1653. "muls r7, r6 \n\t" \
  1654. "adds %[l], %[l], r7 \n\t" \
  1655. "movs r5, #0 \n\t" \
  1656. "adcs %[h], r5 \n\t" \
  1657. "adds %[l], %[l], r7 \n\t" \
  1658. "adcs %[h], r5 \n\t" \
  1659. /* al * bh */ \
  1660. "lsrs r7, %[b], #16 \n\t" \
  1661. "muls r6, r7 \n\t" \
  1662. "lsrs r7, r6, #16 \n\t" \
  1663. "lsls r6, r6, #16 \n\t" \
  1664. "adds %[l], %[l], r6 \n\t" \
  1665. "adcs %[h], r7 \n\t" \
  1666. "adds %[l], %[l], r6 \n\t" \
  1667. "adcs %[h], r7 \n\t" \
  1668. "adcs %[o], r5 \n\t" \
  1669. /* ah * bh */ \
  1670. "lsrs r6, %[a], #16 \n\t" \
  1671. "lsrs r7, %[b], #16 \n\t" \
  1672. "muls r7, r6 \n\t" \
  1673. "adds %[h], %[h], r7 \n\t" \
  1674. "adcs %[o], r5 \n\t" \
  1675. "adds %[h], %[h], r7 \n\t" \
  1676. "adcs %[o], r5 \n\t" \
  1677. /* ah * bl */ \
  1678. "uxth r7, %[b] \n\t" \
  1679. "muls r6, r7 \n\t" \
  1680. "lsrs r7, r6, #16 \n\t" \
  1681. "lsls r6, r6, #16 \n\t" \
  1682. "adds %[l], %[l], r6 \n\t" \
  1683. "adcs %[h], r7 \n\t" \
  1684. "adcs %[o], r5 \n\t" \
  1685. "adds %[l], %[l], r6 \n\t" \
  1686. "adcs %[h], r7 \n\t" \
  1687. "adcs %[o], r5 \n\t" \
  1688. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1689. : [a] "l" (va), [b] "l" (vb) \
  1690. : "r5", "r6", "r7", "cc" \
  1691. )
  1692. #else
  1693. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1694. * Assumes first add will not overflow vh | vl
  1695. */
  1696. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1697. __asm__ __volatile__ ( \
  1698. "movs r8, %[a] \n\t" \
  1699. /* al * bl */ \
  1700. "uxth r5, %[a] \n\t" \
  1701. "uxth r6, %[b] \n\t" \
  1702. "muls r6, r5 \n\t" \
  1703. "adds %[l], %[l], r6 \n\t" \
  1704. "movs %[a], #0 \n\t" \
  1705. "adcs %[h], %[a] \n\t" \
  1706. "adds %[l], %[l], r6 \n\t" \
  1707. "adcs %[h], %[a] \n\t" \
  1708. /* al * bh */ \
  1709. "lsrs r6, %[b], #16 \n\t" \
  1710. "muls r5, r6 \n\t" \
  1711. "lsrs r6, r5, #16 \n\t" \
  1712. "lsls r5, r5, #16 \n\t" \
  1713. "adds %[l], %[l], r5 \n\t" \
  1714. "adcs %[h], r6 \n\t" \
  1715. "adds %[l], %[l], r5 \n\t" \
  1716. "adcs %[h], r6 \n\t" \
  1717. "adcs %[o], %[a] \n\t" \
  1718. /* ah * bh */ \
  1719. "movs %[a], r8 \n\t" \
  1720. "lsrs r5, %[a], #16 \n\t" \
  1721. "lsrs r6, %[b], #16 \n\t" \
  1722. "muls r6, r5 \n\t" \
  1723. "movs %[a], #0 \n\t" \
  1724. "adds %[h], %[h], r6 \n\t" \
  1725. "adcs %[o], %[a] \n\t" \
  1726. "adds %[h], %[h], r6 \n\t" \
  1727. "adcs %[o], %[a] \n\t" \
  1728. /* ah * bl */ \
  1729. "uxth r6, %[b] \n\t" \
  1730. "muls r5, r6 \n\t" \
  1731. "lsrs r6, r5, #16 \n\t" \
  1732. "lsls r5, r5, #16 \n\t" \
  1733. "adds %[l], %[l], r5 \n\t" \
  1734. "adcs %[h], r6 \n\t" \
  1735. "adcs %[o], %[a] \n\t" \
  1736. "adds %[l], %[l], r5 \n\t" \
  1737. "adcs %[h], r6 \n\t" \
  1738. "adcs %[o], %[a] \n\t" \
  1739. "movs %[a], r8 \n\t" \
  1740. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1741. : [a] "l" (va), [b] "l" (vb) \
  1742. : "r5", "r6", "r8", "cc" \
  1743. )
  1744. #endif
  1745. /* Square va and store double size result in: vh | vl */
  1746. #define SP_ASM_SQR(vl, vh, va) \
  1747. __asm__ __volatile__ ( \
  1748. "lsrs r5, %[a], #16 \n\t" \
  1749. "uxth r6, %[a] \n\t" \
  1750. "mov %[l], r6 \n\t" \
  1751. "mov %[h], r5 \n\t" \
  1752. /* al * al */ \
  1753. "muls %[l], %[l] \n\t" \
  1754. /* ah * ah */ \
  1755. "muls %[h], %[h] \n\t" \
  1756. /* 2 * al * ah */ \
  1757. "muls r6, r5 \n\t" \
  1758. "lsrs r5, r6, #15 \n\t" \
  1759. "lsls r6, r6, #17 \n\t" \
  1760. "adds %[l], %[l], r6 \n\t" \
  1761. "adcs %[h], r5 \n\t" \
  1762. : [h] "+l" (vh), [l] "+l" (vl) \
  1763. : [a] "l" (va) \
  1764. : "r5", "r6", "cc" \
  1765. )
  1766. /* Square va and add double size result into: vo | vh | vl */
  1767. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1768. __asm__ __volatile__ ( \
  1769. "lsrs r4, %[a], #16 \n\t" \
  1770. "uxth r6, %[a] \n\t" \
  1771. /* al * al */ \
  1772. "muls r6, r6 \n\t" \
  1773. /* ah * ah */ \
  1774. "muls r4, r4 \n\t" \
  1775. "adds %[l], %[l], r6 \n\t" \
  1776. "adcs %[h], r4 \n\t" \
  1777. "movs r5, #0 \n\t" \
  1778. "adcs %[o], r5 \n\t" \
  1779. "lsrs r4, %[a], #16 \n\t" \
  1780. "uxth r6, %[a] \n\t" \
  1781. /* 2 * al * ah */ \
  1782. "muls r6, r4 \n\t" \
  1783. "lsrs r4, r6, #15 \n\t" \
  1784. "lsls r6, r6, #17 \n\t" \
  1785. "adds %[l], %[l], r6 \n\t" \
  1786. "adcs %[h], r4 \n\t" \
  1787. "adcs %[o], r5 \n\t" \
  1788. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1789. : [a] "l" (va) \
  1790. : "r4", "r5", "r6", "cc" \
  1791. )
  1792. /* Square va and add double size result into: vh | vl */
  1793. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1794. __asm__ __volatile__ ( \
  1795. "lsrs r6, %[a], #16 \n\t" \
  1796. "uxth r6, %[a] \n\t" \
  1797. /* al * al */ \
  1798. "muls r6, r6 \n\t" \
  1799. /* ah * ah */ \
  1800. "muls r6, r6 \n\t" \
  1801. "adds %[l], %[l], r6 \n\t" \
  1802. "adcs %[h], r6 \n\t" \
  1803. "lsrs r6, %[a], #16 \n\t" \
  1804. "uxth r6, %[a] \n\t" \
  1805. /* 2 * al * ah */ \
  1806. "muls r6, r6 \n\t" \
  1807. "lsrs r6, r6, #15 \n\t" \
  1808. "lsls r6, r6, #17 \n\t" \
  1809. "adds %[l], %[l], r6 \n\t" \
  1810. "adcs %[h], r6 \n\t" \
  1811. : [l] "+l" (vl), [h] "+l" (vh) \
  1812. : [a] "l" (va) \
  1813. : "r5", "r6", "cc" \
  1814. )
  1815. /* Add va into: vh | vl */
  1816. #define SP_ASM_ADDC(vl, vh, va) \
  1817. __asm__ __volatile__ ( \
  1818. "adds %[l], %[l], %[a] \n\t" \
  1819. "movs r5, #0 \n\t" \
  1820. "adcs %[h], r5 \n\t" \
  1821. : [l] "+l" (vl), [h] "+l" (vh) \
  1822. : [a] "l" (va) \
  1823. : "r5", "cc" \
  1824. )
  1825. /* Sub va from: vh | vl */
  1826. #define SP_ASM_SUBB(vl, vh, va) \
  1827. __asm__ __volatile__ ( \
  1828. "subs %[l], %[l], %[a] \n\t" \
  1829. "movs r5, #0 \n\t" \
  1830. "sbcs %[h], r5 \n\t" \
  1831. : [l] "+l" (vl), [h] "+l" (vh) \
  1832. : [a] "l" (va) \
  1833. : "r5", "cc" \
  1834. )
  1835. /* Add two times vc | vb | va into vo | vh | vl */
  1836. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1837. __asm__ __volatile__ ( \
  1838. "adds %[l], %[l], %[a] \n\t" \
  1839. "adcs %[h], %[b] \n\t" \
  1840. "adcs %[o], %[c] \n\t" \
  1841. "adds %[l], %[l], %[a] \n\t" \
  1842. "adcs %[h], %[b] \n\t" \
  1843. "adcs %[o], %[c] \n\t" \
  1844. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1845. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  1846. : "cc" \
  1847. )
  1848. #elif defined(WOLFSSL_KEIL)
  1849. /* Multiply va by vb and store double size result in: vh | vl */
  1850. #define SP_ASM_MUL(vl, vh, va, vb) \
  1851. __asm__ __volatile__ ( \
  1852. /* al * bl */ \
  1853. "uxth r6, %[a] \n\t" \
  1854. "uxth %[l], %[b] \n\t" \
  1855. "muls %[l], r6, %[l] \n\t" \
  1856. /* al * bh */ \
  1857. "lsrs r4, %[b], #16 \n\t" \
  1858. "muls r6, r4, r6 \n\t" \
  1859. "lsrs %[h], r6, #16 \n\t" \
  1860. "lsls r6, r6, #16 \n\t" \
  1861. "adds %[l], %[l], r6 \n\t" \
  1862. "movs r5, #0 \n\t" \
  1863. "adcs %[h], %[h], r5 \n\t" \
  1864. /* ah * bh */ \
  1865. "lsrs r6, %[a], #16 \n\t" \
  1866. "muls r4, r6, r4 \n\t" \
  1867. "adds %[h], %[h], r4 \n\t" \
  1868. /* ah * bl */ \
  1869. "uxth r4, %[b] \n\t" \
  1870. "muls r6, r4, r6 \n\t" \
  1871. "lsrs r4, r6, #16 \n\t" \
  1872. "lsls r6, r6, #16 \n\t" \
  1873. "adds %[l], %[l], r6 \n\t" \
  1874. "adcs %[h], %[h], r4 \n\t" \
  1875. : [h] "+l" (vh), [l] "+l" (vl) \
  1876. : [a] "l" (va), [b] "l" (vb) \
  1877. : "r4", "r5", "r6", "cc" \
  1878. )
  1879. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1880. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1881. __asm__ __volatile__ ( \
  1882. /* al * bl */ \
  1883. "uxth r6, %[a] \n\t" \
  1884. "uxth %[l], %[b] \n\t" \
  1885. "muls %[l], r6, %[l] \n\t" \
  1886. /* al * bh */ \
  1887. "lsrs r5, %[b], #16 \n\t" \
  1888. "muls r6, r5, r6 \n\t" \
  1889. "lsrs %[h], r6, #16 \n\t" \
  1890. "lsls r6, r6, #16 \n\t" \
  1891. "adds %[l], %[l], r6 \n\t" \
  1892. "movs %[o], #0 \n\t" \
  1893. "adcs %[h], %[h], %[o] \n\t" \
  1894. /* ah * bh */ \
  1895. "lsrs r6, %[a], #16 \n\t" \
  1896. "muls r5, r6, r5 \n\t" \
  1897. "adds %[h], %[h], r5 \n\t" \
  1898. /* ah * bl */ \
  1899. "uxth r5, %[b] \n\t" \
  1900. "muls r6, r5, r6 \n\t" \
  1901. "lsrs r5, r6, #16 \n\t" \
  1902. "lsls r6, r6, #16 \n\t" \
  1903. "adds %[l], %[l], r6 \n\t" \
  1904. "adcs %[h], %[h], r5 \n\t" \
  1905. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1906. : [a] "l" (va), [b] "l" (vb) \
  1907. : "r5", "r6", "cc" \
  1908. )
  1909. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  1910. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1911. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1912. __asm__ __volatile__ ( \
  1913. /* al * bl */ \
  1914. "uxth r6, %[a] \n\t" \
  1915. "uxth r7, %[b] \n\t" \
  1916. "muls r7, r6, r7 \n\t" \
  1917. "adds %[l], %[l], r7 \n\t" \
  1918. "movs r5, #0 \n\t" \
  1919. "adcs %[h], %[h], r5 \n\t" \
  1920. "adcs %[o], %[o], r5 \n\t" \
  1921. /* al * bh */ \
  1922. "lsrs r7, %[b], #16 \n\t" \
  1923. "muls r6, r7, r6 \n\t" \
  1924. "lsrs r7, r6, #16 \n\t" \
  1925. "lsls r6, r6, #16 \n\t" \
  1926. "adds %[l], %[l], r6 \n\t" \
  1927. "adcs %[h], %[h], r7 \n\t" \
  1928. "adcs %[o], %[o], r5 \n\t" \
  1929. /* ah * bh */ \
  1930. "lsrs r6, %[a], #16 \n\t" \
  1931. "lsrs r7, %[b], #16 \n\t" \
  1932. "muls r7, r6, r7 \n\t" \
  1933. "adds %[h], %[h], r7 \n\t" \
  1934. "adcs %[o], %[o], r5 \n\t" \
  1935. /* ah * bl */ \
  1936. "uxth r7, %[b] \n\t" \
  1937. "muls r6, r7, r6 \n\t" \
  1938. "lsrs r7, r6, #16 \n\t" \
  1939. "lsls r6, r6, #16 \n\t" \
  1940. "adds %[l], %[l], r6 \n\t" \
  1941. "adcs %[h], %[h], r7 \n\t" \
  1942. "adcs %[o], %[o], r5 \n\t" \
  1943. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1944. : [a] "l" (va), [b] "l" (vb) \
  1945. : "r5", "r6", "r7", "cc" \
  1946. )
  1947. #else
  1948. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1949. __asm__ __volatile__ ( \
  1950. /* al * bl */ \
  1951. "uxth r6, %[a] \n\t" \
  1952. "uxth r5, %[b] \n\t" \
  1953. "muls r5, r6, r5 \n\t" \
  1954. "adds %[l], %[l], r5 \n\t" \
  1955. "movs r5, #0 \n\t" \
  1956. "adcs %[h], %[h], r5 \n\t" \
  1957. "adcs %[o], %[o], r5 \n\t" \
  1958. /* al * bh */ \
  1959. "lsrs r5, %[b], #16 \n\t" \
  1960. "muls r6, r5, r6 \n\t" \
  1961. "lsrs r5, r6, #16 \n\t" \
  1962. "lsls r6, r6, #16 \n\t" \
  1963. "adds %[l], %[l], r6 \n\t" \
  1964. "adcs %[h], %[h], r5 \n\t" \
  1965. "movs r5, #0 \n\t" \
  1966. "adcs %[o], %[o], r5 \n\t" \
  1967. /* ah * bh */ \
  1968. "lsrs r6, %[a], #16 \n\t" \
  1969. "lsrs r5, %[b], #16 \n\t" \
  1970. "muls r5, r6, r5 \n\t" \
  1971. "adds %[h], %[h], r5 \n\t" \
  1972. "movs r5, #0 \n\t" \
  1973. "adcs %[o], %[o], r5 \n\t" \
  1974. /* ah * bl */ \
  1975. "uxth r5, %[b] \n\t" \
  1976. "muls r6, r5, r6 \n\t" \
  1977. "lsrs r5, r6, #16 \n\t" \
  1978. "lsls r6, r6, #16 \n\t" \
  1979. "adds %[l], %[l], r6 \n\t" \
  1980. "adcs %[h], %[h], r5 \n\t" \
  1981. "movs r5, #0 \n\t" \
  1982. "adcs %[o], %[o], r5 \n\t" \
  1983. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1984. : [a] "l" (va), [b] "l" (vb) \
  1985. : "r5", "r6", "cc" \
  1986. )
  1987. #endif
  1988. /* Multiply va by vb and add double size result into: vh | vl */
  1989. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1990. __asm__ __volatile__ ( \
  1991. /* al * bl */ \
  1992. "uxth r6, %[a] \n\t" \
  1993. "uxth r4, %[b] \n\t" \
  1994. "muls r4, r6, r4 \n\t" \
  1995. "adds %[l], %[l], r4 \n\t" \
  1996. "movs r5, #0 \n\t" \
  1997. "adcs %[h], %[h], r5 \n\t" \
  1998. /* al * bh */ \
  1999. "lsrs r4, %[b], #16 \n\t" \
  2000. "muls r6, r4, r6 \n\t" \
  2001. "lsrs r4, r6, #16 \n\t" \
  2002. "lsls r6, r6, #16 \n\t" \
  2003. "adds %[l], %[l], r6 \n\t" \
  2004. "adcs %[h], %[h], r4 \n\t" \
  2005. /* ah * bh */ \
  2006. "lsrs r6, %[a], #16 \n\t" \
  2007. "lsrs r4, %[b], #16 \n\t" \
  2008. "muls r4, r6, r4 \n\t" \
  2009. "adds %[h], %[h], r4 \n\t" \
  2010. /* ah * bl */ \
  2011. "uxth r4, %[b] \n\t" \
  2012. "muls r6, r4, r6 \n\t" \
  2013. "lsrs r4, r6, #16 \n\t" \
  2014. "lsls r6, r6, #16 \n\t" \
  2015. "adds %[l], %[l], r6 \n\t" \
  2016. "adcs %[h], %[h], r4 \n\t" \
  2017. : [l] "+l" (vl), [h] "+l" (vh) \
  2018. : [a] "l" (va), [b] "l" (vb) \
  2019. : "r4", "r5", "r6", "cc" \
  2020. )
  2021. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  2022. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2023. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2024. __asm__ __volatile__ ( \
  2025. /* al * bl */ \
  2026. "uxth r6, %[a] \n\t" \
  2027. "uxth r7, %[b] \n\t" \
  2028. "muls r7, r6, r7 \n\t" \
  2029. "adds %[l], %[l], r7 \n\t" \
  2030. "movs r5, #0 \n\t" \
  2031. "adcs %[h], %[h], r5 \n\t" \
  2032. "adcs %[o], %[o], r5 \n\t" \
  2033. "adds %[l], %[l], r7 \n\t" \
  2034. "adcs %[h], %[h], r5 \n\t" \
  2035. "adcs %[o], %[o], r5 \n\t" \
  2036. /* al * bh */ \
  2037. "lsrs r7, %[b], #16 \n\t" \
  2038. "muls r6, r7, r6 \n\t" \
  2039. "lsrs r7, r6, #16 \n\t" \
  2040. "lsls r6, r6, #16 \n\t" \
  2041. "adds %[l], %[l], r6 \n\t" \
  2042. "adcs %[h], %[h], r7 \n\t" \
  2043. "adcs %[o], %[o], r5 \n\t" \
  2044. "adds %[l], %[l], r6 \n\t" \
  2045. "adcs %[h], %[h], r7 \n\t" \
  2046. "adcs %[o], %[o], r5 \n\t" \
  2047. /* ah * bh */ \
  2048. "lsrs r6, %[a], #16 \n\t" \
  2049. "lsrs r7, %[b], #16 \n\t" \
  2050. "muls r7, r6, r7 \n\t" \
  2051. "adds %[h], %[h], r7 \n\t" \
  2052. "adcs %[o], %[o], r5 \n\t" \
  2053. "adds %[h], %[h], r7 \n\t" \
  2054. "adcs %[o], %[o], r5 \n\t" \
  2055. /* ah * bl */ \
  2056. "uxth r7, %[b] \n\t" \
  2057. "muls r6, r7, r6 \n\t" \
  2058. "lsrs r7, r6, #16 \n\t" \
  2059. "lsls r6, r6, #16 \n\t" \
  2060. "adds %[l], %[l], r6 \n\t" \
  2061. "adcs %[h], %[h], r7 \n\t" \
  2062. "adcs %[o], %[o], r5 \n\t" \
  2063. "adds %[l], %[l], r6 \n\t" \
  2064. "adcs %[h], %[h], r7 \n\t" \
  2065. "adcs %[o], %[o], r5 \n\t" \
  2066. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2067. : [a] "l" (va), [b] "l" (vb) \
  2068. : "r5", "r6", "r7", "cc" \
  2069. )
  2070. #else
  2071. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2072. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2073. __asm__ __volatile__ ( \
  2074. "movs r8, %[a] \n\t" \
  2075. /* al * bl */ \
  2076. "uxth r6, %[a] \n\t" \
  2077. "uxth r5, %[b] \n\t" \
  2078. "muls r5, r6, r5 \n\t" \
  2079. "adds %[l], %[l], r5 \n\t" \
  2080. "movs %[a], #0 \n\t" \
  2081. "adcs %[h], %[h], %[a] \n\t" \
  2082. "adcs %[o], %[o], %[a] \n\t" \
  2083. "adds %[l], %[l], r5 \n\t" \
  2084. "adcs %[h], %[h], %[a] \n\t" \
  2085. "adcs %[o], %[o], %[a] \n\t" \
  2086. /* al * bh */ \
  2087. "lsrs r5, %[b], #16 \n\t" \
  2088. "muls r6, r5, r6 \n\t" \
  2089. "lsrs r5, r6, #16 \n\t" \
  2090. "lsls r6, r6, #16 \n\t" \
  2091. "adds %[l], %[l], r6 \n\t" \
  2092. "adcs %[h], %[h], r5 \n\t" \
  2093. "adcs %[o], %[o], %[a] \n\t" \
  2094. "adds %[l], %[l], r6 \n\t" \
  2095. "adcs %[h], %[h], r5 \n\t" \
  2096. "adcs %[o], %[o], %[a] \n\t" \
  2097. /* ah * bh */ \
  2098. "movs %[a], r8 \n\t" \
  2099. "lsrs r6, %[a], #16 \n\t" \
  2100. "lsrs r5, %[b], #16 \n\t" \
  2101. "muls r5, r6, r5 \n\t" \
  2102. "adds %[h], %[h], r5 \n\t" \
  2103. "movs %[a], #0 \n\t" \
  2104. "adcs %[o], %[o], %[a] \n\t" \
  2105. "adds %[h], %[h], r5 \n\t" \
  2106. "adcs %[o], %[o], %[a] \n\t" \
  2107. /* ah * bl */ \
  2108. "uxth r5, %[b] \n\t" \
  2109. "muls r6, r5, r6 \n\t" \
  2110. "lsrs r5, r6, #16 \n\t" \
  2111. "lsls r6, r6, #16 \n\t" \
  2112. "adds %[l], %[l], r6 \n\t" \
  2113. "adcs %[h], %[h], r5 \n\t" \
  2114. "adcs %[o], %[o], %[a] \n\t" \
  2115. "adds %[l], %[l], r6 \n\t" \
  2116. "adcs %[h], %[h], r5 \n\t" \
  2117. "adcs %[o], %[o], %[a] \n\t" \
  2118. "movs %[a], r8 \n\t" \
  2119. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2120. : [a] "l" (va), [b] "l" (vb) \
  2121. : "r5", "r6", "r8", "cc" \
  2122. )
  2123. #endif
  2124. #ifndef DEBUG
  2125. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2126. * Assumes first add will not overflow vh | vl
  2127. */
  2128. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2129. __asm__ __volatile__ ( \
  2130. /* al * bl */ \
  2131. "uxth r6, %[a] \n\t" \
  2132. "uxth r7, %[b] \n\t" \
  2133. "muls r7, r6, r7 \n\t" \
  2134. "adds %[l], %[l], r7 \n\t" \
  2135. "movs r5, #0 \n\t" \
  2136. "adcs %[h], %[h], r5 \n\t" \
  2137. "adds %[l], %[l], r7 \n\t" \
  2138. "adcs %[h], %[h], r5 \n\t" \
  2139. /* al * bh */ \
  2140. "lsrs r7, %[b], #16 \n\t" \
  2141. "muls r6, r7, r6 \n\t" \
  2142. "lsrs r7, r6, #16 \n\t" \
  2143. "lsls r6, r6, #16 \n\t" \
  2144. "adds %[l], %[l], r6 \n\t" \
  2145. "adcs %[h], %[h], r7 \n\t" \
  2146. "adds %[l], %[l], r6 \n\t" \
  2147. "adcs %[h], %[h], r7 \n\t" \
  2148. "adcs %[o], %[o], r5 \n\t" \
  2149. /* ah * bh */ \
  2150. "lsrs r6, %[a], #16 \n\t" \
  2151. "lsrs r7, %[b], #16 \n\t" \
  2152. "muls r7, r6, r7 \n\t" \
  2153. "adds %[h], %[h], r7 \n\t" \
  2154. "adcs %[o], %[o], r5 \n\t" \
  2155. "adds %[h], %[h], r7 \n\t" \
  2156. "adcs %[o], %[o], r5 \n\t" \
  2157. /* ah * bl */ \
  2158. "uxth r7, %[b] \n\t" \
  2159. "muls r6, r7, r6 \n\t" \
  2160. "lsrs r7, r6, #16 \n\t" \
  2161. "lsls r6, r6, #16 \n\t" \
  2162. "adds %[l], %[l], r6 \n\t" \
  2163. "adcs %[h], %[h], r7 \n\t" \
  2164. "adcs %[o], %[o], r5 \n\t" \
  2165. "adds %[l], %[l], r6 \n\t" \
  2166. "adcs %[h], %[h], r7 \n\t" \
  2167. "adcs %[o], %[o], r5 \n\t" \
  2168. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2169. : [a] "l" (va), [b] "l" (vb) \
  2170. : "r5", "r6", "r7", "cc" \
  2171. )
  2172. #else
  2173. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2174. * Assumes first add will not overflow vh | vl
  2175. */
  2176. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2177. __asm__ __volatile__ ( \
  2178. "movs r8, %[a] \n\t" \
  2179. /* al * bl */ \
  2180. "uxth r5, %[a] \n\t" \
  2181. "uxth r6, %[b] \n\t" \
  2182. "muls r6, r5, r6 \n\t" \
  2183. "adds %[l], %[l], r6 \n\t" \
  2184. "movs %[a], #0 \n\t" \
  2185. "adcs %[h], %[h], %[a] \n\t" \
  2186. "adds %[l], %[l], r6 \n\t" \
  2187. "adcs %[h], %[h], %[a] \n\t" \
  2188. /* al * bh */ \
  2189. "lsrs r6, %[b], #16 \n\t" \
  2190. "muls r5, r6, r5 \n\t" \
  2191. "lsrs r6, r5, #16 \n\t" \
  2192. "lsls r5, r5, #16 \n\t" \
  2193. "adds %[l], %[l], r5 \n\t" \
  2194. "adcs %[h], %[h], r6 \n\t" \
  2195. "adds %[l], %[l], r5 \n\t" \
  2196. "adcs %[h], %[h], r6 \n\t" \
  2197. "adcs %[o], %[o], %[a] \n\t" \
  2198. /* ah * bh */ \
  2199. "movs %[a], r8 \n\t" \
  2200. "lsrs r5, %[a], #16 \n\t" \
  2201. "lsrs r6, %[b], #16 \n\t" \
  2202. "muls r6, r5, r6 \n\t" \
  2203. "movs %[a], #0 \n\t" \
  2204. "adds %[h], %[h], r6 \n\t" \
  2205. "adcs %[o], %[o], %[a] \n\t" \
  2206. "adds %[h], %[h], r6 \n\t" \
  2207. "adcs %[o], %[o], %[a] \n\t" \
  2208. /* ah * bl */ \
  2209. "uxth r6, %[b] \n\t" \
  2210. "muls r5, r6, r5 \n\t" \
  2211. "lsrs r6, r5, #16 \n\t" \
  2212. "lsls r5, r5, #16 \n\t" \
  2213. "adds %[l], %[l], r5 \n\t" \
  2214. "adcs %[h], %[h], r6 \n\t" \
  2215. "adcs %[o], %[o], %[a] \n\t" \
  2216. "adds %[l], %[l], r5 \n\t" \
  2217. "adcs %[h], %[h], r6 \n\t" \
  2218. "adcs %[o], %[o], %[a] \n\t" \
  2219. "movs %[a], r8 \n\t" \
  2220. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2221. : [a] "l" (va), [b] "l" (vb) \
  2222. : "r5", "r6", "r8", "cc" \
  2223. )
  2224. #endif
  2225. /* Square va and store double size result in: vh | vl */
  2226. #define SP_ASM_SQR(vl, vh, va) \
  2227. __asm__ __volatile__ ( \
  2228. "lsrs r5, %[a], #16 \n\t" \
  2229. "uxth r6, %[a] \n\t" \
  2230. "mov %[l], r6 \n\t" \
  2231. "mov %[h], r5 \n\t" \
  2232. /* al * al */ \
  2233. "muls %[l], %[l], %[l] \n\t" \
  2234. /* ah * ah */ \
  2235. "muls %[h], %[h], %[h] \n\t" \
  2236. /* 2 * al * ah */ \
  2237. "muls r6, r5, r6 \n\t" \
  2238. "lsrs r5, r6, #15 \n\t" \
  2239. "lsls r6, r6, #17 \n\t" \
  2240. "adds %[l], %[l], r6 \n\t" \
  2241. "adcs %[h], %[h], r5 \n\t" \
  2242. : [h] "+l" (vh), [l] "+l" (vl) \
  2243. : [a] "l" (va) \
  2244. : "r5", "r6", "cc" \
  2245. )
  2246. /* Square va and add double size result into: vo | vh | vl */
  2247. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  2248. __asm__ __volatile__ ( \
  2249. "lsrs r4, %[a], #16 \n\t" \
  2250. "uxth r6, %[a] \n\t" \
  2251. /* al * al */ \
  2252. "muls r6, r6, r6 \n\t" \
  2253. /* ah * ah */ \
  2254. "muls r4, r4, r4 \n\t" \
  2255. "adds %[l], %[l], r6 \n\t" \
  2256. "adcs %[h], %[h], r4 \n\t" \
  2257. "movs r5, #0 \n\t" \
  2258. "adcs %[o], %[o], r5 \n\t" \
  2259. "lsrs r4, %[a], #16 \n\t" \
  2260. "uxth r6, %[a] \n\t" \
  2261. /* 2 * al * ah */ \
  2262. "muls r6, r4, r6 \n\t" \
  2263. "lsrs r4, r6, #15 \n\t" \
  2264. "lsls r6, r6, #17 \n\t" \
  2265. "adds %[l], %[l], r6 \n\t" \
  2266. "adcs %[h], %[h], r4 \n\t" \
  2267. "adcs %[o], %[o], r5 \n\t" \
  2268. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2269. : [a] "l" (va) \
  2270. : "r4", "r5", "r6", "cc" \
  2271. )
  2272. /* Square va and add double size result into: vh | vl */
  2273. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  2274. __asm__ __volatile__ ( \
  2275. "lsrs r5, %[a], #16 \n\t" \
  2276. "uxth r6, %[a] \n\t" \
  2277. /* al * al */ \
  2278. "muls r6, r6, r6 \n\t" \
  2279. /* ah * ah */ \
  2280. "muls r5, r5, r5 \n\t" \
  2281. "adds %[l], %[l], r6 \n\t" \
  2282. "adcs %[h], %[h], r5 \n\t" \
  2283. "lsrs r5, %[a], #16 \n\t" \
  2284. "uxth r6, %[a] \n\t" \
  2285. /* 2 * al * ah */ \
  2286. "muls r6, r5, r6 \n\t" \
  2287. "lsrs r5, r6, #15 \n\t" \
  2288. "lsls r6, r6, #17 \n\t" \
  2289. "adds %[l], %[l], r6 \n\t" \
  2290. "adcs %[h], %[h], r5 \n\t" \
  2291. : [l] "+l" (vl), [h] "+l" (vh) \
  2292. : [a] "l" (va) \
  2293. : "r5", "r6", "cc" \
  2294. )
  2295. /* Add va into: vh | vl */
  2296. #define SP_ASM_ADDC(vl, vh, va) \
  2297. __asm__ __volatile__ ( \
  2298. "adds %[l], %[l], %[a] \n\t" \
  2299. "movs r5, #0 \n\t" \
  2300. "adcs %[h], %[h], r5 \n\t" \
  2301. : [l] "+l" (vl), [h] "+l" (vh) \
  2302. : [a] "l" (va) \
  2303. : "r5", "cc" \
  2304. )
  2305. /* Sub va from: vh | vl */
  2306. #define SP_ASM_SUBB(vl, vh, va) \
  2307. __asm__ __volatile__ ( \
  2308. "subs %[l], %[l], %[a] \n\t" \
  2309. "movs r5, #0 \n\t" \
  2310. "sbcs %[h], %[h], r5 \n\t" \
  2311. : [l] "+l" (vl), [h] "+l" (vh) \
  2312. : [a] "l" (va) \
  2313. : "r5", "cc" \
  2314. )
  2315. /* Add two times vc | vb | va into vo | vh | vl */
  2316. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  2317. __asm__ __volatile__ ( \
  2318. "adds %[l], %[l], %[a] \n\t" \
  2319. "adcs %[h], %[h], %[b] \n\t" \
  2320. "adcs %[o], %[o], %[c] \n\t" \
  2321. "adds %[l], %[l], %[a] \n\t" \
  2322. "adcs %[h], %[h], %[b] \n\t" \
  2323. "adcs %[o], %[o], %[c] \n\t" \
  2324. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2325. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  2326. : "cc" \
  2327. )
  2328. #elif defined(__GNUC__)
  2329. /* Multiply va by vb and store double size result in: vh | vl */
  2330. #define SP_ASM_MUL(vl, vh, va, vb) \
  2331. __asm__ __volatile__ ( \
  2332. /* al * bl */ \
  2333. "uxth r6, %[a] \n\t" \
  2334. "uxth %[l], %[b] \n\t" \
  2335. "mul %[l], r6 \n\t" \
  2336. /* al * bh */ \
  2337. "lsr r4, %[b], #16 \n\t" \
  2338. "mul r6, r4 \n\t" \
  2339. "lsr %[h], r6, #16 \n\t" \
  2340. "lsl r6, r6, #16 \n\t" \
  2341. "add %[l], %[l], r6 \n\t" \
  2342. "mov r5, #0 \n\t" \
  2343. "adc %[h], r5 \n\t" \
  2344. /* ah * bh */ \
  2345. "lsr r6, %[a], #16 \n\t" \
  2346. "mul r4, r6 \n\t" \
  2347. "add %[h], %[h], r4 \n\t" \
  2348. /* ah * bl */ \
  2349. "uxth r4, %[b] \n\t" \
  2350. "mul r6, r4 \n\t" \
  2351. "lsr r4, r6, #16 \n\t" \
  2352. "lsl r6, r6, #16 \n\t" \
  2353. "add %[l], %[l], r6 \n\t" \
  2354. "adc %[h], r4 \n\t" \
  2355. : [h] "+l" (vh), [l] "+l" (vl) \
  2356. : [a] "l" (va), [b] "l" (vb) \
  2357. : "r4", "r5", "r6", "cc" \
  2358. )
  2359. /* Multiply va by vb and store double size result in: vo | vh | vl */
  2360. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  2361. __asm__ __volatile__ ( \
  2362. /* al * bl */ \
  2363. "uxth r6, %[a] \n\t" \
  2364. "uxth %[l], %[b] \n\t" \
  2365. "mul %[l], r6 \n\t" \
  2366. /* al * bh */ \
  2367. "lsr r5, %[b], #16 \n\t" \
  2368. "mul r6, r5 \n\t" \
  2369. "lsr %[h], r6, #16 \n\t" \
  2370. "lsl r6, r6, #16 \n\t" \
  2371. "add %[l], %[l], r6 \n\t" \
  2372. "mov %[o], #0 \n\t" \
  2373. "adc %[h], %[o] \n\t" \
  2374. /* ah * bh */ \
  2375. "lsr r6, %[a], #16 \n\t" \
  2376. "mul r5, r6 \n\t" \
  2377. "add %[h], %[h], r5 \n\t" \
  2378. /* ah * bl */ \
  2379. "uxth r5, %[b] \n\t" \
  2380. "mul r6, r5 \n\t" \
  2381. "lsr r5, r6, #16 \n\t" \
  2382. "lsl r6, r6, #16 \n\t" \
  2383. "add %[l], %[l], r6 \n\t" \
  2384. "adc %[h], r5 \n\t" \
  2385. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2386. : [a] "l" (va), [b] "l" (vb) \
  2387. : "r5", "r6", "cc" \
  2388. )
  2389. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  2390. /* Multiply va by vb and add double size result into: vo | vh | vl */
  2391. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  2392. __asm__ __volatile__ ( \
  2393. /* al * bl */ \
  2394. "uxth r6, %[a] \n\t" \
  2395. "uxth r7, %[b] \n\t" \
  2396. "mul r7, r6 \n\t" \
  2397. "add %[l], %[l], r7 \n\t" \
  2398. "mov r5, #0 \n\t" \
  2399. "adc %[h], r5 \n\t" \
  2400. "adc %[o], r5 \n\t" \
  2401. /* al * bh */ \
  2402. "lsr r7, %[b], #16 \n\t" \
  2403. "mul r6, r7 \n\t" \
  2404. "lsr r7, r6, #16 \n\t" \
  2405. "lsl r6, r6, #16 \n\t" \
  2406. "add %[l], %[l], r6 \n\t" \
  2407. "adc %[h], r7 \n\t" \
  2408. "adc %[o], r5 \n\t" \
  2409. /* ah * bh */ \
  2410. "lsr r6, %[a], #16 \n\t" \
  2411. "lsr r7, %[b], #16 \n\t" \
  2412. "mul r7, r6 \n\t" \
  2413. "add %[h], %[h], r7 \n\t" \
  2414. "adc %[o], r5 \n\t" \
  2415. /* ah * bl */ \
  2416. "uxth r7, %[b] \n\t" \
  2417. "mul r6, r7 \n\t" \
  2418. "lsr r7, r6, #16 \n\t" \
  2419. "lsl r6, r6, #16 \n\t" \
  2420. "add %[l], %[l], r6 \n\t" \
  2421. "adc %[h], r7 \n\t" \
  2422. "adc %[o], r5 \n\t" \
  2423. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2424. : [a] "l" (va), [b] "l" (vb) \
  2425. : "r5", "r6", "r7", "cc" \
  2426. )
  2427. #else
  2428. /* Multiply va by vb and add double size result into: vo | vh | vl */
  2429. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  2430. __asm__ __volatile__ ( \
  2431. /* al * bl */ \
  2432. "uxth r6, %[a] \n\t" \
  2433. "uxth r5, %[b] \n\t" \
  2434. "mul r5, r6 \n\t" \
  2435. "add %[l], %[l], r5 \n\t" \
  2436. "mov r5, #0 \n\t" \
  2437. "adc %[h], r5 \n\t" \
  2438. "adc %[o], r5 \n\t" \
  2439. /* al * bh */ \
  2440. "lsr r5, %[b], #16 \n\t" \
  2441. "mul r6, r5 \n\t" \
  2442. "lsr r5, r6, #16 \n\t" \
  2443. "lsl r6, r6, #16 \n\t" \
  2444. "add %[l], %[l], r6 \n\t" \
  2445. "adc %[h], r5 \n\t" \
  2446. "mov r5, #0 \n\t" \
  2447. "adc %[o], r5 \n\t" \
  2448. /* ah * bh */ \
  2449. "lsr r6, %[a], #16 \n\t" \
  2450. "lsr r5, %[b], #16 \n\t" \
  2451. "mul r5, r6 \n\t" \
  2452. "add %[h], %[h], r5 \n\t" \
  2453. "mov r5, #0 \n\t" \
  2454. "adc %[o], r5 \n\t" \
  2455. /* ah * bl */ \
  2456. "uxth r5, %[b] \n\t" \
  2457. "mul r6, r5 \n\t" \
  2458. "lsr r5, r6, #16 \n\t" \
  2459. "lsl r6, r6, #16 \n\t" \
  2460. "add %[l], %[l], r6 \n\t" \
  2461. "adc %[h], r5 \n\t" \
  2462. "mov r5, #0 \n\t" \
  2463. "adc %[o], r5 \n\t" \
  2464. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2465. : [a] "l" (va), [b] "l" (vb) \
  2466. : "r5", "r6", "cc" \
  2467. )
  2468. #endif
  2469. /* Multiply va by vb and add double size result into: vh | vl */
  2470. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  2471. __asm__ __volatile__ ( \
  2472. /* al * bl */ \
  2473. "uxth r6, %[a] \n\t" \
  2474. "uxth r4, %[b] \n\t" \
  2475. "mul r4, r6 \n\t" \
  2476. "add %[l], %[l], r4 \n\t" \
  2477. "mov r5, #0 \n\t" \
  2478. "adc %[h], r5 \n\t" \
  2479. /* al * bh */ \
  2480. "lsr r4, %[b], #16 \n\t" \
  2481. "mul r6, r4 \n\t" \
  2482. "lsr r4, r6, #16 \n\t" \
  2483. "lsl r6, r6, #16 \n\t" \
  2484. "add %[l], %[l], r6 \n\t" \
  2485. "adc %[h], r4 \n\t" \
  2486. /* ah * bh */ \
  2487. "lsr r6, %[a], #16 \n\t" \
  2488. "lsr r4, %[b], #16 \n\t" \
  2489. "mul r4, r6 \n\t" \
  2490. "add %[h], %[h], r4 \n\t" \
  2491. /* ah * bl */ \
  2492. "uxth r4, %[b] \n\t" \
  2493. "mul r6, r4 \n\t" \
  2494. "lsr r4, r6, #16 \n\t" \
  2495. "lsl r6, r6, #16 \n\t" \
  2496. "add %[l], %[l], r6 \n\t" \
  2497. "adc %[h], r4 \n\t" \
  2498. : [l] "+l" (vl), [h] "+l" (vh) \
  2499. : [a] "l" (va), [b] "l" (vb) \
  2500. : "r4", "r5", "r6", "cc" \
  2501. )
  2502. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  2503. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2504. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2505. __asm__ __volatile__ ( \
  2506. /* al * bl */ \
  2507. "uxth r6, %[a] \n\t" \
  2508. "uxth r7, %[b] \n\t" \
  2509. "mul r7, r6 \n\t" \
  2510. "add %[l], %[l], r7 \n\t" \
  2511. "mov r5, #0 \n\t" \
  2512. "adc %[h], r5 \n\t" \
  2513. "adc %[o], r5 \n\t" \
  2514. "add %[l], %[l], r7 \n\t" \
  2515. "adc %[h], r5 \n\t" \
  2516. "adc %[o], r5 \n\t" \
  2517. /* al * bh */ \
  2518. "lsr r7, %[b], #16 \n\t" \
  2519. "mul r6, r7 \n\t" \
  2520. "lsr r7, r6, #16 \n\t" \
  2521. "lsl r6, r6, #16 \n\t" \
  2522. "add %[l], %[l], r6 \n\t" \
  2523. "adc %[h], r7 \n\t" \
  2524. "adc %[o], r5 \n\t" \
  2525. "add %[l], %[l], r6 \n\t" \
  2526. "adc %[h], r7 \n\t" \
  2527. "adc %[o], r5 \n\t" \
  2528. /* ah * bh */ \
  2529. "lsr r6, %[a], #16 \n\t" \
  2530. "lsr r7, %[b], #16 \n\t" \
  2531. "mul r7, r6 \n\t" \
  2532. "add %[h], %[h], r7 \n\t" \
  2533. "adc %[o], r5 \n\t" \
  2534. "add %[h], %[h], r7 \n\t" \
  2535. "adc %[o], r5 \n\t" \
  2536. /* ah * bl */ \
  2537. "uxth r7, %[b] \n\t" \
  2538. "mul r6, r7 \n\t" \
  2539. "lsr r7, r6, #16 \n\t" \
  2540. "lsl r6, r6, #16 \n\t" \
  2541. "add %[l], %[l], r6 \n\t" \
  2542. "adc %[h], r7 \n\t" \
  2543. "adc %[o], r5 \n\t" \
  2544. "add %[l], %[l], r6 \n\t" \
  2545. "adc %[h], r7 \n\t" \
  2546. "adc %[o], r5 \n\t" \
  2547. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2548. : [a] "l" (va), [b] "l" (vb) \
  2549. : "r5", "r6", "r7", "cc" \
  2550. )
  2551. #else
  2552. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2553. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2554. __asm__ __volatile__ ( \
  2555. "mov r8, %[a] \n\t" \
  2556. /* al * bl */ \
  2557. "uxth r6, %[a] \n\t" \
  2558. "uxth r5, %[b] \n\t" \
  2559. "mul r5, r6 \n\t" \
  2560. "add %[l], %[l], r5 \n\t" \
  2561. "mov %[a], #0 \n\t" \
  2562. "adc %[h], %[a] \n\t" \
  2563. "adc %[o], %[a] \n\t" \
  2564. "add %[l], %[l], r5 \n\t" \
  2565. "adc %[h], %[a] \n\t" \
  2566. "adc %[o], %[a] \n\t" \
  2567. /* al * bh */ \
  2568. "lsr r5, %[b], #16 \n\t" \
  2569. "mul r6, r5 \n\t" \
  2570. "lsr r5, r6, #16 \n\t" \
  2571. "lsl r6, r6, #16 \n\t" \
  2572. "add %[l], %[l], r6 \n\t" \
  2573. "adc %[h], r5 \n\t" \
  2574. "adc %[o], %[a] \n\t" \
  2575. "add %[l], %[l], r6 \n\t" \
  2576. "adc %[h], r5 \n\t" \
  2577. "adc %[o], %[a] \n\t" \
  2578. /* ah * bh */ \
  2579. "mov %[a], r8 \n\t" \
  2580. "lsr r6, %[a], #16 \n\t" \
  2581. "lsr r5, %[b], #16 \n\t" \
  2582. "mul r5, r6 \n\t" \
  2583. "add %[h], %[h], r5 \n\t" \
  2584. "mov %[a], #0 \n\t" \
  2585. "adc %[o], %[a] \n\t" \
  2586. "add %[h], %[h], r5 \n\t" \
  2587. "adc %[o], %[a] \n\t" \
  2588. /* ah * bl */ \
  2589. "uxth r5, %[b] \n\t" \
  2590. "mul r6, r5 \n\t" \
  2591. "lsr r5, r6, #16 \n\t" \
  2592. "lsl r6, r6, #16 \n\t" \
  2593. "add %[l], %[l], r6 \n\t" \
  2594. "adc %[h], r5 \n\t" \
  2595. "adc %[o], %[a] \n\t" \
  2596. "add %[l], %[l], r6 \n\t" \
  2597. "adc %[h], r5 \n\t" \
  2598. "adc %[o], %[a] \n\t" \
  2599. "mov %[a], r8 \n\t" \
  2600. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2601. : [a] "l" (va), [b] "l" (vb) \
  2602. : "r5", "r6", "r8", "cc" \
  2603. )
  2604. #endif
  2605. #ifndef DEBUG
  2606. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2607. * Assumes first add will not overflow vh | vl
  2608. */
  2609. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2610. __asm__ __volatile__ ( \
  2611. /* al * bl */ \
  2612. "uxth r6, %[a] \n\t" \
  2613. "uxth r7, %[b] \n\t" \
  2614. "mul r7, r6 \n\t" \
  2615. "add %[l], %[l], r7 \n\t" \
  2616. "mov r5, #0 \n\t" \
  2617. "adc %[h], r5 \n\t" \
  2618. "add %[l], %[l], r7 \n\t" \
  2619. "adc %[h], r5 \n\t" \
  2620. /* al * bh */ \
  2621. "lsr r7, %[b], #16 \n\t" \
  2622. "mul r6, r7 \n\t" \
  2623. "lsr r7, r6, #16 \n\t" \
  2624. "lsl r6, r6, #16 \n\t" \
  2625. "add %[l], %[l], r6 \n\t" \
  2626. "adc %[h], r7 \n\t" \
  2627. "add %[l], %[l], r6 \n\t" \
  2628. "adc %[h], r7 \n\t" \
  2629. "adc %[o], r5 \n\t" \
  2630. /* ah * bh */ \
  2631. "lsr r6, %[a], #16 \n\t" \
  2632. "lsr r7, %[b], #16 \n\t" \
  2633. "mul r7, r6 \n\t" \
  2634. "add %[h], %[h], r7 \n\t" \
  2635. "adc %[o], r5 \n\t" \
  2636. "add %[h], %[h], r7 \n\t" \
  2637. "adc %[o], r5 \n\t" \
  2638. /* ah * bl */ \
  2639. "uxth r7, %[b] \n\t" \
  2640. "mul r6, r7 \n\t" \
  2641. "lsr r7, r6, #16 \n\t" \
  2642. "lsl r6, r6, #16 \n\t" \
  2643. "add %[l], %[l], r6 \n\t" \
  2644. "adc %[h], r7 \n\t" \
  2645. "adc %[o], r5 \n\t" \
  2646. "add %[l], %[l], r6 \n\t" \
  2647. "adc %[h], r7 \n\t" \
  2648. "adc %[o], r5 \n\t" \
  2649. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2650. : [a] "l" (va), [b] "l" (vb) \
  2651. : "r5", "r6", "r7", "cc" \
  2652. )
  2653. #else
  2654. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2655. * Assumes first add will not overflow vh | vl
  2656. */
  2657. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2658. __asm__ __volatile__ ( \
  2659. "mov r8, %[a] \n\t" \
  2660. /* al * bl */ \
  2661. "uxth r5, %[a] \n\t" \
  2662. "uxth r6, %[b] \n\t" \
  2663. "mul r6, r5 \n\t" \
  2664. "add %[l], %[l], r6 \n\t" \
  2665. "mov %[a], #0 \n\t" \
  2666. "adc %[h], %[a] \n\t" \
  2667. "add %[l], %[l], r6 \n\t" \
  2668. "adc %[h], %[a] \n\t" \
  2669. /* al * bh */ \
  2670. "lsr r6, %[b], #16 \n\t" \
  2671. "mul r5, r6 \n\t" \
  2672. "lsr r6, r5, #16 \n\t" \
  2673. "lsl r5, r5, #16 \n\t" \
  2674. "add %[l], %[l], r5 \n\t" \
  2675. "adc %[h], r6 \n\t" \
  2676. "add %[l], %[l], r5 \n\t" \
  2677. "adc %[h], r6 \n\t" \
  2678. "adc %[o], %[a] \n\t" \
  2679. /* ah * bh */ \
  2680. "mov %[a], r8 \n\t" \
  2681. "lsr r5, %[a], #16 \n\t" \
  2682. "lsr r6, %[b], #16 \n\t" \
  2683. "mul r6, r5 \n\t" \
  2684. "mov %[a], #0 \n\t" \
  2685. "add %[h], %[h], r6 \n\t" \
  2686. "adc %[o], %[a] \n\t" \
  2687. "add %[h], %[h], r6 \n\t" \
  2688. "adc %[o], %[a] \n\t" \
  2689. /* ah * bl */ \
  2690. "uxth r6, %[b] \n\t" \
  2691. "mul r5, r6 \n\t" \
  2692. "lsr r6, r5, #16 \n\t" \
  2693. "lsl r5, r5, #16 \n\t" \
  2694. "add %[l], %[l], r5 \n\t" \
  2695. "adc %[h], r6 \n\t" \
  2696. "adc %[o], %[a] \n\t" \
  2697. "add %[l], %[l], r5 \n\t" \
  2698. "adc %[h], r6 \n\t" \
  2699. "adc %[o], %[a] \n\t" \
  2700. "mov %[a], r8 \n\t" \
  2701. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2702. : [a] "l" (va), [b] "l" (vb) \
  2703. : "r5", "r6", "r8", "cc" \
  2704. )
  2705. #endif
  2706. /* Square va and store double size result in: vh | vl */
  2707. #define SP_ASM_SQR(vl, vh, va) \
  2708. __asm__ __volatile__ ( \
  2709. "lsr r5, %[a], #16 \n\t" \
  2710. "uxth r6, %[a] \n\t" \
  2711. "mov %[l], r6 \n\t" \
  2712. "mov %[h], r5 \n\t" \
  2713. /* al * al */ \
  2714. "mul %[l], %[l] \n\t" \
  2715. /* ah * ah */ \
  2716. "mul %[h], %[h] \n\t" \
  2717. /* 2 * al * ah */ \
  2718. "mul r6, r5 \n\t" \
  2719. "lsr r5, r6, #15 \n\t" \
  2720. "lsl r6, r6, #17 \n\t" \
  2721. "add %[l], %[l], r6 \n\t" \
  2722. "adc %[h], r5 \n\t" \
  2723. : [h] "+l" (vh), [l] "+l" (vl) \
  2724. : [a] "l" (va) \
  2725. : "r5", "r6", "cc" \
  2726. )
  2727. /* Square va and add double size result into: vo | vh | vl */
  2728. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  2729. __asm__ __volatile__ ( \
  2730. "lsr r4, %[a], #16 \n\t" \
  2731. "uxth r6, %[a] \n\t" \
  2732. /* al * al */ \
  2733. "mul r6, r6 \n\t" \
  2734. /* ah * ah */ \
  2735. "mul r4, r4 \n\t" \
  2736. "add %[l], %[l], r6 \n\t" \
  2737. "adc %[h], r4 \n\t" \
  2738. "mov r5, #0 \n\t" \
  2739. "adc %[o], r5 \n\t" \
  2740. "lsr r4, %[a], #16 \n\t" \
  2741. "uxth r6, %[a] \n\t" \
  2742. /* 2 * al * ah */ \
  2743. "mul r6, r4 \n\t" \
  2744. "lsr r4, r6, #15 \n\t" \
  2745. "lsl r6, r6, #17 \n\t" \
  2746. "add %[l], %[l], r6 \n\t" \
  2747. "adc %[h], r4 \n\t" \
  2748. "adc %[o], r5 \n\t" \
  2749. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2750. : [a] "l" (va) \
  2751. : "r4", "r5", "r6", "cc" \
  2752. )
  2753. /* Square va and add double size result into: vh | vl */
  2754. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  2755. __asm__ __volatile__ ( \
  2756. "lsr r5, %[a], #16 \n\t" \
  2757. "uxth r6, %[a] \n\t" \
  2758. /* al * al */ \
  2759. "mul r6, r6 \n\t" \
  2760. /* ah * ah */ \
  2761. "mul r5, r5 \n\t" \
  2762. "add %[l], %[l], r6 \n\t" \
  2763. "adc %[h], r5 \n\t" \
  2764. "lsr r5, %[a], #16 \n\t" \
  2765. "uxth r6, %[a] \n\t" \
  2766. /* 2 * al * ah */ \
  2767. "mul r6, r5 \n\t" \
  2768. "lsr r5, r6, #15 \n\t" \
  2769. "lsl r6, r6, #17 \n\t" \
  2770. "add %[l], %[l], r6 \n\t" \
  2771. "adc %[h], r5 \n\t" \
  2772. : [l] "+l" (vl), [h] "+l" (vh) \
  2773. : [a] "l" (va) \
  2774. : "r5", "r6", "cc" \
  2775. )
  2776. /* Add va into: vh | vl */
  2777. #define SP_ASM_ADDC(vl, vh, va) \
  2778. __asm__ __volatile__ ( \
  2779. "add %[l], %[l], %[a] \n\t" \
  2780. "mov r5, #0 \n\t" \
  2781. "adc %[h], r5 \n\t" \
  2782. : [l] "+l" (vl), [h] "+l" (vh) \
  2783. : [a] "l" (va) \
  2784. : "r5", "cc" \
  2785. )
  2786. /* Sub va from: vh | vl */
  2787. #define SP_ASM_SUBB(vl, vh, va) \
  2788. __asm__ __volatile__ ( \
  2789. "sub %[l], %[l], %[a] \n\t" \
  2790. "mov r5, #0 \n\t" \
  2791. "sbc %[h], r5 \n\t" \
  2792. : [l] "+l" (vl), [h] "+l" (vh) \
  2793. : [a] "l" (va) \
  2794. : "r5", "cc" \
  2795. )
  2796. /* Add two times vc | vb | va into vo | vh | vl */
  2797. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  2798. __asm__ __volatile__ ( \
  2799. "add %[l], %[l], %[a] \n\t" \
  2800. "adc %[h], %[b] \n\t" \
  2801. "adc %[o], %[c] \n\t" \
  2802. "add %[l], %[l], %[a] \n\t" \
  2803. "adc %[h], %[b] \n\t" \
  2804. "adc %[o], %[c] \n\t" \
  2805. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2806. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  2807. : "cc" \
  2808. )
  2809. #endif
  2810. #ifdef WOLFSSL_SP_DIV_WORD_HALF
  2811. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  2812. *
  2813. * No division instruction used - does operation bit by bit.
  2814. * Constant time.
  2815. *
  2816. * @param [in] hi SP integer digit. High digit of the dividend.
  2817. * @param [in] lo SP integer digit. Lower digit of the dividend.
  2818. * @param [in] d SP integer digit. Number to divide by.
  2819. * @return The division result.
  2820. */
  2821. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  2822. sp_int_digit d)
  2823. {
  2824. __asm__ __volatile__ (
  2825. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2826. "lsrs r3, %[d], #24\n\t"
  2827. #else
  2828. "lsr r3, %[d], #24\n\t"
  2829. #endif
  2830. "beq 2%=f\n\t"
  2831. "\n1%=:\n\t"
  2832. "movs r3, #0\n\t"
  2833. "b 3%=f\n\t"
  2834. "\n2%=:\n\t"
  2835. "mov r3, #8\n\t"
  2836. "\n3%=:\n\t"
  2837. "movs r4, #31\n\t"
  2838. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2839. "subs r4, r4, r3\n\t"
  2840. #else
  2841. "sub r4, r4, r3\n\t"
  2842. #endif
  2843. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2844. "lsls %[d], %[d], r3\n\t"
  2845. #else
  2846. "lsl %[d], %[d], r3\n\t"
  2847. #endif
  2848. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2849. "lsls %[hi], %[hi], r3\n\t"
  2850. #else
  2851. "lsl %[hi], %[hi], r3\n\t"
  2852. #endif
  2853. "mov r5, %[lo]\n\t"
  2854. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2855. "lsrs r5, r5, r4\n\t"
  2856. #else
  2857. "lsr r5, r5, r4\n\t"
  2858. #endif
  2859. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2860. "lsls %[lo], %[lo], r3\n\t"
  2861. #else
  2862. "lsl %[lo], %[lo], r3\n\t"
  2863. #endif
  2864. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2865. "lsrs r5, r5, #1\n\t"
  2866. #else
  2867. "lsr r5, r5, #1\n\t"
  2868. #endif
  2869. #if defined(WOLFSSL_KEIL)
  2870. "orrs %[hi], %[hi], r5\n\t"
  2871. #elif defined(__clang__)
  2872. "orrs %[hi], r5\n\t"
  2873. #else
  2874. "orr %[hi], r5\n\t"
  2875. #endif
  2876. "movs r3, #0\n\t"
  2877. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2878. "lsrs r5, %[d], #1\n\t"
  2879. #else
  2880. "lsr r5, %[d], #1\n\t"
  2881. #endif
  2882. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2883. "adds r5, r5, #1\n\t"
  2884. #else
  2885. "add r5, r5, #1\n\t"
  2886. #endif
  2887. "mov r8, %[lo]\n\t"
  2888. "mov r9, %[hi]\n\t"
  2889. /* Do top 32 */
  2890. "movs r6, r5\n\t"
  2891. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2892. "subs r6, r6, %[hi]\n\t"
  2893. #else
  2894. "sub r6, r6, %[hi]\n\t"
  2895. #endif
  2896. #ifdef WOLFSSL_KEIL
  2897. "sbcs r6, r6, r6\n\t"
  2898. #elif defined(__clang__)
  2899. "sbcs r6, r6\n\t"
  2900. #else
  2901. "sbc r6, r6\n\t"
  2902. #endif
  2903. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2904. "adds r3, r3, r3\n\t"
  2905. #else
  2906. "add r3, r3, r3\n\t"
  2907. #endif
  2908. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2909. "subs r3, r3, r6\n\t"
  2910. #else
  2911. "sub r3, r3, r6\n\t"
  2912. #endif
  2913. #ifdef WOLFSSL_KEIL
  2914. "ands r6, r6, r5\n\t"
  2915. #elif defined(__clang__)
  2916. "ands r6, r5\n\t"
  2917. #else
  2918. "and r6, r5\n\t"
  2919. #endif
  2920. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2921. "subs %[hi], %[hi], r6\n\t"
  2922. #else
  2923. "sub %[hi], %[hi], r6\n\t"
  2924. #endif
  2925. "movs r4, #29\n\t"
  2926. "\n"
  2927. "L_sp_div_word_loop%=:\n\t"
  2928. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2929. "lsls %[lo], %[lo], #1\n\t"
  2930. #else
  2931. "lsl %[lo], %[lo], #1\n\t"
  2932. #endif
  2933. #ifdef WOLFSSL_KEIL
  2934. "adcs %[hi], %[hi], %[hi]\n\t"
  2935. #elif defined(__clang__)
  2936. "adcs %[hi], %[hi]\n\t"
  2937. #else
  2938. "adc %[hi], %[hi]\n\t"
  2939. #endif
  2940. "movs r6, r5\n\t"
  2941. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2942. "subs r6, r6, %[hi]\n\t"
  2943. #else
  2944. "sub r6, r6, %[hi]\n\t"
  2945. #endif
  2946. #ifdef WOLFSSL_KEIL
  2947. "sbcs r6, r6, r6\n\t"
  2948. #elif defined(__clang__)
  2949. "sbcs r6, r6\n\t"
  2950. #else
  2951. "sbc r6, r6\n\t"
  2952. #endif
  2953. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2954. "adds r3, r3, r3\n\t"
  2955. #else
  2956. "add r3, r3, r3\n\t"
  2957. #endif
  2958. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2959. "subs r3, r3, r6\n\t"
  2960. #else
  2961. "sub r3, r3, r6\n\t"
  2962. #endif
  2963. #ifdef WOLFSSL_KEIL
  2964. "ands r6, r6, r5\n\t"
  2965. #elif defined(__clang__)
  2966. "ands r6, r5\n\t"
  2967. #else
  2968. "and r6, r5\n\t"
  2969. #endif
  2970. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2971. "subs %[hi], %[hi], r6\n\t"
  2972. #else
  2973. "sub %[hi], %[hi], r6\n\t"
  2974. #endif
  2975. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2976. "subs r4, r4, #1\n\t"
  2977. #else
  2978. "sub r4, r4, #1\n\t"
  2979. #endif
  2980. "bpl L_sp_div_word_loop%=\n\t"
  2981. "movs r7, #0\n\t"
  2982. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2983. "adds r3, r3, r3\n\t"
  2984. #else
  2985. "add r3, r3, r3\n\t"
  2986. #endif
  2987. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2988. "adds r3, r3, #1\n\t"
  2989. #else
  2990. "add r3, r3, #1\n\t"
  2991. #endif
  2992. /* r * d - Start */
  2993. "uxth %[hi], r3\n\t"
  2994. "uxth r4, %[d]\n\t"
  2995. #ifdef WOLFSSL_KEIL
  2996. "muls r4, %[hi], r4\n\t"
  2997. #elif defined(__clang__)
  2998. "muls r4, %[hi]\n\t"
  2999. #else
  3000. "mul r4, %[hi]\n\t"
  3001. #endif
  3002. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3003. "lsrs r6, %[d], #16\n\t"
  3004. #else
  3005. "lsr r6, %[d], #16\n\t"
  3006. #endif
  3007. #ifdef WOLFSSL_KEIL
  3008. "muls %[hi], r6, %[hi]\n\t"
  3009. #elif defined(__clang__)
  3010. "muls %[hi], r6\n\t"
  3011. #else
  3012. "mul %[hi], r6\n\t"
  3013. #endif
  3014. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3015. "lsrs r5, %[hi], #16\n\t"
  3016. #else
  3017. "lsr r5, %[hi], #16\n\t"
  3018. #endif
  3019. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3020. "lsls %[hi], %[hi], #16\n\t"
  3021. #else
  3022. "lsl %[hi], %[hi], #16\n\t"
  3023. #endif
  3024. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3025. "adds r4, r4, %[hi]\n\t"
  3026. #else
  3027. "add r4, r4, %[hi]\n\t"
  3028. #endif
  3029. #ifdef WOLFSSL_KEIL
  3030. "adcs r5, r5, r7\n\t"
  3031. #elif defined(__clang__)
  3032. "adcs r5, r7\n\t"
  3033. #else
  3034. "adc r5, r7\n\t"
  3035. #endif
  3036. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3037. "lsrs %[hi], r3, #16\n\t"
  3038. #else
  3039. "lsr %[hi], r3, #16\n\t"
  3040. #endif
  3041. #ifdef WOLFSSL_KEIL
  3042. "muls r6, %[hi], r6\n\t"
  3043. #elif defined(__clang__)
  3044. "muls r6, %[hi]\n\t"
  3045. #else
  3046. "mul r6, %[hi]\n\t"
  3047. #endif
  3048. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3049. "adds r5, r5, r6\n\t"
  3050. #else
  3051. "add r5, r5, r6\n\t"
  3052. #endif
  3053. "uxth r6, %[d]\n\t"
  3054. #ifdef WOLFSSL_KEIL
  3055. "muls %[hi], r6, %[hi]\n\t"
  3056. #elif defined(__clang__)
  3057. "muls %[hi], r6\n\t"
  3058. #else
  3059. "mul %[hi], r6\n\t"
  3060. #endif
  3061. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3062. "lsrs r6, %[hi], #16\n\t"
  3063. #else
  3064. "lsr r6, %[hi], #16\n\t"
  3065. #endif
  3066. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3067. "lsls %[hi], %[hi], #16\n\t"
  3068. #else
  3069. "lsl %[hi], %[hi], #16\n\t"
  3070. #endif
  3071. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3072. "adds r4, r4, %[hi]\n\t"
  3073. #else
  3074. "add r4, r4, %[hi]\n\t"
  3075. #endif
  3076. #ifdef WOLFSSL_KEIL
  3077. "adcs r5, r5, r6\n\t"
  3078. #elif defined(__clang__)
  3079. "adcs r5, r6\n\t"
  3080. #else
  3081. "adc r5, r6\n\t"
  3082. #endif
  3083. /* r * d - Done */
  3084. "mov %[hi], r8\n\t"
  3085. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3086. "subs %[hi], %[hi], r4\n\t"
  3087. #else
  3088. "sub %[hi], %[hi], r4\n\t"
  3089. #endif
  3090. "movs r4, %[hi]\n\t"
  3091. "mov %[hi], r9\n\t"
  3092. #ifdef WOLFSSL_KEIL
  3093. "sbcs %[hi], %[hi], r5\n\t"
  3094. #elif defined(__clang__)
  3095. "sbcs %[hi], r5\n\t"
  3096. #else
  3097. "sbc %[hi], r5\n\t"
  3098. #endif
  3099. "movs r5, %[hi]\n\t"
  3100. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3101. "adds r3, r3, r5\n\t"
  3102. #else
  3103. "add r3, r3, r5\n\t"
  3104. #endif
  3105. /* r * d - Start */
  3106. "uxth %[hi], r3\n\t"
  3107. "uxth r4, %[d]\n\t"
  3108. #ifdef WOLFSSL_KEIL
  3109. "muls r4, %[hi], r4\n\t"
  3110. #elif defined(__clang__)
  3111. "muls r4, %[hi]\n\t"
  3112. #else
  3113. "mul r4, %[hi]\n\t"
  3114. #endif
  3115. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3116. "lsrs r6, %[d], #16\n\t"
  3117. #else
  3118. "lsr r6, %[d], #16\n\t"
  3119. #endif
  3120. #ifdef WOLFSSL_KEIL
  3121. "muls %[hi], r6, %[hi]\n\t"
  3122. #elif defined(__clang__)
  3123. "muls %[hi], r6\n\t"
  3124. #else
  3125. "mul %[hi], r6\n\t"
  3126. #endif
  3127. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3128. "lsrs r5, %[hi], #16\n\t"
  3129. #else
  3130. "lsr r5, %[hi], #16\n\t"
  3131. #endif
  3132. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3133. "lsls %[hi], %[hi], #16\n\t"
  3134. #else
  3135. "lsl %[hi], %[hi], #16\n\t"
  3136. #endif
  3137. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3138. "adds r4, r4, %[hi]\n\t"
  3139. #else
  3140. "add r4, r4, %[hi]\n\t"
  3141. #endif
  3142. #ifdef WOLFSSL_KEIL
  3143. "adcs r5, r5, r7\n\t"
  3144. #elif defined(__clang__)
  3145. "adcs r5, r7\n\t"
  3146. #else
  3147. "adc r5, r7\n\t"
  3148. #endif
  3149. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3150. "lsrs %[hi], r3, #16\n\t"
  3151. #else
  3152. "lsr %[hi], r3, #16\n\t"
  3153. #endif
  3154. #ifdef WOLFSSL_KEIL
  3155. "muls r6, %[hi], r6\n\t"
  3156. #elif defined(__clang__)
  3157. "muls r6, %[hi]\n\t"
  3158. #else
  3159. "mul r6, %[hi]\n\t"
  3160. #endif
  3161. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3162. "adds r5, r5, r6\n\t"
  3163. #else
  3164. "add r5, r5, r6\n\t"
  3165. #endif
  3166. "uxth r6, %[d]\n\t"
  3167. #ifdef WOLFSSL_KEIL
  3168. "muls %[hi], r6, %[hi]\n\t"
  3169. #elif defined(__clang__)
  3170. "muls %[hi], r6\n\t"
  3171. #else
  3172. "mul %[hi], r6\n\t"
  3173. #endif
  3174. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3175. "lsrs r6, %[hi], #16\n\t"
  3176. #else
  3177. "lsr r6, %[hi], #16\n\t"
  3178. #endif
  3179. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3180. "lsls %[hi], %[hi], #16\n\t"
  3181. #else
  3182. "lsl %[hi], %[hi], #16\n\t"
  3183. #endif
  3184. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3185. "adds r4, r4, %[hi]\n\t"
  3186. #else
  3187. "add r4, r4, %[hi]\n\t"
  3188. #endif
  3189. #ifdef WOLFSSL_KEIL
  3190. "adcs r5, r5, r6\n\t"
  3191. #elif defined(__clang__)
  3192. "adcs r5, r6\n\t"
  3193. #else
  3194. "adc r5, r6\n\t"
  3195. #endif
  3196. /* r * d - Done */
  3197. "mov %[hi], r8\n\t"
  3198. "mov r6, r9\n\t"
  3199. #ifdef WOLFSSL_KEIL
  3200. "subs r4, %[hi], r4\n\t"
  3201. #else
  3202. #ifdef __clang__
  3203. "subs r4, %[hi], r4\n\t"
  3204. #else
  3205. "sub r4, %[hi], r4\n\t"
  3206. #endif
  3207. #endif
  3208. #ifdef WOLFSSL_KEIL
  3209. "sbcs r6, r6, r5\n\t"
  3210. #elif defined(__clang__)
  3211. "sbcs r6, r5\n\t"
  3212. #else
  3213. "sbc r6, r5\n\t"
  3214. #endif
  3215. "movs r5, r6\n\t"
  3216. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3217. "adds r3, r3, r5\n\t"
  3218. #else
  3219. "add r3, r3, r5\n\t"
  3220. #endif
  3221. /* r * d - Start */
  3222. "uxth %[hi], r3\n\t"
  3223. "uxth r4, %[d]\n\t"
  3224. #ifdef WOLFSSL_KEIL
  3225. "muls r4, %[hi], r4\n\t"
  3226. #elif defined(__clang__)
  3227. "muls r4, %[hi]\n\t"
  3228. #else
  3229. "mul r4, %[hi]\n\t"
  3230. #endif
  3231. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3232. "lsrs r6, %[d], #16\n\t"
  3233. #else
  3234. "lsr r6, %[d], #16\n\t"
  3235. #endif
  3236. #ifdef WOLFSSL_KEIL
  3237. "muls %[hi], r6, %[hi]\n\t"
  3238. #elif defined(__clang__)
  3239. "muls %[hi], r6\n\t"
  3240. #else
  3241. "mul %[hi], r6\n\t"
  3242. #endif
  3243. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3244. "lsrs r5, %[hi], #16\n\t"
  3245. #else
  3246. "lsr r5, %[hi], #16\n\t"
  3247. #endif
  3248. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3249. "lsls %[hi], %[hi], #16\n\t"
  3250. #else
  3251. "lsl %[hi], %[hi], #16\n\t"
  3252. #endif
  3253. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3254. "adds r4, r4, %[hi]\n\t"
  3255. #else
  3256. "add r4, r4, %[hi]\n\t"
  3257. #endif
  3258. #ifdef WOLFSSL_KEIL
  3259. "adcs r5, r5, r7\n\t"
  3260. #elif defined(__clang__)
  3261. "adcs r5, r7\n\t"
  3262. #else
  3263. "adc r5, r7\n\t"
  3264. #endif
  3265. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3266. "lsrs %[hi], r3, #16\n\t"
  3267. #else
  3268. "lsr %[hi], r3, #16\n\t"
  3269. #endif
  3270. #ifdef WOLFSSL_KEIL
  3271. "muls r6, %[hi], r6\n\t"
  3272. #elif defined(__clang__)
  3273. "muls r6, %[hi]\n\t"
  3274. #else
  3275. "mul r6, %[hi]\n\t"
  3276. #endif
  3277. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3278. "adds r5, r5, r6\n\t"
  3279. #else
  3280. "add r5, r5, r6\n\t"
  3281. #endif
  3282. "uxth r6, %[d]\n\t"
  3283. #ifdef WOLFSSL_KEIL
  3284. "muls %[hi], r6, %[hi]\n\t"
  3285. #elif defined(__clang__)
  3286. "muls %[hi], r6\n\t"
  3287. #else
  3288. "mul %[hi], r6\n\t"
  3289. #endif
  3290. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3291. "lsrs r6, %[hi], #16\n\t"
  3292. #else
  3293. "lsr r6, %[hi], #16\n\t"
  3294. #endif
  3295. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3296. "lsls %[hi], %[hi], #16\n\t"
  3297. #else
  3298. "lsl %[hi], %[hi], #16\n\t"
  3299. #endif
  3300. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3301. "adds r4, r4, %[hi]\n\t"
  3302. #else
  3303. "add r4, r4, %[hi]\n\t"
  3304. #endif
  3305. #ifdef WOLFSSL_KEIL
  3306. "adcs r5, r5, r6\n\t"
  3307. #elif defined(__clang__)
  3308. "adcs r5, r6\n\t"
  3309. #else
  3310. "adc r5, r6\n\t"
  3311. #endif
  3312. /* r * d - Done */
  3313. "mov %[hi], r8\n\t"
  3314. "mov r6, r9\n\t"
  3315. #ifdef WOLFSSL_KEIL
  3316. "subs r4, %[hi], r4\n\t"
  3317. #else
  3318. #ifdef __clang__
  3319. "subs r4, %[hi], r4\n\t"
  3320. #else
  3321. "sub r4, %[hi], r4\n\t"
  3322. #endif
  3323. #endif
  3324. #ifdef WOLFSSL_KEIL
  3325. "sbcs r6, r6, r5\n\t"
  3326. #elif defined(__clang__)
  3327. "sbcs r6, r5\n\t"
  3328. #else
  3329. "sbc r6, r5\n\t"
  3330. #endif
  3331. "movs r5, r6\n\t"
  3332. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3333. "adds r3, r3, r5\n\t"
  3334. #else
  3335. "add r3, r3, r5\n\t"
  3336. #endif
  3337. "movs r6, %[d]\n\t"
  3338. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3339. "subs r6, r6, r4\n\t"
  3340. #else
  3341. "sub r6, r6, r4\n\t"
  3342. #endif
  3343. #ifdef WOLFSSL_KEIL
  3344. "sbcs r6, r6, r6\n\t"
  3345. #elif defined(__clang__)
  3346. "sbcs r6, r6\n\t"
  3347. #else
  3348. "sbc r6, r6\n\t"
  3349. #endif
  3350. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3351. "subs r3, r3, r6\n\t"
  3352. #else
  3353. "sub r3, r3, r6\n\t"
  3354. #endif
  3355. "movs %[hi], r3\n\t"
  3356. : [hi] "+l" (hi), [lo] "+l" (lo), [d] "+l" (d)
  3357. :
  3358. : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
  3359. );
  3360. return (uint32_t)(size_t)hi;
  3361. }
  3362. #define SP_ASM_DIV_WORD
  3363. #endif /* !WOLFSSL_SP_DIV_WORD_HALF */
  3364. #define SP_INT_ASM_AVAILABLE
  3365. #endif /* WOLFSSL_SP_ARM_THUMB && SP_WORD_SIZE == 32 */
  3366. #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
  3367. /*
  3368. * CPU: PPC64
  3369. */
  3370. /* Multiply va by vb and store double size result in: vh | vl */
  3371. #define SP_ASM_MUL(vl, vh, va, vb) \
  3372. __asm__ __volatile__ ( \
  3373. "mulld %[l], %[a], %[b] \n\t" \
  3374. "mulhdu %[h], %[a], %[b] \n\t" \
  3375. : [h] "+r" (vh), [l] "+r" (vl) \
  3376. : [a] "r" (va), [b] "r" (vb) \
  3377. : "memory" \
  3378. )
  3379. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3380. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3381. __asm__ __volatile__ ( \
  3382. "mulhdu %[h], %[a], %[b] \n\t" \
  3383. "mulld %[l], %[a], %[b] \n\t" \
  3384. "li %[o], 0 \n\t" \
  3385. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3386. : [a] "r" (va), [b] "r" (vb) \
  3387. : \
  3388. )
  3389. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3390. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3391. __asm__ __volatile__ ( \
  3392. "mulld 16, %[a], %[b] \n\t" \
  3393. "mulhdu 17, %[a], %[b] \n\t" \
  3394. "addc %[l], %[l], 16 \n\t" \
  3395. "adde %[h], %[h], 17 \n\t" \
  3396. "addze %[o], %[o] \n\t" \
  3397. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3398. : [a] "r" (va), [b] "r" (vb) \
  3399. : "16", "17", "cc" \
  3400. )
  3401. /* Multiply va by vb and add double size result into: vh | vl */
  3402. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3403. __asm__ __volatile__ ( \
  3404. "mulld 16, %[a], %[b] \n\t" \
  3405. "mulhdu 17, %[a], %[b] \n\t" \
  3406. "addc %[l], %[l], 16 \n\t" \
  3407. "adde %[h], %[h], 17 \n\t" \
  3408. : [l] "+r" (vl), [h] "+r" (vh) \
  3409. : [a] "r" (va), [b] "r" (vb) \
  3410. : "16", "17", "cc" \
  3411. )
  3412. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3413. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3414. __asm__ __volatile__ ( \
  3415. "mulld 16, %[a], %[b] \n\t" \
  3416. "mulhdu 17, %[a], %[b] \n\t" \
  3417. "addc %[l], %[l], 16 \n\t" \
  3418. "adde %[h], %[h], 17 \n\t" \
  3419. "addze %[o], %[o] \n\t" \
  3420. "addc %[l], %[l], 16 \n\t" \
  3421. "adde %[h], %[h], 17 \n\t" \
  3422. "addze %[o], %[o] \n\t" \
  3423. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3424. : [a] "r" (va), [b] "r" (vb) \
  3425. : "16", "17", "cc" \
  3426. )
  3427. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3428. * Assumes first add will not overflow vh | vl
  3429. */
  3430. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3431. __asm__ __volatile__ ( \
  3432. "mulld 16, %[a], %[b] \n\t" \
  3433. "mulhdu 17, %[a], %[b] \n\t" \
  3434. "addc %[l], %[l], 16 \n\t" \
  3435. "adde %[h], %[h], 17 \n\t" \
  3436. "addc %[l], %[l], 16 \n\t" \
  3437. "adde %[h], %[h], 17 \n\t" \
  3438. "addze %[o], %[o] \n\t" \
  3439. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3440. : [a] "r" (va), [b] "r" (vb) \
  3441. : "16", "17", "cc" \
  3442. )
  3443. /* Square va and store double size result in: vh | vl */
  3444. #define SP_ASM_SQR(vl, vh, va) \
  3445. __asm__ __volatile__ ( \
  3446. "mulld %[l], %[a], %[a] \n\t" \
  3447. "mulhdu %[h], %[a], %[a] \n\t" \
  3448. : [h] "+r" (vh), [l] "+r" (vl) \
  3449. : [a] "r" (va) \
  3450. : "memory" \
  3451. )
  3452. /* Square va and add double size result into: vo | vh | vl */
  3453. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3454. __asm__ __volatile__ ( \
  3455. "mulld 16, %[a], %[a] \n\t" \
  3456. "mulhdu 17, %[a], %[a] \n\t" \
  3457. "addc %[l], %[l], 16 \n\t" \
  3458. "adde %[h], %[h], 17 \n\t" \
  3459. "addze %[o], %[o] \n\t" \
  3460. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3461. : [a] "r" (va) \
  3462. : "16", "17", "cc" \
  3463. )
  3464. /* Square va and add double size result into: vh | vl */
  3465. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3466. __asm__ __volatile__ ( \
  3467. "mulld 16, %[a], %[a] \n\t" \
  3468. "mulhdu 17, %[a], %[a] \n\t" \
  3469. "addc %[l], %[l], 16 \n\t" \
  3470. "adde %[h], %[h], 17 \n\t" \
  3471. : [l] "+r" (vl), [h] "+r" (vh) \
  3472. : [a] "r" (va) \
  3473. : "16", "17", "cc" \
  3474. )
  3475. /* Add va into: vh | vl */
  3476. #define SP_ASM_ADDC(vl, vh, va) \
  3477. __asm__ __volatile__ ( \
  3478. "addc %[l], %[l], %[a] \n\t" \
  3479. "addze %[h], %[h] \n\t" \
  3480. : [l] "+r" (vl), [h] "+r" (vh) \
  3481. : [a] "r" (va) \
  3482. : "cc" \
  3483. )
  3484. /* Sub va from: vh | vl */
  3485. #define SP_ASM_SUBB(vl, vh, va) \
  3486. __asm__ __volatile__ ( \
  3487. "subfc %[l], %[a], %[l] \n\t" \
  3488. "li 16, 0 \n\t" \
  3489. "subfe %[h], 16, %[h] \n\t" \
  3490. : [l] "+r" (vl), [h] "+r" (vh) \
  3491. : [a] "r" (va) \
  3492. : "16", "cc" \
  3493. )
  3494. /* Add two times vc | vb | va into vo | vh | vl */
  3495. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3496. __asm__ __volatile__ ( \
  3497. "addc %[l], %[l], %[a] \n\t" \
  3498. "adde %[h], %[h], %[b] \n\t" \
  3499. "adde %[o], %[o], %[c] \n\t" \
  3500. "addc %[l], %[l], %[a] \n\t" \
  3501. "adde %[h], %[h], %[b] \n\t" \
  3502. "adde %[o], %[o], %[c] \n\t" \
  3503. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3504. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3505. : "cc" \
  3506. )
  3507. /* Count leading zeros. */
  3508. #define SP_ASM_LZCNT(va, vn) \
  3509. __asm__ __volatile__ ( \
  3510. "cntlzd %[n], %[a] \n\t" \
  3511. : [n] "=r" (vn) \
  3512. : [a] "r" (va) \
  3513. : \
  3514. )
  3515. #define SP_INT_ASM_AVAILABLE
  3516. #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
  3517. #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
  3518. /*
  3519. * CPU: PPC 32-bit
  3520. */
  3521. /* Multiply va by vb and store double size result in: vh | vl */
  3522. #define SP_ASM_MUL(vl, vh, va, vb) \
  3523. __asm__ __volatile__ ( \
  3524. "mullw %[l], %[a], %[b] \n\t" \
  3525. "mulhwu %[h], %[a], %[b] \n\t" \
  3526. : [h] "+r" (vh), [l] "+r" (vl) \
  3527. : [a] "r" (va), [b] "r" (vb) \
  3528. : "memory" \
  3529. )
  3530. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3531. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3532. __asm__ __volatile__ ( \
  3533. "mulhwu %[h], %[a], %[b] \n\t" \
  3534. "mullw %[l], %[a], %[b] \n\t" \
  3535. "li %[o], 0 \n\t" \
  3536. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3537. : [a] "r" (va), [b] "r" (vb) \
  3538. )
  3539. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3540. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3541. __asm__ __volatile__ ( \
  3542. "mullw 16, %[a], %[b] \n\t" \
  3543. "mulhwu 17, %[a], %[b] \n\t" \
  3544. "addc %[l], %[l], 16 \n\t" \
  3545. "adde %[h], %[h], 17 \n\t" \
  3546. "addze %[o], %[o] \n\t" \
  3547. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3548. : [a] "r" (va), [b] "r" (vb) \
  3549. : "16", "17", "cc" \
  3550. )
  3551. /* Multiply va by vb and add double size result into: vh | vl */
  3552. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3553. __asm__ __volatile__ ( \
  3554. "mullw 16, %[a], %[b] \n\t" \
  3555. "mulhwu 17, %[a], %[b] \n\t" \
  3556. "addc %[l], %[l], 16 \n\t" \
  3557. "adde %[h], %[h], 17 \n\t" \
  3558. : [l] "+r" (vl), [h] "+r" (vh) \
  3559. : [a] "r" (va), [b] "r" (vb) \
  3560. : "16", "17", "cc" \
  3561. )
  3562. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3563. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3564. __asm__ __volatile__ ( \
  3565. "mullw 16, %[a], %[b] \n\t" \
  3566. "mulhwu 17, %[a], %[b] \n\t" \
  3567. "addc %[l], %[l], 16 \n\t" \
  3568. "adde %[h], %[h], 17 \n\t" \
  3569. "addze %[o], %[o] \n\t" \
  3570. "addc %[l], %[l], 16 \n\t" \
  3571. "adde %[h], %[h], 17 \n\t" \
  3572. "addze %[o], %[o] \n\t" \
  3573. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3574. : [a] "r" (va), [b] "r" (vb) \
  3575. : "16", "17", "cc" \
  3576. )
  3577. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3578. * Assumes first add will not overflow vh | vl
  3579. */
  3580. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3581. __asm__ __volatile__ ( \
  3582. "mullw 16, %[a], %[b] \n\t" \
  3583. "mulhwu 17, %[a], %[b] \n\t" \
  3584. "addc %[l], %[l], 16 \n\t" \
  3585. "adde %[h], %[h], 17 \n\t" \
  3586. "addc %[l], %[l], 16 \n\t" \
  3587. "adde %[h], %[h], 17 \n\t" \
  3588. "addze %[o], %[o] \n\t" \
  3589. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3590. : [a] "r" (va), [b] "r" (vb) \
  3591. : "16", "17", "cc" \
  3592. )
  3593. /* Square va and store double size result in: vh | vl */
  3594. #define SP_ASM_SQR(vl, vh, va) \
  3595. __asm__ __volatile__ ( \
  3596. "mullw %[l], %[a], %[a] \n\t" \
  3597. "mulhwu %[h], %[a], %[a] \n\t" \
  3598. : [h] "+r" (vh), [l] "+r" (vl) \
  3599. : [a] "r" (va) \
  3600. : "memory" \
  3601. )
  3602. /* Square va and add double size result into: vo | vh | vl */
  3603. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3604. __asm__ __volatile__ ( \
  3605. "mullw 16, %[a], %[a] \n\t" \
  3606. "mulhwu 17, %[a], %[a] \n\t" \
  3607. "addc %[l], %[l], 16 \n\t" \
  3608. "adde %[h], %[h], 17 \n\t" \
  3609. "addze %[o], %[o] \n\t" \
  3610. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3611. : [a] "r" (va) \
  3612. : "16", "17", "cc" \
  3613. )
  3614. /* Square va and add double size result into: vh | vl */
  3615. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3616. __asm__ __volatile__ ( \
  3617. "mullw 16, %[a], %[a] \n\t" \
  3618. "mulhwu 17, %[a], %[a] \n\t" \
  3619. "addc %[l], %[l], 16 \n\t" \
  3620. "adde %[h], %[h], 17 \n\t" \
  3621. : [l] "+r" (vl), [h] "+r" (vh) \
  3622. : [a] "r" (va) \
  3623. : "16", "17", "cc" \
  3624. )
  3625. /* Add va into: vh | vl */
  3626. #define SP_ASM_ADDC(vl, vh, va) \
  3627. __asm__ __volatile__ ( \
  3628. "addc %[l], %[l], %[a] \n\t" \
  3629. "addze %[h], %[h] \n\t" \
  3630. : [l] "+r" (vl), [h] "+r" (vh) \
  3631. : [a] "r" (va) \
  3632. : "cc" \
  3633. )
  3634. /* Sub va from: vh | vl */
  3635. #define SP_ASM_SUBB(vl, vh, va) \
  3636. __asm__ __volatile__ ( \
  3637. "subfc %[l], %[a], %[l] \n\t" \
  3638. "li 16, 0 \n\t" \
  3639. "subfe %[h], 16, %[h] \n\t" \
  3640. : [l] "+r" (vl), [h] "+r" (vh) \
  3641. : [a] "r" (va) \
  3642. : "16", "cc" \
  3643. )
  3644. /* Add two times vc | vb | va into vo | vh | vl */
  3645. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3646. __asm__ __volatile__ ( \
  3647. "addc %[l], %[l], %[a] \n\t" \
  3648. "adde %[h], %[h], %[b] \n\t" \
  3649. "adde %[o], %[o], %[c] \n\t" \
  3650. "addc %[l], %[l], %[a] \n\t" \
  3651. "adde %[h], %[h], %[b] \n\t" \
  3652. "adde %[o], %[o], %[c] \n\t" \
  3653. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3654. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3655. : "cc" \
  3656. )
  3657. /* Count leading zeros. */
  3658. #define SP_ASM_LZCNT(va, vn) \
  3659. __asm__ __volatile__ ( \
  3660. "cntlzw %[n], %[a] \n\t" \
  3661. : [n] "=r" (vn) \
  3662. : [a] "r" (va) \
  3663. )
  3664. #define SP_INT_ASM_AVAILABLE
  3665. #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
  3666. #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
  3667. /*
  3668. * CPU: MIPS 64-bit
  3669. */
  3670. /* Multiply va by vb and store double size result in: vh | vl */
  3671. #define SP_ASM_MUL(vl, vh, va, vb) \
  3672. __asm__ __volatile__ ( \
  3673. "dmultu %[a], %[b] \n\t" \
  3674. "mflo %[l] \n\t" \
  3675. "mfhi %[h] \n\t" \
  3676. : [h] "+r" (vh), [l] "+r" (vl) \
  3677. : [a] "r" (va), [b] "r" (vb) \
  3678. : "memory", "$lo", "$hi" \
  3679. )
  3680. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3681. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3682. __asm__ __volatile__ ( \
  3683. "dmultu %[a], %[b] \n\t" \
  3684. "mflo %[l] \n\t" \
  3685. "mfhi %[h] \n\t" \
  3686. "move %[o], $0 \n\t" \
  3687. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3688. : [a] "r" (va), [b] "r" (vb) \
  3689. : "$lo", "$hi" \
  3690. )
  3691. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3692. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3693. __asm__ __volatile__ ( \
  3694. "dmultu %[a], %[b] \n\t" \
  3695. "mflo $10 \n\t" \
  3696. "mfhi $11 \n\t" \
  3697. "daddu %[l], %[l], $10 \n\t" \
  3698. "sltu $12, %[l], $10 \n\t" \
  3699. "daddu %[h], %[h], $12 \n\t" \
  3700. "sltu $12, %[h], $12 \n\t" \
  3701. "daddu %[o], %[o], $12 \n\t" \
  3702. "daddu %[h], %[h], $11 \n\t" \
  3703. "sltu $12, %[h], $11 \n\t" \
  3704. "daddu %[o], %[o], $12 \n\t" \
  3705. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3706. : [a] "r" (va), [b] "r" (vb) \
  3707. : "$10", "$11", "$12", "$lo", "$hi" \
  3708. )
  3709. /* Multiply va by vb and add double size result into: vh | vl */
  3710. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3711. __asm__ __volatile__ ( \
  3712. "dmultu %[a], %[b] \n\t" \
  3713. "mflo $10 \n\t" \
  3714. "mfhi $11 \n\t" \
  3715. "daddu %[l], %[l], $10 \n\t" \
  3716. "sltu $12, %[l], $10 \n\t" \
  3717. "daddu %[h], %[h], $11 \n\t" \
  3718. "daddu %[h], %[h], $12 \n\t" \
  3719. : [l] "+r" (vl), [h] "+r" (vh) \
  3720. : [a] "r" (va), [b] "r" (vb) \
  3721. : "$10", "$11", "$12", "$lo", "$hi" \
  3722. )
  3723. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3724. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3725. __asm__ __volatile__ ( \
  3726. "dmultu %[a], %[b] \n\t" \
  3727. "mflo $10 \n\t" \
  3728. "mfhi $11 \n\t" \
  3729. "daddu %[l], %[l], $10 \n\t" \
  3730. "sltu $12, %[l], $10 \n\t" \
  3731. "daddu %[h], %[h], $12 \n\t" \
  3732. "sltu $12, %[h], $12 \n\t" \
  3733. "daddu %[o], %[o], $12 \n\t" \
  3734. "daddu %[h], %[h], $11 \n\t" \
  3735. "sltu $12, %[h], $11 \n\t" \
  3736. "daddu %[o], %[o], $12 \n\t" \
  3737. "daddu %[l], %[l], $10 \n\t" \
  3738. "sltu $12, %[l], $10 \n\t" \
  3739. "daddu %[h], %[h], $12 \n\t" \
  3740. "sltu $12, %[h], $12 \n\t" \
  3741. "daddu %[o], %[o], $12 \n\t" \
  3742. "daddu %[h], %[h], $11 \n\t" \
  3743. "sltu $12, %[h], $11 \n\t" \
  3744. "daddu %[o], %[o], $12 \n\t" \
  3745. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3746. : [a] "r" (va), [b] "r" (vb) \
  3747. : "$10", "$11", "$12", "$lo", "$hi" \
  3748. )
  3749. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3750. * Assumes first add will not overflow vh | vl
  3751. */
  3752. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3753. __asm__ __volatile__ ( \
  3754. "dmultu %[a], %[b] \n\t" \
  3755. "mflo $10 \n\t" \
  3756. "mfhi $11 \n\t" \
  3757. "daddu %[l], %[l], $10 \n\t" \
  3758. "sltu $12, %[l], $10 \n\t" \
  3759. "daddu %[h], %[h], $11 \n\t" \
  3760. "daddu %[h], %[h], $12 \n\t" \
  3761. "daddu %[l], %[l], $10 \n\t" \
  3762. "sltu $12, %[l], $10 \n\t" \
  3763. "daddu %[h], %[h], $12 \n\t" \
  3764. "sltu $12, %[h], $12 \n\t" \
  3765. "daddu %[o], %[o], $12 \n\t" \
  3766. "daddu %[h], %[h], $11 \n\t" \
  3767. "sltu $12, %[h], $11 \n\t" \
  3768. "daddu %[o], %[o], $12 \n\t" \
  3769. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3770. : [a] "r" (va), [b] "r" (vb) \
  3771. : "$10", "$11", "$12", "$lo", "$hi" \
  3772. )
  3773. /* Square va and store double size result in: vh | vl */
  3774. #define SP_ASM_SQR(vl, vh, va) \
  3775. __asm__ __volatile__ ( \
  3776. "dmultu %[a], %[a] \n\t" \
  3777. "mflo %[l] \n\t" \
  3778. "mfhi %[h] \n\t" \
  3779. : [h] "+r" (vh), [l] "+r" (vl) \
  3780. : [a] "r" (va) \
  3781. : "memory", "$lo", "$hi" \
  3782. )
  3783. /* Square va and add double size result into: vo | vh | vl */
  3784. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3785. __asm__ __volatile__ ( \
  3786. "dmultu %[a], %[a] \n\t" \
  3787. "mflo $10 \n\t" \
  3788. "mfhi $11 \n\t" \
  3789. "daddu %[l], %[l], $10 \n\t" \
  3790. "sltu $12, %[l], $10 \n\t" \
  3791. "daddu %[h], %[h], $12 \n\t" \
  3792. "sltu $12, %[h], $12 \n\t" \
  3793. "daddu %[o], %[o], $12 \n\t" \
  3794. "daddu %[h], %[h], $11 \n\t" \
  3795. "sltu $12, %[h], $11 \n\t" \
  3796. "daddu %[o], %[o], $12 \n\t" \
  3797. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3798. : [a] "r" (va) \
  3799. : "$10", "$11", "$12", "$lo", "$hi" \
  3800. )
  3801. /* Square va and add double size result into: vh | vl */
  3802. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3803. __asm__ __volatile__ ( \
  3804. "dmultu %[a], %[a] \n\t" \
  3805. "mflo $10 \n\t" \
  3806. "mfhi $11 \n\t" \
  3807. "daddu %[l], %[l], $10 \n\t" \
  3808. "sltu $12, %[l], $10 \n\t" \
  3809. "daddu %[h], %[h], $11 \n\t" \
  3810. "daddu %[h], %[h], $12 \n\t" \
  3811. : [l] "+r" (vl), [h] "+r" (vh) \
  3812. : [a] "r" (va) \
  3813. : "$10", "$11", "$12", "$lo", "$hi" \
  3814. )
  3815. /* Add va into: vh | vl */
  3816. #define SP_ASM_ADDC(vl, vh, va) \
  3817. __asm__ __volatile__ ( \
  3818. "daddu %[l], %[l], %[a] \n\t" \
  3819. "sltu $12, %[l], %[a] \n\t" \
  3820. "daddu %[h], %[h], $12 \n\t" \
  3821. : [l] "+r" (vl), [h] "+r" (vh) \
  3822. : [a] "r" (va) \
  3823. : "$12" \
  3824. )
  3825. /* Sub va from: vh | vl */
  3826. #define SP_ASM_SUBB(vl, vh, va) \
  3827. __asm__ __volatile__ ( \
  3828. "move $12, %[l] \n\t" \
  3829. "dsubu %[l], $12, %[a] \n\t" \
  3830. "sltu $12, $12, %[l] \n\t" \
  3831. "dsubu %[h], %[h], $12 \n\t" \
  3832. : [l] "+r" (vl), [h] "+r" (vh) \
  3833. : [a] "r" (va) \
  3834. : "$12" \
  3835. )
  3836. /* Add two times vc | vb | va into vo | vh | vl */
  3837. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3838. __asm__ __volatile__ ( \
  3839. "daddu %[l], %[l], %[a] \n\t" \
  3840. "sltu $12, %[l], %[a] \n\t" \
  3841. "daddu %[h], %[h], $12 \n\t" \
  3842. "sltu $12, %[h], $12 \n\t" \
  3843. "daddu %[o], %[o], $12 \n\t" \
  3844. "daddu %[h], %[h], %[b] \n\t" \
  3845. "sltu $12, %[h], %[b] \n\t" \
  3846. "daddu %[o], %[o], %[c] \n\t" \
  3847. "daddu %[o], %[o], $12 \n\t" \
  3848. "daddu %[l], %[l], %[a] \n\t" \
  3849. "sltu $12, %[l], %[a] \n\t" \
  3850. "daddu %[h], %[h], $12 \n\t" \
  3851. "sltu $12, %[h], $12 \n\t" \
  3852. "daddu %[o], %[o], $12 \n\t" \
  3853. "daddu %[h], %[h], %[b] \n\t" \
  3854. "sltu $12, %[h], %[b] \n\t" \
  3855. "daddu %[o], %[o], %[c] \n\t" \
  3856. "daddu %[o], %[o], $12 \n\t" \
  3857. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3858. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3859. : "$12" \
  3860. )
  3861. #define SP_INT_ASM_AVAILABLE
  3862. #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
  3863. #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
  3864. /*
  3865. * CPU: MIPS 32-bit
  3866. */
  3867. /* Multiply va by vb and store double size result in: vh | vl */
  3868. #define SP_ASM_MUL(vl, vh, va, vb) \
  3869. __asm__ __volatile__ ( \
  3870. "multu %[a], %[b] \n\t" \
  3871. "mflo %[l] \n\t" \
  3872. "mfhi %[h] \n\t" \
  3873. : [h] "+r" (vh), [l] "+r" (vl) \
  3874. : [a] "r" (va), [b] "r" (vb) \
  3875. : "memory", "%lo", "%hi" \
  3876. )
  3877. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3878. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3879. __asm__ __volatile__ ( \
  3880. "multu %[a], %[b] \n\t" \
  3881. "mflo %[l] \n\t" \
  3882. "mfhi %[h] \n\t" \
  3883. "move %[o], $0 \n\t" \
  3884. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3885. : [a] "r" (va), [b] "r" (vb) \
  3886. : "%lo", "%hi" \
  3887. )
  3888. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3889. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3890. __asm__ __volatile__ ( \
  3891. "multu %[a], %[b] \n\t" \
  3892. "mflo $10 \n\t" \
  3893. "mfhi $11 \n\t" \
  3894. "addu %[l], %[l], $10 \n\t" \
  3895. "sltu $12, %[l], $10 \n\t" \
  3896. "addu %[h], %[h], $12 \n\t" \
  3897. "sltu $12, %[h], $12 \n\t" \
  3898. "addu %[o], %[o], $12 \n\t" \
  3899. "addu %[h], %[h], $11 \n\t" \
  3900. "sltu $12, %[h], $11 \n\t" \
  3901. "addu %[o], %[o], $12 \n\t" \
  3902. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3903. : [a] "r" (va), [b] "r" (vb) \
  3904. : "$10", "$11", "$12", "%lo", "%hi" \
  3905. )
  3906. /* Multiply va by vb and add double size result into: vh | vl */
  3907. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3908. __asm__ __volatile__ ( \
  3909. "multu %[a], %[b] \n\t" \
  3910. "mflo $10 \n\t" \
  3911. "mfhi $11 \n\t" \
  3912. "addu %[l], %[l], $10 \n\t" \
  3913. "sltu $12, %[l], $10 \n\t" \
  3914. "addu %[h], %[h], $11 \n\t" \
  3915. "addu %[h], %[h], $12 \n\t" \
  3916. : [l] "+r" (vl), [h] "+r" (vh) \
  3917. : [a] "r" (va), [b] "r" (vb) \
  3918. : "$10", "$11", "$12", "%lo", "%hi" \
  3919. )
  3920. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3921. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3922. __asm__ __volatile__ ( \
  3923. "multu %[a], %[b] \n\t" \
  3924. "mflo $10 \n\t" \
  3925. "mfhi $11 \n\t" \
  3926. "addu %[l], %[l], $10 \n\t" \
  3927. "sltu $12, %[l], $10 \n\t" \
  3928. "addu %[h], %[h], $12 \n\t" \
  3929. "sltu $12, %[h], $12 \n\t" \
  3930. "addu %[o], %[o], $12 \n\t" \
  3931. "addu %[h], %[h], $11 \n\t" \
  3932. "sltu $12, %[h], $11 \n\t" \
  3933. "addu %[o], %[o], $12 \n\t" \
  3934. "addu %[l], %[l], $10 \n\t" \
  3935. "sltu $12, %[l], $10 \n\t" \
  3936. "addu %[h], %[h], $12 \n\t" \
  3937. "sltu $12, %[h], $12 \n\t" \
  3938. "addu %[o], %[o], $12 \n\t" \
  3939. "addu %[h], %[h], $11 \n\t" \
  3940. "sltu $12, %[h], $11 \n\t" \
  3941. "addu %[o], %[o], $12 \n\t" \
  3942. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3943. : [a] "r" (va), [b] "r" (vb) \
  3944. : "$10", "$11", "$12", "%lo", "%hi" \
  3945. )
  3946. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3947. * Assumes first add will not overflow vh | vl
  3948. */
  3949. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3950. __asm__ __volatile__ ( \
  3951. "multu %[a], %[b] \n\t" \
  3952. "mflo $10 \n\t" \
  3953. "mfhi $11 \n\t" \
  3954. "addu %[l], %[l], $10 \n\t" \
  3955. "sltu $12, %[l], $10 \n\t" \
  3956. "addu %[h], %[h], $11 \n\t" \
  3957. "addu %[h], %[h], $12 \n\t" \
  3958. "addu %[l], %[l], $10 \n\t" \
  3959. "sltu $12, %[l], $10 \n\t" \
  3960. "addu %[h], %[h], $12 \n\t" \
  3961. "sltu $12, %[h], $12 \n\t" \
  3962. "addu %[o], %[o], $12 \n\t" \
  3963. "addu %[h], %[h], $11 \n\t" \
  3964. "sltu $12, %[h], $11 \n\t" \
  3965. "addu %[o], %[o], $12 \n\t" \
  3966. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3967. : [a] "r" (va), [b] "r" (vb) \
  3968. : "$10", "$11", "$12", "%lo", "%hi" \
  3969. )
  3970. /* Square va and store double size result in: vh | vl */
  3971. #define SP_ASM_SQR(vl, vh, va) \
  3972. __asm__ __volatile__ ( \
  3973. "multu %[a], %[a] \n\t" \
  3974. "mflo %[l] \n\t" \
  3975. "mfhi %[h] \n\t" \
  3976. : [h] "+r" (vh), [l] "+r" (vl) \
  3977. : [a] "r" (va) \
  3978. : "memory", "%lo", "%hi" \
  3979. )
  3980. /* Square va and add double size result into: vo | vh | vl */
  3981. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3982. __asm__ __volatile__ ( \
  3983. "multu %[a], %[a] \n\t" \
  3984. "mflo $10 \n\t" \
  3985. "mfhi $11 \n\t" \
  3986. "addu %[l], %[l], $10 \n\t" \
  3987. "sltu $12, %[l], $10 \n\t" \
  3988. "addu %[h], %[h], $12 \n\t" \
  3989. "sltu $12, %[h], $12 \n\t" \
  3990. "addu %[o], %[o], $12 \n\t" \
  3991. "addu %[h], %[h], $11 \n\t" \
  3992. "sltu $12, %[h], $11 \n\t" \
  3993. "addu %[o], %[o], $12 \n\t" \
  3994. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3995. : [a] "r" (va) \
  3996. : "$10", "$11", "$12", "%lo", "%hi" \
  3997. )
  3998. /* Square va and add double size result into: vh | vl */
  3999. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4000. __asm__ __volatile__ ( \
  4001. "multu %[a], %[a] \n\t" \
  4002. "mflo $10 \n\t" \
  4003. "mfhi $11 \n\t" \
  4004. "addu %[l], %[l], $10 \n\t" \
  4005. "sltu $12, %[l], $10 \n\t" \
  4006. "addu %[h], %[h], $11 \n\t" \
  4007. "addu %[h], %[h], $12 \n\t" \
  4008. : [l] "+r" (vl), [h] "+r" (vh) \
  4009. : [a] "r" (va) \
  4010. : "$10", "$11", "$12", "%lo", "%hi" \
  4011. )
  4012. /* Add va into: vh | vl */
  4013. #define SP_ASM_ADDC(vl, vh, va) \
  4014. __asm__ __volatile__ ( \
  4015. "addu %[l], %[l], %[a] \n\t" \
  4016. "sltu $12, %[l], %[a] \n\t" \
  4017. "addu %[h], %[h], $12 \n\t" \
  4018. : [l] "+r" (vl), [h] "+r" (vh) \
  4019. : [a] "r" (va) \
  4020. : "$12" \
  4021. )
  4022. /* Sub va from: vh | vl */
  4023. #define SP_ASM_SUBB(vl, vh, va) \
  4024. __asm__ __volatile__ ( \
  4025. "move $12, %[l] \n\t" \
  4026. "subu %[l], $12, %[a] \n\t" \
  4027. "sltu $12, $12, %[l] \n\t" \
  4028. "subu %[h], %[h], $12 \n\t" \
  4029. : [l] "+r" (vl), [h] "+r" (vh) \
  4030. : [a] "r" (va) \
  4031. : "$12" \
  4032. )
  4033. /* Add two times vc | vb | va into vo | vh | vl */
  4034. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4035. __asm__ __volatile__ ( \
  4036. "addu %[l], %[l], %[a] \n\t" \
  4037. "sltu $12, %[l], %[a] \n\t" \
  4038. "addu %[h], %[h], $12 \n\t" \
  4039. "sltu $12, %[h], $12 \n\t" \
  4040. "addu %[o], %[o], $12 \n\t" \
  4041. "addu %[h], %[h], %[b] \n\t" \
  4042. "sltu $12, %[h], %[b] \n\t" \
  4043. "addu %[o], %[o], %[c] \n\t" \
  4044. "addu %[o], %[o], $12 \n\t" \
  4045. "addu %[l], %[l], %[a] \n\t" \
  4046. "sltu $12, %[l], %[a] \n\t" \
  4047. "addu %[h], %[h], $12 \n\t" \
  4048. "sltu $12, %[h], $12 \n\t" \
  4049. "addu %[o], %[o], $12 \n\t" \
  4050. "addu %[h], %[h], %[b] \n\t" \
  4051. "sltu $12, %[h], %[b] \n\t" \
  4052. "addu %[o], %[o], %[c] \n\t" \
  4053. "addu %[o], %[o], $12 \n\t" \
  4054. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4055. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4056. : "$12" \
  4057. )
  4058. #define SP_INT_ASM_AVAILABLE
  4059. #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
  4060. #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
  4061. /*
  4062. * CPU: RISCV 64-bit
  4063. */
  4064. /* Multiply va by vb and store double size result in: vh | vl */
  4065. #define SP_ASM_MUL(vl, vh, va, vb) \
  4066. __asm__ __volatile__ ( \
  4067. "mul %[l], %[a], %[b] \n\t" \
  4068. "mulhu %[h], %[a], %[b] \n\t" \
  4069. : [h] "+r" (vh), [l] "+r" (vl) \
  4070. : [a] "r" (va), [b] "r" (vb) \
  4071. : "memory" \
  4072. )
  4073. /* Multiply va by vb and store double size result in: vo | vh | vl */
  4074. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  4075. __asm__ __volatile__ ( \
  4076. "mulhu %[h], %[a], %[b] \n\t" \
  4077. "mul %[l], %[a], %[b] \n\t" \
  4078. "add %[o], zero, zero \n\t" \
  4079. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  4080. : [a] "r" (va), [b] "r" (vb) \
  4081. : \
  4082. )
  4083. /* Multiply va by vb and add double size result into: vo | vh | vl */
  4084. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  4085. __asm__ __volatile__ ( \
  4086. "mul a5, %[a], %[b] \n\t" \
  4087. "mulhu a6, %[a], %[b] \n\t" \
  4088. "add %[l], %[l], a5 \n\t" \
  4089. "sltu a7, %[l], a5 \n\t" \
  4090. "add %[h], %[h], a7 \n\t" \
  4091. "sltu a7, %[h], a7 \n\t" \
  4092. "add %[o], %[o], a7 \n\t" \
  4093. "add %[h], %[h], a6 \n\t" \
  4094. "sltu a7, %[h], a6 \n\t" \
  4095. "add %[o], %[o], a7 \n\t" \
  4096. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4097. : [a] "r" (va), [b] "r" (vb) \
  4098. : "a5", "a6", "a7" \
  4099. )
  4100. /* Multiply va by vb and add double size result into: vh | vl */
  4101. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  4102. __asm__ __volatile__ ( \
  4103. "mul a5, %[a], %[b] \n\t" \
  4104. "mulhu a6, %[a], %[b] \n\t" \
  4105. "add %[l], %[l], a5 \n\t" \
  4106. "sltu a7, %[l], a5 \n\t" \
  4107. "add %[h], %[h], a6 \n\t" \
  4108. "add %[h], %[h], a7 \n\t" \
  4109. : [l] "+r" (vl), [h] "+r" (vh) \
  4110. : [a] "r" (va), [b] "r" (vb) \
  4111. : "a5", "a6", "a7" \
  4112. )
  4113. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  4114. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  4115. __asm__ __volatile__ ( \
  4116. "mul a5, %[a], %[b] \n\t" \
  4117. "mulhu a6, %[a], %[b] \n\t" \
  4118. "add %[l], %[l], a5 \n\t" \
  4119. "sltu a7, %[l], a5 \n\t" \
  4120. "add %[h], %[h], a7 \n\t" \
  4121. "sltu a7, %[h], a7 \n\t" \
  4122. "add %[o], %[o], a7 \n\t" \
  4123. "add %[h], %[h], a6 \n\t" \
  4124. "sltu a7, %[h], a6 \n\t" \
  4125. "add %[o], %[o], a7 \n\t" \
  4126. "add %[l], %[l], a5 \n\t" \
  4127. "sltu a7, %[l], a5 \n\t" \
  4128. "add %[h], %[h], a7 \n\t" \
  4129. "sltu a7, %[h], a7 \n\t" \
  4130. "add %[o], %[o], a7 \n\t" \
  4131. "add %[h], %[h], a6 \n\t" \
  4132. "sltu a7, %[h], a6 \n\t" \
  4133. "add %[o], %[o], a7 \n\t" \
  4134. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4135. : [a] "r" (va), [b] "r" (vb) \
  4136. : "a5", "a6", "a7" \
  4137. )
  4138. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  4139. * Assumes first add will not overflow vh | vl
  4140. */
  4141. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  4142. __asm__ __volatile__ ( \
  4143. "mul a5, %[a], %[b] \n\t" \
  4144. "mulhu a6, %[a], %[b] \n\t" \
  4145. "add %[l], %[l], a5 \n\t" \
  4146. "sltu a7, %[l], a5 \n\t" \
  4147. "add %[h], %[h], a6 \n\t" \
  4148. "add %[h], %[h], a7 \n\t" \
  4149. "add %[l], %[l], a5 \n\t" \
  4150. "sltu a7, %[l], a5 \n\t" \
  4151. "add %[h], %[h], a7 \n\t" \
  4152. "sltu a7, %[h], a7 \n\t" \
  4153. "add %[o], %[o], a7 \n\t" \
  4154. "add %[h], %[h], a6 \n\t" \
  4155. "sltu a7, %[h], a6 \n\t" \
  4156. "add %[o], %[o], a7 \n\t" \
  4157. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4158. : [a] "r" (va), [b] "r" (vb) \
  4159. : "a5", "a6", "a7" \
  4160. )
  4161. /* Square va and store double size result in: vh | vl */
  4162. #define SP_ASM_SQR(vl, vh, va) \
  4163. __asm__ __volatile__ ( \
  4164. "mul %[l], %[a], %[a] \n\t" \
  4165. "mulhu %[h], %[a], %[a] \n\t" \
  4166. : [h] "+r" (vh), [l] "+r" (vl) \
  4167. : [a] "r" (va) \
  4168. : "memory" \
  4169. )
  4170. /* Square va and add double size result into: vo | vh | vl */
  4171. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  4172. __asm__ __volatile__ ( \
  4173. "mul a5, %[a], %[a] \n\t" \
  4174. "mulhu a6, %[a], %[a] \n\t" \
  4175. "add %[l], %[l], a5 \n\t" \
  4176. "sltu a7, %[l], a5 \n\t" \
  4177. "add %[h], %[h], a7 \n\t" \
  4178. "sltu a7, %[h], a7 \n\t" \
  4179. "add %[o], %[o], a7 \n\t" \
  4180. "add %[h], %[h], a6 \n\t" \
  4181. "sltu a7, %[h], a6 \n\t" \
  4182. "add %[o], %[o], a7 \n\t" \
  4183. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4184. : [a] "r" (va) \
  4185. : "a5", "a6", "a7" \
  4186. )
  4187. /* Square va and add double size result into: vh | vl */
  4188. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4189. __asm__ __volatile__ ( \
  4190. "mul a5, %[a], %[a] \n\t" \
  4191. "mulhu a6, %[a], %[a] \n\t" \
  4192. "add %[l], %[l], a5 \n\t" \
  4193. "sltu a7, %[l], a5 \n\t" \
  4194. "add %[h], %[h], a6 \n\t" \
  4195. "add %[h], %[h], a7 \n\t" \
  4196. : [l] "+r" (vl), [h] "+r" (vh) \
  4197. : [a] "r" (va) \
  4198. : "a5", "a6", "a7" \
  4199. )
  4200. /* Add va into: vh | vl */
  4201. #define SP_ASM_ADDC(vl, vh, va) \
  4202. __asm__ __volatile__ ( \
  4203. "add %[l], %[l], %[a] \n\t" \
  4204. "sltu a7, %[l], %[a] \n\t" \
  4205. "add %[h], %[h], a7 \n\t" \
  4206. : [l] "+r" (vl), [h] "+r" (vh) \
  4207. : [a] "r" (va) \
  4208. : "a7" \
  4209. )
  4210. /* Sub va from: vh | vl */
  4211. #define SP_ASM_SUBB(vl, vh, va) \
  4212. __asm__ __volatile__ ( \
  4213. "add a7, %[l], zero \n\t" \
  4214. "sub %[l], a7, %[a] \n\t" \
  4215. "sltu a7, a7, %[l] \n\t" \
  4216. "sub %[h], %[h], a7 \n\t" \
  4217. : [l] "+r" (vl), [h] "+r" (vh) \
  4218. : [a] "r" (va) \
  4219. : "a7" \
  4220. )
  4221. /* Add two times vc | vb | va into vo | vh | vl */
  4222. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4223. __asm__ __volatile__ ( \
  4224. "add %[l], %[l], %[a] \n\t" \
  4225. "sltu a7, %[l], %[a] \n\t" \
  4226. "add %[h], %[h], a7 \n\t" \
  4227. "sltu a7, %[h], a7 \n\t" \
  4228. "add %[o], %[o], a7 \n\t" \
  4229. "add %[h], %[h], %[b] \n\t" \
  4230. "sltu a7, %[h], %[b] \n\t" \
  4231. "add %[o], %[o], %[c] \n\t" \
  4232. "add %[o], %[o], a7 \n\t" \
  4233. "add %[l], %[l], %[a] \n\t" \
  4234. "sltu a7, %[l], %[a] \n\t" \
  4235. "add %[h], %[h], a7 \n\t" \
  4236. "sltu a7, %[h], a7 \n\t" \
  4237. "add %[o], %[o], a7 \n\t" \
  4238. "add %[h], %[h], %[b] \n\t" \
  4239. "sltu a7, %[h], %[b] \n\t" \
  4240. "add %[o], %[o], %[c] \n\t" \
  4241. "add %[o], %[o], a7 \n\t" \
  4242. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4243. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4244. : "a7" \
  4245. )
  4246. #define SP_INT_ASM_AVAILABLE
  4247. #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
  4248. #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
  4249. /*
  4250. * CPU: RISCV 32-bit
  4251. */
  4252. /* Multiply va by vb and store double size result in: vh | vl */
  4253. #define SP_ASM_MUL(vl, vh, va, vb) \
  4254. __asm__ __volatile__ ( \
  4255. "mul %[l], %[a], %[b] \n\t" \
  4256. "mulhu %[h], %[a], %[b] \n\t" \
  4257. : [h] "+r" (vh), [l] "+r" (vl) \
  4258. : [a] "r" (va), [b] "r" (vb) \
  4259. : "memory" \
  4260. )
  4261. /* Multiply va by vb and store double size result in: vo | vh | vl */
  4262. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  4263. __asm__ __volatile__ ( \
  4264. "mulhu %[h], %[a], %[b] \n\t" \
  4265. "mul %[l], %[a], %[b] \n\t" \
  4266. "add %[o], zero, zero \n\t" \
  4267. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  4268. : [a] "r" (va), [b] "r" (vb) \
  4269. : \
  4270. )
  4271. /* Multiply va by vb and add double size result into: vo | vh | vl */
  4272. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  4273. __asm__ __volatile__ ( \
  4274. "mul a5, %[a], %[b] \n\t" \
  4275. "mulhu a6, %[a], %[b] \n\t" \
  4276. "add %[l], %[l], a5 \n\t" \
  4277. "sltu a7, %[l], a5 \n\t" \
  4278. "add %[h], %[h], a7 \n\t" \
  4279. "sltu a7, %[h], a7 \n\t" \
  4280. "add %[o], %[o], a7 \n\t" \
  4281. "add %[h], %[h], a6 \n\t" \
  4282. "sltu a7, %[h], a6 \n\t" \
  4283. "add %[o], %[o], a7 \n\t" \
  4284. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4285. : [a] "r" (va), [b] "r" (vb) \
  4286. : "a5", "a6", "a7" \
  4287. )
  4288. /* Multiply va by vb and add double size result into: vh | vl */
  4289. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  4290. __asm__ __volatile__ ( \
  4291. "mul a5, %[a], %[b] \n\t" \
  4292. "mulhu a6, %[a], %[b] \n\t" \
  4293. "add %[l], %[l], a5 \n\t" \
  4294. "sltu a7, %[l], a5 \n\t" \
  4295. "add %[h], %[h], a6 \n\t" \
  4296. "add %[h], %[h], a7 \n\t" \
  4297. : [l] "+r" (vl), [h] "+r" (vh) \
  4298. : [a] "r" (va), [b] "r" (vb) \
  4299. : "a5", "a6", "a7" \
  4300. )
  4301. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  4302. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  4303. __asm__ __volatile__ ( \
  4304. "mul a5, %[a], %[b] \n\t" \
  4305. "mulhu a6, %[a], %[b] \n\t" \
  4306. "add %[l], %[l], a5 \n\t" \
  4307. "sltu a7, %[l], a5 \n\t" \
  4308. "add %[h], %[h], a7 \n\t" \
  4309. "sltu a7, %[h], a7 \n\t" \
  4310. "add %[o], %[o], a7 \n\t" \
  4311. "add %[h], %[h], a6 \n\t" \
  4312. "sltu a7, %[h], a6 \n\t" \
  4313. "add %[o], %[o], a7 \n\t" \
  4314. "add %[l], %[l], a5 \n\t" \
  4315. "sltu a7, %[l], a5 \n\t" \
  4316. "add %[h], %[h], a7 \n\t" \
  4317. "sltu a7, %[h], a7 \n\t" \
  4318. "add %[o], %[o], a7 \n\t" \
  4319. "add %[h], %[h], a6 \n\t" \
  4320. "sltu a7, %[h], a6 \n\t" \
  4321. "add %[o], %[o], a7 \n\t" \
  4322. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4323. : [a] "r" (va), [b] "r" (vb) \
  4324. : "a5", "a6", "a7" \
  4325. )
  4326. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  4327. * Assumes first add will not overflow vh | vl
  4328. */
  4329. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  4330. __asm__ __volatile__ ( \
  4331. "mul a5, %[a], %[b] \n\t" \
  4332. "mulhu a6, %[a], %[b] \n\t" \
  4333. "add %[l], %[l], a5 \n\t" \
  4334. "sltu a7, %[l], a5 \n\t" \
  4335. "add %[h], %[h], a6 \n\t" \
  4336. "add %[h], %[h], a7 \n\t" \
  4337. "add %[l], %[l], a5 \n\t" \
  4338. "sltu a7, %[l], a5 \n\t" \
  4339. "add %[h], %[h], a7 \n\t" \
  4340. "sltu a7, %[h], a7 \n\t" \
  4341. "add %[o], %[o], a7 \n\t" \
  4342. "add %[h], %[h], a6 \n\t" \
  4343. "sltu a7, %[h], a6 \n\t" \
  4344. "add %[o], %[o], a7 \n\t" \
  4345. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4346. : [a] "r" (va), [b] "r" (vb) \
  4347. : "a5", "a6", "a7" \
  4348. )
  4349. /* Square va and store double size result in: vh | vl */
  4350. #define SP_ASM_SQR(vl, vh, va) \
  4351. __asm__ __volatile__ ( \
  4352. "mul %[l], %[a], %[a] \n\t" \
  4353. "mulhu %[h], %[a], %[a] \n\t" \
  4354. : [h] "+r" (vh), [l] "+r" (vl) \
  4355. : [a] "r" (va) \
  4356. : "memory" \
  4357. )
  4358. /* Square va and add double size result into: vo | vh | vl */
  4359. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  4360. __asm__ __volatile__ ( \
  4361. "mul a5, %[a], %[a] \n\t" \
  4362. "mulhu a6, %[a], %[a] \n\t" \
  4363. "add %[l], %[l], a5 \n\t" \
  4364. "sltu a7, %[l], a5 \n\t" \
  4365. "add %[h], %[h], a7 \n\t" \
  4366. "sltu a7, %[h], a7 \n\t" \
  4367. "add %[o], %[o], a7 \n\t" \
  4368. "add %[h], %[h], a6 \n\t" \
  4369. "sltu a7, %[h], a6 \n\t" \
  4370. "add %[o], %[o], a7 \n\t" \
  4371. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4372. : [a] "r" (va) \
  4373. : "a5", "a6", "a7" \
  4374. )
  4375. /* Square va and add double size result into: vh | vl */
  4376. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4377. __asm__ __volatile__ ( \
  4378. "mul a5, %[a], %[a] \n\t" \
  4379. "mulhu a6, %[a], %[a] \n\t" \
  4380. "add %[l], %[l], a5 \n\t" \
  4381. "sltu a7, %[l], a5 \n\t" \
  4382. "add %[h], %[h], a6 \n\t" \
  4383. "add %[h], %[h], a7 \n\t" \
  4384. : [l] "+r" (vl), [h] "+r" (vh) \
  4385. : [a] "r" (va) \
  4386. : "a5", "a6", "a7" \
  4387. )
  4388. /* Add va into: vh | vl */
  4389. #define SP_ASM_ADDC(vl, vh, va) \
  4390. __asm__ __volatile__ ( \
  4391. "add %[l], %[l], %[a] \n\t" \
  4392. "sltu a7, %[l], %[a] \n\t" \
  4393. "add %[h], %[h], a7 \n\t" \
  4394. : [l] "+r" (vl), [h] "+r" (vh) \
  4395. : [a] "r" (va) \
  4396. : "a7" \
  4397. )
  4398. /* Sub va from: vh | vl */
  4399. #define SP_ASM_SUBB(vl, vh, va) \
  4400. __asm__ __volatile__ ( \
  4401. "add a7, %[l], zero \n\t" \
  4402. "sub %[l], a7, %[a] \n\t" \
  4403. "sltu a7, a7, %[l] \n\t" \
  4404. "sub %[h], %[h], a7 \n\t" \
  4405. : [l] "+r" (vl), [h] "+r" (vh) \
  4406. : [a] "r" (va) \
  4407. : "a7" \
  4408. )
  4409. /* Add two times vc | vb | va into vo | vh | vl */
  4410. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4411. __asm__ __volatile__ ( \
  4412. "add %[l], %[l], %[a] \n\t" \
  4413. "sltu a7, %[l], %[a] \n\t" \
  4414. "add %[h], %[h], a7 \n\t" \
  4415. "sltu a7, %[h], a7 \n\t" \
  4416. "add %[o], %[o], a7 \n\t" \
  4417. "add %[h], %[h], %[b] \n\t" \
  4418. "sltu a7, %[h], %[b] \n\t" \
  4419. "add %[o], %[o], %[c] \n\t" \
  4420. "add %[o], %[o], a7 \n\t" \
  4421. "add %[l], %[l], %[a] \n\t" \
  4422. "sltu a7, %[l], %[a] \n\t" \
  4423. "add %[h], %[h], a7 \n\t" \
  4424. "sltu a7, %[h], a7 \n\t" \
  4425. "add %[o], %[o], a7 \n\t" \
  4426. "add %[h], %[h], %[b] \n\t" \
  4427. "sltu a7, %[h], %[b] \n\t" \
  4428. "add %[o], %[o], %[c] \n\t" \
  4429. "add %[o], %[o], a7 \n\t" \
  4430. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4431. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4432. : "a7" \
  4433. )
  4434. #define SP_INT_ASM_AVAILABLE
  4435. #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
  4436. #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
  4437. /*
  4438. * CPU: Intel s390x
  4439. */
  4440. /* Multiply va by vb and store double size result in: vh | vl */
  4441. #define SP_ASM_MUL(vl, vh, va, vb) \
  4442. __asm__ __volatile__ ( \
  4443. "lgr %%r1, %[a] \n\t" \
  4444. "mlgr %%r0, %[b] \n\t" \
  4445. "lgr %[l], %%r1 \n\t" \
  4446. "lgr %[h], %%r0 \n\t" \
  4447. : [h] "+r" (vh), [l] "+r" (vl) \
  4448. : [a] "r" (va), [b] "r" (vb) \
  4449. : "memory", "r0", "r1" \
  4450. )
  4451. /* Multiply va by vb and store double size result in: vo | vh | vl */
  4452. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  4453. __asm__ __volatile__ ( \
  4454. "lgr %%r1, %[a] \n\t" \
  4455. "mlgr %%r0, %[b] \n\t" \
  4456. "lghi %[o], 0 \n\t" \
  4457. "lgr %[l], %%r1 \n\t" \
  4458. "lgr %[h], %%r0 \n\t" \
  4459. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  4460. : [a] "r" (va), [b] "r" (vb) \
  4461. : "r0", "r1" \
  4462. )
  4463. /* Multiply va by vb and add double size result into: vo | vh | vl */
  4464. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  4465. __asm__ __volatile__ ( \
  4466. "lghi %%r10, 0 \n\t" \
  4467. "lgr %%r1, %[a] \n\t" \
  4468. "mlgr %%r0, %[b] \n\t" \
  4469. "algr %[l], %%r1 \n\t" \
  4470. "alcgr %[h], %%r0 \n\t" \
  4471. "alcgr %[o], %%r10 \n\t" \
  4472. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4473. : [a] "r" (va), [b] "r" (vb) \
  4474. : "r0", "r1", "r10", "cc" \
  4475. )
  4476. /* Multiply va by vb and add double size result into: vh | vl */
  4477. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  4478. __asm__ __volatile__ ( \
  4479. "lgr %%r1, %[a] \n\t" \
  4480. "mlgr %%r0, %[b] \n\t" \
  4481. "algr %[l], %%r1 \n\t" \
  4482. "alcgr %[h], %%r0 \n\t" \
  4483. : [l] "+r" (vl), [h] "+r" (vh) \
  4484. : [a] "r" (va), [b] "r" (vb) \
  4485. : "r0", "r1", "cc" \
  4486. )
  4487. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  4488. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  4489. __asm__ __volatile__ ( \
  4490. "lghi %%r10, 0 \n\t" \
  4491. "lgr %%r1, %[a] \n\t" \
  4492. "mlgr %%r0, %[b] \n\t" \
  4493. "algr %[l], %%r1 \n\t" \
  4494. "alcgr %[h], %%r0 \n\t" \
  4495. "alcgr %[o], %%r10 \n\t" \
  4496. "algr %[l], %%r1 \n\t" \
  4497. "alcgr %[h], %%r0 \n\t" \
  4498. "alcgr %[o], %%r10 \n\t" \
  4499. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4500. : [a] "r" (va), [b] "r" (vb) \
  4501. : "r0", "r1", "r10", "cc" \
  4502. )
  4503. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  4504. * Assumes first add will not overflow vh | vl
  4505. */
  4506. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  4507. __asm__ __volatile__ ( \
  4508. "lghi %%r10, 0 \n\t" \
  4509. "lgr %%r1, %[a] \n\t" \
  4510. "mlgr %%r0, %[b] \n\t" \
  4511. "algr %[l], %%r1 \n\t" \
  4512. "alcgr %[h], %%r0 \n\t" \
  4513. "algr %[l], %%r1 \n\t" \
  4514. "alcgr %[h], %%r0 \n\t" \
  4515. "alcgr %[o], %%r10 \n\t" \
  4516. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4517. : [a] "r" (va), [b] "r" (vb) \
  4518. : "r0", "r1", "r10", "cc" \
  4519. )
  4520. /* Square va and store double size result in: vh | vl */
  4521. #define SP_ASM_SQR(vl, vh, va) \
  4522. __asm__ __volatile__ ( \
  4523. "lgr %%r1, %[a] \n\t" \
  4524. "mlgr %%r0, %%r1 \n\t" \
  4525. "lgr %[l], %%r1 \n\t" \
  4526. "lgr %[h], %%r0 \n\t" \
  4527. : [h] "+r" (vh), [l] "+r" (vl) \
  4528. : [a] "r" (va) \
  4529. : "memory", "r0", "r1" \
  4530. )
  4531. /* Square va and add double size result into: vo | vh | vl */
  4532. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  4533. __asm__ __volatile__ ( \
  4534. "lghi %%r10, 0 \n\t" \
  4535. "lgr %%r1, %[a] \n\t" \
  4536. "mlgr %%r0, %%r1 \n\t" \
  4537. "algr %[l], %%r1 \n\t" \
  4538. "alcgr %[h], %%r0 \n\t" \
  4539. "alcgr %[o], %%r10 \n\t" \
  4540. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4541. : [a] "r" (va) \
  4542. : "r0", "r1", "r10", "cc" \
  4543. )
  4544. /* Square va and add double size result into: vh | vl */
  4545. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4546. __asm__ __volatile__ ( \
  4547. "lgr %%r1, %[a] \n\t" \
  4548. "mlgr %%r0, %%r1 \n\t" \
  4549. "algr %[l], %%r1 \n\t" \
  4550. "alcgr %[h], %%r0 \n\t" \
  4551. : [l] "+r" (vl), [h] "+r" (vh) \
  4552. : [a] "r" (va) \
  4553. : "r0", "r1", "cc" \
  4554. )
  4555. /* Add va into: vh | vl */
  4556. #define SP_ASM_ADDC(vl, vh, va) \
  4557. __asm__ __volatile__ ( \
  4558. "lghi %%r10, 0 \n\t" \
  4559. "algr %[l], %[a] \n\t" \
  4560. "alcgr %[h], %%r10 \n\t" \
  4561. : [l] "+r" (vl), [h] "+r" (vh) \
  4562. : [a] "r" (va) \
  4563. : "r10", "cc" \
  4564. )
  4565. /* Sub va from: vh | vl */
  4566. #define SP_ASM_SUBB(vl, vh, va) \
  4567. __asm__ __volatile__ ( \
  4568. "lghi %%r10, 0 \n\t" \
  4569. "slgr %[l], %[a] \n\t" \
  4570. "slbgr %[h], %%r10 \n\t" \
  4571. : [l] "+r" (vl), [h] "+r" (vh) \
  4572. : [a] "r" (va) \
  4573. : "r10", "cc" \
  4574. )
  4575. /* Add two times vc | vb | va into vo | vh | vl */
  4576. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4577. __asm__ __volatile__ ( \
  4578. "algr %[l], %[a] \n\t" \
  4579. "alcgr %[h], %[b] \n\t" \
  4580. "alcgr %[o], %[c] \n\t" \
  4581. "algr %[l], %[a] \n\t" \
  4582. "alcgr %[h], %[b] \n\t" \
  4583. "alcgr %[o], %[c] \n\t" \
  4584. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4585. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4586. : "cc" \
  4587. )
  4588. #define SP_INT_ASM_AVAILABLE
  4589. #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
  4590. #ifdef SP_INT_ASM_AVAILABLE
  4591. #ifndef SP_INT_NO_ASM
  4592. #define SQR_MUL_ASM
  4593. #endif
  4594. #ifndef SP_ASM_ADDC_REG
  4595. #define SP_ASM_ADDC_REG SP_ASM_ADDC
  4596. #endif /* SP_ASM_ADDC_REG */
  4597. #ifndef SP_ASM_SUBB_REG
  4598. #define SP_ASM_SUBB_REG SP_ASM_SUBB
  4599. #endif /* SP_ASM_ADDC_REG */
  4600. #endif /* SQR_MUL_ASM */
  4601. #endif /* !WOLFSSL_NO_ASM */
  4602. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
  4603. !defined(NO_DSA) || !defined(NO_DH) || \
  4604. (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
  4605. (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4606. #ifndef WC_NO_CACHE_RESISTANT
  4607. /* Mask of address for constant time operations. */
  4608. const size_t sp_off_on_addr[2] =
  4609. {
  4610. (size_t) 0,
  4611. (size_t)-1
  4612. };
  4613. #endif
  4614. #endif
  4615. #if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
  4616. #ifdef __cplusplus
  4617. extern "C" {
  4618. #endif
  4619. /* Modular exponentiation implementations using Single Precision. */
  4620. WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
  4621. sp_int* res);
  4622. WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
  4623. sp_int* res);
  4624. WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
  4625. sp_int* res);
  4626. WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
  4627. sp_int* res);
  4628. WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
  4629. sp_int* res);
  4630. #ifdef __cplusplus
  4631. } /* extern "C" */
  4632. #endif
  4633. #endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
  4634. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  4635. defined(OPENSSL_ALL)
  4636. static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct);
  4637. #endif
  4638. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  4639. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
  4640. defined(OPENSSL_ALL)
  4641. static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho);
  4642. #endif
  4643. /* Determine when mp_add_d is required. */
  4644. #if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || \
  4645. !defined(NO_DSA) || defined(HAVE_ECC) || \
  4646. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4647. defined(OPENSSL_EXTRA)
  4648. #define WOLFSSL_SP_ADD_D
  4649. #endif
  4650. /* Determine when mp_sub_d is required. */
  4651. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4652. !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
  4653. #define WOLFSSL_SP_SUB_D
  4654. #endif
  4655. /* Determine when mp_read_radix with a radix of 10 is required. */
  4656. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  4657. !defined(WOLFSSL_RSA_VERIFY_ONLY)) || defined(HAVE_ECC) || \
  4658. !defined(NO_DSA) || defined(OPENSSL_EXTRA)
  4659. #define WOLFSSL_SP_READ_RADIX_16
  4660. #endif
  4661. /* Determine when mp_read_radix with a radix of 10 is required. */
  4662. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  4663. !defined(WOLFSSL_RSA_VERIFY_ONLY)
  4664. #define WOLFSSL_SP_READ_RADIX_10
  4665. #endif
  4666. /* Determine when mp_invmod is required. */
  4667. #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
  4668. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4669. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4670. #define WOLFSSL_SP_INVMOD
  4671. #endif
  4672. /* Determine when mp_invmod_mont_ct is required. */
  4673. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  4674. #define WOLFSSL_SP_INVMOD_MONT_CT
  4675. #endif
  4676. /* Determine when mp_prime_gen is required. */
  4677. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4678. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  4679. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  4680. #define WOLFSSL_SP_PRIME_GEN
  4681. #endif
  4682. /* Set the multi-precision number to zero.
  4683. *
  4684. * Assumes a is not NULL.
  4685. *
  4686. * @param [out] a SP integer to set to zero.
  4687. */
  4688. static void _sp_zero(sp_int* a)
  4689. {
  4690. sp_int_minimal* am = (sp_int_minimal *)a;
  4691. am->used = 0;
  4692. am->dp[0] = 0;
  4693. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4694. am->sign = MP_ZPOS;
  4695. #endif
  4696. }
  4697. /* Initialize the multi-precision number to be zero with a given max size.
  4698. *
  4699. * @param [out] a SP integer.
  4700. * @param [in] size Number of words to say are available.
  4701. */
  4702. static void _sp_init_size(sp_int* a, unsigned int size)
  4703. {
  4704. volatile sp_int_minimal* am = (sp_int_minimal *)a;
  4705. #ifdef HAVE_WOLF_BIGINT
  4706. wc_bigint_init((struct WC_BIGINT*)&am->raw);
  4707. #endif
  4708. _sp_zero((sp_int*)am);
  4709. am->size = size;
  4710. }
  4711. /* Initialize the multi-precision number to be zero with a given max size.
  4712. *
  4713. * @param [out] a SP integer.
  4714. * @param [in] size Number of words to say are available.
  4715. *
  4716. * @return MP_OKAY on success.
  4717. * @return MP_VAL when a is NULL.
  4718. */
  4719. int sp_init_size(sp_int* a, unsigned int size)
  4720. {
  4721. int err = MP_OKAY;
  4722. /* Validate parameters. Don't use size more than max compiled. */
  4723. if ((a == NULL) || ((size <= 0) || (size > SP_INT_DIGITS))) {
  4724. err = MP_VAL;
  4725. }
  4726. if (err == MP_OKAY) {
  4727. _sp_init_size(a, size);
  4728. }
  4729. return err;
  4730. }
  4731. /* Initialize the multi-precision number to be zero.
  4732. *
  4733. * @param [out] a SP integer.
  4734. *
  4735. * @return MP_OKAY on success.
  4736. * @return MP_VAL when a is NULL.
  4737. */
  4738. int sp_init(sp_int* a)
  4739. {
  4740. int err = MP_OKAY;
  4741. /* Validate parameter. */
  4742. if (a == NULL) {
  4743. err = MP_VAL;
  4744. }
  4745. else {
  4746. /* Assume complete sp_int with SP_INT_DIGITS digits. */
  4747. _sp_init_size(a, SP_INT_DIGITS);
  4748. }
  4749. return err;
  4750. }
  4751. #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
  4752. /* Initialize up to six multi-precision numbers to be zero.
  4753. *
  4754. * @param [out] n1 SP integer.
  4755. * @param [out] n2 SP integer.
  4756. * @param [out] n3 SP integer.
  4757. * @param [out] n4 SP integer.
  4758. * @param [out] n5 SP integer.
  4759. * @param [out] n6 SP integer.
  4760. *
  4761. * @return MP_OKAY on success.
  4762. */
  4763. int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
  4764. sp_int* n6)
  4765. {
  4766. /* Initialize only those pointers that are valid. */
  4767. if (n1 != NULL) {
  4768. _sp_init_size(n1, SP_INT_DIGITS);
  4769. }
  4770. if (n2 != NULL) {
  4771. _sp_init_size(n2, SP_INT_DIGITS);
  4772. }
  4773. if (n3 != NULL) {
  4774. _sp_init_size(n3, SP_INT_DIGITS);
  4775. }
  4776. if (n4 != NULL) {
  4777. _sp_init_size(n4, SP_INT_DIGITS);
  4778. }
  4779. if (n5 != NULL) {
  4780. _sp_init_size(n5, SP_INT_DIGITS);
  4781. }
  4782. if (n6 != NULL) {
  4783. _sp_init_size(n6, SP_INT_DIGITS);
  4784. }
  4785. return MP_OKAY;
  4786. }
  4787. #endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
  4788. /* Free the memory allocated in the multi-precision number.
  4789. *
  4790. * @param [in] a SP integer.
  4791. */
  4792. void sp_free(sp_int* a)
  4793. {
  4794. if (a != NULL) {
  4795. #ifdef HAVE_WOLF_BIGINT
  4796. wc_bigint_free(&a->raw);
  4797. #endif
  4798. }
  4799. }
  4800. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4801. !defined(NO_DH) || defined(HAVE_ECC)
  4802. /* Grow multi-precision number to be able to hold l digits.
  4803. * This function does nothing as the number of digits is fixed.
  4804. *
  4805. * @param [in,out] a SP integer.
  4806. * @param [in] l Number of digits to grow to.
  4807. *
  4808. * @return MP_OKAY on success
  4809. * @return MP_MEM if the number of digits requested is more than available.
  4810. */
  4811. int sp_grow(sp_int* a, int l)
  4812. {
  4813. int err = MP_OKAY;
  4814. /* Validate parameter. */
  4815. if ((a == NULL) || (l < 0)) {
  4816. err = MP_VAL;
  4817. }
  4818. /* Ensure enough words allocated for grow. */
  4819. if ((err == MP_OKAY) && ((unsigned int)l > a->size)) {
  4820. err = MP_MEM;
  4821. }
  4822. if (err == MP_OKAY) {
  4823. unsigned int i;
  4824. /* Put in zeros up to the new length. */
  4825. for (i = a->used; i < (unsigned int)l; i++) {
  4826. a->dp[i] = 0;
  4827. }
  4828. }
  4829. return err;
  4830. }
  4831. #endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH || HAVE_ECC */
  4832. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4833. defined(HAVE_ECC)
  4834. /* Set the multi-precision number to zero.
  4835. *
  4836. * @param [out] a SP integer to set to zero.
  4837. */
  4838. void sp_zero(sp_int* a)
  4839. {
  4840. /* Make an sp_int with valid pointer zero. */
  4841. if (a != NULL) {
  4842. _sp_zero(a);
  4843. }
  4844. }
  4845. #endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
  4846. /* Clear the data from the multi-precision number, set to zero and free.
  4847. *
  4848. * @param [out] a SP integer.
  4849. */
  4850. void sp_clear(sp_int* a)
  4851. {
  4852. /* Clear when valid pointer passed in. */
  4853. if (a != NULL) {
  4854. unsigned int i;
  4855. /* Only clear the digits being used. */
  4856. for (i = 0; i < a->used; i++) {
  4857. a->dp[i] = 0;
  4858. }
  4859. /* Set back to zero and free. */
  4860. _sp_zero(a);
  4861. sp_free(a);
  4862. }
  4863. }
  4864. #if !defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4865. !defined(NO_DSA) || defined(WOLFSSL_SP_PRIME_GEN)
  4866. /* Ensure the data in the multi-precision number is zeroed.
  4867. *
  4868. * Use when security sensitive data needs to be wiped.
  4869. *
  4870. * @param [in] a SP integer.
  4871. */
  4872. void sp_forcezero(sp_int* a)
  4873. {
  4874. /* Zeroize when a vald pointer passed in. */
  4875. if (a != NULL) {
  4876. /* Ensure all data zeroized - data not zeroed when used decreases. */
  4877. ForceZero(a->dp, a->size * SP_WORD_SIZEOF);
  4878. /* Set back to zero. */
  4879. #ifdef HAVE_WOLF_BIGINT
  4880. /* Zeroize the raw data as well. */
  4881. wc_bigint_zero(&a->raw);
  4882. #endif
  4883. /* Make value zero and free. */
  4884. _sp_zero(a);
  4885. sp_free(a);
  4886. }
  4887. }
  4888. #endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
  4889. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4890. !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
  4891. /* Copy value of multi-precision number a into r.
  4892. *
  4893. * @param [in] a SP integer - source.
  4894. * @param [out] r SP integer - destination.
  4895. */
  4896. static void _sp_copy(const sp_int* a, sp_int* r)
  4897. {
  4898. /* Copy words across. */
  4899. if (a->used == 0) {
  4900. r->dp[0] = 0;
  4901. }
  4902. else {
  4903. XMEMCPY(r->dp, a->dp, a->used * SP_WORD_SIZEOF);
  4904. }
  4905. /* Set number of used words in result. */
  4906. r->used = a->used;
  4907. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4908. /* Set sign of result. */
  4909. r->sign = a->sign;
  4910. #endif
  4911. }
  4912. /* Copy value of multi-precision number a into r.
  4913. *
  4914. * @param [in] a SP integer - source.
  4915. * @param [out] r SP integer - destination.
  4916. *
  4917. * @return MP_OKAY on success.
  4918. */
  4919. int sp_copy(const sp_int* a, sp_int* r)
  4920. {
  4921. int err = MP_OKAY;
  4922. /* Validate parameters. */
  4923. if ((a == NULL) || (r == NULL)) {
  4924. err = MP_VAL;
  4925. }
  4926. /* Only copy if different pointers. */
  4927. if (a != r) {
  4928. /* Validated space in result. */
  4929. if ((err == MP_OKAY) && (a->used > r->size)) {
  4930. err = MP_VAL;
  4931. }
  4932. if (err == MP_OKAY) {
  4933. _sp_copy(a, r);
  4934. }
  4935. }
  4936. return err;
  4937. }
  4938. #endif
  4939. #if ((defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4940. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
  4941. defined(OPENSSL_ALL)) && defined(WC_PROTECT_ENCRYPTED_MEM)
  4942. /* Copy 2 numbers into two results based on y. Copy a fixed number of digits.
  4943. *
  4944. * Constant time implementation.
  4945. * When y is 0, r1 = a2 and r2 = a1.
  4946. * When y is 1, r1 = a1 and r2 = a2.
  4947. *
  4948. * @param [in] a1 First number to copy.
  4949. * @param [in] a2 Second number to copy.
  4950. * @param [out] r1 First result number to copy into.
  4951. * @param [out] r2 Second result number to copy into.
  4952. * @param [in] y Indicates which number goes into which result number.
  4953. * @param [in] used Number of digits to copy.
  4954. */
  4955. static void _sp_copy_2_ct(const sp_int* a1, const sp_int* a2, sp_int* r1,
  4956. sp_int* r2, int y, unsigned int used)
  4957. {
  4958. unsigned int i;
  4959. /* Copy data - constant time. */
  4960. for (i = 0; i < used; i++) {
  4961. r1->dp[i] = (a1->dp[i] & ((sp_int_digit)wc_off_on_addr[y ])) +
  4962. (a2->dp[i] & ((sp_int_digit)wc_off_on_addr[y^1]));
  4963. r2->dp[i] = (a1->dp[i] & ((sp_int_digit)wc_off_on_addr[y^1])) +
  4964. (a2->dp[i] & ((sp_int_digit)wc_off_on_addr[y ]));
  4965. }
  4966. /* Copy used. */
  4967. r1->used = (a1->used & ((int)wc_off_on_addr[y ])) +
  4968. (a2->used & ((int)wc_off_on_addr[y^1]));
  4969. r2->used = (a1->used & ((int)wc_off_on_addr[y^1])) +
  4970. (a2->used & ((int)wc_off_on_addr[y ]));
  4971. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4972. /* Copy sign. */
  4973. r1->sign = (a1->sign & ((int)wc_off_on_addr[y ])) +
  4974. (a2->sign & ((int)wc_off_on_addr[y^1]));
  4975. r2->sign = (a1->sign & ((int)wc_off_on_addr[y^1])) +
  4976. (a2->sign & ((int)wc_off_on_addr[y ]));
  4977. #endif
  4978. }
  4979. #endif
  4980. #if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
  4981. /* Initializes r and copies in value from a.
  4982. *
  4983. * @param [out] r SP integer - destination.
  4984. * @param [in] a SP integer - source.
  4985. *
  4986. * @return MP_OKAY on success.
  4987. * @return MP_VAL when a or r is NULL.
  4988. */
  4989. int sp_init_copy(sp_int* r, const sp_int* a)
  4990. {
  4991. int err;
  4992. /* Initialize r and copy value in a into it. */
  4993. err = sp_init(r);
  4994. if (err == MP_OKAY) {
  4995. err = sp_copy(a, r);
  4996. }
  4997. return err;
  4998. }
  4999. #endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
  5000. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5001. !defined(NO_DH) || !defined(NO_DSA)
  5002. /* Exchange the values in a and b.
  5003. *
  5004. * Avoid using this API as three copy operations are performed.
  5005. *
  5006. * @param [in,out] a SP integer to swap.
  5007. * @param [in,out] b SP integer to swap.
  5008. *
  5009. * @return MP_OKAY on success.
  5010. * @return MP_VAL when a or b is NULL.
  5011. * @return MP_MEM when dynamic memory allocation fails.
  5012. */
  5013. int sp_exch(sp_int* a, sp_int* b)
  5014. {
  5015. int err = MP_OKAY;
  5016. /* Validate parameters. */
  5017. if ((a == NULL) || (b == NULL)) {
  5018. err = MP_VAL;
  5019. }
  5020. /* Check space for a in b and b in a. */
  5021. if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
  5022. err = MP_VAL;
  5023. }
  5024. if (err == MP_OKAY) {
  5025. /* Declare temporary for swapping. */
  5026. DECL_SP_INT(t, a->used);
  5027. /* Create temporary for swapping. */
  5028. ALLOC_SP_INT(t, a->used, err, NULL);
  5029. if (err == MP_OKAY) {
  5030. /* Cache allocated size of a and b. */
  5031. unsigned int asize = a->size;
  5032. unsigned int bsize = b->size;
  5033. /* Copy all of SP int: t <- a, a <- b, b <- t. */
  5034. XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
  5035. XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
  5036. XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
  5037. /* Put back size of a and b. */
  5038. a->size = asize;
  5039. b->size = bsize;
  5040. }
  5041. FREE_SP_INT(t, NULL);
  5042. }
  5043. return err;
  5044. }
  5045. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
  5046. * !NO_DSA */
  5047. #if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
  5048. !defined(WC_NO_CACHE_RESISTANT)
  5049. /* Conditional swap of SP int values in constant time.
  5050. *
  5051. * @param [in] a First SP int to conditionally swap.
  5052. * @param [in] b Second SP int to conditionally swap.
  5053. * @param [in] cnt Count of words to copy.
  5054. * @param [in] swap When value is 1 then swap.
  5055. * @param [in] t Temporary SP int to use in swap.
  5056. * @return MP_OKAY on success.
  5057. * @return MP_MEM when dynamic memory allocation fails.
  5058. */
  5059. int sp_cond_swap_ct_ex(sp_int* a, sp_int* b, int cnt, int swap, sp_int* t)
  5060. {
  5061. unsigned int i;
  5062. sp_int_digit mask = (sp_int_digit)0 - (sp_int_digit)swap;
  5063. /* XOR other fields in sp_int into temp - mask set when swapping. */
  5064. t->used = (a->used ^ b->used) & (unsigned int)mask;
  5065. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5066. t->sign = (a->sign ^ b->sign) & (unsigned int)mask;
  5067. #endif
  5068. /* XOR requested words into temp - mask set when swapping. */
  5069. for (i = 0; i < (unsigned int)cnt; i++) {
  5070. t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
  5071. }
  5072. /* XOR temporary - when mask set then result will be b. */
  5073. a->used ^= t->used;
  5074. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5075. a->sign ^= t->sign;
  5076. #endif
  5077. for (i = 0; i < (unsigned int)cnt; i++) {
  5078. a->dp[i] ^= t->dp[i];
  5079. }
  5080. /* XOR temporary - when mask set then result will be a. */
  5081. b->used ^= t->used;
  5082. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5083. b->sign ^= b->sign;
  5084. #endif
  5085. for (i = 0; i < (unsigned int)cnt; i++) {
  5086. b->dp[i] ^= t->dp[i];
  5087. }
  5088. return MP_OKAY;
  5089. }
  5090. /* Conditional swap of SP int values in constant time.
  5091. *
  5092. * @param [in] a First SP int to conditionally swap.
  5093. * @param [in] b Second SP int to conditionally swap.
  5094. * @param [in] cnt Count of words to copy.
  5095. * @param [in] swap When value is 1 then swap.
  5096. * @return MP_OKAY on success.
  5097. * @return MP_MEM when dynamic memory allocation fails.
  5098. */
  5099. int sp_cond_swap_ct(sp_int* a, sp_int* b, int cnt, int swap)
  5100. {
  5101. int err = MP_OKAY;
  5102. DECL_SP_INT(t, (size_t)cnt);
  5103. /* Allocate temporary to hold masked xor of a and b. */
  5104. ALLOC_SP_INT(t, cnt, err, NULL);
  5105. if (err == MP_OKAY) {
  5106. err = sp_cond_swap_ct_ex(a, b, cnt, swap, t);
  5107. FREE_SP_INT(t, NULL);
  5108. }
  5109. return err;
  5110. }
  5111. #endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
  5112. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5113. /* Calculate the absolute value of the multi-precision number.
  5114. *
  5115. * @param [in] a SP integer to calculate absolute value of.
  5116. * @param [out] r SP integer to hold result.
  5117. *
  5118. * @return MP_OKAY on success.
  5119. * @return MP_VAL when a or r is NULL.
  5120. */
  5121. int sp_abs(const sp_int* a, sp_int* r)
  5122. {
  5123. int err;
  5124. /* Copy a into r - copy fails when r is NULL. */
  5125. err = sp_copy(a, r);
  5126. if (err == MP_OKAY) {
  5127. r->sign = MP_ZPOS;
  5128. }
  5129. return err;
  5130. }
  5131. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  5132. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  5133. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  5134. /* Compare absolute value of two multi-precision numbers.
  5135. *
  5136. * @param [in] a SP integer.
  5137. * @param [in] b SP integer.
  5138. *
  5139. * @return MP_GT when a is greater than b.
  5140. * @return MP_LT when a is less than b.
  5141. * @return MP_EQ when a is equals b.
  5142. */
  5143. static int _sp_cmp_abs(const sp_int* a, const sp_int* b)
  5144. {
  5145. int ret = MP_EQ;
  5146. /* Check number of words first. */
  5147. if (a->used > b->used) {
  5148. ret = MP_GT;
  5149. }
  5150. else if (a->used < b->used) {
  5151. ret = MP_LT;
  5152. }
  5153. else {
  5154. int i;
  5155. /* Starting from most significant word, compare words.
  5156. * Stop when different and set comparison return.
  5157. */
  5158. for (i = (int)(a->used - 1); i >= 0; i--) {
  5159. if (a->dp[i] > b->dp[i]) {
  5160. ret = MP_GT;
  5161. break;
  5162. }
  5163. else if (a->dp[i] < b->dp[i]) {
  5164. ret = MP_LT;
  5165. break;
  5166. }
  5167. }
  5168. /* If we made to the end then ret is MP_EQ from initialization. */
  5169. }
  5170. return ret;
  5171. }
  5172. #endif
  5173. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  5174. /* Compare absolute value of two multi-precision numbers.
  5175. *
  5176. * Pointers are compared such that NULL is less than not NULL.
  5177. *
  5178. * @param [in] a SP integer.
  5179. * @param [in] b SP integer.
  5180. *
  5181. * @return MP_GT when a is greater than b.
  5182. * @return MP_LT when a is less than b.
  5183. * @return MP_EQ when a equals b.
  5184. */
  5185. int sp_cmp_mag(const sp_int* a, const sp_int* b)
  5186. {
  5187. int ret;
  5188. /* Do pointer checks first. Both NULL returns equal. */
  5189. if (a == b) {
  5190. ret = MP_EQ;
  5191. }
  5192. /* Nothing is smaller than something. */
  5193. else if (a == NULL) {
  5194. ret = MP_LT;
  5195. }
  5196. /* Something is larger than nothing. */
  5197. else if (b == NULL) {
  5198. ret = MP_GT;
  5199. }
  5200. else
  5201. {
  5202. /* Compare values - a and b are not NULL. */
  5203. ret = _sp_cmp_abs(a, b);
  5204. }
  5205. return ret;
  5206. }
  5207. #endif
  5208. #if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
  5209. defined(OPENSSL_EXTRA) || !defined(NO_DH) || \
  5210. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  5211. /* Compare two multi-precision numbers.
  5212. *
  5213. * Assumes a and b are not NULL.
  5214. *
  5215. * @param [in] a SP integer.
  5216. * @param [in] b SP integer.
  5217. *
  5218. * @return MP_GT when a is greater than b.
  5219. * @return MP_LT when a is less than b.
  5220. * @return MP_EQ when a is equals b.
  5221. */
  5222. static int _sp_cmp(const sp_int* a, const sp_int* b)
  5223. {
  5224. int ret;
  5225. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5226. /* Check sign first. */
  5227. if (a->sign > b->sign) {
  5228. ret = MP_LT;
  5229. }
  5230. else if (a->sign < b->sign) {
  5231. ret = MP_GT;
  5232. }
  5233. else /* (a->sign == b->sign) */ {
  5234. #endif
  5235. /* Compare values. */
  5236. ret = _sp_cmp_abs(a, b);
  5237. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5238. if (a->sign == MP_NEG) {
  5239. /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
  5240. * Swapping MP_GT and MP_LT results.
  5241. */
  5242. ret = -ret;
  5243. }
  5244. }
  5245. #endif
  5246. return ret;
  5247. }
  5248. #endif
  5249. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5250. !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_DH) || \
  5251. defined(WOLFSSL_SP_MATH_ALL)
  5252. /* Compare two multi-precision numbers.
  5253. *
  5254. * Pointers are compared such that NULL is less than not NULL.
  5255. *
  5256. * @param [in] a SP integer.
  5257. * @param [in] b SP integer.
  5258. *
  5259. * @return MP_GT when a is greater than b.
  5260. * @return MP_LT when a is less than b.
  5261. * @return MP_EQ when a is equals b.
  5262. */
  5263. int sp_cmp(const sp_int* a, const sp_int* b)
  5264. {
  5265. int ret;
  5266. /* Check pointers first. Both NULL returns equal. */
  5267. if (a == b) {
  5268. ret = MP_EQ;
  5269. }
  5270. /* Nothing is smaller than something. */
  5271. else if (a == NULL) {
  5272. ret = MP_LT;
  5273. }
  5274. /* Something is larger than nothing. */
  5275. else if (b == NULL) {
  5276. ret = MP_GT;
  5277. }
  5278. else
  5279. {
  5280. /* Compare values - a and b are not NULL. */
  5281. ret = _sp_cmp(a, b);
  5282. }
  5283. return ret;
  5284. }
  5285. #endif
  5286. #if defined(HAVE_ECC) && !defined(WC_NO_RNG) && \
  5287. defined(WOLFSSL_ECC_GEN_REJECT_SAMPLING)
  5288. /* Compare two multi-precision numbers in constant time.
  5289. *
  5290. * Assumes a and b are not NULL.
  5291. * Assumes a and b are positive.
  5292. *
  5293. * @param [in] a SP integer.
  5294. * @param [in] b SP integer.
  5295. * @param [in] n Number of digits to compare.
  5296. *
  5297. * @return MP_GT when a is greater than b.
  5298. * @return MP_LT when a is less than b.
  5299. * @return MP_EQ when a is equals b.
  5300. */
  5301. static int _sp_cmp_ct(const sp_int* a, const sp_int* b, unsigned int n)
  5302. {
  5303. int ret = MP_EQ;
  5304. int i;
  5305. int mask = -1;
  5306. for (i = n - 1; i >= 0; i--) {
  5307. sp_int_digit ad = a->dp[i] & ((sp_int_digit)0 - (i < (int)a->used));
  5308. sp_int_digit bd = b->dp[i] & ((sp_int_digit)0 - (i < (int)b->used));
  5309. ret |= mask & ((0 - (ad < bd)) & MP_LT);
  5310. mask &= 0 - (ret == MP_EQ);
  5311. ret |= mask & ((0 - (ad > bd)) & MP_GT);
  5312. mask &= 0 - (ret == MP_EQ);
  5313. }
  5314. return ret;
  5315. }
  5316. /* Compare two multi-precision numbers in constant time.
  5317. *
  5318. * Pointers are compared such that NULL is less than not NULL.
  5319. * Assumes a and b are positive.
  5320. * Assumes a and b have n digits set at sometime.
  5321. *
  5322. * @param [in] a SP integer.
  5323. * @param [in] b SP integer.
  5324. * @param [in] n Number of digits to compare.
  5325. *
  5326. * @return MP_GT when a is greater than b.
  5327. * @return MP_LT when a is less than b.
  5328. * @return MP_EQ when a is equals b.
  5329. */
  5330. int sp_cmp_ct(const sp_int* a, const sp_int* b, unsigned int n)
  5331. {
  5332. int ret;
  5333. /* Check pointers first. Both NULL returns equal. */
  5334. if (a == b) {
  5335. ret = MP_EQ;
  5336. }
  5337. /* Nothing is smaller than something. */
  5338. else if (a == NULL) {
  5339. ret = MP_LT;
  5340. }
  5341. /* Something is larger than nothing. */
  5342. else if (b == NULL) {
  5343. ret = MP_GT;
  5344. }
  5345. else
  5346. {
  5347. /* Compare values - a and b are not NULL. */
  5348. ret = _sp_cmp_ct(a, b, n);
  5349. }
  5350. return ret;
  5351. }
  5352. #endif /* HAVE_ECC && !WC_NO_RNG && WOLFSSL_ECC_GEN_REJECT_SAMPLING */
  5353. /*************************
  5354. * Bit check/set functions
  5355. *************************/
  5356. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5357. ((defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_SM2)) && \
  5358. defined(HAVE_ECC)) || defined(OPENSSL_EXTRA)
  5359. /* Check if a bit is set
  5360. *
  5361. * When a is NULL, result is 0.
  5362. *
  5363. * @param [in] a SP integer.
  5364. * @param [in] b Bit position to check.
  5365. *
  5366. * @return 0 when bit is not set.
  5367. * @return 1 when bit is set.
  5368. */
  5369. int sp_is_bit_set(const sp_int* a, unsigned int b)
  5370. {
  5371. int ret = 0;
  5372. /* Index of word. */
  5373. unsigned int i = b >> SP_WORD_SHIFT;
  5374. /* Check parameters. */
  5375. if ((a != NULL) && (i < a->used)) {
  5376. /* Shift amount to get bit down to index 0. */
  5377. unsigned int s = b & SP_WORD_MASK;
  5378. /* Get and mask bit. */
  5379. ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
  5380. }
  5381. return ret;
  5382. }
  5383. #endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) ||
  5384. * (WOLFSSL_SP_MATH_ALL && HAVE_ECC) */
  5385. /* Count the number of bits in the multi-precision number.
  5386. *
  5387. * When a is NULL, result is 0.
  5388. *
  5389. * @param [in] a SP integer.
  5390. *
  5391. * @return Number of bits in the SP integer value.
  5392. */
  5393. int sp_count_bits(const sp_int* a)
  5394. {
  5395. int n = -1;
  5396. /* Check parameter. */
  5397. if ((a != NULL) && (a->used > 0)) {
  5398. /* Get index of last word. */
  5399. n = (int)(a->used - 1);
  5400. /* Don't count leading zeros. */
  5401. while ((n >= 0) && (a->dp[n] == 0)) {
  5402. n--;
  5403. }
  5404. }
  5405. /* -1 indicates SP integer value was zero. */
  5406. if (n < 0) {
  5407. n = 0;
  5408. }
  5409. else {
  5410. /* Get the most significant word. */
  5411. sp_int_digit d = a->dp[n];
  5412. /* Count of bits up to last word. */
  5413. n *= SP_WORD_SIZE;
  5414. #ifdef SP_ASM_HI_BIT_SET_IDX
  5415. {
  5416. sp_int_digit hi;
  5417. /* Get index of highest set bit. */
  5418. SP_ASM_HI_BIT_SET_IDX(d, hi);
  5419. /* Add bits up to and including index. */
  5420. n += (int)hi + 1;
  5421. }
  5422. #elif defined(SP_ASM_LZCNT)
  5423. {
  5424. sp_int_digit lz;
  5425. /* Count number of leading zeros in highest non-zero digit. */
  5426. SP_ASM_LZCNT(d, lz);
  5427. /* Add non-leading zero bits count. */
  5428. n += SP_WORD_SIZE - (int)lz;
  5429. }
  5430. #else
  5431. /* Check if top word has more than half the bits set. */
  5432. if (d > SP_HALF_MAX) {
  5433. /* Set count to a full last word. */
  5434. n += SP_WORD_SIZE;
  5435. /* Don't count leading zero bits. */
  5436. while ((d & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
  5437. n--;
  5438. d <<= 1;
  5439. }
  5440. }
  5441. else {
  5442. /* Add to count until highest set bit is shifted out. */
  5443. while (d != 0) {
  5444. n++;
  5445. d >>= 1;
  5446. }
  5447. }
  5448. #endif
  5449. }
  5450. return n;
  5451. }
  5452. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5453. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  5454. (defined(HAVE_ECC) && defined(FP_ECC)) || \
  5455. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  5456. /* Number of entries in array of number of least significant zero bits. */
  5457. #define SP_LNZ_CNT 16
  5458. /* Number of bits the array checks. */
  5459. #define SP_LNZ_BITS 4
  5460. /* Mask to apply to check with array. */
  5461. #define SP_LNZ_MASK 0xf
  5462. /* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
  5463. static const int sp_lnz[SP_LNZ_CNT] = {
  5464. 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
  5465. };
  5466. /* Count the number of least significant zero bits.
  5467. *
  5468. * When a is not NULL, result is 0.
  5469. *
  5470. * @param [in] a SP integer to use.
  5471. *
  5472. * @return Number of least significant zero bits.
  5473. */
  5474. #if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
  5475. static
  5476. #endif /* !HAVE_ECC || HAVE_COMP_KEY */
  5477. int sp_cnt_lsb(const sp_int* a)
  5478. {
  5479. unsigned int bc = 0;
  5480. /* Check for number with a value. */
  5481. if ((a != NULL) && (!sp_iszero(a))) {
  5482. unsigned int i;
  5483. unsigned int j;
  5484. /* Count least significant words that are zero. */
  5485. for (i = 0; i < a->used && a->dp[i] == 0; i++, bc += SP_WORD_SIZE) {
  5486. }
  5487. /* Use 4-bit table to get count. */
  5488. for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
  5489. /* Get number of lesat significant 0 bits in nibble. */
  5490. int cnt = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
  5491. /* Done if not all 4 bits are zero. */
  5492. if (cnt != 4) {
  5493. /* Add checked bits and count in last 4 bits checked. */
  5494. bc += j + (unsigned int)cnt;
  5495. break;
  5496. }
  5497. }
  5498. }
  5499. return (int)bc;
  5500. }
  5501. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
  5502. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_ASN_TEMPLATE) || \
  5503. (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_ASN))
  5504. /* Determine if the most significant byte of the encoded multi-precision number
  5505. * has the top bit set.
  5506. *
  5507. * When a is NULL, result is 0.
  5508. *
  5509. * @param [in] a SP integer.
  5510. *
  5511. * @return 1 when the top bit of top byte is set.
  5512. * @return 0 when the top bit of top byte is not set.
  5513. */
  5514. int sp_leading_bit(const sp_int* a)
  5515. {
  5516. int bit = 0;
  5517. /* Check if we have a number and value to use. */
  5518. if ((a != NULL) && (a->used > 0)) {
  5519. /* Get top word. */
  5520. sp_int_digit d = a->dp[a->used - 1];
  5521. #if SP_WORD_SIZE > 8
  5522. /* Remove bottom 8 bits until highest 8 bits left. */
  5523. while (d > (sp_int_digit)0xff) {
  5524. d >>= 8;
  5525. }
  5526. #endif
  5527. /* Get the highest bit of the 8-bit value. */
  5528. bit = (int)(d >> 7);
  5529. }
  5530. return bit;
  5531. }
  5532. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  5533. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  5534. defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
  5535. !defined(NO_RSA)
  5536. /* Set one bit of a: a |= 1 << i
  5537. * The field 'used' is updated in a.
  5538. *
  5539. * @param [in,out] a SP integer to set bit into.
  5540. * @param [in] i Index of bit to set.
  5541. *
  5542. * @return MP_OKAY on success.
  5543. * @return MP_VAL when a is NULL, index is negative or index is too large.
  5544. */
  5545. int sp_set_bit(sp_int* a, int i)
  5546. {
  5547. int err = MP_OKAY;
  5548. /* Get index of word to set. */
  5549. unsigned int w = (unsigned int)(i >> SP_WORD_SHIFT);
  5550. /* Check for valid number and and space for bit. */
  5551. if ((a == NULL) || (i < 0) || (w >= a->size)) {
  5552. err = MP_VAL;
  5553. }
  5554. if (err == MP_OKAY) {
  5555. /* Amount to shift up to set bit in word. */
  5556. unsigned int s = (unsigned int)(i & (SP_WORD_SIZE - 1));
  5557. unsigned int j;
  5558. /* Set to zero all unused words up to and including word to have bit
  5559. * set.
  5560. */
  5561. for (j = a->used; j <= w; j++) {
  5562. a->dp[j] = 0;
  5563. }
  5564. /* Set bit in word. */
  5565. a->dp[w] |= (sp_int_digit)1 << s;
  5566. /* Update used if necessary */
  5567. if (a->used <= w) {
  5568. a->used = w + 1;
  5569. }
  5570. }
  5571. return err;
  5572. }
  5573. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
  5574. * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
  5575. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5576. defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
  5577. /* Exponentiate 2 to the power of e: a = 2^e
  5578. * This is done by setting the 'e'th bit.
  5579. *
  5580. * @param [out] a SP integer to hold result.
  5581. * @param [in] e Exponent.
  5582. *
  5583. * @return MP_OKAY on success.
  5584. * @return MP_VAL when a is NULL, e is negative or 2^exponent is too large.
  5585. */
  5586. int sp_2expt(sp_int* a, int e)
  5587. {
  5588. int err = MP_OKAY;
  5589. /* Validate parameters. */
  5590. if ((a == NULL) || (e < 0)) {
  5591. err = MP_VAL;
  5592. }
  5593. if (err == MP_OKAY) {
  5594. /* Set number to zero and then set bit. */
  5595. _sp_zero(a);
  5596. err = sp_set_bit(a, e);
  5597. }
  5598. return err;
  5599. }
  5600. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  5601. * WOLFSSL_KEY_GEN || !NO_DH */
  5602. /**********************
  5603. * Digit/Long functions
  5604. **********************/
  5605. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || !defined(NO_DH) || \
  5606. defined(HAVE_ECC)
  5607. /* Set the multi-precision number to be the value of the digit.
  5608. *
  5609. * @param [out] a SP integer to become number.
  5610. * @param [in] d Digit to be set.
  5611. */
  5612. static void _sp_set(sp_int* a, sp_int_digit d)
  5613. {
  5614. /* Use sp_int_minimal to support allocated byte arrays as sp_ints. */
  5615. sp_int_minimal* am = (sp_int_minimal*)a;
  5616. am->dp[0] = d;
  5617. /* d == 0 => used = 0, d > 0 => used = 1 */
  5618. am->used = (d > 0);
  5619. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5620. am->sign = MP_ZPOS;
  5621. #endif
  5622. }
  5623. /* Set the multi-precision number to be the value of the digit.
  5624. *
  5625. * @param [out] a SP integer to become number.
  5626. * @param [in] d Digit to be set.
  5627. *
  5628. * @return MP_OKAY on success.
  5629. * @return MP_VAL when a is NULL.
  5630. */
  5631. int sp_set(sp_int* a, sp_int_digit d)
  5632. {
  5633. int err = MP_OKAY;
  5634. /* Validate parameters. */
  5635. if (a == NULL) {
  5636. err = MP_VAL;
  5637. }
  5638. if (err == MP_OKAY) {
  5639. _sp_set(a, d);
  5640. }
  5641. return err;
  5642. }
  5643. #endif
  5644. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || defined(OPENSSL_EXTRA)
  5645. /* Set a number into the multi-precision number.
  5646. *
  5647. * Number may be larger than the size of a digit.
  5648. *
  5649. * @param [out] a SP integer to set.
  5650. * @param [in] n Long value to set.
  5651. *
  5652. * @return MP_OKAY on success.
  5653. * @return MP_VAL when a is NULL.
  5654. */
  5655. int sp_set_int(sp_int* a, unsigned long n)
  5656. {
  5657. int err = MP_OKAY;
  5658. if (a == NULL) {
  5659. err = MP_VAL;
  5660. }
  5661. if (err == MP_OKAY) {
  5662. #if SP_WORD_SIZE < SP_ULONG_BITS
  5663. /* Assign if value first in one word. */
  5664. if (n <= (sp_int_digit)SP_DIGIT_MAX) {
  5665. #endif
  5666. a->dp[0] = (sp_int_digit)n;
  5667. a->used = (n != 0);
  5668. #if SP_WORD_SIZE < SP_ULONG_BITS
  5669. }
  5670. else {
  5671. unsigned int i;
  5672. /* Assign value word by word. */
  5673. for (i = 0; (i < a->size) && (n > 0); i++,n >>= SP_WORD_SIZE) {
  5674. a->dp[i] = (sp_int_digit)n;
  5675. }
  5676. /* Update number of words used. */
  5677. a->used = i;
  5678. /* Check for overflow. */
  5679. if ((i == a->size) && (n != 0)) {
  5680. err = MP_VAL;
  5681. }
  5682. }
  5683. #endif
  5684. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5685. a->sign = MP_ZPOS;
  5686. #endif
  5687. }
  5688. return err;
  5689. }
  5690. #endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA */
  5691. #if defined(WOLFSSL_SP_MATH_ALL) || \
  5692. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5693. !defined(NO_DH) || defined(HAVE_ECC)
  5694. /* Compare a one digit number with a multi-precision number.
  5695. *
  5696. * When a is NULL, MP_LT is returned.
  5697. *
  5698. * @param [in] a SP integer to compare.
  5699. * @param [in] d Digit to compare with.
  5700. *
  5701. * @return MP_GT when a is greater than d.
  5702. * @return MP_LT when a is less than d.
  5703. * @return MP_EQ when a is equals d.
  5704. */
  5705. int sp_cmp_d(const sp_int* a, sp_int_digit d)
  5706. {
  5707. int ret = MP_EQ;
  5708. /* No SP integer is always less - even when d is zero. */
  5709. if (a == NULL) {
  5710. ret = MP_LT;
  5711. }
  5712. else
  5713. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5714. /* Check sign first. */
  5715. if (a->sign == MP_NEG) {
  5716. ret = MP_LT;
  5717. }
  5718. else
  5719. #endif
  5720. {
  5721. /* Check if SP integer as more than one word. */
  5722. if (a->used > 1) {
  5723. ret = MP_GT;
  5724. }
  5725. /* Special case for zero. */
  5726. else if (a->used == 0) {
  5727. if (d != 0) {
  5728. ret = MP_LT;
  5729. }
  5730. /* ret initialized to equal. */
  5731. }
  5732. else {
  5733. /* The single word in the SP integer can now be compared with d. */
  5734. if (a->dp[0] > d) {
  5735. ret = MP_GT;
  5736. }
  5737. else if (a->dp[0] < d) {
  5738. ret = MP_LT;
  5739. }
  5740. /* ret initialized to equal. */
  5741. }
  5742. }
  5743. return ret;
  5744. }
  5745. #endif
  5746. #if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
  5747. defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
  5748. /* Add a one digit number to the multi-precision number.
  5749. *
  5750. * @param [in] a SP integer be added to.
  5751. * @param [in] d Digit to add.
  5752. * @param [out] r SP integer to store result in.
  5753. *
  5754. * @return MP_OKAY on success.
  5755. * @return MP_VAL when result is too large for fixed size dp array.
  5756. */
  5757. static int _sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5758. {
  5759. int err = MP_OKAY;
  5760. /* Special case of zero means we want result to have a digit when not adding
  5761. * zero. */
  5762. if (a->used == 0) {
  5763. r->dp[0] = d;
  5764. r->used = (d > 0);
  5765. }
  5766. else {
  5767. unsigned int i = 0;
  5768. sp_int_digit a0 = a->dp[0];
  5769. /* Set used of result - updated if overflow seen. */
  5770. r->used = a->used;
  5771. r->dp[0] = a0 + d;
  5772. /* Check for carry. */
  5773. if (r->dp[0] < a0) {
  5774. /* Do carry through all words. */
  5775. for (++i; i < a->used; i++) {
  5776. r->dp[i] = a->dp[i] + 1;
  5777. if (r->dp[i] != 0) {
  5778. break;
  5779. }
  5780. }
  5781. /* Add another word if required. */
  5782. if (i == a->used) {
  5783. /* Check result has enough space for another word. */
  5784. if (i < r->size) {
  5785. r->used++;
  5786. r->dp[i] = 1;
  5787. }
  5788. else {
  5789. err = MP_VAL;
  5790. }
  5791. }
  5792. }
  5793. /* When result is not the same as input, copy rest of digits. */
  5794. if ((err == MP_OKAY) && (r != a)) {
  5795. /* Copy any words that didn't update with carry. */
  5796. for (++i; i < a->used; i++) {
  5797. r->dp[i] = a->dp[i];
  5798. }
  5799. }
  5800. }
  5801. return err;
  5802. }
  5803. #endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
  5804. * defined(WOLFSSL_SP_READ_RADIX_10) */
  5805. #if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
  5806. defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
  5807. defined(WOLFSSL_SP_INVMOD_MONT_CT) || (defined(WOLFSSL_SP_PRIME_GEN) && \
  5808. !defined(WC_NO_RNG))
  5809. /* Sub a one digit number from the multi-precision number.
  5810. *
  5811. * @param [in] a SP integer be subtracted from.
  5812. * @param [in] d Digit to subtract.
  5813. * @param [out] r SP integer to store result in.
  5814. */
  5815. static void _sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5816. {
  5817. /* Set result used to be same as input. Updated with clamp. */
  5818. r->used = a->used;
  5819. /* Only possible when not handling negatives. */
  5820. if (a->used == 0) {
  5821. /* Set result to zero as no negative support. */
  5822. r->dp[0] = 0;
  5823. }
  5824. else {
  5825. unsigned int i = 0;
  5826. sp_int_digit a0 = a->dp[0];
  5827. r->dp[0] = a0 - d;
  5828. /* Check for borrow. */
  5829. if (r->dp[0] > a0) {
  5830. /* Do borrow through all words. */
  5831. for (++i; i < a->used; i++) {
  5832. r->dp[i] = a->dp[i] - 1;
  5833. if (r->dp[i] != SP_DIGIT_MAX) {
  5834. break;
  5835. }
  5836. }
  5837. }
  5838. /* When result is not the same as input, copy rest of digits. */
  5839. if (r != a) {
  5840. /* Copy any words that didn't update with borrow. */
  5841. for (++i; i < a->used; i++) {
  5842. r->dp[i] = a->dp[i];
  5843. }
  5844. }
  5845. /* Remove leading zero words. */
  5846. sp_clamp(r);
  5847. }
  5848. }
  5849. #endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
  5850. * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
  5851. * WOLFSSL_SP_PRIME_GEN */
  5852. #ifdef WOLFSSL_SP_ADD_D
  5853. /* Add a one digit number to the multi-precision number.
  5854. *
  5855. * @param [in] a SP integer be added to.
  5856. * @param [in] d Digit to add.
  5857. * @param [out] r SP integer to store result in.
  5858. *
  5859. * @return MP_OKAY on success.
  5860. * @return MP_VAL when result is too large for fixed size dp array.
  5861. */
  5862. int sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5863. {
  5864. int err = MP_OKAY;
  5865. /* Check validity of parameters. */
  5866. if ((a == NULL) || (r == NULL)) {
  5867. err = MP_VAL;
  5868. }
  5869. #ifndef WOLFSSL_SP_INT_NEGATIVE
  5870. /* Check for space in result especially when carry adds a new word. */
  5871. if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
  5872. err = MP_VAL;
  5873. }
  5874. if (err == MP_OKAY) {
  5875. /* Positive only so just use internal function. */
  5876. err = _sp_add_d(a, d, r);
  5877. }
  5878. #else
  5879. /* Check for space in result especially when carry adds a new word. */
  5880. if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used + 1 > r->size)) {
  5881. err = MP_VAL;
  5882. }
  5883. /* Check for space in result - no carry but borrow possible. */
  5884. if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used > r->size)) {
  5885. err = MP_VAL;
  5886. }
  5887. if (err == MP_OKAY) {
  5888. if (a->sign == MP_ZPOS) {
  5889. /* Positive, so use internal function. */
  5890. r->sign = MP_ZPOS;
  5891. err = _sp_add_d(a, d, r);
  5892. }
  5893. else if ((a->used > 1) || (a->dp[0] > d)) {
  5894. /* Negative value bigger than digit so subtract digit. */
  5895. r->sign = MP_NEG;
  5896. _sp_sub_d(a, d, r);
  5897. }
  5898. else {
  5899. /* Negative value smaller or equal to digit. */
  5900. r->sign = MP_ZPOS;
  5901. /* Subtract negative value from digit. */
  5902. r->dp[0] = d - a->dp[0];
  5903. /* Result is a digit equal to or greater than zero. */
  5904. r->used = (r->dp[0] > 0);
  5905. }
  5906. }
  5907. #endif
  5908. return err;
  5909. }
  5910. #endif /* WOLFSSL_SP_ADD_D */
  5911. #ifdef WOLFSSL_SP_SUB_D
  5912. /* Sub a one digit number from the multi-precision number.
  5913. *
  5914. * @param [in] a SP integer be subtracted from.
  5915. * @param [in] d Digit to subtract.
  5916. * @param [out] r SP integer to store result in.
  5917. *
  5918. * @return MP_OKAY on success.
  5919. * @return MP_VAL when a or r is NULL.
  5920. */
  5921. int sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5922. {
  5923. int err = MP_OKAY;
  5924. /* Check validity of parameters. */
  5925. if ((a == NULL) || (r == NULL)) {
  5926. err = MP_VAL;
  5927. }
  5928. #ifndef WOLFSSL_SP_INT_NEGATIVE
  5929. /* Check for space in result. */
  5930. if ((err == MP_OKAY) && (a->used > r->size)) {
  5931. err = MP_VAL;
  5932. }
  5933. if (err == MP_OKAY) {
  5934. /* Positive only so just use internal function. */
  5935. _sp_sub_d(a, d, r);
  5936. }
  5937. #else
  5938. /* Check for space in result especially when borrow adds a new word. */
  5939. if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used + 1 > r->size)) {
  5940. err = MP_VAL;
  5941. }
  5942. /* Check for space in result - no carry but borrow possible. */
  5943. if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used > r->size)) {
  5944. err = MP_VAL;
  5945. }
  5946. if (err == MP_OKAY) {
  5947. if (a->sign == MP_NEG) {
  5948. /* Subtracting from negative use internal add. */
  5949. r->sign = MP_NEG;
  5950. err = _sp_add_d(a, d, r);
  5951. }
  5952. else if ((a->used > 1) || (a->dp[0] >= d)) {
  5953. /* Positive number greater than or equal to digit - subtract digit.
  5954. */
  5955. r->sign = MP_ZPOS;
  5956. _sp_sub_d(a, d, r);
  5957. }
  5958. else {
  5959. /* Positive value smaller than digit. */
  5960. r->sign = MP_NEG;
  5961. /* Subtract positive value from digit. */
  5962. r->dp[0] = d - a->dp[0];
  5963. /* Result is a digit equal to or greater than zero. */
  5964. r->used = 1;
  5965. }
  5966. }
  5967. #endif
  5968. return err;
  5969. }
  5970. #endif /* WOLFSSL_SP_SUB_D */
  5971. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5972. defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
  5973. !defined(NO_DH) || defined(HAVE_ECC) || \
  5974. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5975. !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
  5976. (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
  5977. /* Multiply a by digit n and put result into r shifting up o digits.
  5978. * r = (a * n) << (o * SP_WORD_SIZE)
  5979. *
  5980. * @param [in] a SP integer to be multiplied.
  5981. * @param [in] d SP digit to multiply by.
  5982. * @param [out] r SP integer result.
  5983. * @param [in] o Number of digits to move result up by.
  5984. * @return MP_OKAY on success.
  5985. * @return MP_VAL when result is too large for sp_int.
  5986. */
  5987. static int _sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r, unsigned int o)
  5988. {
  5989. int err = MP_OKAY;
  5990. unsigned int i;
  5991. #ifndef SQR_MUL_ASM
  5992. sp_int_word t = 0;
  5993. #else
  5994. sp_int_digit l = 0;
  5995. sp_int_digit h = 0;
  5996. #endif
  5997. #ifdef WOLFSSL_SP_SMALL
  5998. /* Zero out offset words. */
  5999. for (i = 0; i < o; i++) {
  6000. r->dp[i] = 0;
  6001. }
  6002. #else
  6003. /* Don't use the offset. Only when doing small code size div. */
  6004. (void)o;
  6005. #endif
  6006. /* Multiply each word of a by n. */
  6007. for (i = 0; i < a->used; i++, o++) {
  6008. #ifndef SQR_MUL_ASM
  6009. /* Add product to top word of previous result. */
  6010. t += (sp_int_word)a->dp[i] * d;
  6011. /* Store low word. */
  6012. r->dp[o] = (sp_int_digit)t;
  6013. /* Move top word down. */
  6014. t >>= SP_WORD_SIZE;
  6015. #else
  6016. /* Multiply and add into low and high from previous result.
  6017. * No overflow of possible with add. */
  6018. SP_ASM_MUL_ADD_NO(l, h, a->dp[i], d);
  6019. /* Store low word. */
  6020. r->dp[o] = l;
  6021. /* Move high word into low word and set high word to 0. */
  6022. l = h;
  6023. h = 0;
  6024. #endif
  6025. }
  6026. /* Check whether new word to be appended to result. */
  6027. #ifndef SQR_MUL_ASM
  6028. if (t > 0)
  6029. #else
  6030. if (l > 0)
  6031. #endif
  6032. {
  6033. /* Validate space available in result. */
  6034. if (o == r->size) {
  6035. err = MP_VAL;
  6036. }
  6037. else {
  6038. /* Store new top word. */
  6039. #ifndef SQR_MUL_ASM
  6040. r->dp[o++] = (sp_int_digit)t;
  6041. #else
  6042. r->dp[o++] = l;
  6043. #endif
  6044. }
  6045. }
  6046. /* Update number of words in result. */
  6047. r->used = o;
  6048. /* In case n is zero. */
  6049. sp_clamp(r);
  6050. return err;
  6051. }
  6052. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  6053. * WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
  6054. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  6055. (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
  6056. /* Multiply a by digit n and put result into r. r = a * n
  6057. *
  6058. * @param [in] a SP integer to multiply.
  6059. * @param [in] n Digit to multiply by.
  6060. * @param [out] r SP integer to hold result.
  6061. *
  6062. * @return MP_OKAY on success.
  6063. * @return MP_VAL when a or b is NULL, or a has maximum number of digits used.
  6064. */
  6065. int sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r)
  6066. {
  6067. int err = MP_OKAY;
  6068. /* Validate parameters. */
  6069. if ((a == NULL) || (r == NULL)) {
  6070. err = MP_VAL;
  6071. }
  6072. /* Check space for product result - _sp_mul_d checks when new word added. */
  6073. if ((err == MP_OKAY) && (a->used > r->size)) {
  6074. err = MP_VAL;
  6075. }
  6076. if (err == MP_OKAY) {
  6077. err = _sp_mul_d(a, d, r, 0);
  6078. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6079. /* Update sign. */
  6080. if (d == 0) {
  6081. r->sign = MP_ZPOS;
  6082. }
  6083. else {
  6084. r->sign = a->sign;
  6085. }
  6086. #endif
  6087. }
  6088. return err;
  6089. }
  6090. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  6091. * (WOLFSSL_KEY_GEN && !NO_RSA) */
  6092. /* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
  6093. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  6094. defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
  6095. defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
  6096. #define WOLFSSL_SP_DIV_D
  6097. #endif
  6098. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  6099. !defined(NO_DH) || \
  6100. (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
  6101. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  6102. #define WOLFSSL_SP_MOD_D
  6103. #endif
  6104. #if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  6105. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  6106. !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
  6107. defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
  6108. #ifndef SP_ASM_DIV_WORD
  6109. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  6110. *
  6111. * @param [in] hi SP integer digit. High digit of the dividend.
  6112. * @param [in] lo SP integer digit. Lower digit of the dividend.
  6113. * @param [in] d SP integer digit. Number to divide by.
  6114. * @return The division result.
  6115. */
  6116. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  6117. sp_int_digit d)
  6118. {
  6119. #ifdef WOLFSSL_SP_DIV_WORD_HALF
  6120. sp_int_digit r;
  6121. /* Trial division using half of the bits in d. */
  6122. /* Check for shortcut when no high word set. */
  6123. if (hi == 0) {
  6124. r = lo / d;
  6125. }
  6126. else {
  6127. /* Half the bits of d. */
  6128. sp_int_digit divh = d >> SP_HALF_SIZE;
  6129. /* Number to divide in one value. */
  6130. sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
  6131. sp_int_word trial;
  6132. sp_int_digit r2;
  6133. /* Calculation for top SP_WORD_SIZE / 2 bits of dividend. */
  6134. /* Divide high word by top half of divisor. */
  6135. r = hi / divh;
  6136. /* When result too big then assume only max value. */
  6137. if (r > SP_HALF_MAX) {
  6138. r = SP_HALF_MAX;
  6139. }
  6140. /* Shift up result for trial division calculation. */
  6141. r <<= SP_HALF_SIZE;
  6142. /* Calculate trial value. */
  6143. trial = r * (sp_int_word)d;
  6144. /* Decrease r while trial is too big. */
  6145. while (trial > w) {
  6146. r -= (sp_int_digit)1 << SP_HALF_SIZE;
  6147. trial -= (sp_int_word)d << SP_HALF_SIZE;
  6148. }
  6149. /* Subtract trial. */
  6150. w -= trial;
  6151. /* Calculation for remaining second SP_WORD_SIZE / 2 bits. */
  6152. /* Divide top SP_WORD_SIZE of remainder by top half of divisor. */
  6153. r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divh;
  6154. /* Calculate trial value. */
  6155. trial = r2 * (sp_int_word)d;
  6156. /* Decrease r while trial is too big. */
  6157. while (trial > w) {
  6158. r2--;
  6159. trial -= d;
  6160. }
  6161. /* Subtract trial. */
  6162. w -= trial;
  6163. /* Update result. */
  6164. r += r2;
  6165. /* Calculation for remaining bottom SP_WORD_SIZE bits. */
  6166. r2 = ((sp_int_digit)w) / d;
  6167. /* Update result. */
  6168. r += r2;
  6169. }
  6170. return r;
  6171. #else
  6172. sp_int_word w;
  6173. sp_int_digit r;
  6174. /* Use built-in divide. */
  6175. w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
  6176. w /= d;
  6177. r = (sp_int_digit)w;
  6178. return r;
  6179. #endif /* WOLFSSL_SP_DIV_WORD_HALF */
  6180. }
  6181. #endif /* !SP_ASM_DIV_WORD */
  6182. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  6183. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  6184. #if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
  6185. !defined(WOLFSSL_SP_SMALL)
  6186. #if SP_WORD_SIZE == 64
  6187. /* 2^64 / 3 */
  6188. #define SP_DIV_3_CONST 0x5555555555555555L
  6189. /* 2^64 / 10 */
  6190. #define SP_DIV_10_CONST 0x1999999999999999L
  6191. #elif SP_WORD_SIZE == 32
  6192. /* 2^32 / 3 */
  6193. #define SP_DIV_3_CONST 0x55555555
  6194. /* 2^32 / 10 */
  6195. #define SP_DIV_10_CONST 0x19999999
  6196. #elif SP_WORD_SIZE == 16
  6197. /* 2^16 / 3 */
  6198. #define SP_DIV_3_CONST 0x5555
  6199. /* 2^16 / 10 */
  6200. #define SP_DIV_10_CONST 0x1999
  6201. #elif SP_WORD_SIZE == 8
  6202. /* 2^8 / 3 */
  6203. #define SP_DIV_3_CONST 0x55
  6204. /* 2^8 / 10 */
  6205. #define SP_DIV_10_CONST 0x19
  6206. #endif
  6207. #if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE < 64)
  6208. /* Divide by 3: r = a / 3 and rem = a % 3
  6209. *
  6210. * Used in checking prime: (a % 3) == 0?.
  6211. *
  6212. * @param [in] a SP integer to be divided.
  6213. * @param [out] r SP integer that is the quotient. May be NULL.
  6214. * @param [out] rem SP integer that is the remainder. May be NULL.
  6215. */
  6216. static void _sp_div_3(const sp_int* a, sp_int* r, sp_int_digit* rem)
  6217. {
  6218. #ifndef SQR_MUL_ASM
  6219. sp_int_word t;
  6220. sp_int_digit tt;
  6221. #else
  6222. sp_int_digit l = 0;
  6223. sp_int_digit tt = 0;
  6224. sp_int_digit t = SP_DIV_3_CONST;
  6225. sp_int_digit lm = 0;
  6226. sp_int_digit hm = 0;
  6227. #endif
  6228. sp_int_digit tr = 0;
  6229. /* Quotient fixup. */
  6230. static const unsigned char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
  6231. /* Remainder fixup. */
  6232. static const unsigned char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
  6233. /* Check whether only mod value needed. */
  6234. if (r == NULL) {
  6235. unsigned int i;
  6236. /* 2^2 mod 3 = 4 mod 3 = 1.
  6237. * => 2^(2*n) mod 3 = (2^2 mod 3)^n mod 3 = 1^n mod 3 = 1
  6238. * => (2^(2*n) * x) mod 3 = (2^(2*n) mod 3) * (x mod 3) = x mod 3
  6239. *
  6240. * Calculate mod 3 on sum of digits as SP_WORD_SIZE is a multiple of 2.
  6241. */
  6242. #ifndef SQR_MUL_ASM
  6243. t = 0;
  6244. /* Sum the digits. */
  6245. for (i = 0; i < a->used; i++) {
  6246. t += a->dp[i];
  6247. }
  6248. /* Sum digits of sum. */
  6249. t = (t >> SP_WORD_SIZE) + (t & SP_MASK);
  6250. /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 3. */
  6251. tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
  6252. /* Subtract trial division. */
  6253. tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
  6254. #else
  6255. /* Sum the digits. */
  6256. for (i = 0; i < a->used; i++) {
  6257. SP_ASM_ADDC_REG(l, tr, a->dp[i]);
  6258. }
  6259. /* Sum digits of sum - can get carry. */
  6260. SP_ASM_ADDC_REG(l, tt, tr);
  6261. /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
  6262. SP_ASM_MUL(lm, hm, l, t);
  6263. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
  6264. hm += tt * SP_DIV_3_CONST;
  6265. /* Subtract trial division from digit. */
  6266. tr = l - (hm * 3);
  6267. #endif
  6268. /* tr is 0..5 but need 0..2 */
  6269. /* Fix up remainder. */
  6270. tr = sp_rem6[tr];
  6271. *rem = tr;
  6272. }
  6273. /* At least result needed - remainder is calculated anyway. */
  6274. else {
  6275. int i;
  6276. /* Divide starting at most significant word down to least. */
  6277. for (i = (int)(a->used - 1); i >= 0; i--) {
  6278. #ifndef SQR_MUL_ASM
  6279. /* Combine remainder from last operation with this word. */
  6280. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6281. /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 3. */
  6282. tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
  6283. /* Subtract trial division. */
  6284. tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
  6285. #else
  6286. /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
  6287. SP_ASM_MUL(l, tt, a->dp[i], t);
  6288. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
  6289. tt += tr * SP_DIV_3_CONST;
  6290. /* Subtract trial division from digit. */
  6291. tr = a->dp[i] - (tt * 3);
  6292. #endif
  6293. /* tr is 0..5 but need 0..2 */
  6294. /* Fix up result. */
  6295. tt += sp_r6[tr];
  6296. /* Fix up remainder. */
  6297. tr = sp_rem6[tr];
  6298. /* Store result of digit divided by 3. */
  6299. r->dp[i] = tt;
  6300. }
  6301. /* Set the used amount to maximal amount. */
  6302. r->used = a->used;
  6303. /* Remove leading zeros. */
  6304. sp_clamp(r);
  6305. /* Return remainder if required. */
  6306. if (rem != NULL) {
  6307. *rem = tr;
  6308. }
  6309. }
  6310. }
  6311. #endif /* !(WOLFSSL_SP_SMALL && (SP_WORD_SIZE < 64) */
  6312. /* Divide by 10: r = a / 10 and rem = a % 10
  6313. *
  6314. * Used when writing with a radix of 10 - decimal number.
  6315. *
  6316. * @param [in] a SP integer to be divided.
  6317. * @param [out] r SP integer that is the quotient. May be NULL.
  6318. * @param [out] rem SP integer that is the remainder. May be NULL.
  6319. */
  6320. static void _sp_div_10(const sp_int* a, sp_int* r, sp_int_digit* rem)
  6321. {
  6322. int i;
  6323. #ifndef SQR_MUL_ASM
  6324. sp_int_word t;
  6325. sp_int_digit tt;
  6326. #else
  6327. sp_int_digit l = 0;
  6328. sp_int_digit tt = 0;
  6329. sp_int_digit t = SP_DIV_10_CONST;
  6330. #endif
  6331. sp_int_digit tr = 0;
  6332. /* Check whether only mod value needed. */
  6333. if (r == NULL) {
  6334. /* Divide starting at most significant word down to least. */
  6335. for (i = (int)(a->used - 1); i >= 0; i--) {
  6336. #ifndef SQR_MUL_ASM
  6337. /* Combine remainder from last operation with this word. */
  6338. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6339. /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 10. */
  6340. tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
  6341. /* Subtract trial division. */
  6342. tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
  6343. #else
  6344. /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
  6345. SP_ASM_MUL(l, tt, a->dp[i], t);
  6346. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
  6347. */
  6348. tt += tr * SP_DIV_10_CONST;
  6349. /* Subtract trial division from digit. */
  6350. tr = a->dp[i] - (tt * 10);
  6351. #endif
  6352. /* tr is 0..99 but need 0..9 */
  6353. /* Fix up remainder. */
  6354. tr = tr % 10;
  6355. }
  6356. *rem = tr;
  6357. }
  6358. /* At least result needed - remainder is calculated anyway. */
  6359. else {
  6360. /* Divide starting at most significant word down to least. */
  6361. for (i = (int)(a->used - 1); i >= 0; i--) {
  6362. #ifndef SQR_MUL_ASM
  6363. /* Combine remainder from last operation with this word. */
  6364. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6365. /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 10. */
  6366. tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
  6367. /* Subtract trial division. */
  6368. tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
  6369. #else
  6370. /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
  6371. SP_ASM_MUL(l, tt, a->dp[i], t);
  6372. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
  6373. */
  6374. tt += tr * SP_DIV_10_CONST;
  6375. /* Subtract trial division from digit. */
  6376. tr = a->dp[i] - (tt * 10);
  6377. #endif
  6378. /* tr is 0..99 but need 0..9 */
  6379. /* Fix up result. */
  6380. tt += tr / 10;
  6381. /* Fix up remainder. */
  6382. tr %= 10;
  6383. /* Store result of digit divided by 10. */
  6384. r->dp[i] = tt;
  6385. }
  6386. /* Set the used amount to maximal amount. */
  6387. r->used = a->used;
  6388. /* Remove leading zeros. */
  6389. sp_clamp(r);
  6390. /* Return remainder if required. */
  6391. if (rem != NULL) {
  6392. *rem = tr;
  6393. }
  6394. }
  6395. }
  6396. #endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
  6397. #if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
  6398. /* Divide by small number: r = a / d and rem = a % d
  6399. *
  6400. * @param [in] a SP integer to be divided.
  6401. * @param [in] d Digit to divide by.
  6402. * @param [out] r SP integer that is the quotient. May be NULL.
  6403. * @param [out] rem SP integer that is the remainder. May be NULL.
  6404. */
  6405. static void _sp_div_small(const sp_int* a, sp_int_digit d, sp_int* r,
  6406. sp_int_digit* rem)
  6407. {
  6408. int i;
  6409. #ifndef SQR_MUL_ASM
  6410. sp_int_word t;
  6411. sp_int_digit tt;
  6412. #else
  6413. sp_int_digit l = 0;
  6414. sp_int_digit tt = 0;
  6415. #endif
  6416. sp_int_digit tr = 0;
  6417. sp_int_digit m = SP_DIGIT_MAX / d;
  6418. #ifndef WOLFSSL_SP_SMALL
  6419. /* Check whether only mod value needed. */
  6420. if (r == NULL) {
  6421. /* Divide starting at most significant word down to least. */
  6422. for (i = (int)(a->used - 1); i >= 0; i--) {
  6423. #ifndef SQR_MUL_ASM
  6424. /* Combine remainder from last operation with this word. */
  6425. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6426. /* Get top digit after multiplying. */
  6427. tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
  6428. /* Subtract trial division. */
  6429. tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
  6430. #else
  6431. /* Multiply digit. */
  6432. SP_ASM_MUL(l, tt, a->dp[i], m);
  6433. /* Add multiplied remainder to top digit. */
  6434. tt += tr * m;
  6435. /* Subtract trial division from digit. */
  6436. tr = a->dp[i] - (tt * d);
  6437. #endif
  6438. /* tr < d * d */
  6439. /* Fix up remainder. */
  6440. tr = tr % d;
  6441. }
  6442. *rem = tr;
  6443. }
  6444. /* At least result needed - remainder is calculated anyway. */
  6445. else
  6446. #endif /* !WOLFSSL_SP_SMALL */
  6447. {
  6448. /* Divide starting at most significant word down to least. */
  6449. for (i = (int)(a->used - 1); i >= 0; i--) {
  6450. #ifndef SQR_MUL_ASM
  6451. /* Combine remainder from last operation with this word. */
  6452. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6453. /* Get top digit after multiplying. */
  6454. tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
  6455. /* Subtract trial division. */
  6456. tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
  6457. #else
  6458. /* Multiply digit. */
  6459. SP_ASM_MUL(l, tt, a->dp[i], m);
  6460. /* Add multiplied remainder to top digit. */
  6461. tt += tr * m;
  6462. /* Subtract trial division from digit. */
  6463. tr = a->dp[i] - (tt * d);
  6464. #endif
  6465. /* tr < d * d */
  6466. /* Fix up result. */
  6467. tt += tr / d;
  6468. /* Fix up remainder. */
  6469. tr %= d;
  6470. /* Store result of dividing the digit. */
  6471. #ifdef WOLFSSL_SP_SMALL
  6472. if (r != NULL)
  6473. #endif
  6474. {
  6475. r->dp[i] = tt;
  6476. }
  6477. }
  6478. #ifdef WOLFSSL_SP_SMALL
  6479. if (r != NULL)
  6480. #endif
  6481. {
  6482. /* Set the used amount to maximal amount. */
  6483. r->used = a->used;
  6484. /* Remove leading zeros. */
  6485. sp_clamp(r);
  6486. }
  6487. /* Return remainder if required. */
  6488. if (rem != NULL) {
  6489. *rem = tr;
  6490. }
  6491. }
  6492. }
  6493. #endif
  6494. #ifdef WOLFSSL_SP_DIV_D
  6495. /* Divide a multi-precision number by a digit size number and calculate
  6496. * remainder.
  6497. * r = a / d; rem = a % d
  6498. *
  6499. * Use trial division algorithm.
  6500. *
  6501. * @param [in] a SP integer to be divided.
  6502. * @param [in] d Digit to divide by.
  6503. * @param [out] r SP integer that is the quotient. May be NULL.
  6504. * @param [out] rem Digit that is the remainder. May be NULL.
  6505. */
  6506. static void _sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r,
  6507. sp_int_digit* rem)
  6508. {
  6509. int i;
  6510. #ifndef SQR_MUL_ASM
  6511. sp_int_word w = 0;
  6512. #else
  6513. sp_int_digit l;
  6514. sp_int_digit h = 0;
  6515. #endif
  6516. sp_int_digit t;
  6517. /* Divide starting at most significant word down to least. */
  6518. for (i = (int)(a->used - 1); i >= 0; i--) {
  6519. #ifndef SQR_MUL_ASM
  6520. /* Combine remainder from last operation with this word and divide. */
  6521. t = sp_div_word((sp_int_digit)w, a->dp[i], d);
  6522. /* Combine remainder from last operation with this word. */
  6523. w = (w << SP_WORD_SIZE) | a->dp[i];
  6524. /* Subtract to get modulo result. */
  6525. w -= (sp_int_word)t * d;
  6526. #else
  6527. /* Get current word. */
  6528. l = a->dp[i];
  6529. /* Combine remainder from last operation with this word and divide. */
  6530. t = sp_div_word(h, l, d);
  6531. /* Subtract to get modulo result. */
  6532. h = l - t * d;
  6533. #endif
  6534. /* Store result of dividing the digit. */
  6535. if (r != NULL) {
  6536. r->dp[i] = t;
  6537. }
  6538. }
  6539. if (r != NULL) {
  6540. /* Set the used amount to maximal amount. */
  6541. r->used = a->used;
  6542. /* Remove leading zeros. */
  6543. sp_clamp(r);
  6544. }
  6545. /* Return remainder if required. */
  6546. if (rem != NULL) {
  6547. #ifndef SQR_MUL_ASM
  6548. *rem = (sp_int_digit)w;
  6549. #else
  6550. *rem = h;
  6551. #endif
  6552. }
  6553. }
  6554. /* Divide a multi-precision number by a digit size number and calculate
  6555. * remainder.
  6556. * r = a / d; rem = a % d
  6557. *
  6558. * @param [in] a SP integer to be divided.
  6559. * @param [in] d Digit to divide by.
  6560. * @param [out] r SP integer that is the quotient. May be NULL.
  6561. * @param [out] rem Digit that is the remainder. May be NULL.
  6562. *
  6563. * @return MP_OKAY on success.
  6564. * @return MP_VAL when a is NULL or d is 0.
  6565. */
  6566. int sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
  6567. {
  6568. int err = MP_OKAY;
  6569. /* Validate parameters. */
  6570. if ((a == NULL) || (d == 0)) {
  6571. err = MP_VAL;
  6572. }
  6573. /* Check space for maximal sized result. */
  6574. if ((err == MP_OKAY) && (r != NULL) && (a->used > r->size)) {
  6575. err = MP_VAL;
  6576. }
  6577. if (err == MP_OKAY) {
  6578. #if !defined(WOLFSSL_SP_SMALL)
  6579. #if SP_WORD_SIZE < 64
  6580. if (d == 3) {
  6581. /* Fast implementation for divisor of 3. */
  6582. _sp_div_3(a, r, rem);
  6583. }
  6584. else
  6585. #endif
  6586. if (d == 10) {
  6587. /* Fast implementation for divisor of 10 - sp_todecimal(). */
  6588. _sp_div_10(a, r, rem);
  6589. }
  6590. else
  6591. #endif
  6592. if (d <= SP_HALF_MAX) {
  6593. /* For small divisors. */
  6594. _sp_div_small(a, d, r, rem);
  6595. }
  6596. else
  6597. {
  6598. _sp_div_d(a, d, r, rem);
  6599. }
  6600. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6601. if (r != NULL) {
  6602. r->sign = a->sign;
  6603. }
  6604. #endif
  6605. }
  6606. return err;
  6607. }
  6608. #endif /* WOLFSSL_SP_DIV_D */
  6609. #ifdef WOLFSSL_SP_MOD_D
  6610. /* Calculate a modulo the digit d into r: r = a mod d
  6611. *
  6612. * @param [in] a SP integer to reduce.
  6613. * @param [in] d Digit to that is the modulus.
  6614. * @param [out] r Digit that is the result.
  6615. */
  6616. static void _sp_mod_d(const sp_int* a, const sp_int_digit d, sp_int_digit* r)
  6617. {
  6618. int i;
  6619. #ifndef SQR_MUL_ASM
  6620. sp_int_word w = 0;
  6621. #else
  6622. sp_int_digit h = 0;
  6623. #endif
  6624. /* Divide starting at most significant word down to least. */
  6625. for (i = (int)(a->used - 1); i >= 0; i--) {
  6626. #ifndef SQR_MUL_ASM
  6627. /* Combine remainder from last operation with this word and divide. */
  6628. sp_int_digit t = sp_div_word((sp_int_digit)w, a->dp[i], d);
  6629. /* Combine remainder from last operation with this word. */
  6630. w = (w << SP_WORD_SIZE) | a->dp[i];
  6631. /* Subtract to get modulo result. */
  6632. w -= (sp_int_word)t * d;
  6633. #else
  6634. /* Combine remainder from last operation with this word and divide. */
  6635. sp_int_digit t = sp_div_word(h, a->dp[i], d);
  6636. /* Subtract to get modulo result. */
  6637. h = a->dp[i] - t * d;
  6638. #endif
  6639. }
  6640. /* Return remainder. */
  6641. #ifndef SQR_MUL_ASM
  6642. *r = (sp_int_digit)w;
  6643. #else
  6644. *r = h;
  6645. #endif
  6646. }
  6647. /* Calculate a modulo the digit d into r: r = a mod d
  6648. *
  6649. * @param [in] a SP integer to reduce.
  6650. * @param [in] d Digit to that is the modulus.
  6651. * @param [out] r Digit that is the result.
  6652. *
  6653. * @return MP_OKAY on success.
  6654. * @return MP_VAL when a is NULL or d is 0.
  6655. */
  6656. #if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
  6657. !defined(HAVE_COMP_KEY)) && !defined(OPENSSL_EXTRA)
  6658. static
  6659. #endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
  6660. int sp_mod_d(const sp_int* a, sp_int_digit d, sp_int_digit* r)
  6661. {
  6662. int err = MP_OKAY;
  6663. /* Validate parameters. */
  6664. if ((a == NULL) || (r == NULL) || (d == 0)) {
  6665. err = MP_VAL;
  6666. }
  6667. #if 0
  6668. sp_print(a, "a");
  6669. sp_print_digit(d, "m");
  6670. #endif
  6671. if (err == MP_OKAY) {
  6672. /* Check whether d is a power of 2. */
  6673. if ((d & (d - 1)) == 0) {
  6674. if (a->used == 0) {
  6675. *r = 0;
  6676. }
  6677. else {
  6678. *r = a->dp[0] & (d - 1);
  6679. }
  6680. }
  6681. #if !defined(WOLFSSL_SP_SMALL)
  6682. #if SP_WORD_SIZE < 64
  6683. else if (d == 3) {
  6684. /* Fast implementation for divisor of 3. */
  6685. _sp_div_3(a, NULL, r);
  6686. }
  6687. #endif
  6688. else if (d == 10) {
  6689. /* Fast implementation for divisor of 10. */
  6690. _sp_div_10(a, NULL, r);
  6691. }
  6692. #endif
  6693. else if (d <= SP_HALF_MAX) {
  6694. /* For small divisors. */
  6695. _sp_div_small(a, d, NULL, r);
  6696. }
  6697. else {
  6698. _sp_mod_d(a, d, r);
  6699. }
  6700. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6701. if (a->sign == MP_NEG) {
  6702. *r = d - *r;
  6703. }
  6704. #endif
  6705. }
  6706. #if 0
  6707. sp_print_digit(*r, "rmod");
  6708. #endif
  6709. return err;
  6710. }
  6711. #endif /* WOLFSSL_SP_MOD_D */
  6712. #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
  6713. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  6714. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  6715. /* Divides a by 2 and stores in r: r = a >> 1
  6716. *
  6717. * @param [in] a SP integer to divide.
  6718. * @param [out] r SP integer to hold result.
  6719. */
  6720. static void _sp_div_2(const sp_int* a, sp_int* r)
  6721. {
  6722. int i;
  6723. /* Shift down each word by 1 and include bottom bit of next at top. */
  6724. for (i = 0; i < (int)a->used - 1; i++) {
  6725. r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
  6726. }
  6727. /* Last word only needs to be shifted down. */
  6728. r->dp[i] = a->dp[i] >> 1;
  6729. /* Set used to be all words seen. */
  6730. r->used = (unsigned int)i + 1;
  6731. /* Remove leading zeros. */
  6732. sp_clamp(r);
  6733. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6734. /* Same sign in result. */
  6735. r->sign = a->sign;
  6736. #endif
  6737. }
  6738. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  6739. /* Divides a by 2 and stores in r: r = a >> 1
  6740. *
  6741. * @param [in] a SP integer to divide.
  6742. * @param [out] r SP integer to hold result.
  6743. *
  6744. * @return MP_OKAY on success.
  6745. * @return MP_VAL when a or r is NULL.
  6746. */
  6747. int sp_div_2(const sp_int* a, sp_int* r)
  6748. {
  6749. int err = MP_OKAY;
  6750. /* Only when a public API. */
  6751. if ((a == NULL) || (r == NULL)) {
  6752. err = MP_VAL;
  6753. }
  6754. /* Ensure maximal size is supported by result. */
  6755. if ((err == MP_OKAY) && (a->used > r->size)) {
  6756. err = MP_VAL;
  6757. }
  6758. if (err == MP_OKAY) {
  6759. _sp_div_2(a, r);
  6760. }
  6761. return err;
  6762. }
  6763. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  6764. #endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
  6765. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  6766. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  6767. /* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
  6768. *
  6769. * r = a / 2 (mod m) - constant time (a < m and positive)
  6770. *
  6771. * @param [in] a SP integer to divide.
  6772. * @param [in] m SP integer that is modulus.
  6773. * @param [out] r SP integer to hold result.
  6774. *
  6775. * @return MP_OKAY on success.
  6776. * @return MP_VAL when a, m or r is NULL.
  6777. */
  6778. int sp_div_2_mod_ct(const sp_int* a, const sp_int* m, sp_int* r)
  6779. {
  6780. int err = MP_OKAY;
  6781. /* Validate parameters. */
  6782. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  6783. err = MP_VAL;
  6784. }
  6785. /* Check result has enough space for a + m. */
  6786. if ((err == MP_OKAY) && (m->used + 1 > r->size)) {
  6787. err = MP_VAL;
  6788. }
  6789. if (err == MP_OKAY) {
  6790. #ifndef SQR_MUL_ASM
  6791. sp_int_word w = 0;
  6792. #else
  6793. sp_int_digit l = 0;
  6794. sp_int_digit h;
  6795. sp_int_digit t;
  6796. #endif
  6797. /* Mask to apply to modulus. */
  6798. sp_int_digit mask = (sp_int_digit)0 - (a->dp[0] & 1);
  6799. unsigned int i;
  6800. #if 0
  6801. sp_print(a, "a");
  6802. sp_print(m, "m");
  6803. #endif
  6804. /* Add a to m, if a is odd, into r in constant time. */
  6805. for (i = 0; i < m->used; i++) {
  6806. /* Mask to apply to a - set when used value at index. */
  6807. sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
  6808. #ifndef SQR_MUL_ASM
  6809. /* Conditionally add modulus. */
  6810. w += m->dp[i] & mask;
  6811. /* Conditionally add a. */
  6812. w += a->dp[i] & mask_a;
  6813. /* Store low digit in result. */
  6814. r->dp[i] = (sp_int_digit)w;
  6815. /* Move high digit down. */
  6816. w >>= DIGIT_BIT;
  6817. #else
  6818. /* No high digit. */
  6819. h = 0;
  6820. /* Conditionally use modulus. */
  6821. t = m->dp[i] & mask;
  6822. /* Add with carry modulus. */
  6823. SP_ASM_ADDC_REG(l, h, t);
  6824. /* Conditionally use a. */
  6825. t = a->dp[i] & mask_a;
  6826. /* Add with carry a. */
  6827. SP_ASM_ADDC_REG(l, h, t);
  6828. /* Store low digit in result. */
  6829. r->dp[i] = l;
  6830. /* Move high digit down. */
  6831. l = h;
  6832. #endif
  6833. }
  6834. /* Store carry. */
  6835. #ifndef SQR_MUL_ASM
  6836. r->dp[i] = (sp_int_digit)w;
  6837. #else
  6838. r->dp[i] = l;
  6839. #endif
  6840. /* Used includes carry - set or not. */
  6841. r->used = i + 1;
  6842. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6843. r->sign = MP_ZPOS;
  6844. #endif
  6845. /* Divide conditional sum by 2. */
  6846. _sp_div_2(r, r);
  6847. #if 0
  6848. sp_print(r, "rd2");
  6849. #endif
  6850. }
  6851. return err;
  6852. }
  6853. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  6854. /************************
  6855. * Add/Subtract Functions
  6856. ************************/
  6857. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
  6858. /* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZEOF))
  6859. *
  6860. * @param [in] a SP integer to add to.
  6861. * @param [in] b SP integer to add.
  6862. * @param [out] r SP integer to store result in.
  6863. * @param [in] o Number of digits to offset b.
  6864. */
  6865. static void _sp_add_off(const sp_int* a, const sp_int* b, sp_int* r, int o)
  6866. {
  6867. unsigned int i = 0;
  6868. #ifndef SQR_MUL_ASM
  6869. sp_int_word t = 0;
  6870. #else
  6871. sp_int_digit l = 0;
  6872. sp_int_digit h = 0;
  6873. sp_int_digit t = 0;
  6874. #endif
  6875. #ifdef SP_MATH_NEED_ADD_OFF
  6876. unsigned int j;
  6877. /* Copy a into result up to offset. */
  6878. for (; (i < o) && (i < a->used); i++) {
  6879. r->dp[i] = a->dp[i];
  6880. }
  6881. /* Set result to 0 for digits beyonf those in a. */
  6882. for (; i < o; i++) {
  6883. r->dp[i] = 0;
  6884. }
  6885. /* Add each digit from a and b where both have values. */
  6886. for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
  6887. #ifndef SQR_MUL_ASM
  6888. t += a->dp[i];
  6889. t += b->dp[j];
  6890. r->dp[i] = (sp_int_digit)t;
  6891. t >>= SP_WORD_SIZE;
  6892. #else
  6893. t = a->dp[i];
  6894. SP_ASM_ADDC(l, h, t);
  6895. t = b->dp[j];
  6896. SP_ASM_ADDC(l, h, t);
  6897. r->dp[i] = l;
  6898. l = h;
  6899. h = 0;
  6900. #endif
  6901. }
  6902. /* Either a and/or b are out of digits. Add carry and remaining a digits. */
  6903. for (; i < a->used; i++) {
  6904. #ifndef SQR_MUL_ASM
  6905. t += a->dp[i];
  6906. r->dp[i] = (sp_int_digit)t;
  6907. t >>= SP_WORD_SIZE;
  6908. #else
  6909. t = a->dp[i];
  6910. SP_ASM_ADDC(l, h, t);
  6911. r->dp[i] = l;
  6912. l = h;
  6913. h = 0;
  6914. #endif
  6915. }
  6916. /* a is out of digits. Add carry and remaining b digits. */
  6917. for (; j < b->used; i++, j++) {
  6918. #ifndef SQR_MUL_ASM
  6919. t += b->dp[j];
  6920. r->dp[i] = (sp_int_digit)t;
  6921. t >>= SP_WORD_SIZE;
  6922. #else
  6923. t = b->dp[j];
  6924. SP_ASM_ADDC(l, h, t);
  6925. r->dp[i] = l;
  6926. l = h;
  6927. h = 0;
  6928. #endif
  6929. }
  6930. #else
  6931. (void)o;
  6932. /* Add each digit from a and b where both have values. */
  6933. for (; (i < a->used) && (i < b->used); i++) {
  6934. #ifndef SQR_MUL_ASM
  6935. t += a->dp[i];
  6936. t += b->dp[i];
  6937. r->dp[i] = (sp_int_digit)t;
  6938. t >>= SP_WORD_SIZE;
  6939. #else
  6940. t = a->dp[i];
  6941. SP_ASM_ADDC(l, h, t);
  6942. t = b->dp[i];
  6943. SP_ASM_ADDC(l, h, t);
  6944. r->dp[i] = l;
  6945. l = h;
  6946. h = 0;
  6947. #endif
  6948. }
  6949. /* Either a and/or b are out of digits. Add carry and remaining a digits. */
  6950. for (; i < a->used; i++) {
  6951. #ifndef SQR_MUL_ASM
  6952. t += a->dp[i];
  6953. r->dp[i] = (sp_int_digit)t;
  6954. t >>= SP_WORD_SIZE;
  6955. #else
  6956. t = a->dp[i];
  6957. SP_ASM_ADDC(l, h, t);
  6958. r->dp[i] = l;
  6959. l = h;
  6960. h = 0;
  6961. #endif
  6962. }
  6963. /* a is out of digits. Add carry and remaining b digits. */
  6964. for (; i < b->used; i++) {
  6965. #ifndef SQR_MUL_ASM
  6966. t += b->dp[i];
  6967. r->dp[i] = (sp_int_digit)t;
  6968. t >>= SP_WORD_SIZE;
  6969. #else
  6970. t = b->dp[i];
  6971. SP_ASM_ADDC(l, h, t);
  6972. r->dp[i] = l;
  6973. l = h;
  6974. h = 0;
  6975. #endif
  6976. }
  6977. #endif
  6978. /* Set used based on last digit put in. */
  6979. r->used = i;
  6980. /* Put in carry. */
  6981. #ifndef SQR_MUL_ASM
  6982. r->dp[i] = (sp_int_digit)t;
  6983. r->used += (t != 0);
  6984. #else
  6985. r->dp[i] = l;
  6986. r->used += (l != 0);
  6987. #endif
  6988. /* Remove leading zeros. */
  6989. sp_clamp(r);
  6990. }
  6991. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  6992. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
  6993. !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
  6994. !defined(WOLFSSL_RSA_VERIFY_ONLY))
  6995. /* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZEOF))
  6996. * a must be greater than b.
  6997. *
  6998. * When using offset, r == a is faster.
  6999. *
  7000. * @param [in] a SP integer to subtract from.
  7001. * @param [in] b SP integer to subtract.
  7002. * @param [out] r SP integer to store result in.
  7003. * @param [in] o Number of digits to offset b.
  7004. */
  7005. static void _sp_sub_off(const sp_int* a, const sp_int* b, sp_int* r,
  7006. unsigned int o)
  7007. {
  7008. unsigned int i = 0;
  7009. unsigned int j;
  7010. #ifndef SQR_MUL_ASM
  7011. sp_int_sword t = 0;
  7012. #else
  7013. sp_int_digit l = 0;
  7014. sp_int_digit h = 0;
  7015. #endif
  7016. /* Need to copy digits up to offset into result. */
  7017. if (r != a) {
  7018. for (; (i < o) && (i < a->used); i++) {
  7019. r->dp[i] = a->dp[i];
  7020. }
  7021. }
  7022. else {
  7023. i = o;
  7024. }
  7025. /* Index to add at is the offset now. */
  7026. for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
  7027. #ifndef SQR_MUL_ASM
  7028. /* Add a into and subtract b from current value. */
  7029. t += a->dp[i];
  7030. t -= b->dp[j];
  7031. /* Store low digit in result. */
  7032. r->dp[i] = (sp_int_digit)t;
  7033. /* Move high digit down. */
  7034. t >>= SP_WORD_SIZE;
  7035. #else
  7036. /* Add a into and subtract b from current value. */
  7037. SP_ASM_ADDC(l, h, a->dp[i]);
  7038. SP_ASM_SUBB(l, h, b->dp[j]);
  7039. /* Store low digit in result. */
  7040. r->dp[i] = l;
  7041. /* Move high digit down. */
  7042. l = h;
  7043. /* High digit is 0 when positive or -1 on negative. */
  7044. h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
  7045. #endif
  7046. }
  7047. for (; i < a->used; i++) {
  7048. #ifndef SQR_MUL_ASM
  7049. /* Add a into current value. */
  7050. t += a->dp[i];
  7051. /* Store low digit in result. */
  7052. r->dp[i] = (sp_int_digit)t;
  7053. /* Move high digit down. */
  7054. t >>= SP_WORD_SIZE;
  7055. #else
  7056. /* Add a into current value. */
  7057. SP_ASM_ADDC(l, h, a->dp[i]);
  7058. /* Store low digit in result. */
  7059. r->dp[i] = l;
  7060. /* Move high digit down. */
  7061. l = h;
  7062. /* High digit is 0 when positive or -1 on negative. */
  7063. h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
  7064. #endif
  7065. }
  7066. /* Set used based on last digit put in. */
  7067. r->used = i;
  7068. /* Remove leading zeros. */
  7069. sp_clamp(r);
  7070. }
  7071. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
  7072. * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  7073. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
  7074. /* Add b to a into r: r = a + b
  7075. *
  7076. * @param [in] a SP integer to add to.
  7077. * @param [in] b SP integer to add.
  7078. * @param [out] r SP integer to store result in.
  7079. *
  7080. * @return MP_OKAY on success.
  7081. * @return MP_VAL when a, b, or r is NULL.
  7082. */
  7083. int sp_add(const sp_int* a, const sp_int* b, sp_int* r)
  7084. {
  7085. int err = MP_OKAY;
  7086. /* Validate parameters. */
  7087. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  7088. err = MP_VAL;
  7089. }
  7090. /* Check that r as big as a and b plus one word. */
  7091. if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
  7092. err = MP_VAL;
  7093. }
  7094. if (err == MP_OKAY) {
  7095. #ifndef WOLFSSL_SP_INT_NEGATIVE
  7096. /* Add two positive numbers. */
  7097. _sp_add_off(a, b, r, 0);
  7098. #else
  7099. /* Same sign then add absolute values and use sign. */
  7100. if (a->sign == b->sign) {
  7101. _sp_add_off(a, b, r, 0);
  7102. r->sign = a->sign;
  7103. }
  7104. /* Different sign and abs(a) >= abs(b). */
  7105. else if (_sp_cmp_abs(a, b) != MP_LT) {
  7106. /* Subtract absolute values and use sign of a unless result 0. */
  7107. _sp_sub_off(a, b, r, 0);
  7108. if (sp_iszero(r)) {
  7109. r->sign = MP_ZPOS;
  7110. }
  7111. else {
  7112. r->sign = a->sign;
  7113. }
  7114. }
  7115. /* Different sign and abs(a) < abs(b). */
  7116. else {
  7117. /* Reverse subtract absolute values and use sign of b. */
  7118. _sp_sub_off(b, a, r, 0);
  7119. r->sign = b->sign;
  7120. }
  7121. #endif
  7122. }
  7123. return err;
  7124. }
  7125. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  7126. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7127. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  7128. /* Subtract b from a into r: r = a - b
  7129. *
  7130. * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
  7131. *
  7132. * @param [in] a SP integer to subtract from.
  7133. * @param [in] b SP integer to subtract.
  7134. * @param [out] r SP integer to store result in.
  7135. *
  7136. * @return MP_OKAY on success.
  7137. * @return MP_VAL when a, b, or r is NULL.
  7138. */
  7139. int sp_sub(const sp_int* a, const sp_int* b, sp_int* r)
  7140. {
  7141. int err = MP_OKAY;
  7142. /* Validate parameters. */
  7143. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  7144. err = MP_VAL;
  7145. }
  7146. /* Check that r as big as a and b plus one word. */
  7147. if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
  7148. err = MP_VAL;
  7149. }
  7150. if (err == MP_OKAY) {
  7151. #ifndef WOLFSSL_SP_INT_NEGATIVE
  7152. /* Subtract positive numbers b from a. */
  7153. _sp_sub_off(a, b, r, 0);
  7154. #else
  7155. /* Different sign. */
  7156. if (a->sign != b->sign) {
  7157. /* Add absolute values and use sign of a. */
  7158. _sp_add_off(a, b, r, 0);
  7159. r->sign = a->sign;
  7160. }
  7161. /* Same sign and abs(a) >= abs(b). */
  7162. else if (_sp_cmp_abs(a, b) != MP_LT) {
  7163. /* Subtract absolute values and use sign of a unless result 0. */
  7164. _sp_sub_off(a, b, r, 0);
  7165. if (sp_iszero(r)) {
  7166. r->sign = MP_ZPOS;
  7167. }
  7168. else {
  7169. r->sign = a->sign;
  7170. }
  7171. }
  7172. /* Same sign and abs(a) < abs(b). */
  7173. else {
  7174. /* Reverse subtract absolute values and use opposite sign of a */
  7175. _sp_sub_off(b, a, r, 0);
  7176. r->sign = 1 - a->sign;
  7177. }
  7178. #endif
  7179. }
  7180. return err;
  7181. }
  7182. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  7183. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
  7184. /****************************
  7185. * Add/Subtract mod functions
  7186. ****************************/
  7187. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  7188. (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
  7189. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
  7190. /* Add two value and reduce: r = (a + b) % m
  7191. *
  7192. * @param [in] a SP integer to add.
  7193. * @param [in] b SP integer to add with.
  7194. * @param [in] m SP integer that is the modulus.
  7195. * @param [out] r SP integer to hold result.
  7196. *
  7197. * @return MP_OKAY on success.
  7198. * @return MP_MEM when dynamic memory allocation fails.
  7199. */
  7200. static int _sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m,
  7201. sp_int* r)
  7202. {
  7203. int err = MP_OKAY;
  7204. /* Calculate used based on digits used in a and b. */
  7205. unsigned int used = ((a->used >= b->used) ? a->used + 1 : b->used + 1);
  7206. DECL_SP_INT(t, used);
  7207. /* Allocate a temporary SP int to hold sum. */
  7208. ALLOC_SP_INT_SIZE(t, used, err, NULL);
  7209. if (err == MP_OKAY) {
  7210. /* Do sum. */
  7211. err = sp_add(a, b, t);
  7212. }
  7213. if (err == MP_OKAY) {
  7214. /* Mod result. */
  7215. err = sp_mod(t, m, r);
  7216. }
  7217. FREE_SP_INT(t, NULL);
  7218. return err;
  7219. }
  7220. /* Add two value and reduce: r = (a + b) % m
  7221. *
  7222. * @param [in] a SP integer to add.
  7223. * @param [in] b SP integer to add with.
  7224. * @param [in] m SP integer that is the modulus.
  7225. * @param [out] r SP integer to hold result.
  7226. *
  7227. * @return MP_OKAY on success.
  7228. * @return MP_VAL when a, b, m or r is NULL.
  7229. * @return MP_MEM when dynamic memory allocation fails.
  7230. */
  7231. int sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7232. {
  7233. int err = MP_OKAY;
  7234. /* Validate parameters. */
  7235. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  7236. err = MP_VAL;
  7237. }
  7238. /* Ensure a and b aren't too big a number to operate on. */
  7239. else if (a->used >= SP_INT_DIGITS) {
  7240. err = MP_VAL;
  7241. }
  7242. else if (b->used >= SP_INT_DIGITS) {
  7243. err = MP_VAL;
  7244. }
  7245. #if 0
  7246. if (err == MP_OKAY) {
  7247. sp_print(a, "a");
  7248. sp_print(b, "b");
  7249. sp_print(m, "m");
  7250. }
  7251. #endif
  7252. if (err == MP_OKAY) {
  7253. /* Do add and modular reduction. */
  7254. err = _sp_addmod(a, b, m, r);
  7255. }
  7256. #if 0
  7257. if (err == MP_OKAY) {
  7258. sp_print(r, "rma");
  7259. }
  7260. #endif
  7261. return err;
  7262. }
  7263. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
  7264. * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
  7265. #if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  7266. defined(HAVE_ECC))
  7267. /* Sub b from a and reduce: r = (a - b) % m
  7268. * Result is always positive.
  7269. *
  7270. * @param [in] a SP integer to subtract from
  7271. * @param [in] b SP integer to subtract.
  7272. * @param [in] m SP integer that is the modulus.
  7273. * @param [out] r SP integer to hold result.
  7274. *
  7275. * @return MP_OKAY on success.
  7276. * @return MP_MEM when dynamic memory allocation fails.
  7277. */
  7278. static int _sp_submod(const sp_int* a, const sp_int* b, const sp_int* m,
  7279. sp_int* r)
  7280. {
  7281. int err = MP_OKAY;
  7282. #ifndef WOLFSSL_SP_INT_NEGATIVE
  7283. unsigned int used = ((a->used >= m->used) ?
  7284. ((a->used >= b->used) ? (a->used + 1) : (b->used + 1)) :
  7285. ((b->used >= m->used)) ? (b->used + 1) : (m->used + 1));
  7286. DECL_SP_INT_ARRAY(t, used, 2);
  7287. ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
  7288. if (err == MP_OKAY) {
  7289. /* Reduce a to less than m. */
  7290. if (_sp_cmp(a, m) != MP_LT) {
  7291. err = sp_mod(a, m, t[0]);
  7292. a = t[0];
  7293. }
  7294. }
  7295. if (err == MP_OKAY) {
  7296. /* Reduce b to less than m. */
  7297. if (_sp_cmp(b, m) != MP_LT) {
  7298. err = sp_mod(b, m, t[1]);
  7299. b = t[1];
  7300. }
  7301. }
  7302. if (err == MP_OKAY) {
  7303. /* Add m to a if a smaller than b. */
  7304. if (_sp_cmp(a, b) == MP_LT) {
  7305. err = sp_add(a, m, t[0]);
  7306. a = t[0];
  7307. }
  7308. }
  7309. if (err == MP_OKAY) {
  7310. /* Subtract b from a. */
  7311. err = sp_sub(a, b, r);
  7312. }
  7313. FREE_SP_INT_ARRAY(t, NULL);
  7314. #else /* WOLFSSL_SP_INT_NEGATIVE */
  7315. unsigned int used = ((a->used >= b->used) ? a->used + 1 : b->used + 1);
  7316. DECL_SP_INT(t, used);
  7317. ALLOC_SP_INT_SIZE(t, used, err, NULL);
  7318. /* Subtract b from a into temporary. */
  7319. if (err == MP_OKAY) {
  7320. err = sp_sub(a, b, t);
  7321. }
  7322. if (err == MP_OKAY) {
  7323. /* Reduce result mod m into result. */
  7324. err = sp_mod(t, m, r);
  7325. }
  7326. FREE_SP_INT(t, NULL);
  7327. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7328. return err;
  7329. }
  7330. /* Sub b from a and reduce: r = (a - b) % m
  7331. * Result is always positive.
  7332. *
  7333. * @param [in] a SP integer to subtract from
  7334. * @param [in] b SP integer to subtract.
  7335. * @param [in] m SP integer that is the modulus.
  7336. * @param [out] r SP integer to hold result.
  7337. *
  7338. * @return MP_OKAY on success.
  7339. * @return MP_VAL when a, b, m or r is NULL.
  7340. * @return MP_MEM when dynamic memory allocation fails.
  7341. */
  7342. int sp_submod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7343. {
  7344. int err = MP_OKAY;
  7345. /* Validate parameters. */
  7346. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  7347. err = MP_VAL;
  7348. }
  7349. /* Ensure a, b and m aren't too big a number to operate on. */
  7350. else if (a->used >= SP_INT_DIGITS) {
  7351. err = MP_VAL;
  7352. }
  7353. else if (b->used >= SP_INT_DIGITS) {
  7354. err = MP_VAL;
  7355. }
  7356. else if (m->used >= SP_INT_DIGITS) {
  7357. err = MP_VAL;
  7358. }
  7359. #if 0
  7360. if (err == MP_OKAY) {
  7361. sp_print(a, "a");
  7362. sp_print(b, "b");
  7363. sp_print(m, "m");
  7364. }
  7365. #endif
  7366. if (err == MP_OKAY) {
  7367. /* Do submod. */
  7368. err = _sp_submod(a, b, m, r);
  7369. }
  7370. #if 0
  7371. if (err == MP_OKAY) {
  7372. sp_print(r, "rms");
  7373. }
  7374. #endif
  7375. return err;
  7376. }
  7377. #endif /* WOLFSSL_SP_MATH_ALL */
  7378. /* Constant time clamping/
  7379. *
  7380. * @param [in, out] a SP integer to clamp.
  7381. */
  7382. static void sp_clamp_ct(sp_int* a)
  7383. {
  7384. int i;
  7385. unsigned int used = a->used;
  7386. unsigned int mask = (unsigned int)-1;
  7387. for (i = (int)a->used - 1; i >= 0; i--) {
  7388. used -= ((unsigned int)(a->dp[i] == 0)) & mask;
  7389. mask &= (unsigned int)0 - (a->dp[i] == 0);
  7390. }
  7391. a->used = used;
  7392. }
  7393. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  7394. /* Add two value and reduce: r = (a + b) % m
  7395. *
  7396. * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
  7397. *
  7398. * Assumes a, b, m and r are not NULL.
  7399. * m and r must not be the same pointer.
  7400. *
  7401. * @param [in] a SP integer to add.
  7402. * @param [in] b SP integer to add with.
  7403. * @param [in] m SP integer that is the modulus.
  7404. * @param [out] r SP integer to hold result.
  7405. *
  7406. * @return MP_OKAY on success.
  7407. */
  7408. int sp_addmod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7409. {
  7410. int err = MP_OKAY;
  7411. #ifndef SQR_MUL_ASM
  7412. sp_int_sword w;
  7413. sp_int_sword s;
  7414. #else
  7415. sp_int_digit wl;
  7416. sp_int_digit wh;
  7417. sp_int_digit sl;
  7418. sp_int_digit sh;
  7419. sp_int_digit t;
  7420. #endif
  7421. sp_int_digit mask;
  7422. sp_int_digit mask_a = (sp_int_digit)-1;
  7423. sp_int_digit mask_b = (sp_int_digit)-1;
  7424. unsigned int i;
  7425. /* Check result is as big as modulus. */
  7426. if (m->used > r->size) {
  7427. err = MP_VAL;
  7428. }
  7429. /* Validate parameters. */
  7430. if ((err == MP_OKAY) && (r == m)) {
  7431. err = MP_VAL;
  7432. }
  7433. if (err == MP_OKAY) {
  7434. #if 0
  7435. sp_print(a, "a");
  7436. sp_print(b, "b");
  7437. sp_print(m, "m");
  7438. #endif
  7439. /* Add a to b into r. Do the subtract of modulus but don't store result.
  7440. * When subtract result is negative, the overflow will be negative.
  7441. * Only need to subtract mod when result is positive - overflow is
  7442. * positive.
  7443. */
  7444. #ifndef SQR_MUL_ASM
  7445. w = 0;
  7446. s = 0;
  7447. #else
  7448. wl = 0;
  7449. sl = 0;
  7450. sh = 0;
  7451. #endif
  7452. /* Constant time - add modulus digits worth from a and b. */
  7453. for (i = 0; i < m->used; i++) {
  7454. /* Values past 'used' are not initialized. */
  7455. mask_a += (i == a->used);
  7456. mask_b += (i == b->used);
  7457. #ifndef SQR_MUL_ASM
  7458. /* Add next digits from a and b to current value. */
  7459. w += a->dp[i] & mask_a;
  7460. w += b->dp[i] & mask_b;
  7461. /* Store low digit in result. */
  7462. r->dp[i] = (sp_int_digit)w;
  7463. /* Add result to reducing value. */
  7464. s += (sp_int_digit)w;
  7465. /* Subtract next digit of modulus. */
  7466. s -= m->dp[i];
  7467. /* Move high digit of reduced result down. */
  7468. s >>= DIGIT_BIT;
  7469. /* Move high digit of sum result down. */
  7470. w >>= DIGIT_BIT;
  7471. #else
  7472. wh = 0;
  7473. /* Add next digits from a and b to current value. */
  7474. t = a->dp[i] & mask_a;
  7475. SP_ASM_ADDC_REG(wl, wh, t);
  7476. t = b->dp[i] & mask_b;
  7477. SP_ASM_ADDC_REG(wl, wh, t);
  7478. /* Store low digit in result. */
  7479. r->dp[i] = wl;
  7480. /* Add result to reducing value. */
  7481. SP_ASM_ADDC_REG(sl, sh, wl);
  7482. /* Subtract next digit of modulus. */
  7483. SP_ASM_SUBB(sl, sh, m->dp[i]);
  7484. /* Move high digit of reduced result down. */
  7485. sl = sh;
  7486. /* High digit is 0 when positive or -1 on negative. */
  7487. sh = (sp_int_digit)0 - (sh >> (SP_WORD_SIZE-1));
  7488. /* Move high digit of sum result down. */
  7489. wl = wh;
  7490. #endif
  7491. }
  7492. #ifndef SQR_MUL_ASM
  7493. /* Add carry into reduced result. */
  7494. s += (sp_int_digit)w;
  7495. /* s will be positive when subtracting modulus is needed. */
  7496. mask = (sp_int_digit)0 - (s >= 0);
  7497. #else
  7498. /* Add carry into reduced result. */
  7499. SP_ASM_ADDC_REG(sl, sh, wl);
  7500. /* s will be positive when subtracting modulus is needed. */
  7501. mask = (sh >> (SP_WORD_SIZE-1)) - 1;
  7502. #endif
  7503. /* Constant time, conditionally, subtract modulus from sum. */
  7504. #ifndef SQR_MUL_ASM
  7505. w = 0;
  7506. #else
  7507. wl = 0;
  7508. wh = 0;
  7509. #endif
  7510. for (i = 0; i < m->used; i++) {
  7511. #ifndef SQR_MUL_ASM
  7512. /* Add result to current value and conditionally subtract modulus.
  7513. */
  7514. w += r->dp[i];
  7515. w -= m->dp[i] & mask;
  7516. /* Store low digit in result. */
  7517. r->dp[i] = (sp_int_digit)w;
  7518. /* Move high digit of sum result down. */
  7519. w >>= DIGIT_BIT;
  7520. #else
  7521. /* Add result to current value and conditionally subtract modulus.
  7522. */
  7523. SP_ASM_ADDC(wl, wh, r->dp[i]);
  7524. t = m->dp[i] & mask;
  7525. SP_ASM_SUBB_REG(wl, wh, t);
  7526. /* Store low digit in result. */
  7527. r->dp[i] = wl;
  7528. /* Move high digit of sum result down. */
  7529. wl = wh;
  7530. /* High digit is 0 when positive or -1 on negative. */
  7531. wh = (sp_int_digit)0 - (wl >> (SP_WORD_SIZE-1));
  7532. #endif
  7533. }
  7534. /* Result will always have digits equal to or less than those in
  7535. * modulus. */
  7536. r->used = i;
  7537. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7538. r->sign = MP_ZPOS;
  7539. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7540. /* Remove leading zeros. */
  7541. sp_clamp_ct(r);
  7542. #if 0
  7543. sp_print(r, "rma");
  7544. #endif
  7545. }
  7546. return err;
  7547. }
  7548. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  7549. #if (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)) || \
  7550. (defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  7551. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
  7552. defined(OPENSSL_ALL))
  7553. /* Sub b from a modulo m: r = (a - b) % m
  7554. *
  7555. * Result is always positive.
  7556. *
  7557. * Assumes a, b, m and r are not NULL.
  7558. * m and r must not be the same pointer.
  7559. *
  7560. * @param [in] a SP integer to subtract from
  7561. * @param [in] b SP integer to subtract.
  7562. * @param [in] m SP integer that is the modulus.
  7563. * @param [out] r SP integer to hold result.
  7564. *
  7565. * @return MP_OKAY on success.
  7566. */
  7567. static void _sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m,
  7568. unsigned int max, sp_int* r)
  7569. {
  7570. #ifndef SQR_MUL_ASM
  7571. sp_int_sword w;
  7572. #else
  7573. sp_int_digit l;
  7574. sp_int_digit h;
  7575. sp_int_digit t;
  7576. #endif
  7577. sp_int_digit mask;
  7578. sp_int_digit mask_a = (sp_int_digit)-1;
  7579. sp_int_digit mask_b = (sp_int_digit)-1;
  7580. unsigned int i;
  7581. /* In constant time, subtract b from a putting result in r. */
  7582. #ifndef SQR_MUL_ASM
  7583. w = 0;
  7584. #else
  7585. l = 0;
  7586. h = 0;
  7587. #endif
  7588. for (i = 0; i < max; i++) {
  7589. /* Values past 'used' are not initialized. */
  7590. mask_a += (i == a->used);
  7591. mask_b += (i == b->used);
  7592. #ifndef SQR_MUL_ASM
  7593. /* Add a to and subtract b from current value. */
  7594. w += a->dp[i] & mask_a;
  7595. w -= b->dp[i] & mask_b;
  7596. /* Store low digit in result. */
  7597. r->dp[i] = (sp_int_digit)w;
  7598. /* Move high digit down. */
  7599. w >>= DIGIT_BIT;
  7600. #else
  7601. /* Add a and subtract b from current value. */
  7602. t = a->dp[i] & mask_a;
  7603. SP_ASM_ADDC_REG(l, h, t);
  7604. t = b->dp[i] & mask_b;
  7605. SP_ASM_SUBB_REG(l, h, t);
  7606. /* Store low digit in result. */
  7607. r->dp[i] = l;
  7608. /* Move high digit down. */
  7609. l = h;
  7610. /* High digit is 0 when positive or -1 on negative. */
  7611. h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
  7612. #endif
  7613. }
  7614. /* When w is negative then we need to add modulus to make result
  7615. * positive. */
  7616. #ifndef SQR_MUL_ASM
  7617. mask = (sp_int_digit)0 - (w < 0);
  7618. #else
  7619. mask = h;
  7620. #endif
  7621. /* Constant time, conditionally, add modulus to difference. */
  7622. #ifndef SQR_MUL_ASM
  7623. w = 0;
  7624. #else
  7625. l = 0;
  7626. #endif
  7627. for (i = 0; i < m->used; i++) {
  7628. #ifndef SQR_MUL_ASM
  7629. /* Add result and conditionally modulus to current value. */
  7630. w += r->dp[i];
  7631. w += m->dp[i] & mask;
  7632. /* Store low digit in result. */
  7633. r->dp[i] = (sp_int_digit)w;
  7634. /* Move high digit down. */
  7635. w >>= DIGIT_BIT;
  7636. #else
  7637. h = 0;
  7638. /* Add result and conditionally modulus to current value. */
  7639. SP_ASM_ADDC(l, h, r->dp[i]);
  7640. t = m->dp[i] & mask;
  7641. SP_ASM_ADDC_REG(l, h, t);
  7642. /* Store low digit in result. */
  7643. r->dp[i] = l;
  7644. /* Move high digit down. */
  7645. l = h;
  7646. #endif
  7647. }
  7648. /* Result will always have digits equal to or less than those in
  7649. * modulus. */
  7650. r->used = i;
  7651. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7652. r->sign = MP_ZPOS;
  7653. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7654. /* Remove leading zeros. */
  7655. sp_clamp_ct(r);
  7656. }
  7657. #endif
  7658. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  7659. /* Sub b from a modulo m: r = (a - b) % m
  7660. * Result is always positive.
  7661. *
  7662. * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
  7663. *
  7664. * Assumes a, b, m and r are not NULL.
  7665. * m and r must not be the same pointer.
  7666. *
  7667. * @param [in] a SP integer to subtract from
  7668. * @param [in] b SP integer to subtract.
  7669. * @param [in] m SP integer that is the modulus.
  7670. * @param [out] r SP integer to hold result.
  7671. *
  7672. * @return MP_OKAY on success.
  7673. */
  7674. int sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7675. {
  7676. int err = MP_OKAY;
  7677. /* Check result is as big as modulus plus one digit. */
  7678. if (m->used > r->size) {
  7679. err = MP_VAL;
  7680. }
  7681. /* Validate parameters. */
  7682. if ((err == MP_OKAY) && (r == m)) {
  7683. err = MP_VAL;
  7684. }
  7685. if (err == MP_OKAY) {
  7686. #if 0
  7687. sp_print(a, "a");
  7688. sp_print(b, "b");
  7689. sp_print(m, "m");
  7690. #endif
  7691. _sp_submod_ct(a, b, m, m->used, r);
  7692. #if 0
  7693. sp_print(r, "rms");
  7694. #endif
  7695. }
  7696. return err;
  7697. }
  7698. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  7699. /********************
  7700. * Shifting functoins
  7701. ********************/
  7702. #if !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
  7703. defined(WC_RSA_BLINDING) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  7704. /* Left shift the multi-precision number by a number of digits.
  7705. *
  7706. * @param [in,out] a SP integer to shift.
  7707. * @param [in] s Number of digits to shift.
  7708. *
  7709. * @return MP_OKAY on success.
  7710. * @return MP_VAL when a is NULL, s is negative or the result is too big.
  7711. */
  7712. int sp_lshd(sp_int* a, int s)
  7713. {
  7714. int err = MP_OKAY;
  7715. /* Validate parameters. */
  7716. if ((a == NULL) || (s < 0)) {
  7717. err = MP_VAL;
  7718. }
  7719. /* Ensure number has enough digits for operation. */
  7720. if ((err == MP_OKAY) && (a->used + (unsigned int)s > a->size)) {
  7721. err = MP_VAL;
  7722. }
  7723. if (err == MP_OKAY) {
  7724. /* Move up digits. */
  7725. XMEMMOVE(a->dp + s, a->dp, a->used * SP_WORD_SIZEOF);
  7726. /* Back fill with zeros. */
  7727. XMEMSET(a->dp, 0, (size_t)s * SP_WORD_SIZEOF);
  7728. /* Update used. */
  7729. a->used += (unsigned int)s;
  7730. /* Remove leading zeros. */
  7731. sp_clamp(a);
  7732. }
  7733. return err;
  7734. }
  7735. #endif
  7736. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7737. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  7738. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  7739. /* Left shift the multi-precision number by n bits.
  7740. * Bits may be larger than the word size.
  7741. *
  7742. * Used by sp_mul_2d() and other internal functions.
  7743. *
  7744. * @param [in,out] a SP integer to shift.
  7745. * @param [in] n Number of bits to shift left.
  7746. *
  7747. * @return MP_OKAY on success.
  7748. * @return MP_VAL when the result is too big.
  7749. */
  7750. static int sp_lshb(sp_int* a, int n)
  7751. {
  7752. int err = MP_OKAY;
  7753. if (a->used != 0) {
  7754. /* Calculate number of digits to shift. */
  7755. unsigned int s = (unsigned int)n >> SP_WORD_SHIFT;
  7756. /* Ensure number has enough digits for result. */
  7757. if (a->used + s >= a->size) {
  7758. err = MP_VAL;
  7759. }
  7760. if (err == MP_OKAY) {
  7761. /* Get count of bits to move in digit. */
  7762. n &= SP_WORD_MASK;
  7763. /* Check whether this is a complicated case. */
  7764. if (n != 0) {
  7765. unsigned int i;
  7766. /* Shift up starting at most significant digit. */
  7767. /* Get new most significant digit. */
  7768. sp_int_digit v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
  7769. /* Shift up each digit. */
  7770. for (i = a->used - 1; i >= 1; i--) {
  7771. a->dp[i + s] = (a->dp[i] << n) |
  7772. (a->dp[i - 1] >> (SP_WORD_SIZE - n));
  7773. }
  7774. /* Shift up least significant digit. */
  7775. a->dp[s] = a->dp[0] << n;
  7776. /* Add new high digit unless zero. */
  7777. if (v != 0) {
  7778. a->dp[a->used + s] = v;
  7779. a->used++;
  7780. }
  7781. }
  7782. /* Only digits to move and ensure not zero. */
  7783. else if (s > 0) {
  7784. /* Move up digits. */
  7785. XMEMMOVE(a->dp + s, a->dp, a->used * SP_WORD_SIZEOF);
  7786. }
  7787. /* Update used digit count. */
  7788. a->used += s;
  7789. /* Back fill with zeros. */
  7790. XMEMSET(a->dp, 0, SP_WORD_SIZEOF * s);
  7791. }
  7792. }
  7793. return err;
  7794. }
  7795. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  7796. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  7797. #ifdef WOLFSSL_SP_MATH_ALL
  7798. /* Shift a right by c digits: a = a >> (n * SP_WORD_SIZE)
  7799. *
  7800. * @param [in, out] a SP integer to shift.
  7801. * @param [in] c Number of digits to shift.
  7802. */
  7803. void sp_rshd(sp_int* a, int c)
  7804. {
  7805. /* Do shift if we have an SP int. */
  7806. if ((a != NULL) && (c > 0)) {
  7807. /* Make zero if shift removes all digits. */
  7808. if ((unsigned int)c >= a->used) {
  7809. _sp_zero(a);
  7810. }
  7811. else {
  7812. unsigned int i;
  7813. /* Update used digits count. */
  7814. a->used -= (unsigned int)c;
  7815. /* Move digits down. */
  7816. for (i = 0; i < a->used; i++, c++) {
  7817. a->dp[i] = a->dp[c];
  7818. }
  7819. }
  7820. }
  7821. }
  7822. #endif /* WOLFSSL_SP_MATH_ALL */
  7823. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7824. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  7825. defined(WOLFSSL_HAVE_SP_DH)
  7826. /* Shift a right by n bits into r: r = a >> n
  7827. *
  7828. * @param [in] a SP integer to shift.
  7829. * @param [in] n Number of bits to shift.
  7830. * @param [out] r SP integer to store result in.
  7831. */
  7832. int sp_rshb(const sp_int* a, int n, sp_int* r)
  7833. {
  7834. int err = MP_OKAY;
  7835. /* Number of digits to shift down. */
  7836. unsigned int i = (unsigned int)(n >> SP_WORD_SHIFT);
  7837. if ((a == NULL) || (n < 0)) {
  7838. err = MP_VAL;
  7839. }
  7840. /* Handle case where shifting out all digits. */
  7841. if ((err == MP_OKAY) && (i >= a->used)) {
  7842. _sp_zero(r);
  7843. }
  7844. /* Change callers when more error cases returned. */
  7845. else if ((err == MP_OKAY) && (a->used - i > r->size)) {
  7846. err = MP_VAL;
  7847. }
  7848. else if (err == MP_OKAY) {
  7849. unsigned int j;
  7850. /* Number of bits to shift in digits. */
  7851. n &= SP_WORD_SIZE - 1;
  7852. /* Handle simple case. */
  7853. if (n == 0) {
  7854. /* Set the count of used digits. */
  7855. r->used = a->used - i;
  7856. /* Move digits down. */
  7857. if (r == a) {
  7858. XMEMMOVE(r->dp, r->dp + i, SP_WORD_SIZEOF * r->used);
  7859. }
  7860. else {
  7861. XMEMCPY(r->dp, a->dp + i, SP_WORD_SIZEOF * r->used);
  7862. }
  7863. }
  7864. else {
  7865. /* Move the bits down starting at least significant digit. */
  7866. for (j = 0; i < a->used-1; i++, j++)
  7867. r->dp[j] = (a->dp[i] >> n) | (a->dp[i+1] << (SP_WORD_SIZE - n));
  7868. /* Most significant digit has no higher digit to pull from. */
  7869. r->dp[j] = a->dp[i] >> n;
  7870. /* Set the count of used digits. */
  7871. r->used = j + (r->dp[j] > 0);
  7872. }
  7873. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7874. if (sp_iszero(r)) {
  7875. /* Set zero sign. */
  7876. r->sign = MP_ZPOS;
  7877. }
  7878. else {
  7879. /* Retain sign. */
  7880. r->sign = a->sign;
  7881. }
  7882. #endif
  7883. }
  7884. return err;
  7885. }
  7886. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  7887. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
  7888. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7889. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  7890. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  7891. static void _sp_div_same_size(sp_int* a, const sp_int* d, sp_int* r)
  7892. {
  7893. unsigned int i;
  7894. /* Compare top digits of dividend with those of divisor up to last. */
  7895. for (i = d->used - 1; i > 0; i--) {
  7896. /* Break if top divisor is not equal to dividend. */
  7897. if (a->dp[a->used - d->used + i] != d->dp[i]) {
  7898. break;
  7899. }
  7900. }
  7901. /* Check if top dividend is greater than or equal to divisor. */
  7902. if (a->dp[a->used - d->used + i] >= d->dp[i]) {
  7903. /* Update quotient result. */
  7904. r->dp[a->used - d->used] += 1;
  7905. /* Get 'used' to restore - ensure zeros put into quotient. */
  7906. i = a->used;
  7907. /* Subtract d from top of a. */
  7908. _sp_sub_off(a, d, a, a->used - d->used);
  7909. /* Restore 'used' on remainder. */
  7910. a->used = i;
  7911. }
  7912. }
  7913. /* Divide a by d and return the quotient in r and the remainder in a.
  7914. * r = a / d; a = a % d
  7915. *
  7916. * Note: a is constantly having multiplies of d subtracted.
  7917. *
  7918. * @param [in, out] a SP integer to be divided and remainder on out.
  7919. * @param [in] d SP integer to divide by.
  7920. * @param [out] r SP integer that is the quotient.
  7921. * @param [out] trial SP integer that is product in trial division.
  7922. *
  7923. * @return MP_OKAY on success.
  7924. * @return MP_VAL when operation fails - only when compiling small code.
  7925. */
  7926. static int _sp_div_impl(sp_int* a, const sp_int* d, sp_int* r, sp_int* trial)
  7927. {
  7928. int err = MP_OKAY;
  7929. unsigned int i;
  7930. #ifdef WOLFSSL_SP_SMALL
  7931. int c;
  7932. #else
  7933. unsigned int j;
  7934. unsigned int o;
  7935. #ifndef SQR_MUL_ASM
  7936. sp_int_sword sw;
  7937. #else
  7938. sp_int_digit sl;
  7939. sp_int_digit sh;
  7940. sp_int_digit st;
  7941. #endif
  7942. #endif /* WOLFSSL_SP_SMALL */
  7943. sp_int_digit t;
  7944. sp_int_digit dt;
  7945. /* Set result size to clear. */
  7946. r->used = a->used - d->used + 1;
  7947. /* Set all potentially used digits to zero. */
  7948. for (i = 0; i < r->used; i++) {
  7949. r->dp[i] = 0;
  7950. }
  7951. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7952. r->sign = MP_ZPOS;
  7953. #endif
  7954. /* Get the most significant digit (will have top bit set). */
  7955. dt = d->dp[d->used-1];
  7956. /* Handle when a >= d ^ (2 ^ (SP_WORD_SIZE * x)). */
  7957. _sp_div_same_size(a, d, r);
  7958. /* Keep subtracting multiples of d as long as the digit count of a is
  7959. * greater than equal to d.
  7960. */
  7961. for (i = a->used - 1; i >= d->used; i--) {
  7962. /* When top digits equal, guestimate maximum multiplier.
  7963. * Worst case, multiplier is actually SP_DIGIT_MAX - 1.
  7964. * That is, for w (word size in bits) > 1, n > 1, let:
  7965. * a = 2^((n+1)*w-1), d = 2^(n*w-1) + 2^((n-1)*w) - 1, t = 2^w - 2
  7966. * Then,
  7967. * d * t
  7968. * = (2^(n*w-1) + 2^((n-1)*w) - 1) * (2^w - 2)
  7969. * = 2^((n+1)*w-1) - 2^(n*w) + 2^(n*w) - 2^((n-1)*w+1) - 2^w + 2
  7970. * = 2^((n+1)*w-1) - 2^((n-1)*w+1) - 2^w + 2
  7971. * = a - 2^((n-1)*w+1) - 2^w + 2
  7972. * d > 2^((n-1)*w+1) + 2^w - 2, when w > 1, n > 1
  7973. */
  7974. if (a->dp[i] == dt) {
  7975. t = SP_DIGIT_MAX;
  7976. }
  7977. else {
  7978. /* Calculate trial quotient by dividing top word of dividend by top
  7979. * digit of divisor.
  7980. * Some implementations segfault when quotient > SP_DIGIT_MAX.
  7981. * Implementations in assembly, using builtins or using
  7982. * digits only (WOLFSSL_SP_DIV_WORD_HALF).
  7983. */
  7984. t = sp_div_word(a->dp[i], a->dp[i-1], dt);
  7985. }
  7986. #ifdef WOLFSSL_SP_SMALL
  7987. do {
  7988. /* Calculate trial from trial quotient. */
  7989. err = _sp_mul_d(d, t, trial, i - d->used);
  7990. if (err != MP_OKAY) {
  7991. break;
  7992. }
  7993. /* Check if trial is bigger. */
  7994. c = _sp_cmp_abs(trial, a);
  7995. if (c == MP_GT) {
  7996. /* Decrement trial quotient and try again. */
  7997. t--;
  7998. }
  7999. }
  8000. while (c == MP_GT);
  8001. if (err != MP_OKAY) {
  8002. break;
  8003. }
  8004. /* Subtract the trial and add qoutient to result. */
  8005. _sp_sub_off(a, trial, a, 0);
  8006. r->dp[i - d->used] += t;
  8007. /* Handle overflow of digit. */
  8008. if (r->dp[i - d->used] < t) {
  8009. r->dp[i + 1 - d->used]++;
  8010. }
  8011. #else
  8012. /* Index of lowest digit trial is subtracted from. */
  8013. o = i - d->used;
  8014. do {
  8015. #ifndef SQR_MUL_ASM
  8016. sp_int_word tw = 0;
  8017. #else
  8018. sp_int_digit tl = 0;
  8019. sp_int_digit th = 0;
  8020. #endif
  8021. /* Multiply divisor by trial quotient. */
  8022. for (j = 0; j < d->used; j++) {
  8023. #ifndef SQR_MUL_ASM
  8024. tw += (sp_int_word)d->dp[j] * t;
  8025. trial->dp[j] = (sp_int_digit)tw;
  8026. tw >>= SP_WORD_SIZE;
  8027. #else
  8028. SP_ASM_MUL_ADD_NO(tl, th, d->dp[j], t);
  8029. trial->dp[j] = tl;
  8030. tl = th;
  8031. th = 0;
  8032. #endif
  8033. }
  8034. #ifndef SQR_MUL_ASM
  8035. trial->dp[j] = (sp_int_digit)tw;
  8036. #else
  8037. trial->dp[j] = tl;
  8038. #endif
  8039. /* Check trial quotient isn't larger than dividend. */
  8040. for (j = d->used; j > 0; j--) {
  8041. if (trial->dp[j] != a->dp[j + o]) {
  8042. break;
  8043. }
  8044. }
  8045. /* Decrement trial quotient if larger and try again. */
  8046. if (trial->dp[j] > a->dp[j + o]) {
  8047. t--;
  8048. }
  8049. }
  8050. while (trial->dp[j] > a->dp[j + o]);
  8051. #ifndef SQR_MUL_ASM
  8052. sw = 0;
  8053. #else
  8054. sl = 0;
  8055. sh = 0;
  8056. #endif
  8057. /* Subtract trial - don't need to update used. */
  8058. for (j = 0; j <= d->used; j++) {
  8059. #ifndef SQR_MUL_ASM
  8060. sw += a->dp[j + o];
  8061. sw -= trial->dp[j];
  8062. a->dp[j + o] = (sp_int_digit)sw;
  8063. sw >>= SP_WORD_SIZE;
  8064. #else
  8065. st = a->dp[j + o];
  8066. SP_ASM_ADDC(sl, sh, st);
  8067. st = trial->dp[j];
  8068. SP_ASM_SUBB(sl, sh, st);
  8069. a->dp[j + o] = sl;
  8070. sl = sh;
  8071. sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE - 1));
  8072. #endif
  8073. }
  8074. r->dp[o] = t;
  8075. #endif /* WOLFSSL_SP_SMALL */
  8076. }
  8077. /* Update used. */
  8078. a->used = i + 1;
  8079. if (a->used == d->used) {
  8080. /* Finish div now that length of dividend is same as divisor. */
  8081. _sp_div_same_size(a, d, r);
  8082. }
  8083. return err;
  8084. }
  8085. /* Divide a by d and return the quotient in r and the remainder in rem.
  8086. * r = a / d; rem = a % d
  8087. *
  8088. * @param [in] a SP integer to be divided.
  8089. * @param [in] d SP integer to divide by.
  8090. * @param [out] r SP integer that is the quotient.
  8091. * @param [out] rem SP integer that is the remainder.
  8092. * @param [in] used Number of digits in temporaries to use.
  8093. *
  8094. * @return MP_OKAY on success.
  8095. * @return MP_MEM when dynamic memory allocation fails.
  8096. */
  8097. static int _sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem,
  8098. unsigned int used)
  8099. {
  8100. int err = MP_OKAY;
  8101. int ret;
  8102. int done = 0;
  8103. int s = 0;
  8104. sp_int* sa = NULL;
  8105. sp_int* sd = NULL;
  8106. sp_int* tr = NULL;
  8107. sp_int* trial = NULL;
  8108. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8109. unsigned int signA = MP_ZPOS;
  8110. unsigned int signD = MP_ZPOS;
  8111. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8112. /* Intermediates will always be less than or equal to dividend. */
  8113. DECL_SP_INT_ARRAY(td, used, 4);
  8114. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8115. /* Cache sign for results. */
  8116. signA = a->sign;
  8117. signD = d->sign;
  8118. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8119. /* Handle simple case of: dividend < divisor. */
  8120. ret = _sp_cmp_abs(a, d);
  8121. if (ret == MP_LT) {
  8122. /* a = 0 * d + a */
  8123. if ((rem != NULL) && (a != rem)) {
  8124. _sp_copy(a, rem);
  8125. }
  8126. if (r != NULL) {
  8127. _sp_set(r, 0);
  8128. }
  8129. done = 1;
  8130. }
  8131. /* Handle simple case of: dividend == divisor. */
  8132. else if (ret == MP_EQ) {
  8133. /* a = 1 * d + 0 */
  8134. if (rem != NULL) {
  8135. _sp_set(rem, 0);
  8136. }
  8137. if (r != NULL) {
  8138. _sp_set(r, 1);
  8139. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8140. r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
  8141. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8142. }
  8143. done = 1;
  8144. }
  8145. else if (sp_count_bits(a) == sp_count_bits(d)) {
  8146. /* a is greater than d but same bit length - subtract. */
  8147. if (rem != NULL) {
  8148. _sp_sub_off(a, d, rem, 0);
  8149. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8150. rem->sign = signA;
  8151. #endif
  8152. }
  8153. if (r != NULL) {
  8154. _sp_set(r, 1);
  8155. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8156. r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
  8157. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8158. }
  8159. done = 1;
  8160. }
  8161. /* Allocate temporary 'sp_int's and assign. */
  8162. if ((!done) && (err == MP_OKAY)) {
  8163. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  8164. !defined(WOLFSSL_SP_NO_MALLOC)
  8165. int cnt = 4;
  8166. /* Reuse remainder sp_int where possible. */
  8167. if ((rem != NULL) && (rem != d) && (rem->size > a->used)) {
  8168. sa = rem;
  8169. cnt--;
  8170. }
  8171. /* Reuse result sp_int where possible. */
  8172. if ((r != NULL) && (r != d)) {
  8173. tr = r;
  8174. cnt--;
  8175. }
  8176. /* Macro always has code associated with it and checks err first. */
  8177. ALLOC_SP_INT_ARRAY(td, used, cnt, err, NULL);
  8178. #else
  8179. ALLOC_SP_INT_ARRAY(td, used, 4, err, NULL);
  8180. #endif
  8181. }
  8182. if ((!done) && (err == MP_OKAY)) {
  8183. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  8184. !defined(WOLFSSL_SP_NO_MALLOC)
  8185. int i = 2;
  8186. /* Set to temporary when not reusing. */
  8187. if (sa == NULL) {
  8188. sa = td[i++];
  8189. _sp_init_size(sa, used);
  8190. }
  8191. if (tr == NULL) {
  8192. tr = td[i];
  8193. _sp_init_size(tr, a->used - d->used + 2);
  8194. }
  8195. #else
  8196. sa = td[2];
  8197. tr = td[3];
  8198. _sp_init_size(sa, used);
  8199. _sp_init_size(tr, a->used - d->used + 2);
  8200. #endif
  8201. sd = td[0];
  8202. trial = td[1];
  8203. /* Initialize sizes to minimal values. */
  8204. _sp_init_size(sd, d->used + 1);
  8205. _sp_init_size(trial, used);
  8206. /* Move divisor to top of word. Adjust dividend as well. */
  8207. s = sp_count_bits(d);
  8208. s = SP_WORD_SIZE - (s & SP_WORD_MASK);
  8209. _sp_copy(a, sa);
  8210. /* Only shift if top bit of divisor no set. */
  8211. if (s != SP_WORD_SIZE) {
  8212. err = sp_lshb(sa, s);
  8213. if (err == MP_OKAY) {
  8214. _sp_copy(d, sd);
  8215. d = sd;
  8216. err = sp_lshb(sd, s);
  8217. }
  8218. }
  8219. }
  8220. if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
  8221. /* Do division: tr = sa / d, sa = sa % d. */
  8222. err = _sp_div_impl(sa, d, tr, trial);
  8223. /* Return the remainder if required. */
  8224. if ((err == MP_OKAY) && (rem != NULL)) {
  8225. /* Move result back down if moved up for divisor value. */
  8226. if (s != SP_WORD_SIZE) {
  8227. (void)sp_rshb(sa, s, sa);
  8228. }
  8229. _sp_copy(sa, rem);
  8230. sp_clamp(rem);
  8231. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8232. rem->sign = (rem->used == 0) ? MP_ZPOS : signA;
  8233. #endif
  8234. }
  8235. /* Return the quotient if required. */
  8236. if ((err == MP_OKAY) && (r != NULL)) {
  8237. _sp_copy(tr, r);
  8238. sp_clamp(r);
  8239. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8240. if ((r->used == 0) || (signA == signD)) {
  8241. r->sign = MP_ZPOS;
  8242. }
  8243. else {
  8244. r->sign = MP_NEG;
  8245. }
  8246. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8247. }
  8248. }
  8249. FREE_SP_INT_ARRAY(td, NULL);
  8250. return err;
  8251. }
  8252. /* Divide a by d and return the quotient in r and the remainder in rem.
  8253. * r = a / d; rem = a % d
  8254. *
  8255. * @param [in] a SP integer to be divided.
  8256. * @param [in] d SP integer to divide by.
  8257. * @param [out] r SP integer that is the quotient.
  8258. * @param [out] rem SP integer that is the remainder.
  8259. *
  8260. * @return MP_OKAY on success.
  8261. * @return MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
  8262. * @return MP_MEM when dynamic memory allocation fails.
  8263. */
  8264. int sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem)
  8265. {
  8266. int err = MP_OKAY;
  8267. unsigned int used = 1;
  8268. /* Validate parameters. */
  8269. if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
  8270. err = MP_VAL;
  8271. }
  8272. /* a / 0 = infinity. */
  8273. if ((err == MP_OKAY) && sp_iszero(d)) {
  8274. err = MP_VAL;
  8275. }
  8276. /* Ensure quotient result has enough memory. */
  8277. if ((err == MP_OKAY) && (r != NULL) && (r->size < a->used - d->used + 2)) {
  8278. err = MP_VAL;
  8279. }
  8280. if ((err == MP_OKAY) && (rem != NULL)) {
  8281. /* Ensure remainder has enough memory. */
  8282. if ((a->used <= d->used) && (rem->size < a->used + 1)) {
  8283. err = MP_VAL;
  8284. }
  8285. else if ((a->used > d->used) && (rem->size < d->used + 1)) {
  8286. err = MP_VAL;
  8287. }
  8288. }
  8289. if (err == MP_OKAY) {
  8290. if (a->used == SP_INT_DIGITS) {
  8291. /* May need to shift number being divided left into a new word. */
  8292. int bits = SP_WORD_SIZE - (sp_count_bits(d) % SP_WORD_SIZE);
  8293. if ((bits != SP_WORD_SIZE) &&
  8294. (sp_count_bits(a) + bits > SP_INT_DIGITS * SP_WORD_SIZE)) {
  8295. err = MP_VAL;
  8296. }
  8297. else {
  8298. used = SP_INT_DIGITS;
  8299. }
  8300. }
  8301. else {
  8302. used = a->used + 1;
  8303. }
  8304. }
  8305. if (err == MP_OKAY) {
  8306. #if 0
  8307. sp_print(a, "a");
  8308. sp_print(d, "b");
  8309. #endif
  8310. /* Do operation. */
  8311. err = _sp_div(a, d, r, rem, used);
  8312. #if 0
  8313. if (err == MP_OKAY) {
  8314. if (rem != NULL) {
  8315. sp_print(rem, "rdr");
  8316. }
  8317. if (r != NULL) {
  8318. sp_print(r, "rdw");
  8319. }
  8320. }
  8321. #endif
  8322. }
  8323. return err;
  8324. }
  8325. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
  8326. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  8327. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  8328. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  8329. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  8330. #ifndef FREESCALE_LTC_TFM
  8331. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8332. /* Calculate the remainder of dividing a by m: r = a mod m. r is m.
  8333. *
  8334. * @param [in] a SP integer to reduce.
  8335. * @param [in] m SP integer that is the modulus.
  8336. * @param [out] r SP integer to store result in.
  8337. *
  8338. * @return MP_OKAY on success.
  8339. * @return MP_MEM when dynamic memory allocation fails.
  8340. */
  8341. static int _sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
  8342. {
  8343. int err = MP_OKAY;
  8344. /* Remainder will start as a. */
  8345. DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
  8346. /* In case remainder is modulus - allocate temporary. */
  8347. ALLOC_SP_INT(t, a->used + 1, err, NULL);
  8348. if (err == MP_OKAY) {
  8349. _sp_init_size(t, a->used + 1);
  8350. /* Use divide to calculate remainder and don't get quotient. */
  8351. err = sp_div(a, m, NULL, t);
  8352. }
  8353. if (err == MP_OKAY) {
  8354. /* Make remainder positive and copy into result. */
  8355. if ((!sp_iszero(t)) && (t->sign != m->sign)) {
  8356. err = sp_add(t, m, r);
  8357. }
  8358. else {
  8359. _sp_copy(t, r);
  8360. }
  8361. }
  8362. FREE_SP_INT(t, NULL);
  8363. return err;
  8364. }
  8365. #endif
  8366. /* Calculate the remainder of dividing a by m: r = a mod m.
  8367. *
  8368. * @param [in] a SP integer to reduce.
  8369. * @param [in] m SP integer that is the modulus.
  8370. * @param [out] r SP integer to store result in.
  8371. *
  8372. * @return MP_OKAY on success.
  8373. * @return MP_VAL when a, m or r is NULL or m is 0.
  8374. * @return MP_MEM when dynamic memory allocation fails.
  8375. */
  8376. int sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
  8377. {
  8378. int err = MP_OKAY;
  8379. /* Validate parameters. */
  8380. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  8381. err = MP_VAL;
  8382. }
  8383. /* Ensure a isn't too big a number to operate on. */
  8384. else if (a->used >= SP_INT_DIGITS) {
  8385. err = MP_VAL;
  8386. }
  8387. #ifndef WOLFSSL_SP_INT_NEGATIVE
  8388. if (err == MP_OKAY) {
  8389. /* Use divide to calculate remainder and don't get quotient. */
  8390. err = sp_div(a, m, NULL, r);
  8391. }
  8392. #else
  8393. if ((err == MP_OKAY) && (r != m)) {
  8394. err = sp_div(a, m, NULL, r);
  8395. if ((err == MP_OKAY) && (!sp_iszero(r)) && (r->sign != m->sign)) {
  8396. err = sp_add(r, m, r);
  8397. }
  8398. }
  8399. else if (err == MP_OKAY) {
  8400. err = _sp_mod(a, m, r);
  8401. }
  8402. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8403. return err;
  8404. }
  8405. #endif /* !FREESCALE_LTC_TFM */
  8406. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
  8407. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  8408. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  8409. defined(HAVE_ECC) || !defined(NO_RSA)
  8410. /* START SP_MUL implementations. */
  8411. /* This code is generated.
  8412. * To generate:
  8413. * cd scripts/sp/sp_int
  8414. * ./gen.sh
  8415. * File sp_mul.c contains code.
  8416. */
  8417. #ifdef SQR_MUL_ASM
  8418. /* Multiply a by b into r where a and b have same no. digits. r = a * b
  8419. *
  8420. * Optimised code for when number of digits in a and b are the same.
  8421. *
  8422. * @param [in] a SP integer to multiply.
  8423. * @param [in] b SP integer to multiply by.
  8424. * @param [out] r SP integer to hold result.
  8425. *
  8426. * @return MP_OKAY otherwise.
  8427. * @return MP_MEM when dynamic memory allocation fails.
  8428. */
  8429. static int _sp_mul_nxn(const sp_int* a, const sp_int* b, sp_int* r)
  8430. {
  8431. int err = MP_OKAY;
  8432. unsigned int i;
  8433. int j;
  8434. unsigned int k;
  8435. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8436. sp_int_digit* t = NULL;
  8437. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8438. !defined(WOLFSSL_SP_NO_DYN_STACK)
  8439. sp_int_digit t[a->used];
  8440. #else
  8441. sp_int_digit t[SP_INT_DIGITS / 2];
  8442. #endif
  8443. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8444. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * a->used, NULL,
  8445. DYNAMIC_TYPE_BIGINT);
  8446. if (t == NULL) {
  8447. err = MP_MEM;
  8448. }
  8449. #endif
  8450. if (err == MP_OKAY) {
  8451. sp_int_digit l;
  8452. sp_int_digit h;
  8453. sp_int_digit o;
  8454. const sp_int_digit* dp;
  8455. h = 0;
  8456. l = 0;
  8457. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8458. t[0] = h;
  8459. h = 0;
  8460. o = 0;
  8461. for (k = 1; k <= a->used - 1; k++) {
  8462. j = (int)k;
  8463. dp = a->dp;
  8464. for (; j >= 0; dp++, j--) {
  8465. SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
  8466. }
  8467. t[k] = l;
  8468. l = h;
  8469. h = o;
  8470. o = 0;
  8471. }
  8472. for (; k <= (a->used - 1) * 2; k++) {
  8473. i = k - (b->used - 1);
  8474. dp = &b->dp[b->used - 1];
  8475. for (; i < a->used; i++, dp--) {
  8476. SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
  8477. }
  8478. r->dp[k] = l;
  8479. l = h;
  8480. h = o;
  8481. o = 0;
  8482. }
  8483. r->dp[k] = l;
  8484. XMEMCPY(r->dp, t, a->used * sizeof(sp_int_digit));
  8485. r->used = k + 1;
  8486. sp_clamp(r);
  8487. }
  8488. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8489. if (t != NULL) {
  8490. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8491. }
  8492. #endif
  8493. return err;
  8494. }
  8495. /* Multiply a by b into r. r = a * b
  8496. *
  8497. * @param [in] a SP integer to multiply.
  8498. * @param [in] b SP integer to multiply by.
  8499. * @param [out] r SP integer to hold result.
  8500. *
  8501. * @return MP_OKAY otherwise.
  8502. * @return MP_MEM when dynamic memory allocation fails.
  8503. */
  8504. static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
  8505. {
  8506. int err = MP_OKAY;
  8507. unsigned int i;
  8508. int j;
  8509. unsigned int k;
  8510. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8511. sp_int_digit* t = NULL;
  8512. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8513. !defined(WOLFSSL_SP_NO_DYN_STACK)
  8514. sp_int_digit t[a->used + b->used];
  8515. #else
  8516. sp_int_digit t[SP_INT_DIGITS];
  8517. #endif
  8518. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8519. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
  8520. DYNAMIC_TYPE_BIGINT);
  8521. if (t == NULL) {
  8522. err = MP_MEM;
  8523. }
  8524. #endif
  8525. if (err == MP_OKAY) {
  8526. sp_int_digit l;
  8527. sp_int_digit h;
  8528. sp_int_digit o;
  8529. h = 0;
  8530. l = 0;
  8531. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8532. t[0] = h;
  8533. h = 0;
  8534. o = 0;
  8535. for (k = 1; k <= b->used - 1; k++) {
  8536. i = 0;
  8537. j = (int)k;
  8538. for (; (i < a->used) && (j >= 0); i++, j--) {
  8539. SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
  8540. }
  8541. t[k] = l;
  8542. l = h;
  8543. h = o;
  8544. o = 0;
  8545. }
  8546. for (; k <= (a->used - 1) + (b->used - 1); k++) {
  8547. j = (int)(b->used - 1);
  8548. i = k - (unsigned int)j;
  8549. for (; (i < a->used) && (j >= 0); i++, j--) {
  8550. SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
  8551. }
  8552. t[k] = l;
  8553. l = h;
  8554. h = o;
  8555. o = 0;
  8556. }
  8557. t[k] = l;
  8558. r->used = k + 1;
  8559. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  8560. sp_clamp(r);
  8561. }
  8562. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8563. if (t != NULL) {
  8564. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8565. }
  8566. #endif
  8567. return err;
  8568. }
  8569. #else
  8570. /* Multiply a by b into r. r = a * b
  8571. *
  8572. * @param [in] a SP integer to multiply.
  8573. * @param [in] b SP integer to multiply by.
  8574. * @param [out] r SP integer to hold result.
  8575. *
  8576. * @return MP_OKAY otherwise.
  8577. * @return MP_MEM when dynamic memory allocation fails.
  8578. */
  8579. static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
  8580. {
  8581. int err = MP_OKAY;
  8582. unsigned int i;
  8583. int j;
  8584. unsigned int k;
  8585. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8586. sp_int_digit* t = NULL;
  8587. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8588. !defined(WOLFSSL_SP_NO_DYN_STACK)
  8589. sp_int_digit t[a->used + b->used];
  8590. #else
  8591. sp_int_digit t[SP_INT_DIGITS];
  8592. #endif
  8593. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8594. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
  8595. DYNAMIC_TYPE_BIGINT);
  8596. if (t == NULL) {
  8597. err = MP_MEM;
  8598. }
  8599. #endif
  8600. if (err == MP_OKAY) {
  8601. sp_int_word w;
  8602. sp_int_word l;
  8603. sp_int_word h;
  8604. #ifdef SP_WORD_OVERFLOW
  8605. sp_int_word o;
  8606. #endif
  8607. w = (sp_int_word)a->dp[0] * b->dp[0];
  8608. t[0] = (sp_int_digit)w;
  8609. l = (sp_int_digit)(w >> SP_WORD_SIZE);
  8610. h = 0;
  8611. #ifdef SP_WORD_OVERFLOW
  8612. o = 0;
  8613. #endif
  8614. for (k = 1; k <= (a->used - 1) + (b->used - 1); k++) {
  8615. i = k - (b->used - 1);
  8616. i &= (((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U);
  8617. j = (int)(k - i);
  8618. for (; (i < a->used) && (j >= 0); i++, j--) {
  8619. w = (sp_int_word)a->dp[i] * b->dp[j];
  8620. l += (sp_int_digit)w;
  8621. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  8622. #ifdef SP_WORD_OVERFLOW
  8623. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  8624. l &= SP_MASK;
  8625. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  8626. h &= SP_MASK;
  8627. #endif
  8628. }
  8629. t[k] = (sp_int_digit)l;
  8630. l >>= SP_WORD_SIZE;
  8631. l += (sp_int_digit)h;
  8632. h >>= SP_WORD_SIZE;
  8633. #ifdef SP_WORD_OVERFLOW
  8634. h += o & SP_MASK;
  8635. o >>= SP_WORD_SIZE;
  8636. #endif
  8637. }
  8638. t[k] = (sp_int_digit)l;
  8639. r->used = k + 1;
  8640. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  8641. sp_clamp(r);
  8642. }
  8643. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8644. if (t != NULL) {
  8645. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8646. }
  8647. #endif
  8648. return err;
  8649. }
  8650. #endif
  8651. #ifndef WOLFSSL_SP_SMALL
  8652. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  8653. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  8654. #ifndef SQR_MUL_ASM
  8655. /* Multiply a by b and store in r: r = a * b
  8656. *
  8657. * Long-hand implementation.
  8658. *
  8659. * @param [in] a SP integer to multiply.
  8660. * @param [in] b SP integer to multiply.
  8661. * @param [out] r SP integer result.
  8662. *
  8663. * @return MP_OKAY on success.
  8664. * @return MP_MEM when dynamic memory allocation fails.
  8665. */
  8666. static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
  8667. {
  8668. int err = MP_OKAY;
  8669. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8670. sp_int_word* w = NULL;
  8671. #else
  8672. sp_int_word w[16];
  8673. #endif
  8674. const sp_int_digit* da = a->dp;
  8675. const sp_int_digit* db = b->dp;
  8676. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8677. w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
  8678. DYNAMIC_TYPE_BIGINT);
  8679. if (w == NULL) {
  8680. err = MP_MEM;
  8681. }
  8682. #endif
  8683. if (err == MP_OKAY) {
  8684. w[0] = (sp_int_word)da[0] * db[0];
  8685. w[1] = (sp_int_word)da[0] * db[1];
  8686. w[2] = (sp_int_word)da[1] * db[0];
  8687. w[3] = (sp_int_word)da[0] * db[2];
  8688. w[4] = (sp_int_word)da[1] * db[1];
  8689. w[5] = (sp_int_word)da[2] * db[0];
  8690. w[6] = (sp_int_word)da[0] * db[3];
  8691. w[7] = (sp_int_word)da[1] * db[2];
  8692. w[8] = (sp_int_word)da[2] * db[1];
  8693. w[9] = (sp_int_word)da[3] * db[0];
  8694. w[10] = (sp_int_word)da[1] * db[3];
  8695. w[11] = (sp_int_word)da[2] * db[2];
  8696. w[12] = (sp_int_word)da[3] * db[1];
  8697. w[13] = (sp_int_word)da[2] * db[3];
  8698. w[14] = (sp_int_word)da[3] * db[2];
  8699. w[15] = (sp_int_word)da[3] * db[3];
  8700. r->dp[0] = (sp_int_digit)w[0];
  8701. w[0] >>= SP_WORD_SIZE;
  8702. w[0] += (sp_int_digit)w[1];
  8703. w[0] += (sp_int_digit)w[2];
  8704. r->dp[1] = (sp_int_digit)w[0];
  8705. w[0] >>= SP_WORD_SIZE;
  8706. w[1] >>= SP_WORD_SIZE;
  8707. w[0] += (sp_int_digit)w[1];
  8708. w[2] >>= SP_WORD_SIZE;
  8709. w[0] += (sp_int_digit)w[2];
  8710. w[0] += (sp_int_digit)w[3];
  8711. w[0] += (sp_int_digit)w[4];
  8712. w[0] += (sp_int_digit)w[5];
  8713. r->dp[2] = (sp_int_digit)w[0];
  8714. w[0] >>= SP_WORD_SIZE;
  8715. w[3] >>= SP_WORD_SIZE;
  8716. w[0] += (sp_int_digit)w[3];
  8717. w[4] >>= SP_WORD_SIZE;
  8718. w[0] += (sp_int_digit)w[4];
  8719. w[5] >>= SP_WORD_SIZE;
  8720. w[0] += (sp_int_digit)w[5];
  8721. w[0] += (sp_int_digit)w[6];
  8722. w[0] += (sp_int_digit)w[7];
  8723. w[0] += (sp_int_digit)w[8];
  8724. w[0] += (sp_int_digit)w[9];
  8725. r->dp[3] = (sp_int_digit)w[0];
  8726. w[0] >>= SP_WORD_SIZE;
  8727. w[6] >>= SP_WORD_SIZE;
  8728. w[0] += (sp_int_digit)w[6];
  8729. w[7] >>= SP_WORD_SIZE;
  8730. w[0] += (sp_int_digit)w[7];
  8731. w[8] >>= SP_WORD_SIZE;
  8732. w[0] += (sp_int_digit)w[8];
  8733. w[9] >>= SP_WORD_SIZE;
  8734. w[0] += (sp_int_digit)w[9];
  8735. w[0] += (sp_int_digit)w[10];
  8736. w[0] += (sp_int_digit)w[11];
  8737. w[0] += (sp_int_digit)w[12];
  8738. r->dp[4] = (sp_int_digit)w[0];
  8739. w[0] >>= SP_WORD_SIZE;
  8740. w[10] >>= SP_WORD_SIZE;
  8741. w[0] += (sp_int_digit)w[10];
  8742. w[11] >>= SP_WORD_SIZE;
  8743. w[0] += (sp_int_digit)w[11];
  8744. w[12] >>= SP_WORD_SIZE;
  8745. w[0] += (sp_int_digit)w[12];
  8746. w[0] += (sp_int_digit)w[13];
  8747. w[0] += (sp_int_digit)w[14];
  8748. r->dp[5] = (sp_int_digit)w[0];
  8749. w[0] >>= SP_WORD_SIZE;
  8750. w[13] >>= SP_WORD_SIZE;
  8751. w[0] += (sp_int_digit)w[13];
  8752. w[14] >>= SP_WORD_SIZE;
  8753. w[0] += (sp_int_digit)w[14];
  8754. w[0] += (sp_int_digit)w[15];
  8755. r->dp[6] = (sp_int_digit)w[0];
  8756. w[0] >>= SP_WORD_SIZE;
  8757. w[15] >>= SP_WORD_SIZE;
  8758. w[0] += (sp_int_digit)w[15];
  8759. r->dp[7] = (sp_int_digit)w[0];
  8760. r->used = 8;
  8761. sp_clamp(r);
  8762. }
  8763. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8764. if (w != NULL) {
  8765. XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
  8766. }
  8767. #endif
  8768. return err;
  8769. }
  8770. #else /* SQR_MUL_ASM */
  8771. /* Multiply a by b and store in r: r = a * b
  8772. *
  8773. * Comba implementation.
  8774. *
  8775. * @param [in] a SP integer to multiply.
  8776. * @param [in] b SP integer to multiply.
  8777. * @param [out] r SP integer result.
  8778. *
  8779. * @return MP_OKAY on success.
  8780. * @return MP_MEM when dynamic memory allocation fails.
  8781. */
  8782. static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
  8783. {
  8784. sp_int_digit l = 0;
  8785. sp_int_digit h = 0;
  8786. sp_int_digit o = 0;
  8787. sp_int_digit t[4];
  8788. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8789. t[0] = h;
  8790. h = 0;
  8791. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8792. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8793. t[1] = l;
  8794. l = h;
  8795. h = o;
  8796. o = 0;
  8797. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8798. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8799. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8800. t[2] = l;
  8801. l = h;
  8802. h = o;
  8803. o = 0;
  8804. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8805. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8806. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8807. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8808. t[3] = l;
  8809. l = h;
  8810. h = o;
  8811. o = 0;
  8812. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8813. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8814. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8815. r->dp[4] = l;
  8816. l = h;
  8817. h = o;
  8818. o = 0;
  8819. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8820. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8821. r->dp[5] = l;
  8822. l = h;
  8823. h = o;
  8824. SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
  8825. r->dp[6] = l;
  8826. r->dp[7] = h;
  8827. XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
  8828. r->used = 8;
  8829. sp_clamp(r);
  8830. return MP_OKAY;
  8831. }
  8832. #endif /* SQR_MUL_ASM */
  8833. #endif /* SP_WORD_SIZE == 64 */
  8834. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  8835. #ifdef SQR_MUL_ASM
  8836. /* Multiply a by b and store in r: r = a * b
  8837. *
  8838. * Comba implementation.
  8839. *
  8840. * @param [in] a SP integer to multiply.
  8841. * @param [in] b SP integer to multiply.
  8842. * @param [out] r SP integer result.
  8843. *
  8844. * @return MP_OKAY on success.
  8845. * @return MP_MEM when dynamic memory allocation fails.
  8846. */
  8847. static int _sp_mul_6(const sp_int* a, const sp_int* b, sp_int* r)
  8848. {
  8849. sp_int_digit l = 0;
  8850. sp_int_digit h = 0;
  8851. sp_int_digit o = 0;
  8852. sp_int_digit t[6];
  8853. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8854. t[0] = h;
  8855. h = 0;
  8856. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8857. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8858. t[1] = l;
  8859. l = h;
  8860. h = o;
  8861. o = 0;
  8862. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8863. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8864. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8865. t[2] = l;
  8866. l = h;
  8867. h = o;
  8868. o = 0;
  8869. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8870. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8871. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8872. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8873. t[3] = l;
  8874. l = h;
  8875. h = o;
  8876. o = 0;
  8877. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  8878. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8879. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8880. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8881. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  8882. t[4] = l;
  8883. l = h;
  8884. h = o;
  8885. o = 0;
  8886. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  8887. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  8888. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8889. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8890. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  8891. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  8892. t[5] = l;
  8893. l = h;
  8894. h = o;
  8895. o = 0;
  8896. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  8897. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  8898. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  8899. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  8900. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  8901. r->dp[6] = l;
  8902. l = h;
  8903. h = o;
  8904. o = 0;
  8905. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  8906. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  8907. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  8908. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  8909. r->dp[7] = l;
  8910. l = h;
  8911. h = o;
  8912. o = 0;
  8913. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  8914. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  8915. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  8916. r->dp[8] = l;
  8917. l = h;
  8918. h = o;
  8919. o = 0;
  8920. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  8921. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  8922. r->dp[9] = l;
  8923. l = h;
  8924. h = o;
  8925. SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
  8926. r->dp[10] = l;
  8927. r->dp[11] = h;
  8928. XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
  8929. r->used = 12;
  8930. sp_clamp(r);
  8931. return MP_OKAY;
  8932. }
  8933. #endif /* SQR_MUL_ASM */
  8934. #endif /* SP_WORD_SIZE == 64 */
  8935. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  8936. #ifdef SQR_MUL_ASM
  8937. /* Multiply a by b and store in r: r = a * b
  8938. *
  8939. * Comba implementation.
  8940. *
  8941. * @param [in] a SP integer to multiply.
  8942. * @param [in] b SP integer to multiply.
  8943. * @param [out] r SP integer result.
  8944. *
  8945. * @return MP_OKAY on success.
  8946. * @return MP_MEM when dynamic memory allocation fails.
  8947. */
  8948. static int _sp_mul_8(const sp_int* a, const sp_int* b, sp_int* r)
  8949. {
  8950. sp_int_digit l = 0;
  8951. sp_int_digit h = 0;
  8952. sp_int_digit o = 0;
  8953. sp_int_digit t[8];
  8954. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8955. t[0] = h;
  8956. h = 0;
  8957. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8958. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8959. t[1] = l;
  8960. l = h;
  8961. h = o;
  8962. o = 0;
  8963. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8964. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8965. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8966. t[2] = l;
  8967. l = h;
  8968. h = o;
  8969. o = 0;
  8970. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8971. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8972. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8973. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8974. t[3] = l;
  8975. l = h;
  8976. h = o;
  8977. o = 0;
  8978. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  8979. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8980. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8981. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8982. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  8983. t[4] = l;
  8984. l = h;
  8985. h = o;
  8986. o = 0;
  8987. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  8988. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  8989. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8990. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8991. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  8992. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  8993. t[5] = l;
  8994. l = h;
  8995. h = o;
  8996. o = 0;
  8997. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  8998. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  8999. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  9000. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  9001. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  9002. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  9003. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  9004. t[6] = l;
  9005. l = h;
  9006. h = o;
  9007. o = 0;
  9008. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  9009. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  9010. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  9011. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  9012. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  9013. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  9014. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  9015. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  9016. t[7] = l;
  9017. l = h;
  9018. h = o;
  9019. o = 0;
  9020. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  9021. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  9022. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  9023. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  9024. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  9025. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  9026. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  9027. r->dp[8] = l;
  9028. l = h;
  9029. h = o;
  9030. o = 0;
  9031. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  9032. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  9033. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  9034. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  9035. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  9036. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  9037. r->dp[9] = l;
  9038. l = h;
  9039. h = o;
  9040. o = 0;
  9041. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  9042. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  9043. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  9044. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  9045. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  9046. r->dp[10] = l;
  9047. l = h;
  9048. h = o;
  9049. o = 0;
  9050. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  9051. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  9052. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  9053. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  9054. r->dp[11] = l;
  9055. l = h;
  9056. h = o;
  9057. o = 0;
  9058. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  9059. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  9060. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  9061. r->dp[12] = l;
  9062. l = h;
  9063. h = o;
  9064. o = 0;
  9065. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  9066. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  9067. r->dp[13] = l;
  9068. l = h;
  9069. h = o;
  9070. SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
  9071. r->dp[14] = l;
  9072. r->dp[15] = h;
  9073. XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
  9074. r->used = 16;
  9075. sp_clamp(r);
  9076. return MP_OKAY;
  9077. }
  9078. #endif /* SQR_MUL_ASM */
  9079. #endif /* SP_WORD_SIZE == 32 */
  9080. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  9081. #ifdef SQR_MUL_ASM
  9082. /* Multiply a by b and store in r: r = a * b
  9083. *
  9084. * Comba implementation.
  9085. *
  9086. * @param [in] a SP integer to multiply.
  9087. * @param [in] b SP integer to multiply.
  9088. * @param [out] r SP integer result.
  9089. *
  9090. * @return MP_OKAY on success.
  9091. * @return MP_MEM when dynamic memory allocation fails.
  9092. */
  9093. static int _sp_mul_12(const sp_int* a, const sp_int* b, sp_int* r)
  9094. {
  9095. sp_int_digit l = 0;
  9096. sp_int_digit h = 0;
  9097. sp_int_digit o = 0;
  9098. sp_int_digit t[12];
  9099. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  9100. t[0] = h;
  9101. h = 0;
  9102. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  9103. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  9104. t[1] = l;
  9105. l = h;
  9106. h = o;
  9107. o = 0;
  9108. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  9109. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  9110. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  9111. t[2] = l;
  9112. l = h;
  9113. h = o;
  9114. o = 0;
  9115. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  9116. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  9117. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  9118. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  9119. t[3] = l;
  9120. l = h;
  9121. h = o;
  9122. o = 0;
  9123. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  9124. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  9125. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  9126. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  9127. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  9128. t[4] = l;
  9129. l = h;
  9130. h = o;
  9131. o = 0;
  9132. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  9133. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  9134. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  9135. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  9136. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  9137. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  9138. t[5] = l;
  9139. l = h;
  9140. h = o;
  9141. o = 0;
  9142. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  9143. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  9144. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  9145. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  9146. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  9147. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  9148. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  9149. t[6] = l;
  9150. l = h;
  9151. h = o;
  9152. o = 0;
  9153. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  9154. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  9155. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  9156. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  9157. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  9158. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  9159. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  9160. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  9161. t[7] = l;
  9162. l = h;
  9163. h = o;
  9164. o = 0;
  9165. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  9166. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  9167. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  9168. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  9169. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  9170. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  9171. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  9172. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  9173. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  9174. t[8] = l;
  9175. l = h;
  9176. h = o;
  9177. o = 0;
  9178. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  9179. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  9180. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  9181. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  9182. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  9183. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  9184. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  9185. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  9186. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  9187. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  9188. t[9] = l;
  9189. l = h;
  9190. h = o;
  9191. o = 0;
  9192. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  9193. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  9194. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  9195. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  9196. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  9197. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  9198. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  9199. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  9200. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  9201. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  9202. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  9203. t[10] = l;
  9204. l = h;
  9205. h = o;
  9206. o = 0;
  9207. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  9208. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  9209. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  9210. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  9211. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  9212. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  9213. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  9214. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  9215. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  9216. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  9217. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  9218. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  9219. t[11] = l;
  9220. l = h;
  9221. h = o;
  9222. o = 0;
  9223. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  9224. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  9225. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  9226. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  9227. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  9228. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  9229. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  9230. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  9231. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  9232. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  9233. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  9234. r->dp[12] = l;
  9235. l = h;
  9236. h = o;
  9237. o = 0;
  9238. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  9239. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  9240. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  9241. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  9242. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  9243. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  9244. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  9245. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  9246. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  9247. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  9248. r->dp[13] = l;
  9249. l = h;
  9250. h = o;
  9251. o = 0;
  9252. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  9253. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  9254. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  9255. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  9256. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  9257. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  9258. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  9259. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  9260. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  9261. r->dp[14] = l;
  9262. l = h;
  9263. h = o;
  9264. o = 0;
  9265. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  9266. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  9267. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  9268. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  9269. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  9270. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  9271. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  9272. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  9273. r->dp[15] = l;
  9274. l = h;
  9275. h = o;
  9276. o = 0;
  9277. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  9278. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  9279. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  9280. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  9281. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  9282. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  9283. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  9284. r->dp[16] = l;
  9285. l = h;
  9286. h = o;
  9287. o = 0;
  9288. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  9289. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  9290. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  9291. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  9292. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  9293. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  9294. r->dp[17] = l;
  9295. l = h;
  9296. h = o;
  9297. o = 0;
  9298. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  9299. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  9300. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  9301. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  9302. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  9303. r->dp[18] = l;
  9304. l = h;
  9305. h = o;
  9306. o = 0;
  9307. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  9308. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  9309. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  9310. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  9311. r->dp[19] = l;
  9312. l = h;
  9313. h = o;
  9314. o = 0;
  9315. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  9316. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  9317. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  9318. r->dp[20] = l;
  9319. l = h;
  9320. h = o;
  9321. o = 0;
  9322. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  9323. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  9324. r->dp[21] = l;
  9325. l = h;
  9326. h = o;
  9327. SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
  9328. r->dp[22] = l;
  9329. r->dp[23] = h;
  9330. XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
  9331. r->used = 24;
  9332. sp_clamp(r);
  9333. return MP_OKAY;
  9334. }
  9335. #endif /* SQR_MUL_ASM */
  9336. #endif /* SP_WORD_SIZE == 32 */
  9337. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  9338. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  9339. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  9340. (SP_WORD_SIZE == 64)))
  9341. #if SP_INT_DIGITS >= 32
  9342. /* Multiply a by b and store in r: r = a * b
  9343. *
  9344. * Comba implementation.
  9345. *
  9346. * @param [in] a SP integer to multiply.
  9347. * @param [in] b SP integer to multiply.
  9348. * @param [out] r SP integer result.
  9349. *
  9350. * @return MP_OKAY on success.
  9351. * @return MP_MEM when dynamic memory allocation fails.
  9352. */
  9353. static int _sp_mul_16(const sp_int* a, const sp_int* b, sp_int* r)
  9354. {
  9355. int err = MP_OKAY;
  9356. sp_int_digit l = 0;
  9357. sp_int_digit h = 0;
  9358. sp_int_digit o = 0;
  9359. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9360. sp_int_digit* t = NULL;
  9361. #else
  9362. sp_int_digit t[16];
  9363. #endif
  9364. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9365. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
  9366. DYNAMIC_TYPE_BIGINT);
  9367. if (t == NULL) {
  9368. err = MP_MEM;
  9369. }
  9370. #endif
  9371. if (err == MP_OKAY) {
  9372. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  9373. t[0] = h;
  9374. h = 0;
  9375. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  9376. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  9377. t[1] = l;
  9378. l = h;
  9379. h = o;
  9380. o = 0;
  9381. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  9382. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  9383. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  9384. t[2] = l;
  9385. l = h;
  9386. h = o;
  9387. o = 0;
  9388. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  9389. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  9390. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  9391. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  9392. t[3] = l;
  9393. l = h;
  9394. h = o;
  9395. o = 0;
  9396. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  9397. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  9398. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  9399. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  9400. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  9401. t[4] = l;
  9402. l = h;
  9403. h = o;
  9404. o = 0;
  9405. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  9406. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  9407. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  9408. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  9409. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  9410. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  9411. t[5] = l;
  9412. l = h;
  9413. h = o;
  9414. o = 0;
  9415. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  9416. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  9417. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  9418. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  9419. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  9420. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  9421. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  9422. t[6] = l;
  9423. l = h;
  9424. h = o;
  9425. o = 0;
  9426. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  9427. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  9428. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  9429. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  9430. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  9431. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  9432. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  9433. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  9434. t[7] = l;
  9435. l = h;
  9436. h = o;
  9437. o = 0;
  9438. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  9439. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  9440. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  9441. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  9442. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  9443. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  9444. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  9445. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  9446. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  9447. t[8] = l;
  9448. l = h;
  9449. h = o;
  9450. o = 0;
  9451. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  9452. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  9453. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  9454. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  9455. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  9456. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  9457. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  9458. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  9459. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  9460. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  9461. t[9] = l;
  9462. l = h;
  9463. h = o;
  9464. o = 0;
  9465. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  9466. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  9467. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  9468. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  9469. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  9470. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  9471. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  9472. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  9473. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  9474. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  9475. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  9476. t[10] = l;
  9477. l = h;
  9478. h = o;
  9479. o = 0;
  9480. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  9481. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  9482. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  9483. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  9484. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  9485. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  9486. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  9487. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  9488. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  9489. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  9490. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  9491. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  9492. t[11] = l;
  9493. l = h;
  9494. h = o;
  9495. o = 0;
  9496. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
  9497. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  9498. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  9499. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  9500. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  9501. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  9502. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  9503. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  9504. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  9505. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  9506. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  9507. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  9508. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
  9509. t[12] = l;
  9510. l = h;
  9511. h = o;
  9512. o = 0;
  9513. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
  9514. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
  9515. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  9516. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  9517. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  9518. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  9519. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  9520. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  9521. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  9522. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  9523. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  9524. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  9525. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
  9526. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
  9527. t[13] = l;
  9528. l = h;
  9529. h = o;
  9530. o = 0;
  9531. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
  9532. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
  9533. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
  9534. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  9535. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  9536. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  9537. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  9538. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  9539. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  9540. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  9541. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  9542. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  9543. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
  9544. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
  9545. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
  9546. t[14] = l;
  9547. l = h;
  9548. h = o;
  9549. o = 0;
  9550. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
  9551. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
  9552. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
  9553. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
  9554. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  9555. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  9556. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  9557. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  9558. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  9559. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  9560. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  9561. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  9562. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
  9563. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
  9564. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
  9565. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
  9566. t[15] = l;
  9567. l = h;
  9568. h = o;
  9569. o = 0;
  9570. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
  9571. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
  9572. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
  9573. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
  9574. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  9575. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  9576. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  9577. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  9578. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  9579. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  9580. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  9581. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
  9582. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
  9583. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
  9584. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
  9585. r->dp[16] = l;
  9586. l = h;
  9587. h = o;
  9588. o = 0;
  9589. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
  9590. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
  9591. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
  9592. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
  9593. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  9594. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  9595. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  9596. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  9597. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  9598. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  9599. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
  9600. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
  9601. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
  9602. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
  9603. r->dp[17] = l;
  9604. l = h;
  9605. h = o;
  9606. o = 0;
  9607. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
  9608. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
  9609. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
  9610. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
  9611. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  9612. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  9613. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  9614. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  9615. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  9616. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
  9617. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
  9618. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
  9619. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
  9620. r->dp[18] = l;
  9621. l = h;
  9622. h = o;
  9623. o = 0;
  9624. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
  9625. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
  9626. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
  9627. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
  9628. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  9629. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  9630. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  9631. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  9632. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
  9633. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
  9634. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
  9635. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
  9636. r->dp[19] = l;
  9637. l = h;
  9638. h = o;
  9639. o = 0;
  9640. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
  9641. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
  9642. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
  9643. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
  9644. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  9645. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  9646. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  9647. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
  9648. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
  9649. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
  9650. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
  9651. r->dp[20] = l;
  9652. l = h;
  9653. h = o;
  9654. o = 0;
  9655. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
  9656. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
  9657. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
  9658. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
  9659. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  9660. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  9661. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
  9662. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
  9663. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
  9664. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
  9665. r->dp[21] = l;
  9666. l = h;
  9667. h = o;
  9668. o = 0;
  9669. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
  9670. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
  9671. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
  9672. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
  9673. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
  9674. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
  9675. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
  9676. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
  9677. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
  9678. r->dp[22] = l;
  9679. l = h;
  9680. h = o;
  9681. o = 0;
  9682. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
  9683. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
  9684. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
  9685. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
  9686. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
  9687. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
  9688. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
  9689. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
  9690. r->dp[23] = l;
  9691. l = h;
  9692. h = o;
  9693. o = 0;
  9694. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
  9695. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
  9696. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
  9697. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
  9698. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
  9699. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
  9700. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
  9701. r->dp[24] = l;
  9702. l = h;
  9703. h = o;
  9704. o = 0;
  9705. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
  9706. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
  9707. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
  9708. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
  9709. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
  9710. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
  9711. r->dp[25] = l;
  9712. l = h;
  9713. h = o;
  9714. o = 0;
  9715. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
  9716. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
  9717. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
  9718. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
  9719. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
  9720. r->dp[26] = l;
  9721. l = h;
  9722. h = o;
  9723. o = 0;
  9724. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
  9725. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
  9726. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
  9727. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
  9728. r->dp[27] = l;
  9729. l = h;
  9730. h = o;
  9731. o = 0;
  9732. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
  9733. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
  9734. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
  9735. r->dp[28] = l;
  9736. l = h;
  9737. h = o;
  9738. o = 0;
  9739. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
  9740. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
  9741. r->dp[29] = l;
  9742. l = h;
  9743. h = o;
  9744. SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
  9745. r->dp[30] = l;
  9746. r->dp[31] = h;
  9747. XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
  9748. r->used = 32;
  9749. sp_clamp(r);
  9750. }
  9751. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9752. if (t != NULL) {
  9753. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  9754. }
  9755. #endif
  9756. return err;
  9757. }
  9758. #endif /* SP_INT_DIGITS >= 32 */
  9759. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  9760. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  9761. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  9762. #if SP_INT_DIGITS >= 48
  9763. /* Multiply a by b and store in r: r = a * b
  9764. *
  9765. * Comba implementation.
  9766. *
  9767. * @param [in] a SP integer to multiply.
  9768. * @param [in] b SP integer to multiply.
  9769. * @param [out] r SP integer result.
  9770. *
  9771. * @return MP_OKAY on success.
  9772. * @return MP_MEM when dynamic memory allocation fails.
  9773. */
  9774. static int _sp_mul_24(const sp_int* a, const sp_int* b, sp_int* r)
  9775. {
  9776. int err = MP_OKAY;
  9777. sp_int_digit l = 0;
  9778. sp_int_digit h = 0;
  9779. sp_int_digit o = 0;
  9780. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9781. sp_int_digit* t = NULL;
  9782. #else
  9783. sp_int_digit t[24];
  9784. #endif
  9785. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9786. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
  9787. DYNAMIC_TYPE_BIGINT);
  9788. if (t == NULL) {
  9789. err = MP_MEM;
  9790. }
  9791. #endif
  9792. if (err == MP_OKAY) {
  9793. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  9794. t[0] = h;
  9795. h = 0;
  9796. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  9797. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  9798. t[1] = l;
  9799. l = h;
  9800. h = o;
  9801. o = 0;
  9802. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  9803. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  9804. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  9805. t[2] = l;
  9806. l = h;
  9807. h = o;
  9808. o = 0;
  9809. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  9810. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  9811. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  9812. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  9813. t[3] = l;
  9814. l = h;
  9815. h = o;
  9816. o = 0;
  9817. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  9818. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  9819. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  9820. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  9821. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  9822. t[4] = l;
  9823. l = h;
  9824. h = o;
  9825. o = 0;
  9826. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  9827. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  9828. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  9829. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  9830. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  9831. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  9832. t[5] = l;
  9833. l = h;
  9834. h = o;
  9835. o = 0;
  9836. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  9837. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  9838. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  9839. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  9840. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  9841. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  9842. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  9843. t[6] = l;
  9844. l = h;
  9845. h = o;
  9846. o = 0;
  9847. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  9848. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  9849. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  9850. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  9851. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  9852. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  9853. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  9854. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  9855. t[7] = l;
  9856. l = h;
  9857. h = o;
  9858. o = 0;
  9859. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  9860. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  9861. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  9862. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  9863. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  9864. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  9865. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  9866. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  9867. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  9868. t[8] = l;
  9869. l = h;
  9870. h = o;
  9871. o = 0;
  9872. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  9873. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  9874. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  9875. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  9876. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  9877. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  9878. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  9879. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  9880. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  9881. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  9882. t[9] = l;
  9883. l = h;
  9884. h = o;
  9885. o = 0;
  9886. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  9887. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  9888. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  9889. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  9890. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  9891. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  9892. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  9893. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  9894. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  9895. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  9896. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  9897. t[10] = l;
  9898. l = h;
  9899. h = o;
  9900. o = 0;
  9901. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  9902. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  9903. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  9904. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  9905. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  9906. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  9907. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  9908. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  9909. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  9910. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  9911. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  9912. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  9913. t[11] = l;
  9914. l = h;
  9915. h = o;
  9916. o = 0;
  9917. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
  9918. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  9919. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  9920. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  9921. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  9922. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  9923. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  9924. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  9925. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  9926. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  9927. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  9928. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  9929. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
  9930. t[12] = l;
  9931. l = h;
  9932. h = o;
  9933. o = 0;
  9934. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
  9935. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
  9936. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  9937. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  9938. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  9939. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  9940. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  9941. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  9942. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  9943. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  9944. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  9945. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  9946. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
  9947. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
  9948. t[13] = l;
  9949. l = h;
  9950. h = o;
  9951. o = 0;
  9952. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
  9953. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
  9954. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
  9955. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  9956. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  9957. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  9958. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  9959. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  9960. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  9961. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  9962. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  9963. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  9964. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
  9965. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
  9966. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
  9967. t[14] = l;
  9968. l = h;
  9969. h = o;
  9970. o = 0;
  9971. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
  9972. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
  9973. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
  9974. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
  9975. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  9976. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  9977. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  9978. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  9979. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  9980. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  9981. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  9982. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  9983. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
  9984. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
  9985. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
  9986. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
  9987. t[15] = l;
  9988. l = h;
  9989. h = o;
  9990. o = 0;
  9991. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
  9992. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
  9993. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
  9994. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
  9995. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
  9996. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  9997. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  9998. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  9999. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  10000. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  10001. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  10002. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  10003. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
  10004. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
  10005. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
  10006. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
  10007. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
  10008. t[16] = l;
  10009. l = h;
  10010. h = o;
  10011. o = 0;
  10012. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
  10013. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
  10014. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
  10015. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
  10016. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
  10017. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
  10018. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  10019. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  10020. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  10021. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  10022. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  10023. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  10024. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
  10025. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
  10026. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
  10027. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
  10028. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
  10029. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
  10030. t[17] = l;
  10031. l = h;
  10032. h = o;
  10033. o = 0;
  10034. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
  10035. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
  10036. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
  10037. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
  10038. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
  10039. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
  10040. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
  10041. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  10042. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  10043. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  10044. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  10045. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  10046. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
  10047. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
  10048. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
  10049. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
  10050. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
  10051. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
  10052. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
  10053. t[18] = l;
  10054. l = h;
  10055. h = o;
  10056. o = 0;
  10057. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
  10058. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
  10059. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
  10060. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
  10061. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
  10062. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
  10063. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
  10064. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
  10065. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  10066. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  10067. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  10068. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  10069. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
  10070. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
  10071. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
  10072. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
  10073. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
  10074. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
  10075. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
  10076. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
  10077. t[19] = l;
  10078. l = h;
  10079. h = o;
  10080. o = 0;
  10081. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
  10082. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
  10083. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
  10084. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
  10085. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
  10086. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
  10087. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
  10088. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
  10089. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
  10090. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  10091. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  10092. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  10093. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
  10094. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
  10095. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
  10096. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
  10097. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
  10098. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
  10099. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
  10100. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
  10101. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
  10102. t[20] = l;
  10103. l = h;
  10104. h = o;
  10105. o = 0;
  10106. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
  10107. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
  10108. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
  10109. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
  10110. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
  10111. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
  10112. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
  10113. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
  10114. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
  10115. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
  10116. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  10117. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  10118. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
  10119. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
  10120. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
  10121. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
  10122. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
  10123. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
  10124. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
  10125. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
  10126. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
  10127. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
  10128. t[21] = l;
  10129. l = h;
  10130. h = o;
  10131. o = 0;
  10132. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
  10133. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
  10134. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
  10135. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
  10136. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
  10137. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
  10138. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
  10139. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
  10140. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
  10141. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
  10142. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
  10143. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
  10144. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
  10145. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
  10146. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
  10147. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
  10148. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
  10149. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
  10150. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
  10151. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
  10152. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
  10153. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
  10154. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
  10155. t[22] = l;
  10156. l = h;
  10157. h = o;
  10158. o = 0;
  10159. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
  10160. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
  10161. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
  10162. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
  10163. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
  10164. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
  10165. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
  10166. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
  10167. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
  10168. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
  10169. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
  10170. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
  10171. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
  10172. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
  10173. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
  10174. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
  10175. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
  10176. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
  10177. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
  10178. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
  10179. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
  10180. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
  10181. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
  10182. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
  10183. t[23] = l;
  10184. l = h;
  10185. h = o;
  10186. o = 0;
  10187. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
  10188. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
  10189. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
  10190. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
  10191. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
  10192. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
  10193. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
  10194. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
  10195. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
  10196. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
  10197. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
  10198. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
  10199. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
  10200. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
  10201. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
  10202. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
  10203. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
  10204. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
  10205. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
  10206. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
  10207. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
  10208. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
  10209. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
  10210. r->dp[24] = l;
  10211. l = h;
  10212. h = o;
  10213. o = 0;
  10214. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
  10215. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
  10216. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
  10217. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
  10218. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
  10219. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
  10220. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
  10221. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
  10222. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
  10223. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
  10224. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
  10225. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
  10226. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
  10227. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
  10228. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
  10229. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
  10230. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
  10231. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
  10232. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
  10233. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
  10234. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
  10235. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
  10236. r->dp[25] = l;
  10237. l = h;
  10238. h = o;
  10239. o = 0;
  10240. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
  10241. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
  10242. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
  10243. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
  10244. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
  10245. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
  10246. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
  10247. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
  10248. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
  10249. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
  10250. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
  10251. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
  10252. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
  10253. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
  10254. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
  10255. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
  10256. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
  10257. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
  10258. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
  10259. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
  10260. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
  10261. r->dp[26] = l;
  10262. l = h;
  10263. h = o;
  10264. o = 0;
  10265. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
  10266. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
  10267. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
  10268. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
  10269. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
  10270. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
  10271. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
  10272. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
  10273. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
  10274. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
  10275. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
  10276. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
  10277. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
  10278. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
  10279. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
  10280. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
  10281. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
  10282. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
  10283. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
  10284. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
  10285. r->dp[27] = l;
  10286. l = h;
  10287. h = o;
  10288. o = 0;
  10289. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
  10290. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
  10291. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
  10292. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
  10293. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
  10294. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
  10295. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
  10296. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
  10297. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
  10298. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
  10299. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
  10300. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
  10301. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
  10302. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
  10303. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
  10304. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
  10305. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
  10306. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
  10307. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
  10308. r->dp[28] = l;
  10309. l = h;
  10310. h = o;
  10311. o = 0;
  10312. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
  10313. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
  10314. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
  10315. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
  10316. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
  10317. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
  10318. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
  10319. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
  10320. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
  10321. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
  10322. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
  10323. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
  10324. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
  10325. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
  10326. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
  10327. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
  10328. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
  10329. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
  10330. r->dp[29] = l;
  10331. l = h;
  10332. h = o;
  10333. o = 0;
  10334. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
  10335. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
  10336. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
  10337. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
  10338. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
  10339. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
  10340. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
  10341. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
  10342. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
  10343. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
  10344. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
  10345. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
  10346. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
  10347. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
  10348. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
  10349. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
  10350. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
  10351. r->dp[30] = l;
  10352. l = h;
  10353. h = o;
  10354. o = 0;
  10355. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
  10356. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
  10357. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
  10358. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
  10359. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
  10360. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
  10361. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
  10362. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
  10363. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
  10364. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
  10365. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
  10366. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
  10367. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
  10368. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
  10369. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
  10370. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
  10371. r->dp[31] = l;
  10372. l = h;
  10373. h = o;
  10374. o = 0;
  10375. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
  10376. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
  10377. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
  10378. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
  10379. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
  10380. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
  10381. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
  10382. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
  10383. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
  10384. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
  10385. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
  10386. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
  10387. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
  10388. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
  10389. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
  10390. r->dp[32] = l;
  10391. l = h;
  10392. h = o;
  10393. o = 0;
  10394. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
  10395. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
  10396. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
  10397. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
  10398. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
  10399. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
  10400. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
  10401. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
  10402. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
  10403. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
  10404. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
  10405. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
  10406. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
  10407. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
  10408. r->dp[33] = l;
  10409. l = h;
  10410. h = o;
  10411. o = 0;
  10412. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
  10413. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
  10414. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
  10415. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
  10416. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
  10417. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
  10418. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
  10419. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
  10420. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
  10421. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
  10422. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
  10423. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
  10424. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
  10425. r->dp[34] = l;
  10426. l = h;
  10427. h = o;
  10428. o = 0;
  10429. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
  10430. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
  10431. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
  10432. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
  10433. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
  10434. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
  10435. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
  10436. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
  10437. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
  10438. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
  10439. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
  10440. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
  10441. r->dp[35] = l;
  10442. l = h;
  10443. h = o;
  10444. o = 0;
  10445. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
  10446. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
  10447. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
  10448. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
  10449. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
  10450. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
  10451. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
  10452. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
  10453. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
  10454. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
  10455. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
  10456. r->dp[36] = l;
  10457. l = h;
  10458. h = o;
  10459. o = 0;
  10460. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
  10461. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
  10462. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
  10463. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
  10464. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
  10465. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
  10466. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
  10467. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
  10468. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
  10469. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
  10470. r->dp[37] = l;
  10471. l = h;
  10472. h = o;
  10473. o = 0;
  10474. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
  10475. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
  10476. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
  10477. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
  10478. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
  10479. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
  10480. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
  10481. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
  10482. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
  10483. r->dp[38] = l;
  10484. l = h;
  10485. h = o;
  10486. o = 0;
  10487. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
  10488. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
  10489. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
  10490. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
  10491. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
  10492. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
  10493. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
  10494. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
  10495. r->dp[39] = l;
  10496. l = h;
  10497. h = o;
  10498. o = 0;
  10499. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
  10500. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
  10501. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
  10502. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
  10503. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
  10504. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
  10505. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
  10506. r->dp[40] = l;
  10507. l = h;
  10508. h = o;
  10509. o = 0;
  10510. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
  10511. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
  10512. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
  10513. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
  10514. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
  10515. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
  10516. r->dp[41] = l;
  10517. l = h;
  10518. h = o;
  10519. o = 0;
  10520. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
  10521. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
  10522. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
  10523. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
  10524. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
  10525. r->dp[42] = l;
  10526. l = h;
  10527. h = o;
  10528. o = 0;
  10529. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
  10530. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
  10531. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
  10532. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
  10533. r->dp[43] = l;
  10534. l = h;
  10535. h = o;
  10536. o = 0;
  10537. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
  10538. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
  10539. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
  10540. r->dp[44] = l;
  10541. l = h;
  10542. h = o;
  10543. o = 0;
  10544. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
  10545. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
  10546. r->dp[45] = l;
  10547. l = h;
  10548. h = o;
  10549. SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
  10550. r->dp[46] = l;
  10551. r->dp[47] = h;
  10552. XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
  10553. r->used = 48;
  10554. sp_clamp(r);
  10555. }
  10556. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  10557. if (t != NULL) {
  10558. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  10559. }
  10560. #endif
  10561. return err;
  10562. }
  10563. #endif /* SP_INT_DIGITS >= 48 */
  10564. #if SP_INT_DIGITS >= 64
  10565. /* Multiply a by b and store in r: r = a * b
  10566. *
  10567. * Karatsuba implementation.
  10568. *
  10569. * @param [in] a SP integer to multiply.
  10570. * @param [in] b SP integer to multiply.
  10571. * @param [out] r SP integer result.
  10572. *
  10573. * @return MP_OKAY on success.
  10574. * @return MP_MEM when dynamic memory allocation fails.
  10575. */
  10576. static int _sp_mul_32(const sp_int* a, const sp_int* b, sp_int* r)
  10577. {
  10578. int err = MP_OKAY;
  10579. unsigned int i;
  10580. sp_int_digit l;
  10581. sp_int_digit h;
  10582. sp_int* a1;
  10583. sp_int* b1;
  10584. sp_int* z0;
  10585. sp_int* z1;
  10586. sp_int* z2;
  10587. sp_int_digit ca;
  10588. sp_int_digit cb;
  10589. DECL_SP_INT_ARRAY(t, 16, 2);
  10590. DECL_SP_INT_ARRAY(z, 33, 2);
  10591. ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
  10592. ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
  10593. if (err == MP_OKAY) {
  10594. a1 = t[0];
  10595. b1 = t[1];
  10596. z1 = z[0];
  10597. z2 = z[1];
  10598. z0 = r;
  10599. XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
  10600. a1->used = 16;
  10601. XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
  10602. b1->used = 16;
  10603. /* z2 = a1 * b1 */
  10604. err = _sp_mul_16(a1, b1, z2);
  10605. }
  10606. if (err == MP_OKAY) {
  10607. l = a1->dp[0];
  10608. h = 0;
  10609. SP_ASM_ADDC(l, h, a->dp[0]);
  10610. a1->dp[0] = l;
  10611. l = h;
  10612. h = 0;
  10613. for (i = 1; i < 16; i++) {
  10614. SP_ASM_ADDC(l, h, a1->dp[i]);
  10615. SP_ASM_ADDC(l, h, a->dp[i]);
  10616. a1->dp[i] = l;
  10617. l = h;
  10618. h = 0;
  10619. }
  10620. ca = l;
  10621. /* b01 = b0 + b1 */
  10622. l = b1->dp[0];
  10623. h = 0;
  10624. SP_ASM_ADDC(l, h, b->dp[0]);
  10625. b1->dp[0] = l;
  10626. l = h;
  10627. h = 0;
  10628. for (i = 1; i < 16; i++) {
  10629. SP_ASM_ADDC(l, h, b1->dp[i]);
  10630. SP_ASM_ADDC(l, h, b->dp[i]);
  10631. b1->dp[i] = l;
  10632. l = h;
  10633. h = 0;
  10634. }
  10635. cb = l;
  10636. /* z0 = a0 * b0 */
  10637. err = _sp_mul_16(a, b, z0);
  10638. }
  10639. if (err == MP_OKAY) {
  10640. /* z1 = (a0 + a1) * (b0 + b1) */
  10641. err = _sp_mul_16(a1, b1, z1);
  10642. }
  10643. if (err == MP_OKAY) {
  10644. /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
  10645. /* r = z0 */
  10646. /* r += (z1 - z0 - z2) << 16 */
  10647. z1->dp[32] = ca & cb;
  10648. l = 0;
  10649. if (ca) {
  10650. h = 0;
  10651. for (i = 0; i < 16; i++) {
  10652. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  10653. SP_ASM_ADDC(l, h, b1->dp[i]);
  10654. z1->dp[i + 16] = l;
  10655. l = h;
  10656. h = 0;
  10657. }
  10658. }
  10659. z1->dp[32] += l;
  10660. l = 0;
  10661. if (cb) {
  10662. h = 0;
  10663. for (i = 0; i < 16; i++) {
  10664. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  10665. SP_ASM_ADDC(l, h, a1->dp[i]);
  10666. z1->dp[i + 16] = l;
  10667. l = h;
  10668. h = 0;
  10669. }
  10670. }
  10671. z1->dp[32] += l;
  10672. /* z1 = z1 - z0 - z1 */
  10673. l = 0;
  10674. h = 0;
  10675. for (i = 0; i < 32; i++) {
  10676. l += z1->dp[i];
  10677. SP_ASM_SUBB(l, h, z0->dp[i]);
  10678. SP_ASM_SUBB(l, h, z2->dp[i]);
  10679. z1->dp[i] = l;
  10680. l = h;
  10681. h = 0;
  10682. }
  10683. z1->dp[i] += l;
  10684. /* r += z1 << 16 */
  10685. l = 0;
  10686. h = 0;
  10687. for (i = 0; i < 16; i++) {
  10688. SP_ASM_ADDC(l, h, r->dp[i + 16]);
  10689. SP_ASM_ADDC(l, h, z1->dp[i]);
  10690. r->dp[i + 16] = l;
  10691. l = h;
  10692. h = 0;
  10693. }
  10694. for (; i < 33; i++) {
  10695. SP_ASM_ADDC(l, h, z1->dp[i]);
  10696. r->dp[i + 16] = l;
  10697. l = h;
  10698. h = 0;
  10699. }
  10700. /* r += z2 << 32 */
  10701. l = 0;
  10702. h = 0;
  10703. for (i = 0; i < 17; i++) {
  10704. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  10705. SP_ASM_ADDC(l, h, z2->dp[i]);
  10706. r->dp[i + 32] = l;
  10707. l = h;
  10708. h = 0;
  10709. }
  10710. for (; i < 32; i++) {
  10711. SP_ASM_ADDC(l, h, z2->dp[i]);
  10712. r->dp[i + 32] = l;
  10713. l = h;
  10714. h = 0;
  10715. }
  10716. r->used = 64;
  10717. sp_clamp(r);
  10718. }
  10719. FREE_SP_INT_ARRAY(z, NULL);
  10720. FREE_SP_INT_ARRAY(t, NULL);
  10721. return err;
  10722. }
  10723. #endif /* SP_INT_DIGITS >= 64 */
  10724. #if SP_INT_DIGITS >= 96
  10725. /* Multiply a by b and store in r: r = a * b
  10726. *
  10727. * Karatsuba implementation.
  10728. *
  10729. * @param [in] a SP integer to multiply.
  10730. * @param [in] b SP integer to multiply.
  10731. * @param [out] r SP integer result.
  10732. *
  10733. * @return MP_OKAY on success.
  10734. * @return MP_MEM when dynamic memory allocation fails.
  10735. */
  10736. static int _sp_mul_48(const sp_int* a, const sp_int* b, sp_int* r)
  10737. {
  10738. int err = MP_OKAY;
  10739. unsigned int i;
  10740. sp_int_digit l;
  10741. sp_int_digit h;
  10742. sp_int* a1;
  10743. sp_int* b1;
  10744. sp_int* z0;
  10745. sp_int* z1;
  10746. sp_int* z2;
  10747. sp_int_digit ca;
  10748. sp_int_digit cb;
  10749. DECL_SP_INT_ARRAY(t, 24, 2);
  10750. DECL_SP_INT_ARRAY(z, 49, 2);
  10751. ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
  10752. ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
  10753. if (err == MP_OKAY) {
  10754. a1 = t[0];
  10755. b1 = t[1];
  10756. z1 = z[0];
  10757. z2 = z[1];
  10758. z0 = r;
  10759. XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
  10760. a1->used = 24;
  10761. XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
  10762. b1->used = 24;
  10763. /* z2 = a1 * b1 */
  10764. err = _sp_mul_24(a1, b1, z2);
  10765. }
  10766. if (err == MP_OKAY) {
  10767. l = a1->dp[0];
  10768. h = 0;
  10769. SP_ASM_ADDC(l, h, a->dp[0]);
  10770. a1->dp[0] = l;
  10771. l = h;
  10772. h = 0;
  10773. for (i = 1; i < 24; i++) {
  10774. SP_ASM_ADDC(l, h, a1->dp[i]);
  10775. SP_ASM_ADDC(l, h, a->dp[i]);
  10776. a1->dp[i] = l;
  10777. l = h;
  10778. h = 0;
  10779. }
  10780. ca = l;
  10781. /* b01 = b0 + b1 */
  10782. l = b1->dp[0];
  10783. h = 0;
  10784. SP_ASM_ADDC(l, h, b->dp[0]);
  10785. b1->dp[0] = l;
  10786. l = h;
  10787. h = 0;
  10788. for (i = 1; i < 24; i++) {
  10789. SP_ASM_ADDC(l, h, b1->dp[i]);
  10790. SP_ASM_ADDC(l, h, b->dp[i]);
  10791. b1->dp[i] = l;
  10792. l = h;
  10793. h = 0;
  10794. }
  10795. cb = l;
  10796. /* z0 = a0 * b0 */
  10797. err = _sp_mul_24(a, b, z0);
  10798. }
  10799. if (err == MP_OKAY) {
  10800. /* z1 = (a0 + a1) * (b0 + b1) */
  10801. err = _sp_mul_24(a1, b1, z1);
  10802. }
  10803. if (err == MP_OKAY) {
  10804. /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
  10805. /* r = z0 */
  10806. /* r += (z1 - z0 - z2) << 24 */
  10807. z1->dp[48] = ca & cb;
  10808. l = 0;
  10809. if (ca) {
  10810. h = 0;
  10811. for (i = 0; i < 24; i++) {
  10812. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  10813. SP_ASM_ADDC(l, h, b1->dp[i]);
  10814. z1->dp[i + 24] = l;
  10815. l = h;
  10816. h = 0;
  10817. }
  10818. }
  10819. z1->dp[48] += l;
  10820. l = 0;
  10821. if (cb) {
  10822. h = 0;
  10823. for (i = 0; i < 24; i++) {
  10824. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  10825. SP_ASM_ADDC(l, h, a1->dp[i]);
  10826. z1->dp[i + 24] = l;
  10827. l = h;
  10828. h = 0;
  10829. }
  10830. }
  10831. z1->dp[48] += l;
  10832. /* z1 = z1 - z0 - z1 */
  10833. l = 0;
  10834. h = 0;
  10835. for (i = 0; i < 48; i++) {
  10836. l += z1->dp[i];
  10837. SP_ASM_SUBB(l, h, z0->dp[i]);
  10838. SP_ASM_SUBB(l, h, z2->dp[i]);
  10839. z1->dp[i] = l;
  10840. l = h;
  10841. h = 0;
  10842. }
  10843. z1->dp[i] += l;
  10844. /* r += z1 << 16 */
  10845. l = 0;
  10846. h = 0;
  10847. for (i = 0; i < 24; i++) {
  10848. SP_ASM_ADDC(l, h, r->dp[i + 24]);
  10849. SP_ASM_ADDC(l, h, z1->dp[i]);
  10850. r->dp[i + 24] = l;
  10851. l = h;
  10852. h = 0;
  10853. }
  10854. for (; i < 49; i++) {
  10855. SP_ASM_ADDC(l, h, z1->dp[i]);
  10856. r->dp[i + 24] = l;
  10857. l = h;
  10858. h = 0;
  10859. }
  10860. /* r += z2 << 48 */
  10861. l = 0;
  10862. h = 0;
  10863. for (i = 0; i < 25; i++) {
  10864. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  10865. SP_ASM_ADDC(l, h, z2->dp[i]);
  10866. r->dp[i + 48] = l;
  10867. l = h;
  10868. h = 0;
  10869. }
  10870. for (; i < 48; i++) {
  10871. SP_ASM_ADDC(l, h, z2->dp[i]);
  10872. r->dp[i + 48] = l;
  10873. l = h;
  10874. h = 0;
  10875. }
  10876. r->used = 96;
  10877. sp_clamp(r);
  10878. }
  10879. FREE_SP_INT_ARRAY(z, NULL);
  10880. FREE_SP_INT_ARRAY(t, NULL);
  10881. return err;
  10882. }
  10883. #endif /* SP_INT_DIGITS >= 96 */
  10884. #if SP_INT_DIGITS >= 128
  10885. /* Multiply a by b and store in r: r = a * b
  10886. *
  10887. * Karatsuba implementation.
  10888. *
  10889. * @param [in] a SP integer to multiply.
  10890. * @param [in] b SP integer to multiply.
  10891. * @param [out] r SP integer result.
  10892. *
  10893. * @return MP_OKAY on success.
  10894. * @return MP_MEM when dynamic memory allocation fails.
  10895. */
  10896. static int _sp_mul_64(const sp_int* a, const sp_int* b, sp_int* r)
  10897. {
  10898. int err = MP_OKAY;
  10899. unsigned int i;
  10900. sp_int_digit l;
  10901. sp_int_digit h;
  10902. sp_int* a1;
  10903. sp_int* b1;
  10904. sp_int* z0;
  10905. sp_int* z1;
  10906. sp_int* z2;
  10907. sp_int_digit ca;
  10908. sp_int_digit cb;
  10909. DECL_SP_INT_ARRAY(t, 32, 2);
  10910. DECL_SP_INT_ARRAY(z, 65, 2);
  10911. ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
  10912. ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
  10913. if (err == MP_OKAY) {
  10914. a1 = t[0];
  10915. b1 = t[1];
  10916. z1 = z[0];
  10917. z2 = z[1];
  10918. z0 = r;
  10919. XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
  10920. a1->used = 32;
  10921. XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
  10922. b1->used = 32;
  10923. /* z2 = a1 * b1 */
  10924. err = _sp_mul_32(a1, b1, z2);
  10925. }
  10926. if (err == MP_OKAY) {
  10927. l = a1->dp[0];
  10928. h = 0;
  10929. SP_ASM_ADDC(l, h, a->dp[0]);
  10930. a1->dp[0] = l;
  10931. l = h;
  10932. h = 0;
  10933. for (i = 1; i < 32; i++) {
  10934. SP_ASM_ADDC(l, h, a1->dp[i]);
  10935. SP_ASM_ADDC(l, h, a->dp[i]);
  10936. a1->dp[i] = l;
  10937. l = h;
  10938. h = 0;
  10939. }
  10940. ca = l;
  10941. /* b01 = b0 + b1 */
  10942. l = b1->dp[0];
  10943. h = 0;
  10944. SP_ASM_ADDC(l, h, b->dp[0]);
  10945. b1->dp[0] = l;
  10946. l = h;
  10947. h = 0;
  10948. for (i = 1; i < 32; i++) {
  10949. SP_ASM_ADDC(l, h, b1->dp[i]);
  10950. SP_ASM_ADDC(l, h, b->dp[i]);
  10951. b1->dp[i] = l;
  10952. l = h;
  10953. h = 0;
  10954. }
  10955. cb = l;
  10956. /* z0 = a0 * b0 */
  10957. err = _sp_mul_32(a, b, z0);
  10958. }
  10959. if (err == MP_OKAY) {
  10960. /* z1 = (a0 + a1) * (b0 + b1) */
  10961. err = _sp_mul_32(a1, b1, z1);
  10962. }
  10963. if (err == MP_OKAY) {
  10964. /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
  10965. /* r = z0 */
  10966. /* r += (z1 - z0 - z2) << 32 */
  10967. z1->dp[64] = ca & cb;
  10968. l = 0;
  10969. if (ca) {
  10970. h = 0;
  10971. for (i = 0; i < 32; i++) {
  10972. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  10973. SP_ASM_ADDC(l, h, b1->dp[i]);
  10974. z1->dp[i + 32] = l;
  10975. l = h;
  10976. h = 0;
  10977. }
  10978. }
  10979. z1->dp[64] += l;
  10980. l = 0;
  10981. if (cb) {
  10982. h = 0;
  10983. for (i = 0; i < 32; i++) {
  10984. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  10985. SP_ASM_ADDC(l, h, a1->dp[i]);
  10986. z1->dp[i + 32] = l;
  10987. l = h;
  10988. h = 0;
  10989. }
  10990. }
  10991. z1->dp[64] += l;
  10992. /* z1 = z1 - z0 - z1 */
  10993. l = 0;
  10994. h = 0;
  10995. for (i = 0; i < 64; i++) {
  10996. l += z1->dp[i];
  10997. SP_ASM_SUBB(l, h, z0->dp[i]);
  10998. SP_ASM_SUBB(l, h, z2->dp[i]);
  10999. z1->dp[i] = l;
  11000. l = h;
  11001. h = 0;
  11002. }
  11003. z1->dp[i] += l;
  11004. /* r += z1 << 16 */
  11005. l = 0;
  11006. h = 0;
  11007. for (i = 0; i < 32; i++) {
  11008. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  11009. SP_ASM_ADDC(l, h, z1->dp[i]);
  11010. r->dp[i + 32] = l;
  11011. l = h;
  11012. h = 0;
  11013. }
  11014. for (; i < 65; i++) {
  11015. SP_ASM_ADDC(l, h, z1->dp[i]);
  11016. r->dp[i + 32] = l;
  11017. l = h;
  11018. h = 0;
  11019. }
  11020. /* r += z2 << 64 */
  11021. l = 0;
  11022. h = 0;
  11023. for (i = 0; i < 33; i++) {
  11024. SP_ASM_ADDC(l, h, r->dp[i + 64]);
  11025. SP_ASM_ADDC(l, h, z2->dp[i]);
  11026. r->dp[i + 64] = l;
  11027. l = h;
  11028. h = 0;
  11029. }
  11030. for (; i < 64; i++) {
  11031. SP_ASM_ADDC(l, h, z2->dp[i]);
  11032. r->dp[i + 64] = l;
  11033. l = h;
  11034. h = 0;
  11035. }
  11036. r->used = 128;
  11037. sp_clamp(r);
  11038. }
  11039. FREE_SP_INT_ARRAY(z, NULL);
  11040. FREE_SP_INT_ARRAY(t, NULL);
  11041. return err;
  11042. }
  11043. #endif /* SP_INT_DIGITS >= 128 */
  11044. #if SP_INT_DIGITS >= 192
  11045. /* Multiply a by b and store in r: r = a * b
  11046. *
  11047. * Karatsuba implementation.
  11048. *
  11049. * @param [in] a SP integer to multiply.
  11050. * @param [in] b SP integer to multiply.
  11051. * @param [out] r SP integer result.
  11052. *
  11053. * @return MP_OKAY on success.
  11054. * @return MP_MEM when dynamic memory allocation fails.
  11055. */
  11056. static int _sp_mul_96(const sp_int* a, const sp_int* b, sp_int* r)
  11057. {
  11058. int err = MP_OKAY;
  11059. unsigned int i;
  11060. sp_int_digit l;
  11061. sp_int_digit h;
  11062. sp_int* a1;
  11063. sp_int* b1;
  11064. sp_int* z0;
  11065. sp_int* z1;
  11066. sp_int* z2;
  11067. sp_int_digit ca;
  11068. sp_int_digit cb;
  11069. DECL_SP_INT_ARRAY(t, 48, 2);
  11070. DECL_SP_INT_ARRAY(z, 97, 2);
  11071. ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
  11072. ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
  11073. if (err == MP_OKAY) {
  11074. a1 = t[0];
  11075. b1 = t[1];
  11076. z1 = z[0];
  11077. z2 = z[1];
  11078. z0 = r;
  11079. XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
  11080. a1->used = 48;
  11081. XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
  11082. b1->used = 48;
  11083. /* z2 = a1 * b1 */
  11084. err = _sp_mul_48(a1, b1, z2);
  11085. }
  11086. if (err == MP_OKAY) {
  11087. l = a1->dp[0];
  11088. h = 0;
  11089. SP_ASM_ADDC(l, h, a->dp[0]);
  11090. a1->dp[0] = l;
  11091. l = h;
  11092. h = 0;
  11093. for (i = 1; i < 48; i++) {
  11094. SP_ASM_ADDC(l, h, a1->dp[i]);
  11095. SP_ASM_ADDC(l, h, a->dp[i]);
  11096. a1->dp[i] = l;
  11097. l = h;
  11098. h = 0;
  11099. }
  11100. ca = l;
  11101. /* b01 = b0 + b1 */
  11102. l = b1->dp[0];
  11103. h = 0;
  11104. SP_ASM_ADDC(l, h, b->dp[0]);
  11105. b1->dp[0] = l;
  11106. l = h;
  11107. h = 0;
  11108. for (i = 1; i < 48; i++) {
  11109. SP_ASM_ADDC(l, h, b1->dp[i]);
  11110. SP_ASM_ADDC(l, h, b->dp[i]);
  11111. b1->dp[i] = l;
  11112. l = h;
  11113. h = 0;
  11114. }
  11115. cb = l;
  11116. /* z0 = a0 * b0 */
  11117. err = _sp_mul_48(a, b, z0);
  11118. }
  11119. if (err == MP_OKAY) {
  11120. /* z1 = (a0 + a1) * (b0 + b1) */
  11121. err = _sp_mul_48(a1, b1, z1);
  11122. }
  11123. if (err == MP_OKAY) {
  11124. /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
  11125. /* r = z0 */
  11126. /* r += (z1 - z0 - z2) << 48 */
  11127. z1->dp[96] = ca & cb;
  11128. l = 0;
  11129. if (ca) {
  11130. h = 0;
  11131. for (i = 0; i < 48; i++) {
  11132. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  11133. SP_ASM_ADDC(l, h, b1->dp[i]);
  11134. z1->dp[i + 48] = l;
  11135. l = h;
  11136. h = 0;
  11137. }
  11138. }
  11139. z1->dp[96] += l;
  11140. l = 0;
  11141. if (cb) {
  11142. h = 0;
  11143. for (i = 0; i < 48; i++) {
  11144. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  11145. SP_ASM_ADDC(l, h, a1->dp[i]);
  11146. z1->dp[i + 48] = l;
  11147. l = h;
  11148. h = 0;
  11149. }
  11150. }
  11151. z1->dp[96] += l;
  11152. /* z1 = z1 - z0 - z1 */
  11153. l = 0;
  11154. h = 0;
  11155. for (i = 0; i < 96; i++) {
  11156. l += z1->dp[i];
  11157. SP_ASM_SUBB(l, h, z0->dp[i]);
  11158. SP_ASM_SUBB(l, h, z2->dp[i]);
  11159. z1->dp[i] = l;
  11160. l = h;
  11161. h = 0;
  11162. }
  11163. z1->dp[i] += l;
  11164. /* r += z1 << 16 */
  11165. l = 0;
  11166. h = 0;
  11167. for (i = 0; i < 48; i++) {
  11168. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  11169. SP_ASM_ADDC(l, h, z1->dp[i]);
  11170. r->dp[i + 48] = l;
  11171. l = h;
  11172. h = 0;
  11173. }
  11174. for (; i < 97; i++) {
  11175. SP_ASM_ADDC(l, h, z1->dp[i]);
  11176. r->dp[i + 48] = l;
  11177. l = h;
  11178. h = 0;
  11179. }
  11180. /* r += z2 << 96 */
  11181. l = 0;
  11182. h = 0;
  11183. for (i = 0; i < 49; i++) {
  11184. SP_ASM_ADDC(l, h, r->dp[i + 96]);
  11185. SP_ASM_ADDC(l, h, z2->dp[i]);
  11186. r->dp[i + 96] = l;
  11187. l = h;
  11188. h = 0;
  11189. }
  11190. for (; i < 96; i++) {
  11191. SP_ASM_ADDC(l, h, z2->dp[i]);
  11192. r->dp[i + 96] = l;
  11193. l = h;
  11194. h = 0;
  11195. }
  11196. r->used = 192;
  11197. sp_clamp(r);
  11198. }
  11199. FREE_SP_INT_ARRAY(z, NULL);
  11200. FREE_SP_INT_ARRAY(t, NULL);
  11201. return err;
  11202. }
  11203. #endif /* SP_INT_DIGITS >= 192 */
  11204. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  11205. #endif /* !WOLFSSL_SP_SMALL */
  11206. /* Multiply a by b and store in r: r = a * b
  11207. *
  11208. * @param [in] a SP integer to multiply.
  11209. * @param [in] b SP integer to multiply.
  11210. * @param [out] r SP integer result.
  11211. *
  11212. * @return MP_OKAY on success.
  11213. * @return MP_VAL when a, b or is NULL; or the result will be too big for fixed
  11214. * data length.
  11215. * @return MP_MEM when dynamic memory allocation fails.
  11216. */
  11217. int sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
  11218. {
  11219. int err = MP_OKAY;
  11220. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11221. unsigned int sign = MP_ZPOS;
  11222. #endif
  11223. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  11224. err = MP_VAL;
  11225. }
  11226. /* Need extra digit during calculation. */
  11227. if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
  11228. err = MP_VAL;
  11229. }
  11230. #if 0
  11231. if (err == MP_OKAY) {
  11232. sp_print(a, "a");
  11233. sp_print(b, "b");
  11234. }
  11235. #endif
  11236. if (err == MP_OKAY) {
  11237. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11238. sign = a->sign ^ b->sign;
  11239. #endif
  11240. if ((a->used == 0) || (b->used == 0)) {
  11241. _sp_zero(r);
  11242. }
  11243. else
  11244. #ifndef WOLFSSL_SP_SMALL
  11245. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  11246. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  11247. if ((a->used == 4) && (b->used == 4)) {
  11248. err = _sp_mul_4(a, b, r);
  11249. }
  11250. else
  11251. #endif /* SP_WORD_SIZE == 64 */
  11252. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  11253. #ifdef SQR_MUL_ASM
  11254. if ((a->used == 6) && (b->used == 6)) {
  11255. err = _sp_mul_6(a, b, r);
  11256. }
  11257. else
  11258. #endif /* SQR_MUL_ASM */
  11259. #endif /* SP_WORD_SIZE == 64 */
  11260. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  11261. #ifdef SQR_MUL_ASM
  11262. if ((a->used == 8) && (b->used == 8)) {
  11263. err = _sp_mul_8(a, b, r);
  11264. }
  11265. else
  11266. #endif /* SQR_MUL_ASM */
  11267. #endif /* SP_WORD_SIZE == 32 */
  11268. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  11269. #ifdef SQR_MUL_ASM
  11270. if ((a->used == 12) && (b->used == 12)) {
  11271. err = _sp_mul_12(a, b, r);
  11272. }
  11273. else
  11274. #endif /* SQR_MUL_ASM */
  11275. #endif /* SP_WORD_SIZE == 32 */
  11276. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  11277. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  11278. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  11279. (SP_WORD_SIZE == 64)))
  11280. #if SP_INT_DIGITS >= 32
  11281. if ((a->used == 16) && (b->used == 16)) {
  11282. err = _sp_mul_16(a, b, r);
  11283. }
  11284. else
  11285. #endif /* SP_INT_DIGITS >= 32 */
  11286. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  11287. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  11288. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  11289. #if SP_INT_DIGITS >= 48
  11290. if ((a->used == 24) && (b->used == 24)) {
  11291. err = _sp_mul_24(a, b, r);
  11292. }
  11293. else
  11294. #endif /* SP_INT_DIGITS >= 48 */
  11295. #if SP_INT_DIGITS >= 64
  11296. if ((a->used == 32) && (b->used == 32)) {
  11297. err = _sp_mul_32(a, b, r);
  11298. }
  11299. else
  11300. #endif /* SP_INT_DIGITS >= 64 */
  11301. #if SP_INT_DIGITS >= 96
  11302. if ((a->used == 48) && (b->used == 48)) {
  11303. err = _sp_mul_48(a, b, r);
  11304. }
  11305. else
  11306. #endif /* SP_INT_DIGITS >= 96 */
  11307. #if SP_INT_DIGITS >= 128
  11308. if ((a->used == 64) && (b->used == 64)) {
  11309. err = _sp_mul_64(a, b, r);
  11310. }
  11311. else
  11312. #endif /* SP_INT_DIGITS >= 128 */
  11313. #if SP_INT_DIGITS >= 192
  11314. if ((a->used == 96) && (b->used == 96)) {
  11315. err = _sp_mul_96(a, b, r);
  11316. }
  11317. else
  11318. #endif /* SP_INT_DIGITS >= 192 */
  11319. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  11320. #endif /* !WOLFSSL_SP_SMALL */
  11321. #ifdef SQR_MUL_ASM
  11322. if (a->used == b->used) {
  11323. err = _sp_mul_nxn(a, b, r);
  11324. }
  11325. else
  11326. #endif
  11327. {
  11328. err = _sp_mul(a, b, r);
  11329. }
  11330. }
  11331. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11332. if (err == MP_OKAY) {
  11333. r->sign = (r->used == 0) ? MP_ZPOS : sign;
  11334. }
  11335. #endif
  11336. #if 0
  11337. if (err == MP_OKAY) {
  11338. sp_print(r, "rmul");
  11339. }
  11340. #endif
  11341. return err;
  11342. }
  11343. /* END SP_MUL implementations. */
  11344. #endif
  11345. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  11346. defined(WOLFCRYPT_HAVE_ECCSI) || \
  11347. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || defined(OPENSSL_ALL)
  11348. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  11349. *
  11350. * @param [in] a SP integer to multiply.
  11351. * @param [in] b SP integer to multiply.
  11352. * @param [in] m SP integer that is the modulus.
  11353. * @param [out] r SP integer result.
  11354. *
  11355. * @return MP_OKAY on success.
  11356. * @return MP_MEM when dynamic memory allocation fails.
  11357. */
  11358. static int _sp_mulmod_tmp(const sp_int* a, const sp_int* b, const sp_int* m,
  11359. sp_int* r)
  11360. {
  11361. int err = MP_OKAY;
  11362. /* Create temporary for multiplication result. */
  11363. DECL_SP_INT(t, a->used + b->used);
  11364. ALLOC_SP_INT(t, a->used + b->used, err, NULL);
  11365. if (err == MP_OKAY) {
  11366. err = sp_init_size(t, a->used + b->used);
  11367. }
  11368. /* Multiply and reduce. */
  11369. if (err == MP_OKAY) {
  11370. err = sp_mul(a, b, t);
  11371. }
  11372. if (err == MP_OKAY) {
  11373. err = sp_mod(t, m, r);
  11374. }
  11375. /* Dispose of an allocated SP int. */
  11376. FREE_SP_INT(t, NULL);
  11377. return err;
  11378. }
  11379. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  11380. *
  11381. * @param [in] a SP integer to multiply.
  11382. * @param [in] b SP integer to multiply.
  11383. * @param [in] m SP integer that is the modulus.
  11384. * @param [out] r SP integer result.
  11385. *
  11386. * @return MP_OKAY on success.
  11387. * @return MP_MEM when dynamic memory allocation fails.
  11388. */
  11389. static int _sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m,
  11390. sp_int* r)
  11391. {
  11392. int err = MP_OKAY;
  11393. /* Use r as intermediate result if not same as pointer m which is needed
  11394. * after first intermediate result.
  11395. */
  11396. if (r != m) {
  11397. /* Multiply and reduce. */
  11398. err = sp_mul(a, b, r);
  11399. if (err == MP_OKAY) {
  11400. err = sp_mod(r, m, r);
  11401. }
  11402. }
  11403. else {
  11404. /* Do operation using temporary. */
  11405. err = _sp_mulmod_tmp(a, b, m, r);
  11406. }
  11407. return err;
  11408. }
  11409. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  11410. *
  11411. * @param [in] a SP integer to multiply.
  11412. * @param [in] b SP integer to multiply.
  11413. * @param [in] m SP integer that is the modulus.
  11414. * @param [out] r SP integer result.
  11415. *
  11416. * @return MP_OKAY on success.
  11417. * @return MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
  11418. * fixed data length.
  11419. * @return MP_MEM when dynamic memory allocation fails.
  11420. */
  11421. int sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  11422. {
  11423. int err = MP_OKAY;
  11424. /* Validate parameters. */
  11425. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  11426. err = MP_VAL;
  11427. }
  11428. /* Ensure result SP int is big enough for intermediates. */
  11429. if ((err == MP_OKAY) && (r != m) && (a->used + b->used > r->size)) {
  11430. err = MP_VAL;
  11431. }
  11432. #if 0
  11433. if (err == 0) {
  11434. sp_print(a, "a");
  11435. sp_print(b, "b");
  11436. sp_print(m, "m");
  11437. }
  11438. #endif
  11439. if (err == MP_OKAY) {
  11440. err = _sp_mulmod(a, b, m, r);
  11441. }
  11442. #if 0
  11443. if (err == 0) {
  11444. sp_print(r, "rmm");
  11445. }
  11446. #endif
  11447. return err;
  11448. }
  11449. #endif
  11450. #ifdef WOLFSSL_SP_INVMOD
  11451. /* Calculates the multiplicative inverse in the field. r*a = x*m + 1
  11452. * Right-shift Algorithm. NOT constant time.
  11453. *
  11454. * Algorithm:
  11455. * 1. u = m, v = a, b = 0, c = 1
  11456. * 2. While v != 1 and u != 0
  11457. * 2.1. If u even
  11458. * 2.1.1. u /= 2
  11459. * 2.1.2. b = (b / 2) mod m
  11460. * 2.2. Else if v even
  11461. * 2.2.1. v /= 2
  11462. * 2.2.2. c = (c / 2) mod m
  11463. * 2.3. Else if u >= v
  11464. * 2.3.1. u -= v
  11465. * 2.3.2. b = (c - b) mod m
  11466. * 2.4. Else (v > u)
  11467. * 2.4.1. v -= u
  11468. * 2.4.2. c = (b - c) mod m
  11469. * 3. NO_INVERSE if u == 0
  11470. *
  11471. * @param [in] a SP integer to find inverse of.
  11472. * @param [in] m SP integer this is the modulus.
  11473. * @param [in] u SP integer to use in calculation.
  11474. * @param [in] v SP integer to use in calculation.
  11475. * @param [in] b SP integer to use in calculation
  11476. * @param [out] c SP integer that is the inverse.
  11477. *
  11478. * @return MP_OKAY on success.
  11479. * @return MP_VAL when no inverse.
  11480. */
  11481. static int _sp_invmod_bin(const sp_int* a, const sp_int* m, sp_int* u,
  11482. sp_int* v, sp_int* b, sp_int* c)
  11483. {
  11484. int err = MP_OKAY;
  11485. /* 1. u = m, v = a, b = 0, c = 1 */
  11486. _sp_copy(m, u);
  11487. if (a != v) {
  11488. _sp_copy(a, v);
  11489. }
  11490. _sp_zero(b);
  11491. _sp_set(c, 1);
  11492. /* 2. While v != 1 and u != 0 */
  11493. while (!sp_isone(v) && !sp_iszero(u)) {
  11494. /* 2.1. If u even */
  11495. if ((u->dp[0] & 1) == 0) {
  11496. /* 2.1.1. u /= 2 */
  11497. _sp_div_2(u, u);
  11498. /* 2.1.2. b = (b / 2) mod m */
  11499. if (sp_isodd(b)) {
  11500. _sp_add_off(b, m, b, 0);
  11501. }
  11502. _sp_div_2(b, b);
  11503. }
  11504. /* 2.2. Else if v even */
  11505. else if ((v->dp[0] & 1) == 0) {
  11506. /* 2.2.1. v /= 2 */
  11507. _sp_div_2(v, v);
  11508. /* 2.1.2. c = (c / 2) mod m */
  11509. if (sp_isodd(c)) {
  11510. _sp_add_off(c, m, c, 0);
  11511. }
  11512. _sp_div_2(c, c);
  11513. }
  11514. /* 2.3. Else if u >= v */
  11515. else if (_sp_cmp_abs(u, v) != MP_LT) {
  11516. /* 2.3.1. u -= v */
  11517. _sp_sub_off(u, v, u, 0);
  11518. /* 2.3.2. b = (c - b) mod m */
  11519. if (_sp_cmp_abs(b, c) == MP_LT) {
  11520. _sp_add_off(b, m, b, 0);
  11521. }
  11522. _sp_sub_off(b, c, b, 0);
  11523. }
  11524. /* 2.4. Else (v > u) */
  11525. else {
  11526. /* 2.4.1. v -= u */
  11527. _sp_sub_off(v, u, v, 0);
  11528. /* 2.4.2. c = (b - c) mod m */
  11529. if (_sp_cmp_abs(c, b) == MP_LT) {
  11530. _sp_add_off(c, m, c, 0);
  11531. }
  11532. _sp_sub_off(c, b, c, 0);
  11533. }
  11534. }
  11535. /* 3. NO_INVERSE if u == 0 */
  11536. if (sp_iszero(u)) {
  11537. err = MP_VAL;
  11538. }
  11539. return err;
  11540. }
  11541. #if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
  11542. (!defined(NO_RSA) || !defined(NO_DH))
  11543. /* Calculates the multiplicative inverse in the field. r*a = x*m + 1
  11544. * Extended Euclidean Algorithm. NOT constant time.
  11545. *
  11546. * Creates two new SP ints.
  11547. *
  11548. * Algorithm:
  11549. * 1. x = m, y = a, b = 1, c = 0
  11550. * 2. while x > 1
  11551. * 2.1. d = x / y, r = x mod y
  11552. * 2.2. c -= d * b
  11553. * 2.3. x = y, y = r
  11554. * 2.4. s = b, b = c, c = s
  11555. * 3. If y != 0 then NO_INVERSE
  11556. * 4. If c < 0 then c += m
  11557. * 5. inv = c
  11558. *
  11559. * @param [in] a SP integer to find inverse of.
  11560. * @param [in] m SP integer this is the modulus.
  11561. * @param [in] u SP integer to use in calculation.
  11562. * @param [in] v SP integer to use in calculation.
  11563. * @param [in] b SP integer to use in calculation
  11564. * @param [in] c SP integer to use in calculation
  11565. * @param [out] inv SP integer that is the inverse.
  11566. *
  11567. * @return MP_OKAY on success.
  11568. * @return MP_VAL when no inverse.
  11569. * @return MP_MEM when dynamic memory allocation fails.
  11570. */
  11571. static int _sp_invmod_div(const sp_int* a, const sp_int* m, sp_int* x,
  11572. sp_int* y, sp_int* b, sp_int* c, sp_int* inv)
  11573. {
  11574. int err = MP_OKAY;
  11575. sp_int* s;
  11576. #ifndef WOLFSSL_SP_INT_NEGATIVE
  11577. int bneg = 0;
  11578. int cneg = 0;
  11579. int neg;
  11580. #endif
  11581. DECL_SP_INT(d, m->used + 1);
  11582. ALLOC_SP_INT(d, m->used + 1, err, NULL);
  11583. if (err == MP_OKAY) {
  11584. mp_init(d);
  11585. /* 1. x = m, y = a, b = 1, c = 0 */
  11586. if (a != y) {
  11587. _sp_copy(a, y);
  11588. }
  11589. _sp_copy(m, x);
  11590. _sp_set(b, 1);
  11591. _sp_zero(c);
  11592. }
  11593. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11594. /* 2. while x > 1 */
  11595. while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
  11596. /* 2.1. d = x / y, r = x mod y */
  11597. err = sp_div(x, y, d, x);
  11598. if (err == MP_OKAY) {
  11599. /* 2.2. c -= d * b */
  11600. if (sp_isone(d)) {
  11601. /* c -= 1 * b */
  11602. err = sp_sub(c, b, c);
  11603. }
  11604. else {
  11605. /* d *= b */
  11606. err = sp_mul(d, b, d);
  11607. /* c -= d */
  11608. if (err == MP_OKAY) {
  11609. err = sp_sub(c, d, c);
  11610. }
  11611. }
  11612. /* 2.3. x = y, y = r */
  11613. s = y; y = x; x = s;
  11614. /* 2.4. s = b, b = c, c = s */
  11615. s = b; b = c; c = s;
  11616. }
  11617. }
  11618. /* 3. If y != 0 then NO_INVERSE */
  11619. if ((err == MP_OKAY) && (!sp_iszero(y))) {
  11620. err = MP_VAL;
  11621. }
  11622. /* 4. If c < 0 then c += m */
  11623. if ((err == MP_OKAY) && sp_isneg(c)) {
  11624. err = sp_add(c, m, c);
  11625. }
  11626. if (err == MP_OKAY) {
  11627. /* 5. inv = c */
  11628. err = sp_copy(c, inv);
  11629. }
  11630. #else
  11631. /* 2. while x > 1 */
  11632. while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
  11633. /* 2.1. d = x / y, r = x mod y */
  11634. err = sp_div(x, y, d, x);
  11635. if (err == MP_OKAY) {
  11636. if (sp_isone(d)) {
  11637. /* c -= 1 * b */
  11638. if ((bneg ^ cneg) == 1) {
  11639. /* c -= -b or -c -= b, therefore add. */
  11640. _sp_add_off(c, b, c, 0);
  11641. }
  11642. else if (_sp_cmp_abs(c, b) == MP_LT) {
  11643. /* |c| < |b| and same sign, reverse subtract and negate. */
  11644. _sp_sub_off(b, c, c, 0);
  11645. cneg = !cneg;
  11646. }
  11647. else {
  11648. /* |c| >= |b| */
  11649. _sp_sub_off(c, b, c, 0);
  11650. }
  11651. }
  11652. else {
  11653. /* d *= b */
  11654. err = sp_mul(d, b, d);
  11655. /* c -= d */
  11656. if (err == MP_OKAY) {
  11657. if ((bneg ^ cneg) == 1) {
  11658. /* c -= -d or -c -= d, therefore add. */
  11659. _sp_add_off(c, d, c, 0);
  11660. }
  11661. else if (_sp_cmp_abs(c, d) == MP_LT) {
  11662. /* |c| < |d| and same sign, reverse subtract and negate.
  11663. */
  11664. _sp_sub_off(d, c, c, 0);
  11665. cneg = !cneg;
  11666. }
  11667. else {
  11668. _sp_sub_off(c, d, c, 0);
  11669. }
  11670. }
  11671. }
  11672. /* 2.3. x = y, y = r */
  11673. s = y; y = x; x = s;
  11674. /* 2.4. s = b, b = c, c = s */
  11675. s = b; b = c; c = s;
  11676. neg = bneg; bneg = cneg; cneg = neg;
  11677. }
  11678. }
  11679. /* 3. If y != 0 then NO_INVERSE */
  11680. if ((err == MP_OKAY) && (!sp_iszero(y))) {
  11681. err = MP_VAL;
  11682. }
  11683. /* 4. If c < 0 then c += m */
  11684. if ((err == MP_OKAY) && cneg) {
  11685. /* c = m - |c| */
  11686. _sp_sub_off(m, c, c, 0);
  11687. }
  11688. if (err == MP_OKAY) {
  11689. /* 5. inv = c */
  11690. err = sp_copy(c, inv);
  11691. }
  11692. #endif
  11693. FREE_SP_INT(d, NULL);
  11694. return err;
  11695. }
  11696. #endif
  11697. /* Calculates the multiplicative inverse in the field.
  11698. * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
  11699. *
  11700. * r*a = x*m + 1
  11701. *
  11702. * @param [in] a SP integer to find inverse of.
  11703. * @param [in] m SP integer this is the modulus.
  11704. * @param [out] r SP integer to hold result. r cannot be m.
  11705. *
  11706. * @return MP_OKAY on success.
  11707. * @return MP_VAL when m is even and a divides m evenly.
  11708. * @return MP_MEM when dynamic memory allocation fails.
  11709. */
  11710. static int _sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
  11711. {
  11712. int err = MP_OKAY;
  11713. sp_int* u = NULL;
  11714. sp_int* v = NULL;
  11715. sp_int* b = NULL;
  11716. DECL_SP_INT_ARRAY(t, m->used + 1, 3);
  11717. DECL_SP_INT(c, 2 * m->used + 1);
  11718. /* Allocate SP ints:
  11719. * - x3 one word larger than modulus
  11720. * - x1 one word longer than twice modulus used
  11721. */
  11722. ALLOC_SP_INT_ARRAY(t, m->used + 1, 3, err, NULL);
  11723. ALLOC_SP_INT(c, 2 * m->used + 1, err, NULL);
  11724. if (err == MP_OKAY) {
  11725. u = t[0];
  11726. v = t[1];
  11727. b = t[2];
  11728. /* c allocated separately and larger for even mod case. */
  11729. }
  11730. /* Initialize intermediate values with minimal sizes. */
  11731. if (err == MP_OKAY) {
  11732. err = sp_init_size(u, m->used + 1);
  11733. }
  11734. if (err == MP_OKAY) {
  11735. err = sp_init_size(v, m->used + 1);
  11736. }
  11737. if (err == MP_OKAY) {
  11738. err = sp_init_size(b, m->used + 1);
  11739. }
  11740. if (err == MP_OKAY) {
  11741. err = sp_init_size(c, 2 * m->used + 1);
  11742. }
  11743. if (err == MP_OKAY) {
  11744. const sp_int* mm = m;
  11745. const sp_int* ma = a;
  11746. int evenMod = 0;
  11747. if (sp_iseven(m)) {
  11748. /* a^-1 mod m = m + ((1 - m*(m^-1 % a)) / a) */
  11749. mm = a;
  11750. ma = v;
  11751. _sp_copy(a, u);
  11752. err = sp_mod(m, a, v);
  11753. /* v == 0 when a divides m evenly - no inverse. */
  11754. if ((err == MP_OKAY) && sp_iszero(v)) {
  11755. err = MP_VAL;
  11756. }
  11757. evenMod = 1;
  11758. }
  11759. if (err == MP_OKAY) {
  11760. /* Calculate inverse. */
  11761. #if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
  11762. (!defined(NO_RSA) || !defined(NO_DH))
  11763. if (sp_count_bits(mm) >= 1024) {
  11764. err = _sp_invmod_div(ma, mm, u, v, b, c, c);
  11765. }
  11766. else
  11767. #endif
  11768. {
  11769. err = _sp_invmod_bin(ma, mm, u, v, b, c);
  11770. }
  11771. }
  11772. /* Fixup for even modulus. */
  11773. if ((err == MP_OKAY) && evenMod) {
  11774. /* Finish operation.
  11775. * a^-1 mod m = m + ((1 - m*c) / a)
  11776. * => a^-1 mod m = m - ((m*c - 1) / a)
  11777. */
  11778. err = sp_mul(c, m, c);
  11779. if (err == MP_OKAY) {
  11780. _sp_sub_d(c, 1, c);
  11781. err = sp_div(c, a, c, NULL);
  11782. }
  11783. if (err == MP_OKAY) {
  11784. err = sp_sub(m, c, r);
  11785. }
  11786. }
  11787. else if (err == MP_OKAY) {
  11788. _sp_copy(c, r);
  11789. }
  11790. }
  11791. FREE_SP_INT(c, NULL);
  11792. FREE_SP_INT_ARRAY(t, NULL);
  11793. return err;
  11794. }
  11795. /* Calculates the multiplicative inverse in the field.
  11796. * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
  11797. *
  11798. * r*a = x*m + 1
  11799. *
  11800. * @param [in] a SP integer to find inverse of.
  11801. * @param [in] m SP integer this is the modulus.
  11802. * @param [out] r SP integer to hold result. r cannot be m.
  11803. *
  11804. * @return MP_OKAY on success.
  11805. * @return MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
  11806. * m is negative.
  11807. * @return MP_MEM when dynamic memory allocation fails.
  11808. */
  11809. int sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
  11810. {
  11811. int err = MP_OKAY;
  11812. /* Validate parameters. */
  11813. if ((a == NULL) || (m == NULL) || (r == NULL) || (r == m)) {
  11814. err = MP_VAL;
  11815. }
  11816. if ((err == MP_OKAY) && (m->used * 2 > r->size)) {
  11817. err = MP_VAL;
  11818. }
  11819. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11820. /* Don't support negative modulus. */
  11821. if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
  11822. err = MP_VAL;
  11823. }
  11824. #endif
  11825. if (err == MP_OKAY) {
  11826. /* Ensure number is less than modulus. */
  11827. if (_sp_cmp_abs(a, m) != MP_LT) {
  11828. err = sp_mod(a, m, r);
  11829. a = r;
  11830. }
  11831. }
  11832. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11833. if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
  11834. /* Make 'a' positive */
  11835. err = sp_add(m, a, r);
  11836. a = r;
  11837. }
  11838. #endif
  11839. /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
  11840. if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
  11841. err = MP_VAL;
  11842. }
  11843. /* r*2*x != n*2*y + 1 for integer x,y */
  11844. if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
  11845. err = MP_VAL;
  11846. }
  11847. /* 1*1 = 0*m + 1 */
  11848. if ((err == MP_OKAY) && sp_isone(a)) {
  11849. _sp_set(r, 1);
  11850. }
  11851. else if (err == MP_OKAY) {
  11852. err = _sp_invmod(a, m, r);
  11853. }
  11854. return err;
  11855. }
  11856. #endif /* WOLFSSL_SP_INVMOD */
  11857. #ifdef WOLFSSL_SP_INVMOD_MONT_CT
  11858. /* Number of entries to pre-compute.
  11859. * Many pre-defined primes have multiple of 8 consecutive 1s.
  11860. * P-256 modulus - 2 => 32x1, 31x0, 1x1, 96x0, 94x1, 1x0, 1x1.
  11861. */
  11862. #define CT_INV_MOD_PRE_CNT 8
  11863. /* Calculates the multiplicative inverse in the field - constant time.
  11864. *
  11865. * Modulus (m) must be a prime and greater than 2.
  11866. * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
  11867. *
  11868. * Algorithm:
  11869. * pre = pre-computed values, m = modulus, a = value to find inverse of,
  11870. * e = exponent
  11871. * Pre-calc:
  11872. * 1. pre[0] = 2^0 * a mod m
  11873. * 2. For i in 2..CT_INV_MOD_PRE_CNT
  11874. * 2.1. pre[i-1] = ((pre[i-2] ^ 2) * a) mod m
  11875. * Calc inverse:
  11876. * 1. e = m - 2
  11877. * 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
  11878. * 3. t = pre[j-1]
  11879. * 4. s = 0
  11880. * 5. j = 0
  11881. * 6. For i index of next top bit..0
  11882. * 6.1. bit = e[i]
  11883. * 6.2. j += bit
  11884. * 6.3. s += 1
  11885. * 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
  11886. * 6.4.1. s -= 1 - bit
  11887. * 6.4.2. For s downto 1
  11888. * 6.4.2.1. t = (t ^ 2) mod m
  11889. * 6.4.3. s = 1 - bit
  11890. * 6.4.4. t = (t * pre[j-1]) mod m
  11891. * 6.4.5. j = 0
  11892. * 7. For s downto 1
  11893. * 7.1. t = (t ^ 2) mod m
  11894. * 8. If j > 0 then r = (t * pre[j-1]) mod m
  11895. * 9. Else r = t
  11896. *
  11897. * @param [in] a SP integer, Montgomery form, to find inverse of.
  11898. * @param [in] m SP integer this is the modulus.
  11899. * @param [out] r SP integer to hold result.
  11900. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  11901. *
  11902. * @return MP_OKAY on success.
  11903. * @return MP_MEM when dynamic memory allocation fails.
  11904. */
  11905. static int _sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
  11906. sp_int_digit mp)
  11907. {
  11908. int err = MP_OKAY;
  11909. int i;
  11910. int j = 0;
  11911. int s = 0;
  11912. sp_int* t = NULL;
  11913. sp_int* e = NULL;
  11914. #ifndef WOLFSSL_SP_NO_MALLOC
  11915. DECL_DYN_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
  11916. #else
  11917. DECL_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
  11918. #endif
  11919. #ifndef WOLFSSL_SP_NO_MALLOC
  11920. ALLOC_DYN_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err,
  11921. NULL);
  11922. #else
  11923. ALLOC_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err, NULL);
  11924. #endif
  11925. if (err == MP_OKAY) {
  11926. t = pre[CT_INV_MOD_PRE_CNT + 0];
  11927. e = pre[CT_INV_MOD_PRE_CNT + 1];
  11928. /* Space for sqr and mul result. */
  11929. _sp_init_size(t, m->used * 2 + 1);
  11930. /* e = mod - 2 */
  11931. _sp_init_size(e, m->used + 1);
  11932. /* Create pre-computation results: ((2^(1..8))-1).a. */
  11933. _sp_init_size(pre[0], m->used * 2 + 1);
  11934. /* 1. pre[0] = 2^0 * a mod m
  11935. * Start with 1.a = a.
  11936. */
  11937. _sp_copy(a, pre[0]);
  11938. /* 2. For i in 2..CT_INV_MOD_PRE_CNT
  11939. * For rest of entries in table.
  11940. */
  11941. for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
  11942. /* 2.1 pre[i-1] = ((pre[i-1] ^ 2) * a) mod m */
  11943. /* Previous value ..1 -> ..10 */
  11944. _sp_init_size(pre[i], m->used * 2 + 1);
  11945. err = sp_sqr(pre[i-1], pre[i]);
  11946. if (err == MP_OKAY) {
  11947. err = _sp_mont_red(pre[i], m, mp, 0);
  11948. }
  11949. /* ..10 -> ..11 */
  11950. if (err == MP_OKAY) {
  11951. err = sp_mul(pre[i], a, pre[i]);
  11952. }
  11953. if (err == MP_OKAY) {
  11954. err = _sp_mont_red(pre[i], m, mp, 0);
  11955. }
  11956. }
  11957. }
  11958. if (err == MP_OKAY) {
  11959. /* 1. e = m - 2 */
  11960. _sp_sub_d(m, 2, e);
  11961. /* 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
  11962. * One or more of the top bits is 1 so count.
  11963. */
  11964. for (i = sp_count_bits(e)-2, j = 1; i >= 0; i--, j++) {
  11965. if ((!sp_is_bit_set(e, (unsigned int)i)) ||
  11966. (j == CT_INV_MOD_PRE_CNT)) {
  11967. break;
  11968. }
  11969. }
  11970. /* 3. Set tmp to product of leading bits. */
  11971. _sp_copy(pre[j-1], t);
  11972. /* 4. s = 0 */
  11973. s = 0;
  11974. /* 5. j = 0 */
  11975. j = 0;
  11976. /* 6. For i index of next top bit..0
  11977. * Do remaining bits in exponent.
  11978. */
  11979. for (; (err == MP_OKAY) && (i >= 0); i--) {
  11980. /* 6.1. bit = e[i] */
  11981. int bit = sp_is_bit_set(e, (unsigned int)i);
  11982. /* 6.2. j += bit
  11983. * Update count of consecutive 1 bits.
  11984. */
  11985. j += bit;
  11986. /* 6.3. s += 1
  11987. * Update count of squares required.
  11988. */
  11989. s++;
  11990. /* 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
  11991. * Check if max 1 bits or 0 and have seen at least one 1 bit.
  11992. */
  11993. if ((j == CT_INV_MOD_PRE_CNT) || ((!bit) && (j > 0))) {
  11994. /* 6.4.1. s -= 1 - bit */
  11995. bit = 1 - bit;
  11996. s -= bit;
  11997. /* 6.4.2. For s downto 1
  11998. * Do s squares.
  11999. */
  12000. for (; (err == MP_OKAY) && (s > 0); s--) {
  12001. /* 6.4.2.1. t = (t ^ 2) mod m */
  12002. err = sp_sqr(t, t);
  12003. if (err == MP_OKAY) {
  12004. err = _sp_mont_red(t, m, mp, 0);
  12005. }
  12006. }
  12007. /* 6.4.3. s = 1 - bit */
  12008. s = bit;
  12009. /* 6.4.4. t = (t * pre[j-1]) mod m */
  12010. if (err == MP_OKAY) {
  12011. err = sp_mul(t, pre[j-1], t);
  12012. }
  12013. if (err == MP_OKAY) {
  12014. err = _sp_mont_red(t, m, mp, 0);
  12015. }
  12016. /* 6.4.5. j = 0
  12017. * Reset number of 1 bits seen.
  12018. */
  12019. j = 0;
  12020. }
  12021. }
  12022. }
  12023. if (err == MP_OKAY) {
  12024. /* 7. For s downto 1
  12025. * Do s squares - total remaining. */
  12026. for (; (err == MP_OKAY) && (s > 0); s--) {
  12027. /* 7.1. t = (t ^ 2) mod m */
  12028. err = sp_sqr(t, t);
  12029. if (err == MP_OKAY) {
  12030. err = _sp_mont_red(t, m, mp, 0);
  12031. }
  12032. }
  12033. }
  12034. if (err == MP_OKAY) {
  12035. /* 8. If j > 0 then r = (t * pre[j-1]) mod m */
  12036. if (j > 0) {
  12037. err = sp_mul(t, pre[j-1], r);
  12038. if (err == MP_OKAY) {
  12039. err = _sp_mont_red(r, m, mp, 0);
  12040. }
  12041. }
  12042. /* 9. Else r = t */
  12043. else {
  12044. _sp_copy(t, r);
  12045. }
  12046. }
  12047. #ifndef WOLFSSL_SP_NO_MALLOC
  12048. FREE_DYN_SP_INT_ARRAY(pre, NULL);
  12049. #else
  12050. FREE_SP_INT_ARRAY(pre, NULL);
  12051. #endif
  12052. return err;
  12053. }
  12054. /* Calculates the multiplicative inverse in the field - constant time.
  12055. *
  12056. * Modulus (m) must be a prime and greater than 2.
  12057. * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
  12058. *
  12059. * @param [in] a SP integer, Montgomery form, to find inverse of.
  12060. * @param [in] m SP integer this is the modulus.
  12061. * @param [out] r SP integer to hold result.
  12062. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  12063. *
  12064. * @return MP_OKAY on success.
  12065. * @return MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
  12066. * @return MP_MEM when dynamic memory allocation fails.
  12067. */
  12068. int sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
  12069. sp_int_digit mp)
  12070. {
  12071. int err = MP_OKAY;
  12072. /* Validate parameters. */
  12073. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  12074. err = MP_VAL;
  12075. }
  12076. /* Ensure m is not too big. */
  12077. else if (m->used * 2 >= SP_INT_DIGITS) {
  12078. err = MP_VAL;
  12079. }
  12080. /* check that r can hold the range of the modulus result */
  12081. else if (m->used > r->size) {
  12082. err = MP_VAL;
  12083. }
  12084. /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
  12085. if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
  12086. ((m->used == 1) && (m->dp[0] < 3)))) {
  12087. err = MP_VAL;
  12088. }
  12089. if (err == MP_OKAY) {
  12090. /* Do operation. */
  12091. err = _sp_invmod_mont_ct(a, m, r, mp);
  12092. }
  12093. return err;
  12094. }
  12095. #endif /* WOLFSSL_SP_INVMOD_MONT_CT */
  12096. /**************************
  12097. * Exponentiation functions
  12098. **************************/
  12099. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  12100. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  12101. defined(OPENSSL_ALL)
  12102. #ifndef WC_PROTECT_ENCRYPTED_MEM
  12103. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12104. * Process the exponent one bit at a time.
  12105. * Is constant time and can be cache attack resistant.
  12106. *
  12107. * Algorithm:
  12108. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12109. * 1. s = 0
  12110. * 2. t[0] = b mod m.
  12111. * 3. t[1] = t[0]
  12112. * 4. For i in (bits-1)...0
  12113. * 4.1. t[s] = t[s] ^ 2
  12114. * 4.2. y = e[i]
  12115. * 4.3 j = y & s
  12116. * 4.4 s = s | y
  12117. * 4.5. t[j] = t[j] * b
  12118. * 5. r = t[1]
  12119. *
  12120. * @param [in] b SP integer that is the base.
  12121. * @param [in] e SP integer that is the exponent.
  12122. * @param [in] bits Number of bits in exponent to use. May be greater than
  12123. * count of bits in e.
  12124. * @param [in] m SP integer that is the modulus.
  12125. * @param [out] r SP integer to hold result.
  12126. *
  12127. * @return MP_OKAY on success.
  12128. * @return MP_MEM when dynamic memory allocation fails.
  12129. */
  12130. static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
  12131. const sp_int* m, sp_int* r)
  12132. {
  12133. int i;
  12134. int err = MP_OKAY;
  12135. int done = 0;
  12136. /* 1. s = 0 */
  12137. int s = 0;
  12138. #ifdef WC_NO_CACHE_RESISTANT
  12139. DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
  12140. #else
  12141. DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
  12142. #endif
  12143. /* Allocate temporaries. */
  12144. #ifdef WC_NO_CACHE_RESISTANT
  12145. ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
  12146. #else
  12147. /* Working SP int needed when cache resistant. */
  12148. ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 3, err, NULL);
  12149. #endif
  12150. if (err == MP_OKAY) {
  12151. /* Initialize temporaries. */
  12152. _sp_init_size(t[0], 2 * m->used + 1);
  12153. _sp_init_size(t[1], 2 * m->used + 1);
  12154. #ifndef WC_NO_CACHE_RESISTANT
  12155. _sp_init_size(t[2], 2 * m->used + 1);
  12156. #endif
  12157. /* 2. t[0] = b mod m
  12158. * Ensure base is less than modulus - set fake working value to base.
  12159. */
  12160. if (_sp_cmp_abs(b, m) != MP_LT) {
  12161. err = sp_mod(b, m, t[0]);
  12162. /* Handle base == modulus. */
  12163. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  12164. _sp_set(r, 0);
  12165. done = 1;
  12166. }
  12167. }
  12168. else {
  12169. /* Copy base into working variable. */
  12170. _sp_copy(b, t[0]);
  12171. }
  12172. }
  12173. if ((!done) && (err == MP_OKAY)) {
  12174. /* 3. t[1] = t[0]
  12175. * Set real working value to base.
  12176. */
  12177. _sp_copy(t[0], t[1]);
  12178. /* 4. For i in (bits-1)...0 */
  12179. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12180. #ifdef WC_NO_CACHE_RESISTANT
  12181. /* 4.1. t[s] = t[s] ^ 2 */
  12182. err = sp_sqrmod(t[s], m, t[s]);
  12183. if (err == MP_OKAY) {
  12184. /* 4.2. y = e[i] */
  12185. int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  12186. /* 4.3. j = y & s */
  12187. int j = y & s;
  12188. /* 4.4 s = s | y */
  12189. s |= y;
  12190. /* 4.5. t[j] = t[j] * b */
  12191. err = _sp_mulmod(t[j], b, m, t[j]);
  12192. }
  12193. #else
  12194. /* 4.1. t[s] = t[s] ^ 2 */
  12195. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12196. ((size_t)t[1] & sp_off_on_addr[s ])),
  12197. t[2]);
  12198. err = sp_sqrmod(t[2], m, t[2]);
  12199. _sp_copy(t[2],
  12200. (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12201. ((size_t)t[1] & sp_off_on_addr[s ])));
  12202. if (err == MP_OKAY) {
  12203. /* 4.2. y = e[i] */
  12204. int y = (int)((e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1);
  12205. /* 4.3. j = y & s */
  12206. int j = y & s;
  12207. /* 4.4 s = s | y */
  12208. s |= y;
  12209. /* 4.5. t[j] = t[j] * b */
  12210. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12211. ((size_t)t[1] & sp_off_on_addr[j ])),
  12212. t[2]);
  12213. err = _sp_mulmod(t[2], b, m, t[2]);
  12214. _sp_copy(t[2],
  12215. (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12216. ((size_t)t[1] & sp_off_on_addr[j ])));
  12217. }
  12218. #endif
  12219. }
  12220. }
  12221. if ((!done) && (err == MP_OKAY)) {
  12222. /* 5. r = t[1] */
  12223. _sp_copy(t[1], r);
  12224. }
  12225. FREE_SP_INT_ARRAY(t, NULL);
  12226. return err;
  12227. }
  12228. #else
  12229. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12230. * Process the exponent one bit at a time with base in Montgomery form.
  12231. * Is constant time and cache attack resistant.
  12232. *
  12233. * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
  12234. * Cryptographic Hardware and Embedded Systems, CHES 2002
  12235. *
  12236. * Algorithm:
  12237. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12238. * 1. t[1] = b mod m.
  12239. * 2. t[0] = 1
  12240. * 3. For i in (bits-1)...0
  12241. * 3.1. y = e[i]
  12242. * 3.2. t[2] = t[0] * t[1]
  12243. * 3.3. t[3] = t[y] ^ 2
  12244. * 3.4. t[y] = t[3], t[y^1] = t[2]
  12245. * 4. r = t[0]
  12246. *
  12247. * @param [in] b SP integer that is the base.
  12248. * @param [in] e SP integer that is the exponent.
  12249. * @param [in] bits Number of bits in exponent to use. May be greater than
  12250. * count of bits in e.
  12251. * @param [in] m SP integer that is the modulus.
  12252. * @param [out] r SP integer to hold result.
  12253. *
  12254. * @return MP_OKAY on success.
  12255. * @return MP_MEM when dynamic memory allocation fails.
  12256. */
  12257. static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
  12258. const sp_int* m, sp_int* r)
  12259. {
  12260. int err = MP_OKAY;
  12261. int done = 0;
  12262. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  12263. /* Allocate temporaries. */
  12264. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  12265. if (err == MP_OKAY) {
  12266. /* Initialize temporaries. */
  12267. _sp_init_size(t[0], m->used * 2 + 1);
  12268. _sp_init_size(t[1], m->used * 2 + 1);
  12269. _sp_init_size(t[2], m->used * 2 + 1);
  12270. _sp_init_size(t[3], m->used * 2 + 1);
  12271. /* 1. Ensure base is less than modulus. */
  12272. if (_sp_cmp_abs(b, m) != MP_LT) {
  12273. err = sp_mod(b, m, t[1]);
  12274. /* Handle base == modulus. */
  12275. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  12276. _sp_set(r, 0);
  12277. done = 1;
  12278. }
  12279. }
  12280. else {
  12281. /* Copy base into working variable. */
  12282. err = sp_copy(b, t[1]);
  12283. }
  12284. }
  12285. if ((!done) && (err == MP_OKAY)) {
  12286. int i;
  12287. /* 2. t[0] = 1 */
  12288. _sp_set(t[0], 1);
  12289. /* 3. For i in (bits-1)...0 */
  12290. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12291. /* 3.1. y = e[i] */
  12292. int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  12293. /* 3.2. t[2] = t[0] * t[1] */
  12294. err = sp_mulmod(t[0], t[1], m, t[2]);
  12295. /* 3.3. t[3] = t[y] ^ 2 */
  12296. if (err == MP_OKAY) {
  12297. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
  12298. ((size_t)t[1] & sp_off_on_addr[y ])),
  12299. t[3]);
  12300. err = sp_sqrmod(t[3], m, t[3]);
  12301. }
  12302. /* 3.4. t[y] = t[3], t[y^1] = t[2] */
  12303. if (err == MP_OKAY) {
  12304. _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
  12305. }
  12306. }
  12307. }
  12308. if ((!done) && (err == MP_OKAY)) {
  12309. /* 4. r = t[0] */
  12310. err = sp_copy(t[0], r);
  12311. }
  12312. FREE_SP_INT_ARRAY(t, NULL);
  12313. return err;
  12314. }
  12315. #endif /* WC_PROTECT_ENCRYPTED_MEM */
  12316. #endif
  12317. #if (defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  12318. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
  12319. defined(OPENSSL_ALL)
  12320. #ifndef WC_NO_HARDEN
  12321. #if !defined(WC_NO_CACHE_RESISTANT)
  12322. #ifndef WC_PROTECT_ENCRYPTED_MEM
  12323. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12324. * Process the exponent one bit at a time with base in Montgomery form.
  12325. * Is constant time and cache attack resistant.
  12326. *
  12327. * Algorithm:
  12328. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12329. * 1. t[0] = b mod m.
  12330. * 2. s = 0
  12331. * 3. t[0] = ToMont(t[0])
  12332. * 4. t[1] = t[0]
  12333. * 5. bm = t[0]
  12334. * 6. For i in (bits-1)...0
  12335. * 6.1. t[s] = t[s] ^ 2
  12336. * 6.2. y = e[i]
  12337. * 6.3 j = y & s
  12338. * 6.4 s = s | y
  12339. * 6.5. t[j] = t[j] * bm
  12340. * 7. t[1] = FromMont(t[1])
  12341. * 8. r = t[1]
  12342. *
  12343. * @param [in] b SP integer that is the base.
  12344. * @param [in] e SP integer that is the exponent.
  12345. * @param [in] bits Number of bits in exponent to use. May be greater than
  12346. * count of bits in e.
  12347. * @param [in] m SP integer that is the modulus.
  12348. * @param [out] r SP integer to hold result.
  12349. *
  12350. * @return MP_OKAY on success.
  12351. * @return MP_MEM when dynamic memory allocation fails.
  12352. */
  12353. static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
  12354. const sp_int* m, sp_int* r)
  12355. {
  12356. int err = MP_OKAY;
  12357. int done = 0;
  12358. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  12359. /* Allocate temporaries. */
  12360. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  12361. if (err == MP_OKAY) {
  12362. /* Initialize temporaries. */
  12363. _sp_init_size(t[0], m->used * 2 + 1);
  12364. _sp_init_size(t[1], m->used * 2 + 1);
  12365. _sp_init_size(t[2], m->used * 2 + 1);
  12366. _sp_init_size(t[3], m->used * 2 + 1);
  12367. /* 1. Ensure base is less than modulus. */
  12368. if (_sp_cmp_abs(b, m) != MP_LT) {
  12369. err = sp_mod(b, m, t[0]);
  12370. /* Handle base == modulus. */
  12371. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  12372. _sp_set(r, 0);
  12373. done = 1;
  12374. }
  12375. }
  12376. else {
  12377. /* Copy base into working variable. */
  12378. _sp_copy(b, t[0]);
  12379. }
  12380. }
  12381. if ((!done) && (err == MP_OKAY)) {
  12382. int i;
  12383. /* 2. s = 0 */
  12384. int s = 0;
  12385. sp_int_digit mp;
  12386. /* Calculate Montgomery multiplier for reduction. */
  12387. _sp_mont_setup(m, &mp);
  12388. /* 3. t[0] = ToMont(t[0])
  12389. * Convert base to Montgomery form - as fake working value.
  12390. */
  12391. err = sp_mont_norm(t[1], m);
  12392. if (err == MP_OKAY) {
  12393. err = sp_mul(t[0], t[1], t[0]);
  12394. }
  12395. if (err == MP_OKAY) {
  12396. /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
  12397. err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1);
  12398. }
  12399. if (err == MP_OKAY) {
  12400. /* 4. t[1] = t[0]
  12401. * Set real working value to base.
  12402. */
  12403. _sp_copy(t[0], t[1]);
  12404. /* 5. bm = t[0]. */
  12405. _sp_copy(t[0], t[2]);
  12406. }
  12407. /* 6. For i in (bits-1)...0 */
  12408. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12409. /* 6.1. t[s] = t[s] ^ 2 */
  12410. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12411. ((size_t)t[1] & sp_off_on_addr[s ])),
  12412. t[3]);
  12413. err = sp_sqr(t[3], t[3]);
  12414. if (err == MP_OKAY) {
  12415. err = _sp_mont_red(t[3], m, mp, 0);
  12416. }
  12417. _sp_copy(t[3],
  12418. (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12419. ((size_t)t[1] & sp_off_on_addr[s ])));
  12420. if (err == MP_OKAY) {
  12421. /* 6.2. y = e[i] */
  12422. int y = (int)((e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1);
  12423. /* 6.3 j = y & s */
  12424. int j = y & s;
  12425. /* 6.4 s = s | y */
  12426. s |= y;
  12427. /* 6.5. t[j] = t[j] * bm */
  12428. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12429. ((size_t)t[1] & sp_off_on_addr[j ])),
  12430. t[3]);
  12431. err = sp_mul(t[3], t[2], t[3]);
  12432. if (err == MP_OKAY) {
  12433. err = _sp_mont_red(t[3], m, mp, 0);
  12434. }
  12435. _sp_copy(t[3],
  12436. (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12437. ((size_t)t[1] & sp_off_on_addr[j ])));
  12438. }
  12439. }
  12440. if (err == MP_OKAY) {
  12441. /* 7. t[1] = FromMont(t[1]) */
  12442. err = _sp_mont_red(t[1], m, mp, 0);
  12443. /* Reduction implementation returns number to range: 0..m-1. */
  12444. }
  12445. }
  12446. if ((!done) && (err == MP_OKAY)) {
  12447. /* 8. r = t[1] */
  12448. _sp_copy(t[1], r);
  12449. }
  12450. FREE_SP_INT_ARRAY(t, NULL);
  12451. return err;
  12452. }
  12453. #else
  12454. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12455. * Process the exponent one bit at a time with base in Montgomery form.
  12456. * Is constant time and cache attack resistant.
  12457. *
  12458. * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
  12459. * Cryptographic Hardware and Embedded Systems, CHES 2002
  12460. *
  12461. * Algorithm:
  12462. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12463. * 1. t[1] = b mod m.
  12464. * 2. t[0] = ToMont(1)
  12465. * 3. t[1] = ToMont(t[1])
  12466. * 4. For i in (bits-1)...0
  12467. * 4.1. y = e[i]
  12468. * 4.2. t[2] = t[0] * t[1]
  12469. * 4.3. t[3] = t[y] ^ 2
  12470. * 4.4. t[y] = t[3], t[y^1] = t[2]
  12471. * 5. t[0] = FromMont(t[0])
  12472. * 6. r = t[0]
  12473. *
  12474. * @param [in] b SP integer that is the base.
  12475. * @param [in] e SP integer that is the exponent.
  12476. * @param [in] bits Number of bits in exponent to use. May be greater than
  12477. * count of bits in e.
  12478. * @param [in] m SP integer that is the modulus.
  12479. * @param [out] r SP integer to hold result.
  12480. *
  12481. * @return MP_OKAY on success.
  12482. * @return MP_MEM when dynamic memory allocation fails.
  12483. */
  12484. static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
  12485. const sp_int* m, sp_int* r)
  12486. {
  12487. int err = MP_OKAY;
  12488. int done = 0;
  12489. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  12490. /* Allocate temporaries. */
  12491. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  12492. if (err == MP_OKAY) {
  12493. /* Initialize temporaries. */
  12494. _sp_init_size(t[0], m->used * 2 + 1);
  12495. _sp_init_size(t[1], m->used * 2 + 1);
  12496. _sp_init_size(t[2], m->used * 2 + 1);
  12497. _sp_init_size(t[3], m->used * 2 + 1);
  12498. /* 1. Ensure base is less than modulus. */
  12499. if (_sp_cmp_abs(b, m) != MP_LT) {
  12500. err = sp_mod(b, m, t[1]);
  12501. /* Handle base == modulus. */
  12502. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  12503. _sp_set(r, 0);
  12504. done = 1;
  12505. }
  12506. }
  12507. else {
  12508. /* Copy base into working variable. */
  12509. err = sp_copy(b, t[1]);
  12510. }
  12511. }
  12512. if ((!done) && (err == MP_OKAY)) {
  12513. int i;
  12514. sp_int_digit mp;
  12515. /* Calculate Montgomery multiplier for reduction. */
  12516. _sp_mont_setup(m, &mp);
  12517. /* 2. t[0] = ToMont(1)
  12518. * Calculate 1 in Montgomery form.
  12519. */
  12520. err = sp_mont_norm(t[0], m);
  12521. if (err == MP_OKAY) {
  12522. /* 3. t[1] = ToMont(t[1])
  12523. * Convert base to Montgomery form.
  12524. */
  12525. err = sp_mulmod(t[1], t[0], m, t[1]);
  12526. }
  12527. /* 4. For i in (bits-1)...0 */
  12528. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12529. /* 4.1. y = e[i] */
  12530. int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  12531. /* 4.2. t[2] = t[0] * t[1] */
  12532. err = sp_mul(t[0], t[1], t[2]);
  12533. if (err == MP_OKAY) {
  12534. err = _sp_mont_red(t[2], m, mp, 0);
  12535. }
  12536. /* 4.3. t[3] = t[y] ^ 2 */
  12537. if (err == MP_OKAY) {
  12538. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
  12539. ((size_t)t[1] & sp_off_on_addr[y ])),
  12540. t[3]);
  12541. err = sp_sqr(t[3], t[3]);
  12542. }
  12543. if (err == MP_OKAY) {
  12544. err = _sp_mont_red(t[3], m, mp, 0);
  12545. }
  12546. /* 4.4. t[y] = t[3], t[y^1] = t[2] */
  12547. if (err == MP_OKAY) {
  12548. _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
  12549. }
  12550. }
  12551. if (err == MP_OKAY) {
  12552. /* 5. t[0] = FromMont(t[0]) */
  12553. err = _sp_mont_red(t[0], m, mp, 0);
  12554. /* Reduction implementation returns number to range: 0..m-1. */
  12555. }
  12556. }
  12557. if ((!done) && (err == MP_OKAY)) {
  12558. /* 6. r = t[0] */
  12559. err = sp_copy(t[0], r);
  12560. }
  12561. FREE_SP_INT_ARRAY(t, NULL);
  12562. return err;
  12563. }
  12564. #endif /* WC_PROTECT_ENCRYPTED_MEM */
  12565. #else
  12566. #ifdef SP_ALLOC
  12567. #define SP_ALLOC_PREDEFINED
  12568. #endif
  12569. /* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
  12570. #define SP_ALLOC
  12571. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12572. * Creates a window of precalculated exponents with base in Montgomery form.
  12573. * Is constant time but NOT cache attack resistant.
  12574. *
  12575. * Algorithm:
  12576. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12577. * w: window size based on bits.
  12578. * 1. t[1] = b mod m.
  12579. * 2. t[0] = MontNorm(m) = ToMont(1)
  12580. * 3. t[1] = ToMont(t[1])
  12581. * 4. For i in 2..(2 ^ w) - 1
  12582. * 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2
  12583. * 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1]
  12584. * 5. cb = w * (bits / w)
  12585. * 5. tr = t[e / (2 ^ cb)]
  12586. * 6. For i in cb..w
  12587. * 6.1. y = e[(i-1)..(i-w)]
  12588. * 6.2. tr = tr ^ (2 * w)
  12589. * 6.3. tr = tr * t[y]
  12590. * 7. tr = FromMont(tr)
  12591. * 8. r = tr
  12592. *
  12593. * @param [in] b SP integer that is the base.
  12594. * @param [in] e SP integer that is the exponent.
  12595. * @param [in] bits Number of bits in exponent to use. May be greater than
  12596. * count of bits in e.
  12597. * @param [in] m SP integer that is the modulus.
  12598. * @param [out] r SP integer to hold result.
  12599. *
  12600. * @return MP_OKAY on success.
  12601. * @return MP_MEM when dynamic memory allocation fails.
  12602. */
  12603. static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
  12604. const sp_int* m, sp_int* r)
  12605. {
  12606. int i;
  12607. int c;
  12608. int y;
  12609. int winBits;
  12610. int preCnt;
  12611. int err = MP_OKAY;
  12612. int done = 0;
  12613. sp_int_digit mask;
  12614. sp_int* tr = NULL;
  12615. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
  12616. /* Window bits based on number of pre-calculations versus number of loop
  12617. * calculations.
  12618. * Exponents for RSA and DH will result in 6-bit windows.
  12619. */
  12620. if (bits > 450) {
  12621. winBits = 6;
  12622. }
  12623. else if (bits <= 21) {
  12624. winBits = 1;
  12625. }
  12626. else if (bits <= 36) {
  12627. winBits = 3;
  12628. }
  12629. else if (bits <= 140) {
  12630. winBits = 4;
  12631. }
  12632. else {
  12633. winBits = 5;
  12634. }
  12635. /* An entry for each possible 0..2^winBits-1 value. */
  12636. preCnt = 1 << winBits;
  12637. /* Mask for calculating index into pre-computed table. */
  12638. mask = preCnt - 1;
  12639. /* Allocate sp_ints for:
  12640. * - pre-computation table
  12641. * - temporary result
  12642. */
  12643. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
  12644. if (err == MP_OKAY) {
  12645. /* Set variable to use allocate memory. */
  12646. tr = t[preCnt];
  12647. /* Initialize all allocated. */
  12648. for (i = 0; i < preCnt; i++) {
  12649. _sp_init_size(t[i], m->used * 2 + 1);
  12650. }
  12651. _sp_init_size(tr, m->used * 2 + 1);
  12652. /* 1. t[1] = b mod m. */
  12653. if (_sp_cmp_abs(b, m) != MP_LT) {
  12654. err = sp_mod(b, m, t[1]);
  12655. /* Handle base == modulus. */
  12656. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  12657. _sp_set(r, 0);
  12658. done = 1;
  12659. }
  12660. }
  12661. else {
  12662. /* Copy base into entry of table to contain b^1. */
  12663. _sp_copy(b, t[1]);
  12664. }
  12665. }
  12666. if ((!done) && (err == MP_OKAY)) {
  12667. sp_int_digit mp;
  12668. sp_int_digit n;
  12669. /* Calculate Montgomery multiplier for reduction. */
  12670. _sp_mont_setup(m, &mp);
  12671. /* 2. t[0] = MontNorm(m) = ToMont(1) */
  12672. err = sp_mont_norm(t[0], m);
  12673. if (err == MP_OKAY) {
  12674. /* 3. t[1] = ToMont(t[1]) */
  12675. err = sp_mul(t[1], t[0], t[1]);
  12676. }
  12677. if (err == MP_OKAY) {
  12678. /* t[1] = t[1] mod m, temporary size has to be bigger than t[1]. */
  12679. err = _sp_div(t[1], m, NULL, t[1], t[1]->used + 1);
  12680. }
  12681. /* 4. For i in 2..(2 ^ w) - 1 */
  12682. for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
  12683. /* 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2 */
  12684. if ((i & 1) == 0) {
  12685. err = sp_sqr(t[i/2], t[i]);
  12686. }
  12687. /* 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1] */
  12688. else {
  12689. err = sp_mul(t[i-1], t[1], t[i]);
  12690. }
  12691. /* Montgomery reduce square or multiplication result. */
  12692. if (err == MP_OKAY) {
  12693. err = _sp_mont_red(t[i], m, mp, 0);
  12694. }
  12695. }
  12696. if (err == MP_OKAY) {
  12697. /* 5. cb = w * (bits / w) */
  12698. i = (bits - 1) >> SP_WORD_SHIFT;
  12699. n = e->dp[i--];
  12700. /* Find top bit index in last word. */
  12701. c = bits & (SP_WORD_SIZE - 1);
  12702. if (c == 0) {
  12703. c = SP_WORD_SIZE;
  12704. }
  12705. /* Use as many bits from top to make remaining a multiple of window
  12706. * size.
  12707. */
  12708. if ((bits % winBits) != 0) {
  12709. c -= bits % winBits;
  12710. }
  12711. else {
  12712. c -= winBits;
  12713. }
  12714. /* 5. tr = t[e / (2 ^ cb)] */
  12715. y = (int)(n >> c);
  12716. n <<= SP_WORD_SIZE - c;
  12717. /* 5. Copy table value for first window. */
  12718. _sp_copy(t[y], tr);
  12719. /* 6. For i in cb..w */
  12720. for (; (i >= 0) || (c >= winBits); ) {
  12721. int j;
  12722. /* 6.1. y = e[(i-1)..(i-w)] */
  12723. if (c == 0) {
  12724. /* Bits up to end of digit */
  12725. n = e->dp[i--];
  12726. y = (int)(n >> (SP_WORD_SIZE - winBits));
  12727. n <<= winBits;
  12728. c = SP_WORD_SIZE - winBits;
  12729. }
  12730. else if (c < winBits) {
  12731. /* Bits to end of digit and part of next */
  12732. y = (int)(n >> (SP_WORD_SIZE - winBits));
  12733. n = e->dp[i--];
  12734. c = winBits - c;
  12735. y |= (int)(n >> (SP_WORD_SIZE - c));
  12736. n <<= c;
  12737. c = SP_WORD_SIZE - c;
  12738. }
  12739. else {
  12740. /* Bits from middle of digit */
  12741. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  12742. n <<= winBits;
  12743. c -= winBits;
  12744. }
  12745. /* 6.2. tr = tr ^ (2 * w) */
  12746. for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
  12747. err = sp_sqr(tr, tr);
  12748. if (err == MP_OKAY) {
  12749. err = _sp_mont_red(tr, m, mp, 0);
  12750. }
  12751. }
  12752. /* 6.3. tr = tr * t[y] */
  12753. if (err == MP_OKAY) {
  12754. err = sp_mul(tr, t[y], tr);
  12755. }
  12756. if (err == MP_OKAY) {
  12757. err = _sp_mont_red(tr, m, mp, 0);
  12758. }
  12759. }
  12760. }
  12761. if (err == MP_OKAY) {
  12762. /* 7. tr = FromMont(tr) */
  12763. err = _sp_mont_red(tr, m, mp, 0);
  12764. /* Reduction implementation returns number to range: 0..m-1. */
  12765. }
  12766. }
  12767. if ((!done) && (err == MP_OKAY)) {
  12768. /* 8. r = tr */
  12769. _sp_copy(tr, r);
  12770. }
  12771. FREE_SP_INT_ARRAY(t, NULL);
  12772. return err;
  12773. }
  12774. #ifndef SP_ALLOC_PREDEFINED
  12775. #undef SP_ALLOC
  12776. #undef SP_ALLOC_PREDEFINED
  12777. #endif
  12778. #endif /* !WC_NO_CACHE_RESISTANT */
  12779. #endif /* !WC_NO_HARDEN */
  12780. /* w = Log2(SP_WORD_SIZE) - 1 */
  12781. #if SP_WORD_SIZE == 8
  12782. #define EXP2_WINSIZE 2
  12783. #elif SP_WORD_SIZE == 16
  12784. #define EXP2_WINSIZE 3
  12785. #elif SP_WORD_SIZE == 32
  12786. #define EXP2_WINSIZE 4
  12787. #elif SP_WORD_SIZE == 64
  12788. #define EXP2_WINSIZE 5
  12789. #else
  12790. #error "sp_exptmod_base_2: Unexpected SP_WORD_SIZE"
  12791. #endif
  12792. /* Mask is all bits in window set. */
  12793. #define EXP2_MASK ((1 << EXP2_WINSIZE) - 1)
  12794. /* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
  12795. * Is constant time and cache attack resistant.
  12796. *
  12797. * Calculates value to make mod operations constant time expect when
  12798. * WC_NO_HARDERN defined or modulus fits in one word.
  12799. *
  12800. * Algorithm:
  12801. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12802. * w: window size based on #bits in word.
  12803. * 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
  12804. * else tr = 1
  12805. * 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
  12806. * else a = 0
  12807. * 3. cb = w * (bits / w)
  12808. * 4. y = e / (2 ^ cb)
  12809. * 5. tr = (tr * (2 ^ y) + a) mod m
  12810. * 6. For i in cb..w
  12811. * 6.1. y = e[(i-1)..(i-w)]
  12812. * 6.2. tr = tr ^ (2 * w)
  12813. * 6.3. tr = ((tr * (2 ^ y) + a) mod m
  12814. * 7. if Words(m) > 1 then tr = FromMont(tr)
  12815. * 8. r = tr
  12816. *
  12817. * @param [in] e SP integer that is the exponent.
  12818. * @param [in] digits Number of digits in base to use. May be greater than
  12819. * count of bits in b.
  12820. * @param [in] m SP integer that is the modulus.
  12821. * @param [out] r SP integer to hold result.
  12822. *
  12823. * @return MP_OKAY on success.
  12824. * @return MP_MEM when dynamic memory allocation fails.
  12825. */
  12826. static int _sp_exptmod_base_2(const sp_int* e, int digits, const sp_int* m,
  12827. sp_int* r)
  12828. {
  12829. int i = 0;
  12830. int c = 0;
  12831. int y;
  12832. int err = MP_OKAY;
  12833. sp_int_digit mp = 0;
  12834. sp_int_digit n = 0;
  12835. #ifndef WC_NO_HARDEN
  12836. sp_int* a = NULL;
  12837. sp_int* tr = NULL;
  12838. DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
  12839. #else
  12840. DECL_SP_INT(tr, m->used * 2 + 1);
  12841. #endif
  12842. int useMont = (m->used > 1);
  12843. #if 0
  12844. sp_print_int(2, "a");
  12845. sp_print(e, "b");
  12846. sp_print(m, "m");
  12847. #endif
  12848. #ifndef WC_NO_HARDEN
  12849. /* Allocate sp_ints for:
  12850. * - constant time add value for mod operation
  12851. * - temporary result
  12852. */
  12853. ALLOC_SP_INT_ARRAY(d, m->used * 2 + 1, 2, err, NULL);
  12854. #else
  12855. /* Allocate sp_int for temporary result. */
  12856. ALLOC_SP_INT(tr, m->used * 2 + 1, err, NULL);
  12857. #endif
  12858. if (err == MP_OKAY) {
  12859. #ifndef WC_NO_HARDEN
  12860. a = d[0];
  12861. tr = d[1];
  12862. _sp_init_size(a, m->used * 2 + 1);
  12863. #endif
  12864. _sp_init_size(tr, m->used * 2 + 1);
  12865. }
  12866. if ((err == MP_OKAY) && useMont) {
  12867. /* Calculate Montgomery multiplier for reduction. */
  12868. _sp_mont_setup(m, &mp);
  12869. }
  12870. if (err == MP_OKAY) {
  12871. /* 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
  12872. * else tr = 1
  12873. */
  12874. if (useMont) {
  12875. /* Calculate Montgomery normalizer for modulus - 1 in Montgomery
  12876. * form.
  12877. */
  12878. err = sp_mont_norm(tr, m);
  12879. }
  12880. else {
  12881. /* For single word modulus don't use Montgomery form. */
  12882. err = sp_set(tr, 1);
  12883. }
  12884. }
  12885. /* 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
  12886. * else a = 0
  12887. */
  12888. #ifndef WC_NO_HARDEN
  12889. if ((err == MP_OKAY) && useMont) {
  12890. err = sp_mul_2d(m, 1 << EXP2_WINSIZE, a);
  12891. }
  12892. #endif
  12893. if (err == MP_OKAY) {
  12894. /* 3. cb = w * (bits / w) */
  12895. i = digits - 1;
  12896. n = e->dp[i--];
  12897. c = SP_WORD_SIZE;
  12898. #if EXP2_WINSIZE != 1
  12899. c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
  12900. if (c != SP_WORD_SIZE) {
  12901. /* 4. y = e / (2 ^ cb) */
  12902. y = (int)(n >> c);
  12903. n <<= SP_WORD_SIZE - c;
  12904. }
  12905. else
  12906. #endif
  12907. {
  12908. /* 4. y = e / (2 ^ cb) */
  12909. y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
  12910. n <<= EXP2_WINSIZE;
  12911. c -= EXP2_WINSIZE;
  12912. }
  12913. /* 5. tr = (tr * (2 ^ y) + a) mod m */
  12914. err = sp_mul_2d(tr, y, tr);
  12915. }
  12916. #ifndef WC_NO_HARDEN
  12917. if ((err == MP_OKAY) && useMont) {
  12918. /* Add value to make mod operation constant time. */
  12919. err = sp_add(tr, a, tr);
  12920. }
  12921. #endif
  12922. if (err == MP_OKAY) {
  12923. err = sp_mod(tr, m, tr);
  12924. }
  12925. /* 6. For i in cb..w */
  12926. for (; (err == MP_OKAY) && ((i >= 0) || (c >= EXP2_WINSIZE)); ) {
  12927. int j;
  12928. /* 6.1. y = e[(i-1)..(i-w)] */
  12929. if (c == 0) {
  12930. /* Bits from next digit. */
  12931. n = e->dp[i--];
  12932. y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
  12933. n <<= EXP2_WINSIZE;
  12934. c = SP_WORD_SIZE - EXP2_WINSIZE;
  12935. }
  12936. #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
  12937. else if (c < EXP2_WINSIZE) {
  12938. /* Bits to end of digit and part of next */
  12939. y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
  12940. n = e->dp[i--];
  12941. c = EXP2_WINSIZE - c;
  12942. y |= (int)(n >> (SP_WORD_SIZE - c));
  12943. n <<= c;
  12944. c = SP_WORD_SIZE - c;
  12945. }
  12946. #endif
  12947. else {
  12948. /* Bits from middle of digit */
  12949. y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
  12950. n <<= EXP2_WINSIZE;
  12951. c -= EXP2_WINSIZE;
  12952. }
  12953. /* 6.2. tr = tr ^ (2 * w) */
  12954. for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
  12955. err = sp_sqr(tr, tr);
  12956. if (err == MP_OKAY) {
  12957. if (useMont) {
  12958. err = _sp_mont_red(tr, m, mp, 0);
  12959. }
  12960. else {
  12961. err = sp_mod(tr, m, tr);
  12962. }
  12963. }
  12964. }
  12965. /* 6.3. tr = ((tr * (2 ^ y) + a) mod m */
  12966. if (err == MP_OKAY) {
  12967. err = sp_mul_2d(tr, y, tr);
  12968. }
  12969. #ifndef WC_NO_HARDEN
  12970. if ((err == MP_OKAY) && useMont) {
  12971. /* Add value to make mod operation constant time. */
  12972. err = sp_add(tr, a, tr);
  12973. }
  12974. #endif
  12975. if (err == MP_OKAY) {
  12976. /* Reduce current result by modulus. */
  12977. err = sp_mod(tr, m, tr);
  12978. }
  12979. }
  12980. /* 7. if Words(m) > 1 then tr = FromMont(tr) */
  12981. if ((err == MP_OKAY) && useMont) {
  12982. err = _sp_mont_red(tr, m, mp, 0);
  12983. /* Reduction implementation returns number to range: 0..m-1. */
  12984. }
  12985. if (err == MP_OKAY) {
  12986. /* 8. r = tr */
  12987. _sp_copy(tr, r);
  12988. }
  12989. #if 0
  12990. sp_print(r, "rme");
  12991. #endif
  12992. #ifndef WC_NO_HARDEN
  12993. FREE_SP_INT_ARRAY(d, NULL);
  12994. #else
  12995. FREE_SP_INT(tr, m->used * 2 + 1);
  12996. #endif
  12997. return err;
  12998. }
  12999. #endif
  13000. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  13001. !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
  13002. defined(OPENSSL_ALL)
  13003. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13004. *
  13005. * Error returned when parameters r == e or r == m and base >= modulus.
  13006. *
  13007. * @param [in] b SP integer that is the base.
  13008. * @param [in] e SP integer that is the exponent.
  13009. * @param [in] digits Number of digits in exponent to use. May be greater
  13010. * than count of digits in e.
  13011. * @param [in] m SP integer that is the modulus.
  13012. * @param [out] r SP integer to hold result.
  13013. *
  13014. * @return MP_OKAY on success.
  13015. * @return MP_VAL when b, e, m or r is NULL, digits is negative, or m <= 0 or
  13016. * e is negative.
  13017. * @return MP_MEM when dynamic memory allocation fails.
  13018. */
  13019. int sp_exptmod_ex(const sp_int* b, const sp_int* e, int digits, const sp_int* m,
  13020. sp_int* r)
  13021. {
  13022. int err = MP_OKAY;
  13023. int done = 0;
  13024. int mBits = sp_count_bits(m);
  13025. int bBits = sp_count_bits(b);
  13026. int eBits = sp_count_bits(e);
  13027. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL) ||
  13028. (digits < 0)) {
  13029. err = MP_VAL;
  13030. }
  13031. /* Ensure m is not too big. */
  13032. else if (m->used * 2 >= SP_INT_DIGITS) {
  13033. err = MP_VAL;
  13034. }
  13035. #if 0
  13036. if (err == MP_OKAY) {
  13037. sp_print(b, "a");
  13038. sp_print(e, "b");
  13039. sp_print(m, "m");
  13040. }
  13041. #endif
  13042. /* Check for invalid modulus. */
  13043. if ((err == MP_OKAY) && sp_iszero(m)) {
  13044. err = MP_VAL;
  13045. }
  13046. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13047. /* Check for unsupported negative values of exponent and modulus. */
  13048. if ((err == MP_OKAY) && ((e->sign == MP_NEG) || (m->sign == MP_NEG))) {
  13049. err = MP_VAL;
  13050. }
  13051. #endif
  13052. /* Check for degenerate cases. */
  13053. if ((err == MP_OKAY) && sp_isone(m)) {
  13054. _sp_set(r, 0);
  13055. done = 1;
  13056. }
  13057. if ((!done) && (err == MP_OKAY) && sp_iszero(e)) {
  13058. _sp_set(r, 1);
  13059. done = 1;
  13060. }
  13061. /* Ensure base is less than modulus. */
  13062. if ((!done) && (err == MP_OKAY) && (_sp_cmp_abs(b, m) != MP_LT)) {
  13063. if ((r == e) || (r == m)) {
  13064. err = MP_VAL;
  13065. }
  13066. if (err == MP_OKAY) {
  13067. err = sp_mod(b, m, r);
  13068. }
  13069. if (err == MP_OKAY) {
  13070. b = r;
  13071. }
  13072. }
  13073. /* Check for degenerate case of base. */
  13074. if ((!done) && (err == MP_OKAY) && sp_iszero(b)) {
  13075. _sp_set(r, 0);
  13076. done = 1;
  13077. }
  13078. /* Ensure SP integers have space for intermediate values. */
  13079. if ((!done) && (err == MP_OKAY) && (m->used * 2 >= r->size)) {
  13080. err = MP_VAL;
  13081. }
  13082. if ((!done) && (err == MP_OKAY)) {
  13083. /* Use code optimized for specific sizes if possible */
  13084. #if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
  13085. (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))
  13086. #ifndef WOLFSSL_SP_NO_2048
  13087. if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
  13088. (eBits <= 1024)) {
  13089. err = sp_ModExp_1024((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  13090. done = 1;
  13091. }
  13092. else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
  13093. (eBits <= 2048)) {
  13094. err = sp_ModExp_2048((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  13095. done = 1;
  13096. }
  13097. else
  13098. #endif
  13099. #ifndef WOLFSSL_SP_NO_3072
  13100. if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
  13101. (eBits <= 1536)) {
  13102. err = sp_ModExp_1536((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  13103. done = 1;
  13104. }
  13105. else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
  13106. (eBits <= 3072)) {
  13107. err = sp_ModExp_3072((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  13108. done = 1;
  13109. }
  13110. else
  13111. #endif
  13112. #ifdef WOLFSSL_SP_4096
  13113. if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
  13114. (eBits <= 4096)) {
  13115. err = sp_ModExp_4096((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  13116. done = 1;
  13117. }
  13118. else
  13119. #endif
  13120. #endif
  13121. {
  13122. /* SP does not support size. */
  13123. }
  13124. }
  13125. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(OPENSSL_ALL)
  13126. #if (defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)) && \
  13127. defined(NO_DH)
  13128. if ((!done) && (err == MP_OKAY)) {
  13129. /* Use non-constant time version - fastest. */
  13130. err = sp_exptmod_nct(b, e, m, r);
  13131. }
  13132. #else
  13133. #if defined(WOLFSSL_SP_MATH_ALL) || defined(OPENSSL_ALL)
  13134. if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
  13135. mp_isodd(m)) {
  13136. /* Use the generic base 2 implementation. */
  13137. err = _sp_exptmod_base_2(e, digits, m, r);
  13138. }
  13139. else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
  13140. #ifndef WC_NO_HARDEN
  13141. /* Use constant time version hardened against timing attacks and
  13142. * cache attacks when WC_NO_CACHE_RESISTANT not defined. */
  13143. err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
  13144. #else
  13145. /* Use non-constant time version - fastest. */
  13146. err = sp_exptmod_nct(b, e, m, r);
  13147. #endif
  13148. }
  13149. else
  13150. #endif /* WOLFSSL_SP_MATH_ALL || OPENSSL_ALL */
  13151. if ((!done) && (err == MP_OKAY)) {
  13152. /* Otherwise use the generic implementation hardened against
  13153. * timing and cache attacks. */
  13154. err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
  13155. }
  13156. #endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
  13157. #else
  13158. if ((!done) && (err == MP_OKAY)) {
  13159. err = MP_VAL;
  13160. }
  13161. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  13162. (void)mBits;
  13163. (void)bBits;
  13164. (void)eBits;
  13165. (void)digits;
  13166. #if 0
  13167. if (err == MP_OKAY) {
  13168. sp_print(r, "rme");
  13169. }
  13170. #endif
  13171. return err;
  13172. }
  13173. #endif
  13174. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  13175. !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
  13176. defined(OPENSSL_ALL)
  13177. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13178. *
  13179. * @param [in] b SP integer that is the base.
  13180. * @param [in] e SP integer that is the exponent.
  13181. * @param [in] m SP integer that is the modulus.
  13182. * @param [out] r SP integer to hold result.
  13183. *
  13184. * @return MP_OKAY on success.
  13185. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  13186. * @return MP_MEM when dynamic memory allocation fails.
  13187. */
  13188. int sp_exptmod(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
  13189. {
  13190. int err = MP_OKAY;
  13191. /* Validate parameters. */
  13192. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  13193. err = MP_VAL;
  13194. }
  13195. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  13196. if (err == MP_OKAY) {
  13197. err = sp_exptmod_ex(b, e, (int)e->used, m, r);
  13198. }
  13199. RESTORE_VECTOR_REGISTERS();
  13200. return err;
  13201. }
  13202. #endif
  13203. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
  13204. #if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
  13205. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13206. * Creates a window of precalculated exponents with base in Montgomery form.
  13207. * Sliding window and is NOT constant time.
  13208. *
  13209. * n-bit window is: (b^(2^(n-1))*b^0)...(b^(2^(n-1))*b^(2^(n-1)-1))
  13210. * e.g. when n=6, b^32..b^63
  13211. * Algorithm:
  13212. * 1. Ensure base is less than modulus.
  13213. * 2. Convert base to Montgomery form
  13214. * 3. Set result to table entry for top window bits, or
  13215. * if less than windows bits in exponent, 1 in Montgomery form.
  13216. * 4. While at least window bits left:
  13217. * 4.1. Count number of and skip leading 0 bits unless less then window bits
  13218. * left.
  13219. * 4.2. Montgomery square result for each leading 0 and window bits if bits
  13220. * left.
  13221. * 4.3. Break if less than window bits left.
  13222. * 4.4. Get top window bits from expononent and drop.
  13223. * 4.5. Montgomery multiply result by table entry.
  13224. * 5. While bits left:
  13225. * 5.1. Montogmery square result
  13226. * 5.2. If exponent bit set
  13227. * 5.2.1. Montgomery multiply result by Montgomery form of base.
  13228. * 6. Convert result back from Montgomery form.
  13229. *
  13230. * @param [in] b SP integer that is the base.
  13231. * @param [in] e SP integer that is the exponent.
  13232. * @param [in] bits Number of bits in exponent to use. May be greater than
  13233. * count of bits in e.
  13234. * @param [in] m SP integer that is the modulus.
  13235. * @param [out] r SP integer to hold result.
  13236. *
  13237. * @return MP_OKAY on success.
  13238. * @return MP_MEM when dynamic memory allocation fails.
  13239. */
  13240. static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
  13241. sp_int* r)
  13242. {
  13243. int i = 0;
  13244. int bits;
  13245. int winBits;
  13246. int preCnt;
  13247. int err = MP_OKAY;
  13248. int done = 0;
  13249. sp_int* tr = NULL;
  13250. sp_int* bm = NULL;
  13251. /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
  13252. #ifndef WOLFSSL_SP_NO_MALLOC
  13253. DECL_DYN_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
  13254. #else
  13255. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
  13256. #endif
  13257. bits = sp_count_bits(e);
  13258. /* Window bits based on number of pre-calculations versus number of loop
  13259. * calculations.
  13260. * Exponents for RSA and DH will result in 6-bit windows.
  13261. * Note: for 4096-bit values, 7-bit window is slightly better.
  13262. */
  13263. if (bits > 450) {
  13264. winBits = 6;
  13265. }
  13266. else if (bits <= 21) {
  13267. winBits = 1;
  13268. }
  13269. else if (bits <= 36) {
  13270. winBits = 3;
  13271. }
  13272. else if (bits <= 140) {
  13273. winBits = 4;
  13274. }
  13275. else {
  13276. winBits = 5;
  13277. }
  13278. /* Top bit of exponent fixed as 1 for pre-calculated window. */
  13279. preCnt = 1 << (winBits - 1);
  13280. /* Allocate sp_ints for:
  13281. * - pre-computation table
  13282. * - temporary result
  13283. * - Montgomery form of base
  13284. */
  13285. #ifndef WOLFSSL_SP_NO_MALLOC
  13286. ALLOC_DYN_SP_INT_ARRAY(t, m->used * 2 + 1, (size_t)preCnt + 2, err, NULL);
  13287. #else
  13288. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, (size_t)preCnt + 2, err, NULL);
  13289. #endif
  13290. if (err == MP_OKAY) {
  13291. /* Set variables to use allocate memory. */
  13292. tr = t[preCnt + 0];
  13293. bm = t[preCnt + 1];
  13294. /* Initialize all allocated */
  13295. for (i = 0; i < preCnt; i++) {
  13296. _sp_init_size(t[i], m->used * 2 + 1);
  13297. }
  13298. _sp_init_size(tr, m->used * 2 + 1);
  13299. _sp_init_size(bm, m->used * 2 + 1);
  13300. /* 1. Ensure base is less than modulus. */
  13301. if (_sp_cmp_abs(b, m) != MP_LT) {
  13302. err = sp_mod(b, m, bm);
  13303. /* Handle base == modulus. */
  13304. if ((err == MP_OKAY) && sp_iszero(bm)) {
  13305. _sp_set(r, 0);
  13306. done = 1;
  13307. }
  13308. }
  13309. else {
  13310. /* Copy base into Montogmery base variable. */
  13311. _sp_copy(b, bm);
  13312. }
  13313. }
  13314. if ((!done) && (err == MP_OKAY)) {
  13315. int y = 0;
  13316. int c = 0;
  13317. sp_int_digit mp;
  13318. /* Calculate Montgomery multiplier for reduction. */
  13319. _sp_mont_setup(m, &mp);
  13320. /* Calculate Montgomery normalizer for modulus. */
  13321. err = sp_mont_norm(t[0], m);
  13322. if (err == MP_OKAY) {
  13323. /* 2. Convert base to Montgomery form. */
  13324. err = sp_mul(bm, t[0], bm);
  13325. }
  13326. if (err == MP_OKAY) {
  13327. /* bm = bm mod m, temporary size has to be bigger than bm->used. */
  13328. err = _sp_div(bm, m, NULL, bm, bm->used + 1);
  13329. }
  13330. if (err == MP_OKAY) {
  13331. /* Copy Montgomery form of base into first element of table. */
  13332. _sp_copy(bm, t[0]);
  13333. }
  13334. /* Calculate b^(2^(winBits-1)) */
  13335. for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
  13336. err = sp_sqr(t[0], t[0]);
  13337. if (err == MP_OKAY) {
  13338. err = _sp_mont_red(t[0], m, mp, 0);
  13339. }
  13340. }
  13341. /* For each table entry after first. */
  13342. for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
  13343. /* Multiply previous entry by the base in Mont form into table. */
  13344. err = sp_mul(t[i-1], bm, t[i]);
  13345. if (err == MP_OKAY) {
  13346. err = _sp_mont_red(t[i], m, mp, 0);
  13347. }
  13348. }
  13349. /* 3. Set result to table entry for top window bits, or
  13350. * if less than windows bits in exponent, 1 in Montgomery form.
  13351. */
  13352. if (err == MP_OKAY) {
  13353. sp_int_digit n;
  13354. /* Mask for calculating index into pre-computed table. */
  13355. sp_int_digit mask = (sp_int_digit)preCnt - 1;
  13356. /* Find the top bit. */
  13357. i = (bits - 1) >> SP_WORD_SHIFT;
  13358. n = e->dp[i--];
  13359. c = bits % SP_WORD_SIZE;
  13360. if (c == 0) {
  13361. c = SP_WORD_SIZE;
  13362. }
  13363. /* Put top bit at highest offset in digit. */
  13364. n <<= SP_WORD_SIZE - c;
  13365. if (bits >= winBits) {
  13366. /* Top bit set. Copy from window. */
  13367. if (c < winBits) {
  13368. /* Bits to end of digit and part of next */
  13369. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  13370. n = e->dp[i--];
  13371. c = winBits - c;
  13372. y |= (int)(n >> (SP_WORD_SIZE - c));
  13373. n <<= c;
  13374. c = SP_WORD_SIZE - c;
  13375. }
  13376. else {
  13377. /* Bits from middle of digit */
  13378. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  13379. n <<= winBits;
  13380. c -= winBits;
  13381. }
  13382. _sp_copy(t[y], tr);
  13383. }
  13384. else {
  13385. /* 1 in Montgomery form. */
  13386. err = sp_mont_norm(tr, m);
  13387. }
  13388. /* 4. While at least window bits left. */
  13389. while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
  13390. /* Number of squares to before due to top bits being 0. */
  13391. int sqrs = 0;
  13392. /* 4.1. Count number of and skip leading 0 bits unless less
  13393. * than window bits.
  13394. */
  13395. do {
  13396. /* Make sure n has bits from the right digit. */
  13397. if (c == 0) {
  13398. n = e->dp[i--];
  13399. c = SP_WORD_SIZE;
  13400. }
  13401. /* Mask off the next bit. */
  13402. if ((n & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) != 0) {
  13403. break;
  13404. }
  13405. /* Another square needed. */
  13406. sqrs++;
  13407. /* Skip bit. */
  13408. n <<= 1;
  13409. c--;
  13410. }
  13411. while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits)));
  13412. if ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
  13413. /* Add squares needed before using table entry. */
  13414. sqrs += winBits;
  13415. }
  13416. /* 4.2. Montgomery square result for each leading 0 and window
  13417. * bits if bits left.
  13418. */
  13419. for (; (err == MP_OKAY) && (sqrs > 0); sqrs--) {
  13420. err = sp_sqr(tr, tr);
  13421. if (err == MP_OKAY) {
  13422. err = _sp_mont_red(tr, m, mp, 0);
  13423. }
  13424. }
  13425. /* 4.3. Break if less than window bits left. */
  13426. if ((err == MP_OKAY) && (i < 0) && (c < winBits)) {
  13427. break;
  13428. }
  13429. /* 4.4. Get top window bits from exponent and drop. */
  13430. if (err == MP_OKAY) {
  13431. if (c == 0) {
  13432. /* Bits from next digit. */
  13433. n = e->dp[i--];
  13434. y = (int)(n >> (SP_WORD_SIZE - winBits));
  13435. n <<= winBits;
  13436. c = SP_WORD_SIZE - winBits;
  13437. }
  13438. else if (c < winBits) {
  13439. /* Bits to end of digit and part of next. */
  13440. y = (int)(n >> (SP_WORD_SIZE - winBits));
  13441. n = e->dp[i--];
  13442. c = winBits - c;
  13443. y |= (int)(n >> (SP_WORD_SIZE - c));
  13444. n <<= c;
  13445. c = SP_WORD_SIZE - c;
  13446. }
  13447. else {
  13448. /* Bits from middle of digit. */
  13449. y = (int)(n >> (SP_WORD_SIZE - winBits));
  13450. n <<= winBits;
  13451. c -= winBits;
  13452. }
  13453. y &= (int)mask;
  13454. }
  13455. /* 4.5. Montgomery multiply result by table entry. */
  13456. if (err == MP_OKAY) {
  13457. err = sp_mul(tr, t[y], tr);
  13458. }
  13459. if (err == MP_OKAY) {
  13460. err = _sp_mont_red(tr, m, mp, 0);
  13461. }
  13462. }
  13463. /* Finished multiplying in table entries. */
  13464. if ((err == MP_OKAY) && (c > 0)) {
  13465. /* Handle remaining bits.
  13466. * Window values have top bit set and can't be used. */
  13467. n = e->dp[0];
  13468. /* 5. While bits left: */
  13469. for (--c; (err == MP_OKAY) && (c >= 0); c--) {
  13470. /* 5.1. Montogmery square result */
  13471. err = sp_sqr(tr, tr);
  13472. if (err == MP_OKAY) {
  13473. err = _sp_mont_red(tr, m, mp, 0);
  13474. }
  13475. /* 5.2. If exponent bit set */
  13476. if ((err == MP_OKAY) && ((n >> c) & 1)) {
  13477. /* 5.2.1. Montgomery multiply result by Montgomery form
  13478. * of base.
  13479. */
  13480. err = sp_mul(tr, bm, tr);
  13481. if (err == MP_OKAY) {
  13482. err = _sp_mont_red(tr, m, mp, 0);
  13483. }
  13484. }
  13485. }
  13486. }
  13487. }
  13488. if (err == MP_OKAY) {
  13489. /* 6. Convert result back from Montgomery form. */
  13490. err = _sp_mont_red(tr, m, mp, 0);
  13491. /* Reduction implementation returns number to range: 0..m-1. */
  13492. }
  13493. }
  13494. if ((!done) && (err == MP_OKAY)) {
  13495. /* Copy temporary result into parameter. */
  13496. _sp_copy(tr, r);
  13497. }
  13498. #ifndef WOLFSSL_SP_NO_MALLOC
  13499. FREE_DYN_SP_INT_ARRAY(t, NULL);
  13500. #else
  13501. FREE_SP_INT_ARRAY(t, NULL);
  13502. #endif
  13503. return err;
  13504. }
  13505. #else
  13506. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13507. * Non-constant time implementation.
  13508. *
  13509. * Algorithm:
  13510. * 1. Convert base to Montgomery form
  13511. * 2. Set result to base (assumes exponent is not zero)
  13512. * 3. For each bit in exponent starting at second highest
  13513. * 3.1. Montogmery square result
  13514. * 3.2. If exponent bit set
  13515. * 3.2.1. Montgomery multiply result by Montgomery form of base.
  13516. * 4. Convert result back from Montgomery form.
  13517. *
  13518. * @param [in] b SP integer that is the base.
  13519. * @param [in] e SP integer that is the exponent.
  13520. * @param [in] m SP integer that is the modulus.
  13521. * @param [out] r SP integer to hold result.
  13522. *
  13523. * @return MP_OKAY on success.
  13524. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  13525. * @return MP_MEM when dynamic memory allocation fails.
  13526. */
  13527. static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
  13528. sp_int* r)
  13529. {
  13530. int i;
  13531. int err = MP_OKAY;
  13532. int done = 0;
  13533. int y = 0;
  13534. int bits = sp_count_bits(e);
  13535. sp_int_digit mp;
  13536. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
  13537. /* Allocate memory for:
  13538. * - Montgomery form of base
  13539. * - Temporary result (in case r is same var as another parameter). */
  13540. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
  13541. if (err == MP_OKAY) {
  13542. _sp_init_size(t[0], m->used * 2 + 1);
  13543. _sp_init_size(t[1], m->used * 2 + 1);
  13544. /* Ensure base is less than modulus and copy into temp. */
  13545. if (_sp_cmp_abs(b, m) != MP_LT) {
  13546. err = sp_mod(b, m, t[0]);
  13547. /* Handle base == modulus. */
  13548. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  13549. _sp_set(r, 0);
  13550. done = 1;
  13551. }
  13552. }
  13553. else {
  13554. /* Copy base into temp. */
  13555. _sp_copy(b, t[0]);
  13556. }
  13557. }
  13558. if ((!done) && (err == MP_OKAY)) {
  13559. /* Calculate Montgomery multiplier for reduction. */
  13560. _sp_mont_setup(m, &mp);
  13561. /* Calculate Montgomery normalizer for modulus. */
  13562. err = sp_mont_norm(t[1], m);
  13563. if (err == MP_OKAY) {
  13564. /* 1. Convert base to Montgomery form. */
  13565. err = sp_mul(t[0], t[1], t[0]);
  13566. }
  13567. if (err == MP_OKAY) {
  13568. /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
  13569. err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1);
  13570. }
  13571. if (err == MP_OKAY) {
  13572. /* 2. Result starts as Montgomery form of base (assuming e > 0). */
  13573. _sp_copy(t[0], t[1]);
  13574. }
  13575. /* 3. For each bit in exponent starting at second highest. */
  13576. for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
  13577. /* 3.1. Montgomery square result. */
  13578. err = sp_sqr(t[0], t[0]);
  13579. if (err == MP_OKAY) {
  13580. err = _sp_mont_red(t[0], m, mp, 0);
  13581. }
  13582. if (err == MP_OKAY) {
  13583. /* Get bit and index i. */
  13584. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  13585. /* 3.2. If exponent bit set */
  13586. if (y != 0) {
  13587. /* 3.2.1. Montgomery multiply result by Mont of base. */
  13588. err = sp_mul(t[0], t[1], t[0]);
  13589. if (err == MP_OKAY) {
  13590. err = _sp_mont_red(t[0], m, mp, 0);
  13591. }
  13592. }
  13593. }
  13594. }
  13595. if (err == MP_OKAY) {
  13596. /* 4. Convert from Montgomery form. */
  13597. err = _sp_mont_red(t[0], m, mp, 0);
  13598. /* Reduction implementation returns number of range 0..m-1. */
  13599. }
  13600. }
  13601. if ((!done) && (err == MP_OKAY)) {
  13602. /* Copy temporary result into parameter. */
  13603. _sp_copy(t[0], r);
  13604. }
  13605. FREE_SP_INT_ARRAY(t, NULL);
  13606. return err;
  13607. }
  13608. #endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
  13609. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13610. * Non-constant time implementation.
  13611. *
  13612. * @param [in] b SP integer that is the base.
  13613. * @param [in] e SP integer that is the exponent.
  13614. * @param [in] m SP integer that is the modulus.
  13615. * @param [out] r SP integer to hold result.
  13616. *
  13617. * @return MP_OKAY on success.
  13618. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  13619. * @return MP_MEM when dynamic memory allocation fails.
  13620. */
  13621. int sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
  13622. {
  13623. int err = MP_OKAY;
  13624. /* Validate parameters. */
  13625. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  13626. err = MP_VAL;
  13627. }
  13628. #if 0
  13629. if (err == MP_OKAY) {
  13630. sp_print(b, "a");
  13631. sp_print(e, "b");
  13632. sp_print(m, "m");
  13633. }
  13634. #endif
  13635. if (err != MP_OKAY) {
  13636. }
  13637. /* Handle special cases. */
  13638. else if (sp_iszero(m)) {
  13639. err = MP_VAL;
  13640. }
  13641. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13642. else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
  13643. err = MP_VAL;
  13644. }
  13645. #endif
  13646. /* x mod 1 is always 0. */
  13647. else if (sp_isone(m)) {
  13648. _sp_set(r, 0);
  13649. }
  13650. /* b^0 mod m = 1 mod m = 1. */
  13651. else if (sp_iszero(e)) {
  13652. _sp_set(r, 1);
  13653. }
  13654. /* 0^x mod m = 0 mod m = 0. */
  13655. else if (sp_iszero(b)) {
  13656. _sp_set(r, 0);
  13657. }
  13658. /* Ensure SP integers have space for intermediate values. */
  13659. else if (m->used * 2 >= r->size) {
  13660. err = MP_VAL;
  13661. }
  13662. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  13663. else if (mp_iseven(m)) {
  13664. err = _sp_exptmod_ex(b, e, (int)(e->used * SP_WORD_SIZE), m, r);
  13665. }
  13666. #endif
  13667. else {
  13668. err = _sp_exptmod_nct(b, e, m, r);
  13669. }
  13670. #if 0
  13671. if (err == MP_OKAY) {
  13672. sp_print(r, "rme");
  13673. }
  13674. #endif
  13675. return err;
  13676. }
  13677. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  13678. /***************
  13679. * 2^e functions
  13680. ***************/
  13681. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  13682. /* Divide by 2^e: r = a >> e and rem = bits shifted out
  13683. *
  13684. * @param [in] a SP integer to divide.
  13685. * @param [in] e Exponent bits (dividing by 2^e).
  13686. * @param [in] m SP integer that is the modulus.
  13687. * @param [out] r SP integer to hold result.
  13688. * @param [out] rem SP integer to hold remainder.
  13689. *
  13690. * @return MP_OKAY on success.
  13691. * @return MP_VAL when a is NULL or e is negative.
  13692. */
  13693. int sp_div_2d(const sp_int* a, int e, sp_int* r, sp_int* rem)
  13694. {
  13695. int err = MP_OKAY;
  13696. if ((a == NULL) || (e < 0)) {
  13697. err = MP_VAL;
  13698. }
  13699. if (err == MP_OKAY) {
  13700. /* Number of bits remaining after shift. */
  13701. int remBits = sp_count_bits(a) - e;
  13702. if (remBits <= 0) {
  13703. /* Shifting down by more bits than in number. */
  13704. _sp_zero(r);
  13705. if (rem != NULL) {
  13706. err = sp_copy(a, rem);
  13707. }
  13708. }
  13709. else {
  13710. if (rem != NULL) {
  13711. /* Copy a in to remainder. */
  13712. err = sp_copy(a, rem);
  13713. }
  13714. if (err == MP_OKAY) {
  13715. /* Shift a down by into result. */
  13716. err = sp_rshb(a, e, r);
  13717. }
  13718. if ((err == MP_OKAY) && (rem != NULL)) {
  13719. /* Set used and mask off top digit of remainder. */
  13720. rem->used = ((unsigned int)e + SP_WORD_SIZE - 1) >>
  13721. SP_WORD_SHIFT;
  13722. e &= SP_WORD_MASK;
  13723. if (e > 0) {
  13724. rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
  13725. }
  13726. /* Remove leading zeros from remainder. */
  13727. sp_clamp(rem);
  13728. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13729. rem->sign = MP_ZPOS;
  13730. #endif
  13731. }
  13732. }
  13733. }
  13734. return err;
  13735. }
  13736. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  13737. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  13738. defined(HAVE_ECC)
  13739. /* The bottom e bits: r = a & ((1 << e) - 1)
  13740. *
  13741. * @param [in] a SP integer to reduce.
  13742. * @param [in] e Modulus bits (modulus equals 2^e).
  13743. * @param [out] r SP integer to hold result.
  13744. *
  13745. * @return MP_OKAY on success.
  13746. * @return MP_VAL when a or r is NULL, e is negative or e is too large for
  13747. * result.
  13748. */
  13749. int sp_mod_2d(const sp_int* a, int e, sp_int* r)
  13750. {
  13751. int err = MP_OKAY;
  13752. unsigned int digits = ((unsigned int)e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
  13753. if ((a == NULL) || (r == NULL) || (e < 0)) {
  13754. err = MP_VAL;
  13755. }
  13756. if ((err == MP_OKAY) && (digits > r->size)) {
  13757. err = MP_VAL;
  13758. }
  13759. if (err == MP_OKAY) {
  13760. /* Copy a into r if not same pointer. */
  13761. if (a != r) {
  13762. XMEMCPY(r->dp, a->dp, digits * SP_WORD_SIZEOF);
  13763. r->used = a->used;
  13764. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13765. r->sign = a->sign;
  13766. #endif
  13767. }
  13768. /* Modify result if a is bigger or same digit size. */
  13769. #ifndef WOLFSSL_SP_INT_NEGATIVE
  13770. if (digits <= a->used)
  13771. #else
  13772. /* Need to make negative positive and mask. */
  13773. if ((a->sign == MP_NEG) || (digits <= a->used))
  13774. #endif
  13775. {
  13776. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13777. if (a->sign == MP_NEG) {
  13778. unsigned int i;
  13779. sp_int_digit carry = 0;
  13780. /* Negate value. */
  13781. for (i = 0; i < r->used; i++) {
  13782. sp_int_digit next = r->dp[i] > 0;
  13783. r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
  13784. carry |= next;
  13785. }
  13786. for (; i < digits; i++) {
  13787. r->dp[i] = (sp_int_digit)0 - carry;
  13788. }
  13789. r->sign = MP_ZPOS;
  13790. }
  13791. #endif
  13792. /* Set used and mask off top digit of result. */
  13793. r->used = digits;
  13794. e &= SP_WORD_MASK;
  13795. if (e > 0) {
  13796. r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
  13797. }
  13798. sp_clamp(r);
  13799. }
  13800. }
  13801. return err;
  13802. }
  13803. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY)) || HAVE_ECC */
  13804. #if (defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  13805. !defined(NO_DH))) || defined(OPENSSL_ALL)
  13806. /* Multiply by 2^e: r = a << e
  13807. *
  13808. * @param [in] a SP integer to multiply.
  13809. * @param [in] e Multiplier bits (multiplier equals 2^e).
  13810. * @param [out] r SP integer to hold result.
  13811. *
  13812. * @return MP_OKAY on success.
  13813. * @return MP_VAL when a or r is NULL, e is negative, or result is too big for
  13814. * result size.
  13815. */
  13816. int sp_mul_2d(const sp_int* a, int e, sp_int* r)
  13817. {
  13818. int err = MP_OKAY;
  13819. /* Validate parameters. */
  13820. if ((a == NULL) || (r == NULL) || (e < 0)) {
  13821. err = MP_VAL;
  13822. }
  13823. /* Ensure result has enough allocated digits for result. */
  13824. if ((err == MP_OKAY) &&
  13825. ((unsigned int)(sp_count_bits(a) + e) > r->size * SP_WORD_SIZE)) {
  13826. err = MP_VAL;
  13827. }
  13828. if (err == MP_OKAY) {
  13829. /* Copy a into r as left shift function works on the number. */
  13830. if (a != r) {
  13831. err = sp_copy(a, r);
  13832. }
  13833. }
  13834. if (err == MP_OKAY) {
  13835. #if 0
  13836. sp_print(a, "a");
  13837. sp_print_int(e, "n");
  13838. #endif
  13839. err = sp_lshb(r, e);
  13840. #if 0
  13841. sp_print(r, "rsl");
  13842. #endif
  13843. }
  13844. return err;
  13845. }
  13846. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  13847. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  13848. defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  13849. /* START SP_SQR implementations */
  13850. /* This code is generated.
  13851. * To generate:
  13852. * cd scripts/sp/sp_int
  13853. * ./gen.sh
  13854. * File sp_sqr.c contains code.
  13855. */
  13856. #if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
  13857. #ifdef SQR_MUL_ASM
  13858. /* Square a and store in r. r = a * a
  13859. *
  13860. * @param [in] a SP integer to square.
  13861. * @param [out] r SP integer result.
  13862. *
  13863. * @return MP_OKAY on success.
  13864. * @return MP_MEM when dynamic memory allocation fails.
  13865. */
  13866. static int _sp_sqr(const sp_int* a, sp_int* r)
  13867. {
  13868. int err = MP_OKAY;
  13869. unsigned int i;
  13870. int j;
  13871. unsigned int k;
  13872. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13873. sp_int_digit* t = NULL;
  13874. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  13875. !defined(WOLFSSL_SP_NO_DYN_STACK)
  13876. sp_int_digit t[((a->used + 1) / 2) * 2 + 1];
  13877. #else
  13878. sp_int_digit t[(SP_INT_DIGITS + 1) / 2];
  13879. #endif
  13880. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13881. t = (sp_int_digit*)XMALLOC(
  13882. sizeof(sp_int_digit) * (((a->used + 1) / 2) * 2 + 1), NULL,
  13883. DYNAMIC_TYPE_BIGINT);
  13884. if (t == NULL) {
  13885. err = MP_MEM;
  13886. }
  13887. #endif
  13888. if ((err == MP_OKAY) && (a->used <= 1)) {
  13889. sp_int_digit l;
  13890. sp_int_digit h;
  13891. h = 0;
  13892. l = 0;
  13893. SP_ASM_SQR(h, l, a->dp[0]);
  13894. r->dp[0] = h;
  13895. r->dp[1] = l;
  13896. }
  13897. else if (err == MP_OKAY) {
  13898. sp_int_digit l;
  13899. sp_int_digit h;
  13900. sp_int_digit o;
  13901. sp_int_digit* p = t;
  13902. h = 0;
  13903. l = 0;
  13904. SP_ASM_SQR(h, l, a->dp[0]);
  13905. t[0] = h;
  13906. h = 0;
  13907. o = 0;
  13908. for (k = 1; k < (a->used + 1) / 2; k++) {
  13909. i = k;
  13910. j = (int)(k - 1);
  13911. for (; (j >= 0); i++, j--) {
  13912. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13913. }
  13914. t[k * 2 - 1] = l;
  13915. l = h;
  13916. h = o;
  13917. o = 0;
  13918. SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
  13919. i = k + 1;
  13920. j = (int)(k - 1);
  13921. for (; (j >= 0); i++, j--) {
  13922. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13923. }
  13924. t[k * 2] = l;
  13925. l = h;
  13926. h = o;
  13927. o = 0;
  13928. }
  13929. for (; k < a->used; k++) {
  13930. i = k;
  13931. j = (int)(k - 1);
  13932. for (; (i < a->used); i++, j--) {
  13933. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13934. }
  13935. p[k * 2 - 1] = l;
  13936. l = h;
  13937. h = o;
  13938. o = 0;
  13939. SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
  13940. i = k + 1;
  13941. j = (int)(k - 1);
  13942. for (; (i < a->used); i++, j--) {
  13943. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13944. }
  13945. p[k * 2] = l;
  13946. l = h;
  13947. h = o;
  13948. o = 0;
  13949. p = r->dp;
  13950. }
  13951. r->dp[k * 2 - 1] = l;
  13952. XMEMCPY(r->dp, t, (((a->used + 1) / 2) * 2 + 1) * sizeof(sp_int_digit));
  13953. }
  13954. if (err == MP_OKAY) {
  13955. r->used = a->used * 2;
  13956. sp_clamp(r);
  13957. }
  13958. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13959. if (t != NULL) {
  13960. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  13961. }
  13962. #endif
  13963. return err;
  13964. }
  13965. #else /* !SQR_MUL_ASM */
  13966. /* Square a and store in r. r = a * a
  13967. *
  13968. * @param [in] a SP integer to square.
  13969. * @param [out] r SP integer result.
  13970. *
  13971. * @return MP_OKAY on success.
  13972. * @return MP_MEM when dynamic memory allocation fails.
  13973. */
  13974. static int _sp_sqr(const sp_int* a, sp_int* r)
  13975. {
  13976. int err = MP_OKAY;
  13977. unsigned int i;
  13978. int j;
  13979. unsigned int k;
  13980. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13981. sp_int_digit* t = NULL;
  13982. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  13983. !defined(WOLFSSL_SP_NO_DYN_STACK)
  13984. sp_int_digit t[a->used * 2];
  13985. #else
  13986. sp_int_digit t[SP_INT_DIGITS];
  13987. #endif
  13988. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13989. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
  13990. DYNAMIC_TYPE_BIGINT);
  13991. if (t == NULL) {
  13992. err = MP_MEM;
  13993. }
  13994. #endif
  13995. if (err == MP_OKAY) {
  13996. #ifndef WOLFSSL_SP_INT_SQR_VOLATILE
  13997. sp_int_word w;
  13998. sp_int_word l;
  13999. sp_int_word h;
  14000. #else
  14001. volatile sp_int_word w;
  14002. volatile sp_int_word l;
  14003. volatile sp_int_word h;
  14004. #endif
  14005. #ifdef SP_WORD_OVERFLOW
  14006. sp_int_word o;
  14007. #endif
  14008. w = (sp_int_word)a->dp[0] * a->dp[0];
  14009. t[0] = (sp_int_digit)w;
  14010. l = (sp_int_digit)(w >> SP_WORD_SIZE);
  14011. h = 0;
  14012. #ifdef SP_WORD_OVERFLOW
  14013. o = 0;
  14014. #endif
  14015. for (k = 1; k <= (a->used - 1) * 2; k++) {
  14016. i = k / 2;
  14017. j = (int)(k - i);
  14018. if (i == (unsigned int)j) {
  14019. w = (sp_int_word)a->dp[i] * a->dp[j];
  14020. l += (sp_int_digit)w;
  14021. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  14022. #ifdef SP_WORD_OVERFLOW
  14023. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  14024. l &= SP_MASK;
  14025. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  14026. h &= SP_MASK;
  14027. #endif
  14028. }
  14029. for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
  14030. w = (sp_int_word)a->dp[i] * a->dp[j];
  14031. l += (sp_int_digit)w;
  14032. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  14033. #ifdef SP_WORD_OVERFLOW
  14034. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  14035. l &= SP_MASK;
  14036. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  14037. h &= SP_MASK;
  14038. #endif
  14039. l += (sp_int_digit)w;
  14040. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  14041. #ifdef SP_WORD_OVERFLOW
  14042. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  14043. l &= SP_MASK;
  14044. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  14045. h &= SP_MASK;
  14046. #endif
  14047. }
  14048. t[k] = (sp_int_digit)l;
  14049. l >>= SP_WORD_SIZE;
  14050. l += (sp_int_digit)h;
  14051. h >>= SP_WORD_SIZE;
  14052. #ifdef SP_WORD_OVERFLOW
  14053. h += o & SP_MASK;
  14054. o >>= SP_WORD_SIZE;
  14055. #endif
  14056. }
  14057. t[k] = (sp_int_digit)l;
  14058. r->used = k + 1;
  14059. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  14060. sp_clamp(r);
  14061. }
  14062. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14063. if (t != NULL) {
  14064. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  14065. }
  14066. #endif
  14067. return err;
  14068. }
  14069. #endif /* SQR_MUL_ASM */
  14070. #endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
  14071. #ifndef WOLFSSL_SP_SMALL
  14072. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  14073. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  14074. #ifndef SQR_MUL_ASM
  14075. /* Square a and store in r. r = a * a
  14076. *
  14077. * Long-hand implementation.
  14078. *
  14079. * @param [in] a SP integer to square.
  14080. * @param [out] r SP integer result.
  14081. *
  14082. * @return MP_OKAY on success.
  14083. * @return MP_MEM when dynamic memory allocation fails.
  14084. */
  14085. static int _sp_sqr_4(const sp_int* a, sp_int* r)
  14086. {
  14087. int err = MP_OKAY;
  14088. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14089. sp_int_word* w = NULL;
  14090. #else
  14091. sp_int_word w[10];
  14092. #endif
  14093. const sp_int_digit* da = a->dp;
  14094. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14095. w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
  14096. DYNAMIC_TYPE_BIGINT);
  14097. if (w == NULL) {
  14098. err = MP_MEM;
  14099. }
  14100. #endif
  14101. if (err == MP_OKAY) {
  14102. w[0] = (sp_int_word)da[0] * da[0];
  14103. w[1] = (sp_int_word)da[0] * da[1];
  14104. w[2] = (sp_int_word)da[0] * da[2];
  14105. w[3] = (sp_int_word)da[1] * da[1];
  14106. w[4] = (sp_int_word)da[0] * da[3];
  14107. w[5] = (sp_int_word)da[1] * da[2];
  14108. w[6] = (sp_int_word)da[1] * da[3];
  14109. w[7] = (sp_int_word)da[2] * da[2];
  14110. w[8] = (sp_int_word)da[2] * da[3];
  14111. w[9] = (sp_int_word)da[3] * da[3];
  14112. r->dp[0] = (sp_int_digit)w[0];
  14113. w[0] >>= SP_WORD_SIZE;
  14114. w[0] += (sp_int_digit)w[1];
  14115. w[0] += (sp_int_digit)w[1];
  14116. r->dp[1] = (sp_int_digit)w[0];
  14117. w[0] >>= SP_WORD_SIZE;
  14118. w[1] >>= SP_WORD_SIZE;
  14119. w[0] += (sp_int_digit)w[1];
  14120. w[0] += (sp_int_digit)w[1];
  14121. w[0] += (sp_int_digit)w[2];
  14122. w[0] += (sp_int_digit)w[2];
  14123. w[0] += (sp_int_digit)w[3];
  14124. r->dp[2] = (sp_int_digit)w[0];
  14125. w[0] >>= SP_WORD_SIZE;
  14126. w[2] >>= SP_WORD_SIZE;
  14127. w[0] += (sp_int_digit)w[2];
  14128. w[0] += (sp_int_digit)w[2];
  14129. w[3] >>= SP_WORD_SIZE;
  14130. w[0] += (sp_int_digit)w[3];
  14131. w[0] += (sp_int_digit)w[4];
  14132. w[0] += (sp_int_digit)w[4];
  14133. w[0] += (sp_int_digit)w[5];
  14134. w[0] += (sp_int_digit)w[5];
  14135. r->dp[3] = (sp_int_digit)w[0];
  14136. w[0] >>= SP_WORD_SIZE;
  14137. w[4] >>= SP_WORD_SIZE;
  14138. w[0] += (sp_int_digit)w[4];
  14139. w[0] += (sp_int_digit)w[4];
  14140. w[5] >>= SP_WORD_SIZE;
  14141. w[0] += (sp_int_digit)w[5];
  14142. w[0] += (sp_int_digit)w[5];
  14143. w[0] += (sp_int_digit)w[6];
  14144. w[0] += (sp_int_digit)w[6];
  14145. w[0] += (sp_int_digit)w[7];
  14146. r->dp[4] = (sp_int_digit)w[0];
  14147. w[0] >>= SP_WORD_SIZE;
  14148. w[6] >>= SP_WORD_SIZE;
  14149. w[0] += (sp_int_digit)w[6];
  14150. w[0] += (sp_int_digit)w[6];
  14151. w[7] >>= SP_WORD_SIZE;
  14152. w[0] += (sp_int_digit)w[7];
  14153. w[0] += (sp_int_digit)w[8];
  14154. w[0] += (sp_int_digit)w[8];
  14155. r->dp[5] = (sp_int_digit)w[0];
  14156. w[0] >>= SP_WORD_SIZE;
  14157. w[8] >>= SP_WORD_SIZE;
  14158. w[0] += (sp_int_digit)w[8];
  14159. w[0] += (sp_int_digit)w[8];
  14160. w[0] += (sp_int_digit)w[9];
  14161. r->dp[6] = (sp_int_digit)w[0];
  14162. w[0] >>= SP_WORD_SIZE;
  14163. w[9] >>= SP_WORD_SIZE;
  14164. w[0] += (sp_int_digit)w[9];
  14165. r->dp[7] = (sp_int_digit)w[0];
  14166. r->used = 8;
  14167. sp_clamp(r);
  14168. }
  14169. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14170. if (w != NULL) {
  14171. XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
  14172. }
  14173. #endif
  14174. return err;
  14175. }
  14176. #else /* SQR_MUL_ASM */
  14177. /* Square a and store in r. r = a * a
  14178. *
  14179. * Comba implementation.
  14180. *
  14181. * @param [in] a SP integer to square.
  14182. * @param [out] r SP integer result.
  14183. *
  14184. * @return MP_OKAY on success.
  14185. * @return MP_MEM when dynamic memory allocation fails.
  14186. */
  14187. static int _sp_sqr_4(const sp_int* a, sp_int* r)
  14188. {
  14189. sp_int_digit l = 0;
  14190. sp_int_digit h = 0;
  14191. sp_int_digit o = 0;
  14192. sp_int_digit t[4];
  14193. SP_ASM_SQR(h, l, a->dp[0]);
  14194. t[0] = h;
  14195. h = 0;
  14196. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14197. t[1] = l;
  14198. l = h;
  14199. h = o;
  14200. o = 0;
  14201. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14202. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14203. t[2] = l;
  14204. l = h;
  14205. h = o;
  14206. o = 0;
  14207. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14208. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14209. t[3] = l;
  14210. l = h;
  14211. h = o;
  14212. o = 0;
  14213. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14214. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14215. r->dp[4] = l;
  14216. l = h;
  14217. h = o;
  14218. o = 0;
  14219. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
  14220. r->dp[5] = l;
  14221. l = h;
  14222. h = o;
  14223. SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
  14224. r->dp[6] = l;
  14225. r->dp[7] = h;
  14226. XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
  14227. r->used = 8;
  14228. sp_clamp(r);
  14229. return MP_OKAY;
  14230. }
  14231. #endif /* SQR_MUL_ASM */
  14232. #endif /* SP_WORD_SIZE == 64 */
  14233. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  14234. #ifdef SQR_MUL_ASM
  14235. /* Square a and store in r. r = a * a
  14236. *
  14237. * Comba implementation.
  14238. *
  14239. * @param [in] a SP integer to square.
  14240. * @param [out] r SP integer result.
  14241. *
  14242. * @return MP_OKAY on success.
  14243. * @return MP_MEM when dynamic memory allocation fails.
  14244. */
  14245. static int _sp_sqr_6(const sp_int* a, sp_int* r)
  14246. {
  14247. sp_int_digit l = 0;
  14248. sp_int_digit h = 0;
  14249. sp_int_digit o = 0;
  14250. sp_int_digit tl = 0;
  14251. sp_int_digit th = 0;
  14252. sp_int_digit to;
  14253. sp_int_digit t[6];
  14254. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14255. to = 0;
  14256. #endif
  14257. SP_ASM_SQR(h, l, a->dp[0]);
  14258. t[0] = h;
  14259. h = 0;
  14260. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14261. t[1] = l;
  14262. l = h;
  14263. h = o;
  14264. o = 0;
  14265. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14266. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14267. t[2] = l;
  14268. l = h;
  14269. h = o;
  14270. o = 0;
  14271. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14272. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14273. t[3] = l;
  14274. l = h;
  14275. h = o;
  14276. o = 0;
  14277. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14278. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14279. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14280. t[4] = l;
  14281. l = h;
  14282. h = o;
  14283. o = 0;
  14284. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14285. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14286. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14287. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14288. t[5] = l;
  14289. l = h;
  14290. h = o;
  14291. o = 0;
  14292. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
  14293. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
  14294. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14295. r->dp[6] = l;
  14296. l = h;
  14297. h = o;
  14298. o = 0;
  14299. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
  14300. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
  14301. r->dp[7] = l;
  14302. l = h;
  14303. h = o;
  14304. o = 0;
  14305. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
  14306. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14307. r->dp[8] = l;
  14308. l = h;
  14309. h = o;
  14310. o = 0;
  14311. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
  14312. r->dp[9] = l;
  14313. l = h;
  14314. h = o;
  14315. SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
  14316. r->dp[10] = l;
  14317. r->dp[11] = h;
  14318. XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
  14319. r->used = 12;
  14320. sp_clamp(r);
  14321. return MP_OKAY;
  14322. }
  14323. #endif /* SQR_MUL_ASM */
  14324. #endif /* SP_WORD_SIZE == 64 */
  14325. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  14326. #ifdef SQR_MUL_ASM
  14327. /* Square a and store in r. r = a * a
  14328. *
  14329. * Comba implementation.
  14330. *
  14331. * @param [in] a SP integer to square.
  14332. * @param [out] r SP integer result.
  14333. *
  14334. * @return MP_OKAY on success.
  14335. * @return MP_MEM when dynamic memory allocation fails.
  14336. */
  14337. static int _sp_sqr_8(const sp_int* a, sp_int* r)
  14338. {
  14339. sp_int_digit l = 0;
  14340. sp_int_digit h = 0;
  14341. sp_int_digit o = 0;
  14342. sp_int_digit tl = 0;
  14343. sp_int_digit th = 0;
  14344. sp_int_digit to;
  14345. sp_int_digit t[8];
  14346. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14347. to = 0;
  14348. #endif
  14349. SP_ASM_SQR(h, l, a->dp[0]);
  14350. t[0] = h;
  14351. h = 0;
  14352. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14353. t[1] = l;
  14354. l = h;
  14355. h = o;
  14356. o = 0;
  14357. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14358. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14359. t[2] = l;
  14360. l = h;
  14361. h = o;
  14362. o = 0;
  14363. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14364. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14365. t[3] = l;
  14366. l = h;
  14367. h = o;
  14368. o = 0;
  14369. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14370. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14371. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14372. t[4] = l;
  14373. l = h;
  14374. h = o;
  14375. o = 0;
  14376. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14377. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14378. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14379. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14380. t[5] = l;
  14381. l = h;
  14382. h = o;
  14383. o = 0;
  14384. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14385. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14386. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14387. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14388. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14389. t[6] = l;
  14390. l = h;
  14391. h = o;
  14392. o = 0;
  14393. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14394. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14395. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14396. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14397. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14398. t[7] = l;
  14399. l = h;
  14400. h = o;
  14401. o = 0;
  14402. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
  14403. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14404. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14405. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14406. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14407. r->dp[8] = l;
  14408. l = h;
  14409. h = o;
  14410. o = 0;
  14411. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
  14412. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14413. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14414. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14415. r->dp[9] = l;
  14416. l = h;
  14417. h = o;
  14418. o = 0;
  14419. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
  14420. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
  14421. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14422. r->dp[10] = l;
  14423. l = h;
  14424. h = o;
  14425. o = 0;
  14426. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
  14427. SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
  14428. r->dp[11] = l;
  14429. l = h;
  14430. h = o;
  14431. o = 0;
  14432. SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
  14433. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  14434. r->dp[12] = l;
  14435. l = h;
  14436. h = o;
  14437. o = 0;
  14438. SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
  14439. r->dp[13] = l;
  14440. l = h;
  14441. h = o;
  14442. SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
  14443. r->dp[14] = l;
  14444. r->dp[15] = h;
  14445. XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
  14446. r->used = 16;
  14447. sp_clamp(r);
  14448. return MP_OKAY;
  14449. }
  14450. #endif /* SQR_MUL_ASM */
  14451. #endif /* SP_WORD_SIZE == 32 */
  14452. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  14453. #ifdef SQR_MUL_ASM
  14454. /* Square a and store in r. r = a * a
  14455. *
  14456. * Comba implementation.
  14457. *
  14458. * @param [in] a SP integer to square.
  14459. * @param [out] r SP integer result.
  14460. *
  14461. * @return MP_OKAY on success.
  14462. * @return MP_MEM when dynamic memory allocation fails.
  14463. */
  14464. static int _sp_sqr_12(const sp_int* a, sp_int* r)
  14465. {
  14466. sp_int_digit l = 0;
  14467. sp_int_digit h = 0;
  14468. sp_int_digit o = 0;
  14469. sp_int_digit tl = 0;
  14470. sp_int_digit th = 0;
  14471. sp_int_digit to;
  14472. sp_int_digit t[12];
  14473. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14474. to = 0;
  14475. #endif
  14476. SP_ASM_SQR(h, l, a->dp[0]);
  14477. t[0] = h;
  14478. h = 0;
  14479. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14480. t[1] = l;
  14481. l = h;
  14482. h = o;
  14483. o = 0;
  14484. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14485. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14486. t[2] = l;
  14487. l = h;
  14488. h = o;
  14489. o = 0;
  14490. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14491. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14492. t[3] = l;
  14493. l = h;
  14494. h = o;
  14495. o = 0;
  14496. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14497. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14498. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14499. t[4] = l;
  14500. l = h;
  14501. h = o;
  14502. o = 0;
  14503. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14504. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14505. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14506. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14507. t[5] = l;
  14508. l = h;
  14509. h = o;
  14510. o = 0;
  14511. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14512. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14513. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14514. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14515. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14516. t[6] = l;
  14517. l = h;
  14518. h = o;
  14519. o = 0;
  14520. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14521. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14522. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14523. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14524. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14525. t[7] = l;
  14526. l = h;
  14527. h = o;
  14528. o = 0;
  14529. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  14530. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  14531. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14532. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14533. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14534. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14535. t[8] = l;
  14536. l = h;
  14537. h = o;
  14538. o = 0;
  14539. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  14540. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  14541. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  14542. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14543. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14544. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14545. t[9] = l;
  14546. l = h;
  14547. h = o;
  14548. o = 0;
  14549. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  14550. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  14551. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  14552. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  14553. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  14554. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14555. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14556. t[10] = l;
  14557. l = h;
  14558. h = o;
  14559. o = 0;
  14560. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  14561. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  14562. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  14563. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  14564. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  14565. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  14566. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14567. t[11] = l;
  14568. l = h;
  14569. h = o;
  14570. o = 0;
  14571. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
  14572. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  14573. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  14574. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  14575. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  14576. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  14577. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14578. r->dp[12] = l;
  14579. l = h;
  14580. h = o;
  14581. o = 0;
  14582. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
  14583. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  14584. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  14585. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  14586. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  14587. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14588. r->dp[13] = l;
  14589. l = h;
  14590. h = o;
  14591. o = 0;
  14592. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
  14593. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  14594. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  14595. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  14596. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  14597. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14598. r->dp[14] = l;
  14599. l = h;
  14600. h = o;
  14601. o = 0;
  14602. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
  14603. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  14604. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  14605. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  14606. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14607. r->dp[15] = l;
  14608. l = h;
  14609. h = o;
  14610. o = 0;
  14611. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
  14612. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  14613. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  14614. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  14615. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14616. r->dp[16] = l;
  14617. l = h;
  14618. h = o;
  14619. o = 0;
  14620. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
  14621. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  14622. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  14623. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14624. r->dp[17] = l;
  14625. l = h;
  14626. h = o;
  14627. o = 0;
  14628. SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
  14629. SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
  14630. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  14631. r->dp[18] = l;
  14632. l = h;
  14633. h = o;
  14634. o = 0;
  14635. SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
  14636. SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
  14637. r->dp[19] = l;
  14638. l = h;
  14639. h = o;
  14640. o = 0;
  14641. SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
  14642. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  14643. r->dp[20] = l;
  14644. l = h;
  14645. h = o;
  14646. o = 0;
  14647. SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
  14648. r->dp[21] = l;
  14649. l = h;
  14650. h = o;
  14651. SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
  14652. r->dp[22] = l;
  14653. r->dp[23] = h;
  14654. XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
  14655. r->used = 24;
  14656. sp_clamp(r);
  14657. return MP_OKAY;
  14658. }
  14659. #endif /* SQR_MUL_ASM */
  14660. #endif /* SP_WORD_SIZE == 32 */
  14661. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  14662. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  14663. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  14664. (SP_WORD_SIZE == 64)))
  14665. #if SP_INT_DIGITS >= 32
  14666. /* Square a and store in r. r = a * a
  14667. *
  14668. * Comba implementation.
  14669. *
  14670. * @param [in] a SP integer to square.
  14671. * @param [out] r SP integer result.
  14672. *
  14673. * @return MP_OKAY on success.
  14674. * @return MP_MEM when dynamic memory allocation fails.
  14675. */
  14676. static int _sp_sqr_16(const sp_int* a, sp_int* r)
  14677. {
  14678. int err = MP_OKAY;
  14679. sp_int_digit l = 0;
  14680. sp_int_digit h = 0;
  14681. sp_int_digit o = 0;
  14682. sp_int_digit tl = 0;
  14683. sp_int_digit th = 0;
  14684. sp_int_digit to;
  14685. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14686. sp_int_digit* t = NULL;
  14687. #else
  14688. sp_int_digit t[16];
  14689. #endif
  14690. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14691. to = 0;
  14692. #endif
  14693. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14694. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
  14695. DYNAMIC_TYPE_BIGINT);
  14696. if (t == NULL) {
  14697. err = MP_MEM;
  14698. }
  14699. #endif
  14700. if (err == MP_OKAY) {
  14701. SP_ASM_SQR(h, l, a->dp[0]);
  14702. t[0] = h;
  14703. h = 0;
  14704. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14705. t[1] = l;
  14706. l = h;
  14707. h = o;
  14708. o = 0;
  14709. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14710. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14711. t[2] = l;
  14712. l = h;
  14713. h = o;
  14714. o = 0;
  14715. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14716. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14717. t[3] = l;
  14718. l = h;
  14719. h = o;
  14720. o = 0;
  14721. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14722. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14723. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14724. t[4] = l;
  14725. l = h;
  14726. h = o;
  14727. o = 0;
  14728. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14729. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14730. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14731. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14732. t[5] = l;
  14733. l = h;
  14734. h = o;
  14735. o = 0;
  14736. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14737. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14738. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14739. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14740. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14741. t[6] = l;
  14742. l = h;
  14743. h = o;
  14744. o = 0;
  14745. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14746. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14747. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14748. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14749. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14750. t[7] = l;
  14751. l = h;
  14752. h = o;
  14753. o = 0;
  14754. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  14755. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  14756. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14757. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14758. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14759. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14760. t[8] = l;
  14761. l = h;
  14762. h = o;
  14763. o = 0;
  14764. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  14765. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  14766. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  14767. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14768. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14769. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14770. t[9] = l;
  14771. l = h;
  14772. h = o;
  14773. o = 0;
  14774. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  14775. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  14776. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  14777. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  14778. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  14779. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14780. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14781. t[10] = l;
  14782. l = h;
  14783. h = o;
  14784. o = 0;
  14785. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  14786. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  14787. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  14788. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  14789. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  14790. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  14791. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14792. t[11] = l;
  14793. l = h;
  14794. h = o;
  14795. o = 0;
  14796. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
  14797. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
  14798. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  14799. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  14800. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  14801. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  14802. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  14803. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14804. t[12] = l;
  14805. l = h;
  14806. h = o;
  14807. o = 0;
  14808. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
  14809. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
  14810. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
  14811. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  14812. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  14813. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  14814. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  14815. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14816. t[13] = l;
  14817. l = h;
  14818. h = o;
  14819. o = 0;
  14820. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
  14821. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
  14822. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
  14823. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
  14824. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  14825. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  14826. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  14827. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  14828. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14829. t[14] = l;
  14830. l = h;
  14831. h = o;
  14832. o = 0;
  14833. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
  14834. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
  14835. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
  14836. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
  14837. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
  14838. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  14839. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  14840. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  14841. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14842. t[15] = l;
  14843. l = h;
  14844. h = o;
  14845. o = 0;
  14846. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
  14847. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
  14848. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
  14849. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
  14850. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
  14851. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  14852. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  14853. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  14854. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14855. r->dp[16] = l;
  14856. l = h;
  14857. h = o;
  14858. o = 0;
  14859. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
  14860. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
  14861. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
  14862. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
  14863. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
  14864. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  14865. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  14866. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14867. r->dp[17] = l;
  14868. l = h;
  14869. h = o;
  14870. o = 0;
  14871. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
  14872. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
  14873. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
  14874. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
  14875. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
  14876. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
  14877. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  14878. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14879. r->dp[18] = l;
  14880. l = h;
  14881. h = o;
  14882. o = 0;
  14883. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
  14884. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
  14885. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
  14886. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
  14887. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
  14888. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
  14889. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14890. r->dp[19] = l;
  14891. l = h;
  14892. h = o;
  14893. o = 0;
  14894. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
  14895. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
  14896. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
  14897. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
  14898. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
  14899. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  14900. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14901. r->dp[20] = l;
  14902. l = h;
  14903. h = o;
  14904. o = 0;
  14905. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
  14906. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
  14907. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
  14908. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
  14909. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
  14910. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14911. r->dp[21] = l;
  14912. l = h;
  14913. h = o;
  14914. o = 0;
  14915. SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
  14916. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
  14917. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
  14918. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
  14919. SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
  14920. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14921. r->dp[22] = l;
  14922. l = h;
  14923. h = o;
  14924. o = 0;
  14925. SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
  14926. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
  14927. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
  14928. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
  14929. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14930. r->dp[23] = l;
  14931. l = h;
  14932. h = o;
  14933. o = 0;
  14934. SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
  14935. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
  14936. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
  14937. SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
  14938. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14939. r->dp[24] = l;
  14940. l = h;
  14941. h = o;
  14942. o = 0;
  14943. SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
  14944. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
  14945. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
  14946. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14947. r->dp[25] = l;
  14948. l = h;
  14949. h = o;
  14950. o = 0;
  14951. SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
  14952. SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
  14953. SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
  14954. r->dp[26] = l;
  14955. l = h;
  14956. h = o;
  14957. o = 0;
  14958. SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
  14959. SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
  14960. r->dp[27] = l;
  14961. l = h;
  14962. h = o;
  14963. o = 0;
  14964. SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
  14965. SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
  14966. r->dp[28] = l;
  14967. l = h;
  14968. h = o;
  14969. o = 0;
  14970. SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
  14971. r->dp[29] = l;
  14972. l = h;
  14973. h = o;
  14974. SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
  14975. r->dp[30] = l;
  14976. r->dp[31] = h;
  14977. XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
  14978. r->used = 32;
  14979. sp_clamp(r);
  14980. }
  14981. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14982. if (t != NULL) {
  14983. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  14984. }
  14985. #endif
  14986. return err;
  14987. }
  14988. #endif /* SP_INT_DIGITS >= 32 */
  14989. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  14990. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  14991. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  14992. #if SP_INT_DIGITS >= 48
  14993. /* Square a and store in r. r = a * a
  14994. *
  14995. * Comba implementation.
  14996. *
  14997. * @param [in] a SP integer to square.
  14998. * @param [out] r SP integer result.
  14999. *
  15000. * @return MP_OKAY on success.
  15001. * @return MP_MEM when dynamic memory allocation fails.
  15002. */
  15003. static int _sp_sqr_24(const sp_int* a, sp_int* r)
  15004. {
  15005. int err = MP_OKAY;
  15006. sp_int_digit l = 0;
  15007. sp_int_digit h = 0;
  15008. sp_int_digit o = 0;
  15009. sp_int_digit tl = 0;
  15010. sp_int_digit th = 0;
  15011. sp_int_digit to;
  15012. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  15013. sp_int_digit* t = NULL;
  15014. #else
  15015. sp_int_digit t[24];
  15016. #endif
  15017. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  15018. to = 0;
  15019. #endif
  15020. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  15021. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
  15022. DYNAMIC_TYPE_BIGINT);
  15023. if (t == NULL) {
  15024. err = MP_MEM;
  15025. }
  15026. #endif
  15027. if (err == MP_OKAY) {
  15028. SP_ASM_SQR(h, l, a->dp[0]);
  15029. t[0] = h;
  15030. h = 0;
  15031. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  15032. t[1] = l;
  15033. l = h;
  15034. h = o;
  15035. o = 0;
  15036. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  15037. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  15038. t[2] = l;
  15039. l = h;
  15040. h = o;
  15041. o = 0;
  15042. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  15043. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  15044. t[3] = l;
  15045. l = h;
  15046. h = o;
  15047. o = 0;
  15048. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  15049. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  15050. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  15051. t[4] = l;
  15052. l = h;
  15053. h = o;
  15054. o = 0;
  15055. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  15056. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  15057. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  15058. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15059. t[5] = l;
  15060. l = h;
  15061. h = o;
  15062. o = 0;
  15063. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  15064. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  15065. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  15066. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  15067. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15068. t[6] = l;
  15069. l = h;
  15070. h = o;
  15071. o = 0;
  15072. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  15073. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  15074. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  15075. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  15076. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15077. t[7] = l;
  15078. l = h;
  15079. h = o;
  15080. o = 0;
  15081. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  15082. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  15083. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  15084. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  15085. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  15086. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15087. t[8] = l;
  15088. l = h;
  15089. h = o;
  15090. o = 0;
  15091. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  15092. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  15093. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  15094. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  15095. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  15096. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15097. t[9] = l;
  15098. l = h;
  15099. h = o;
  15100. o = 0;
  15101. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  15102. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  15103. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  15104. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  15105. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  15106. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  15107. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15108. t[10] = l;
  15109. l = h;
  15110. h = o;
  15111. o = 0;
  15112. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  15113. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  15114. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  15115. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  15116. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  15117. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  15118. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15119. t[11] = l;
  15120. l = h;
  15121. h = o;
  15122. o = 0;
  15123. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
  15124. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
  15125. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  15126. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  15127. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  15128. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  15129. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  15130. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15131. t[12] = l;
  15132. l = h;
  15133. h = o;
  15134. o = 0;
  15135. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
  15136. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
  15137. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
  15138. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  15139. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  15140. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  15141. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  15142. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15143. t[13] = l;
  15144. l = h;
  15145. h = o;
  15146. o = 0;
  15147. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
  15148. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
  15149. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
  15150. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
  15151. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  15152. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  15153. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  15154. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  15155. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15156. t[14] = l;
  15157. l = h;
  15158. h = o;
  15159. o = 0;
  15160. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
  15161. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
  15162. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
  15163. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
  15164. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
  15165. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  15166. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  15167. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  15168. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15169. t[15] = l;
  15170. l = h;
  15171. h = o;
  15172. o = 0;
  15173. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
  15174. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
  15175. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
  15176. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
  15177. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
  15178. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
  15179. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  15180. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  15181. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  15182. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15183. t[16] = l;
  15184. l = h;
  15185. h = o;
  15186. o = 0;
  15187. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
  15188. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
  15189. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
  15190. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
  15191. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
  15192. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
  15193. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
  15194. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  15195. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  15196. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15197. t[17] = l;
  15198. l = h;
  15199. h = o;
  15200. o = 0;
  15201. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
  15202. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
  15203. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
  15204. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
  15205. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
  15206. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
  15207. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
  15208. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
  15209. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
  15210. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  15211. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15212. t[18] = l;
  15213. l = h;
  15214. h = o;
  15215. o = 0;
  15216. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
  15217. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
  15218. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
  15219. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
  15220. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
  15221. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
  15222. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
  15223. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
  15224. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
  15225. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
  15226. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15227. t[19] = l;
  15228. l = h;
  15229. h = o;
  15230. o = 0;
  15231. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
  15232. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
  15233. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
  15234. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
  15235. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
  15236. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
  15237. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
  15238. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
  15239. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
  15240. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
  15241. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  15242. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15243. t[20] = l;
  15244. l = h;
  15245. h = o;
  15246. o = 0;
  15247. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
  15248. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
  15249. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
  15250. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
  15251. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
  15252. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
  15253. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
  15254. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
  15255. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
  15256. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
  15257. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
  15258. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15259. t[21] = l;
  15260. l = h;
  15261. h = o;
  15262. o = 0;
  15263. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
  15264. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
  15265. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
  15266. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
  15267. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
  15268. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
  15269. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
  15270. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
  15271. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
  15272. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
  15273. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
  15274. SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
  15275. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15276. t[22] = l;
  15277. l = h;
  15278. h = o;
  15279. o = 0;
  15280. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
  15281. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
  15282. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
  15283. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
  15284. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
  15285. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
  15286. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
  15287. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
  15288. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
  15289. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
  15290. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
  15291. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
  15292. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15293. t[23] = l;
  15294. l = h;
  15295. h = o;
  15296. o = 0;
  15297. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
  15298. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
  15299. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
  15300. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
  15301. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
  15302. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
  15303. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
  15304. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
  15305. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
  15306. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
  15307. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
  15308. SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
  15309. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15310. r->dp[24] = l;
  15311. l = h;
  15312. h = o;
  15313. o = 0;
  15314. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
  15315. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
  15316. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
  15317. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
  15318. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
  15319. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
  15320. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
  15321. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
  15322. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
  15323. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
  15324. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
  15325. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15326. r->dp[25] = l;
  15327. l = h;
  15328. h = o;
  15329. o = 0;
  15330. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
  15331. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
  15332. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
  15333. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
  15334. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
  15335. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
  15336. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
  15337. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
  15338. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
  15339. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
  15340. SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
  15341. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15342. r->dp[26] = l;
  15343. l = h;
  15344. h = o;
  15345. o = 0;
  15346. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
  15347. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
  15348. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
  15349. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
  15350. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
  15351. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
  15352. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
  15353. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
  15354. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
  15355. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
  15356. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15357. r->dp[27] = l;
  15358. l = h;
  15359. h = o;
  15360. o = 0;
  15361. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
  15362. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
  15363. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
  15364. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
  15365. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
  15366. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
  15367. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
  15368. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
  15369. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
  15370. SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
  15371. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15372. r->dp[28] = l;
  15373. l = h;
  15374. h = o;
  15375. o = 0;
  15376. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
  15377. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
  15378. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
  15379. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
  15380. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
  15381. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
  15382. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
  15383. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
  15384. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
  15385. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15386. r->dp[29] = l;
  15387. l = h;
  15388. h = o;
  15389. o = 0;
  15390. SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
  15391. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
  15392. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
  15393. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
  15394. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
  15395. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
  15396. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
  15397. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
  15398. SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
  15399. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15400. r->dp[30] = l;
  15401. l = h;
  15402. h = o;
  15403. o = 0;
  15404. SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
  15405. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
  15406. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
  15407. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
  15408. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
  15409. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
  15410. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
  15411. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
  15412. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15413. r->dp[31] = l;
  15414. l = h;
  15415. h = o;
  15416. o = 0;
  15417. SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
  15418. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
  15419. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
  15420. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
  15421. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
  15422. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
  15423. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
  15424. SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
  15425. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15426. r->dp[32] = l;
  15427. l = h;
  15428. h = o;
  15429. o = 0;
  15430. SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
  15431. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
  15432. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
  15433. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
  15434. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
  15435. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
  15436. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
  15437. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15438. r->dp[33] = l;
  15439. l = h;
  15440. h = o;
  15441. o = 0;
  15442. SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
  15443. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
  15444. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
  15445. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
  15446. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
  15447. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
  15448. SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
  15449. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15450. r->dp[34] = l;
  15451. l = h;
  15452. h = o;
  15453. o = 0;
  15454. SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
  15455. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
  15456. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
  15457. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
  15458. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
  15459. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
  15460. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15461. r->dp[35] = l;
  15462. l = h;
  15463. h = o;
  15464. o = 0;
  15465. SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
  15466. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
  15467. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
  15468. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
  15469. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
  15470. SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
  15471. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15472. r->dp[36] = l;
  15473. l = h;
  15474. h = o;
  15475. o = 0;
  15476. SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
  15477. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
  15478. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
  15479. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
  15480. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
  15481. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15482. r->dp[37] = l;
  15483. l = h;
  15484. h = o;
  15485. o = 0;
  15486. SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
  15487. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
  15488. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
  15489. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
  15490. SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
  15491. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15492. r->dp[38] = l;
  15493. l = h;
  15494. h = o;
  15495. o = 0;
  15496. SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
  15497. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
  15498. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
  15499. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
  15500. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15501. r->dp[39] = l;
  15502. l = h;
  15503. h = o;
  15504. o = 0;
  15505. SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
  15506. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
  15507. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
  15508. SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
  15509. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15510. r->dp[40] = l;
  15511. l = h;
  15512. h = o;
  15513. o = 0;
  15514. SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
  15515. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
  15516. SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
  15517. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15518. r->dp[41] = l;
  15519. l = h;
  15520. h = o;
  15521. o = 0;
  15522. SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
  15523. SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
  15524. SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
  15525. r->dp[42] = l;
  15526. l = h;
  15527. h = o;
  15528. o = 0;
  15529. SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
  15530. SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
  15531. r->dp[43] = l;
  15532. l = h;
  15533. h = o;
  15534. o = 0;
  15535. SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
  15536. SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
  15537. r->dp[44] = l;
  15538. l = h;
  15539. h = o;
  15540. o = 0;
  15541. SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
  15542. r->dp[45] = l;
  15543. l = h;
  15544. h = o;
  15545. SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
  15546. r->dp[46] = l;
  15547. r->dp[47] = h;
  15548. XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
  15549. r->used = 48;
  15550. sp_clamp(r);
  15551. }
  15552. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  15553. if (t != NULL) {
  15554. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  15555. }
  15556. #endif
  15557. return err;
  15558. }
  15559. #endif /* SP_INT_DIGITS >= 48 */
  15560. #if SP_INT_DIGITS >= 64
  15561. /* Square a and store in r. r = a * a
  15562. *
  15563. * Karatsuba implementation.
  15564. *
  15565. * @param [in] a SP integer to square.
  15566. * @param [out] r SP integer result.
  15567. *
  15568. * @return MP_OKAY on success.
  15569. * @return MP_MEM when dynamic memory allocation fails.
  15570. */
  15571. static int _sp_sqr_32(const sp_int* a, sp_int* r)
  15572. {
  15573. int err = MP_OKAY;
  15574. unsigned int i;
  15575. sp_int_digit l;
  15576. sp_int_digit h;
  15577. sp_int* z0;
  15578. sp_int* z1;
  15579. sp_int* z2;
  15580. sp_int_digit ca;
  15581. DECL_SP_INT(a1, 16);
  15582. DECL_SP_INT_ARRAY(z, 33, 2);
  15583. ALLOC_SP_INT(a1, 16, err, NULL);
  15584. ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
  15585. if (err == MP_OKAY) {
  15586. z1 = z[0];
  15587. z2 = z[1];
  15588. z0 = r;
  15589. XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
  15590. a1->used = 16;
  15591. /* z2 = a1 ^ 2 */
  15592. err = _sp_sqr_16(a1, z2);
  15593. }
  15594. if (err == MP_OKAY) {
  15595. l = 0;
  15596. h = 0;
  15597. for (i = 0; i < 16; i++) {
  15598. SP_ASM_ADDC(l, h, a1->dp[i]);
  15599. SP_ASM_ADDC(l, h, a->dp[i]);
  15600. a1->dp[i] = l;
  15601. l = h;
  15602. h = 0;
  15603. }
  15604. ca = l;
  15605. /* z0 = a0 ^ 2 */
  15606. err = _sp_sqr_16(a, z0);
  15607. }
  15608. if (err == MP_OKAY) {
  15609. /* z1 = (a0 + a1) ^ 2 */
  15610. err = _sp_sqr_16(a1, z1);
  15611. }
  15612. if (err == MP_OKAY) {
  15613. /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
  15614. /* r = z0 */
  15615. /* r += (z1 - z0 - z2) << 16 */
  15616. z1->dp[32] = ca;
  15617. l = 0;
  15618. if (ca) {
  15619. l = z1->dp[0 + 16];
  15620. h = 0;
  15621. SP_ASM_ADDC(l, h, a1->dp[0]);
  15622. SP_ASM_ADDC(l, h, a1->dp[0]);
  15623. z1->dp[0 + 16] = l;
  15624. l = h;
  15625. h = 0;
  15626. for (i = 1; i < 16; i++) {
  15627. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  15628. SP_ASM_ADDC(l, h, a1->dp[i]);
  15629. SP_ASM_ADDC(l, h, a1->dp[i]);
  15630. z1->dp[i + 16] = l;
  15631. l = h;
  15632. h = 0;
  15633. }
  15634. }
  15635. z1->dp[32] += l;
  15636. /* z1 = z1 - z0 - z1 */
  15637. l = z1->dp[0];
  15638. h = 0;
  15639. SP_ASM_SUBB(l, h, z0->dp[0]);
  15640. SP_ASM_SUBB(l, h, z2->dp[0]);
  15641. z1->dp[0] = l;
  15642. l = h;
  15643. h = 0;
  15644. for (i = 1; i < 32; i++) {
  15645. l += z1->dp[i];
  15646. SP_ASM_SUBB(l, h, z0->dp[i]);
  15647. SP_ASM_SUBB(l, h, z2->dp[i]);
  15648. z1->dp[i] = l;
  15649. l = h;
  15650. h = 0;
  15651. }
  15652. z1->dp[i] += l;
  15653. /* r += z1 << 16 */
  15654. l = 0;
  15655. h = 0;
  15656. for (i = 0; i < 16; i++) {
  15657. SP_ASM_ADDC(l, h, r->dp[i + 16]);
  15658. SP_ASM_ADDC(l, h, z1->dp[i]);
  15659. r->dp[i + 16] = l;
  15660. l = h;
  15661. h = 0;
  15662. }
  15663. for (; i < 33; i++) {
  15664. SP_ASM_ADDC(l, h, z1->dp[i]);
  15665. r->dp[i + 16] = l;
  15666. l = h;
  15667. h = 0;
  15668. }
  15669. /* r += z2 << 32 */
  15670. l = 0;
  15671. h = 0;
  15672. for (i = 0; i < 17; i++) {
  15673. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  15674. SP_ASM_ADDC(l, h, z2->dp[i]);
  15675. r->dp[i + 32] = l;
  15676. l = h;
  15677. h = 0;
  15678. }
  15679. for (; i < 32; i++) {
  15680. SP_ASM_ADDC(l, h, z2->dp[i]);
  15681. r->dp[i + 32] = l;
  15682. l = h;
  15683. h = 0;
  15684. }
  15685. r->used = 64;
  15686. sp_clamp(r);
  15687. }
  15688. FREE_SP_INT_ARRAY(z, NULL);
  15689. FREE_SP_INT(a1, NULL);
  15690. return err;
  15691. }
  15692. #endif /* SP_INT_DIGITS >= 64 */
  15693. #if SP_INT_DIGITS >= 96
  15694. /* Square a and store in r. r = a * a
  15695. *
  15696. * Karatsuba implementation.
  15697. *
  15698. * @param [in] a SP integer to square.
  15699. * @param [out] r SP integer result.
  15700. *
  15701. * @return MP_OKAY on success.
  15702. * @return MP_MEM when dynamic memory allocation fails.
  15703. */
  15704. static int _sp_sqr_48(const sp_int* a, sp_int* r)
  15705. {
  15706. int err = MP_OKAY;
  15707. unsigned int i;
  15708. sp_int_digit l;
  15709. sp_int_digit h;
  15710. sp_int* z0;
  15711. sp_int* z1;
  15712. sp_int* z2;
  15713. sp_int_digit ca;
  15714. DECL_SP_INT(a1, 24);
  15715. DECL_SP_INT_ARRAY(z, 49, 2);
  15716. ALLOC_SP_INT(a1, 24, err, NULL);
  15717. ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
  15718. if (err == MP_OKAY) {
  15719. z1 = z[0];
  15720. z2 = z[1];
  15721. z0 = r;
  15722. XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
  15723. a1->used = 24;
  15724. /* z2 = a1 ^ 2 */
  15725. err = _sp_sqr_24(a1, z2);
  15726. }
  15727. if (err == MP_OKAY) {
  15728. l = 0;
  15729. h = 0;
  15730. for (i = 0; i < 24; i++) {
  15731. SP_ASM_ADDC(l, h, a1->dp[i]);
  15732. SP_ASM_ADDC(l, h, a->dp[i]);
  15733. a1->dp[i] = l;
  15734. l = h;
  15735. h = 0;
  15736. }
  15737. ca = l;
  15738. /* z0 = a0 ^ 2 */
  15739. err = _sp_sqr_24(a, z0);
  15740. }
  15741. if (err == MP_OKAY) {
  15742. /* z1 = (a0 + a1) ^ 2 */
  15743. err = _sp_sqr_24(a1, z1);
  15744. }
  15745. if (err == MP_OKAY) {
  15746. /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
  15747. /* r = z0 */
  15748. /* r += (z1 - z0 - z2) << 24 */
  15749. z1->dp[48] = ca;
  15750. l = 0;
  15751. if (ca) {
  15752. l = z1->dp[0 + 24];
  15753. h = 0;
  15754. SP_ASM_ADDC(l, h, a1->dp[0]);
  15755. SP_ASM_ADDC(l, h, a1->dp[0]);
  15756. z1->dp[0 + 24] = l;
  15757. l = h;
  15758. h = 0;
  15759. for (i = 1; i < 24; i++) {
  15760. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  15761. SP_ASM_ADDC(l, h, a1->dp[i]);
  15762. SP_ASM_ADDC(l, h, a1->dp[i]);
  15763. z1->dp[i + 24] = l;
  15764. l = h;
  15765. h = 0;
  15766. }
  15767. }
  15768. z1->dp[48] += l;
  15769. /* z1 = z1 - z0 - z1 */
  15770. l = z1->dp[0];
  15771. h = 0;
  15772. SP_ASM_SUBB(l, h, z0->dp[0]);
  15773. SP_ASM_SUBB(l, h, z2->dp[0]);
  15774. z1->dp[0] = l;
  15775. l = h;
  15776. h = 0;
  15777. for (i = 1; i < 48; i++) {
  15778. l += z1->dp[i];
  15779. SP_ASM_SUBB(l, h, z0->dp[i]);
  15780. SP_ASM_SUBB(l, h, z2->dp[i]);
  15781. z1->dp[i] = l;
  15782. l = h;
  15783. h = 0;
  15784. }
  15785. z1->dp[i] += l;
  15786. /* r += z1 << 16 */
  15787. l = 0;
  15788. h = 0;
  15789. for (i = 0; i < 24; i++) {
  15790. SP_ASM_ADDC(l, h, r->dp[i + 24]);
  15791. SP_ASM_ADDC(l, h, z1->dp[i]);
  15792. r->dp[i + 24] = l;
  15793. l = h;
  15794. h = 0;
  15795. }
  15796. for (; i < 49; i++) {
  15797. SP_ASM_ADDC(l, h, z1->dp[i]);
  15798. r->dp[i + 24] = l;
  15799. l = h;
  15800. h = 0;
  15801. }
  15802. /* r += z2 << 48 */
  15803. l = 0;
  15804. h = 0;
  15805. for (i = 0; i < 25; i++) {
  15806. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  15807. SP_ASM_ADDC(l, h, z2->dp[i]);
  15808. r->dp[i + 48] = l;
  15809. l = h;
  15810. h = 0;
  15811. }
  15812. for (; i < 48; i++) {
  15813. SP_ASM_ADDC(l, h, z2->dp[i]);
  15814. r->dp[i + 48] = l;
  15815. l = h;
  15816. h = 0;
  15817. }
  15818. r->used = 96;
  15819. sp_clamp(r);
  15820. }
  15821. FREE_SP_INT_ARRAY(z, NULL);
  15822. FREE_SP_INT(a1, NULL);
  15823. return err;
  15824. }
  15825. #endif /* SP_INT_DIGITS >= 96 */
  15826. #if SP_INT_DIGITS >= 128
  15827. /* Square a and store in r. r = a * a
  15828. *
  15829. * Karatsuba implementation.
  15830. *
  15831. * @param [in] a SP integer to square.
  15832. * @param [out] r SP integer result.
  15833. *
  15834. * @return MP_OKAY on success.
  15835. * @return MP_MEM when dynamic memory allocation fails.
  15836. */
  15837. static int _sp_sqr_64(const sp_int* a, sp_int* r)
  15838. {
  15839. int err = MP_OKAY;
  15840. unsigned int i;
  15841. sp_int_digit l;
  15842. sp_int_digit h;
  15843. sp_int* z0;
  15844. sp_int* z1;
  15845. sp_int* z2;
  15846. sp_int_digit ca;
  15847. DECL_SP_INT(a1, 32);
  15848. DECL_SP_INT_ARRAY(z, 65, 2);
  15849. ALLOC_SP_INT(a1, 32, err, NULL);
  15850. ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
  15851. if (err == MP_OKAY) {
  15852. z1 = z[0];
  15853. z2 = z[1];
  15854. z0 = r;
  15855. XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
  15856. a1->used = 32;
  15857. /* z2 = a1 ^ 2 */
  15858. err = _sp_sqr_32(a1, z2);
  15859. }
  15860. if (err == MP_OKAY) {
  15861. l = 0;
  15862. h = 0;
  15863. for (i = 0; i < 32; i++) {
  15864. SP_ASM_ADDC(l, h, a1->dp[i]);
  15865. SP_ASM_ADDC(l, h, a->dp[i]);
  15866. a1->dp[i] = l;
  15867. l = h;
  15868. h = 0;
  15869. }
  15870. ca = l;
  15871. /* z0 = a0 ^ 2 */
  15872. err = _sp_sqr_32(a, z0);
  15873. }
  15874. if (err == MP_OKAY) {
  15875. /* z1 = (a0 + a1) ^ 2 */
  15876. err = _sp_sqr_32(a1, z1);
  15877. }
  15878. if (err == MP_OKAY) {
  15879. /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
  15880. /* r = z0 */
  15881. /* r += (z1 - z0 - z2) << 32 */
  15882. z1->dp[64] = ca;
  15883. l = 0;
  15884. if (ca) {
  15885. l = z1->dp[0 + 32];
  15886. h = 0;
  15887. SP_ASM_ADDC(l, h, a1->dp[0]);
  15888. SP_ASM_ADDC(l, h, a1->dp[0]);
  15889. z1->dp[0 + 32] = l;
  15890. l = h;
  15891. h = 0;
  15892. for (i = 1; i < 32; i++) {
  15893. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  15894. SP_ASM_ADDC(l, h, a1->dp[i]);
  15895. SP_ASM_ADDC(l, h, a1->dp[i]);
  15896. z1->dp[i + 32] = l;
  15897. l = h;
  15898. h = 0;
  15899. }
  15900. }
  15901. z1->dp[64] += l;
  15902. /* z1 = z1 - z0 - z1 */
  15903. l = z1->dp[0];
  15904. h = 0;
  15905. SP_ASM_SUBB(l, h, z0->dp[0]);
  15906. SP_ASM_SUBB(l, h, z2->dp[0]);
  15907. z1->dp[0] = l;
  15908. l = h;
  15909. h = 0;
  15910. for (i = 1; i < 64; i++) {
  15911. l += z1->dp[i];
  15912. SP_ASM_SUBB(l, h, z0->dp[i]);
  15913. SP_ASM_SUBB(l, h, z2->dp[i]);
  15914. z1->dp[i] = l;
  15915. l = h;
  15916. h = 0;
  15917. }
  15918. z1->dp[i] += l;
  15919. /* r += z1 << 16 */
  15920. l = 0;
  15921. h = 0;
  15922. for (i = 0; i < 32; i++) {
  15923. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  15924. SP_ASM_ADDC(l, h, z1->dp[i]);
  15925. r->dp[i + 32] = l;
  15926. l = h;
  15927. h = 0;
  15928. }
  15929. for (; i < 65; i++) {
  15930. SP_ASM_ADDC(l, h, z1->dp[i]);
  15931. r->dp[i + 32] = l;
  15932. l = h;
  15933. h = 0;
  15934. }
  15935. /* r += z2 << 64 */
  15936. l = 0;
  15937. h = 0;
  15938. for (i = 0; i < 33; i++) {
  15939. SP_ASM_ADDC(l, h, r->dp[i + 64]);
  15940. SP_ASM_ADDC(l, h, z2->dp[i]);
  15941. r->dp[i + 64] = l;
  15942. l = h;
  15943. h = 0;
  15944. }
  15945. for (; i < 64; i++) {
  15946. SP_ASM_ADDC(l, h, z2->dp[i]);
  15947. r->dp[i + 64] = l;
  15948. l = h;
  15949. h = 0;
  15950. }
  15951. r->used = 128;
  15952. sp_clamp(r);
  15953. }
  15954. FREE_SP_INT_ARRAY(z, NULL);
  15955. FREE_SP_INT(a1, NULL);
  15956. return err;
  15957. }
  15958. #endif /* SP_INT_DIGITS >= 128 */
  15959. #if SP_INT_DIGITS >= 192
  15960. /* Square a and store in r. r = a * a
  15961. *
  15962. * Karatsuba implementation.
  15963. *
  15964. * @param [in] a SP integer to square.
  15965. * @param [out] r SP integer result.
  15966. *
  15967. * @return MP_OKAY on success.
  15968. * @return MP_MEM when dynamic memory allocation fails.
  15969. */
  15970. static int _sp_sqr_96(const sp_int* a, sp_int* r)
  15971. {
  15972. int err = MP_OKAY;
  15973. unsigned int i;
  15974. sp_int_digit l;
  15975. sp_int_digit h;
  15976. sp_int* z0;
  15977. sp_int* z1;
  15978. sp_int* z2;
  15979. sp_int_digit ca;
  15980. DECL_SP_INT(a1, 48);
  15981. DECL_SP_INT_ARRAY(z, 97, 2);
  15982. ALLOC_SP_INT(a1, 48, err, NULL);
  15983. ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
  15984. if (err == MP_OKAY) {
  15985. z1 = z[0];
  15986. z2 = z[1];
  15987. z0 = r;
  15988. XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
  15989. a1->used = 48;
  15990. /* z2 = a1 ^ 2 */
  15991. err = _sp_sqr_48(a1, z2);
  15992. }
  15993. if (err == MP_OKAY) {
  15994. l = 0;
  15995. h = 0;
  15996. for (i = 0; i < 48; i++) {
  15997. SP_ASM_ADDC(l, h, a1->dp[i]);
  15998. SP_ASM_ADDC(l, h, a->dp[i]);
  15999. a1->dp[i] = l;
  16000. l = h;
  16001. h = 0;
  16002. }
  16003. ca = l;
  16004. /* z0 = a0 ^ 2 */
  16005. err = _sp_sqr_48(a, z0);
  16006. }
  16007. if (err == MP_OKAY) {
  16008. /* z1 = (a0 + a1) ^ 2 */
  16009. err = _sp_sqr_48(a1, z1);
  16010. }
  16011. if (err == MP_OKAY) {
  16012. /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
  16013. /* r = z0 */
  16014. /* r += (z1 - z0 - z2) << 48 */
  16015. z1->dp[96] = ca;
  16016. l = 0;
  16017. if (ca) {
  16018. l = z1->dp[0 + 48];
  16019. h = 0;
  16020. SP_ASM_ADDC(l, h, a1->dp[0]);
  16021. SP_ASM_ADDC(l, h, a1->dp[0]);
  16022. z1->dp[0 + 48] = l;
  16023. l = h;
  16024. h = 0;
  16025. for (i = 1; i < 48; i++) {
  16026. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  16027. SP_ASM_ADDC(l, h, a1->dp[i]);
  16028. SP_ASM_ADDC(l, h, a1->dp[i]);
  16029. z1->dp[i + 48] = l;
  16030. l = h;
  16031. h = 0;
  16032. }
  16033. }
  16034. z1->dp[96] += l;
  16035. /* z1 = z1 - z0 - z1 */
  16036. l = z1->dp[0];
  16037. h = 0;
  16038. SP_ASM_SUBB(l, h, z0->dp[0]);
  16039. SP_ASM_SUBB(l, h, z2->dp[0]);
  16040. z1->dp[0] = l;
  16041. l = h;
  16042. h = 0;
  16043. for (i = 1; i < 96; i++) {
  16044. l += z1->dp[i];
  16045. SP_ASM_SUBB(l, h, z0->dp[i]);
  16046. SP_ASM_SUBB(l, h, z2->dp[i]);
  16047. z1->dp[i] = l;
  16048. l = h;
  16049. h = 0;
  16050. }
  16051. z1->dp[i] += l;
  16052. /* r += z1 << 16 */
  16053. l = 0;
  16054. h = 0;
  16055. for (i = 0; i < 48; i++) {
  16056. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  16057. SP_ASM_ADDC(l, h, z1->dp[i]);
  16058. r->dp[i + 48] = l;
  16059. l = h;
  16060. h = 0;
  16061. }
  16062. for (; i < 97; i++) {
  16063. SP_ASM_ADDC(l, h, z1->dp[i]);
  16064. r->dp[i + 48] = l;
  16065. l = h;
  16066. h = 0;
  16067. }
  16068. /* r += z2 << 96 */
  16069. l = 0;
  16070. h = 0;
  16071. for (i = 0; i < 49; i++) {
  16072. SP_ASM_ADDC(l, h, r->dp[i + 96]);
  16073. SP_ASM_ADDC(l, h, z2->dp[i]);
  16074. r->dp[i + 96] = l;
  16075. l = h;
  16076. h = 0;
  16077. }
  16078. for (; i < 96; i++) {
  16079. SP_ASM_ADDC(l, h, z2->dp[i]);
  16080. r->dp[i + 96] = l;
  16081. l = h;
  16082. h = 0;
  16083. }
  16084. r->used = 192;
  16085. sp_clamp(r);
  16086. }
  16087. FREE_SP_INT_ARRAY(z, NULL);
  16088. FREE_SP_INT(a1, NULL);
  16089. return err;
  16090. }
  16091. #endif /* SP_INT_DIGITS >= 192 */
  16092. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  16093. #endif /* !WOLFSSL_SP_SMALL */
  16094. /* Square a and store in r. r = a * a
  16095. *
  16096. * @param [in] a SP integer to square.
  16097. * @param [out] r SP integer result.
  16098. *
  16099. * @return MP_OKAY on success.
  16100. * @return MP_VAL when a or r is NULL, or the result will be too big for fixed
  16101. * data length.
  16102. * @return MP_MEM when dynamic memory allocation fails.
  16103. */
  16104. int sp_sqr(const sp_int* a, sp_int* r)
  16105. {
  16106. #if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
  16107. return sp_mul(a, a, r);
  16108. #else
  16109. int err = MP_OKAY;
  16110. if ((a == NULL) || (r == NULL)) {
  16111. err = MP_VAL;
  16112. }
  16113. /* Need extra digit during calculation. */
  16114. if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
  16115. err = MP_VAL;
  16116. }
  16117. #if 0
  16118. if (err == MP_OKAY) {
  16119. sp_print(a, "a");
  16120. }
  16121. #endif
  16122. if (err == MP_OKAY) {
  16123. if (a->used == 0) {
  16124. _sp_zero(r);
  16125. }
  16126. else
  16127. #ifndef WOLFSSL_SP_SMALL
  16128. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  16129. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  16130. if (a->used == 4) {
  16131. err = _sp_sqr_4(a, r);
  16132. }
  16133. else
  16134. #endif /* SP_WORD_SIZE == 64 */
  16135. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  16136. #ifdef SQR_MUL_ASM
  16137. if (a->used == 6) {
  16138. err = _sp_sqr_6(a, r);
  16139. }
  16140. else
  16141. #endif /* SQR_MUL_ASM */
  16142. #endif /* SP_WORD_SIZE == 64 */
  16143. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  16144. #ifdef SQR_MUL_ASM
  16145. if (a->used == 8) {
  16146. err = _sp_sqr_8(a, r);
  16147. }
  16148. else
  16149. #endif /* SQR_MUL_ASM */
  16150. #endif /* SP_WORD_SIZE == 32 */
  16151. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  16152. #ifdef SQR_MUL_ASM
  16153. if (a->used == 12) {
  16154. err = _sp_sqr_12(a, r);
  16155. }
  16156. else
  16157. #endif /* SQR_MUL_ASM */
  16158. #endif /* SP_WORD_SIZE == 32 */
  16159. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  16160. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  16161. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  16162. (SP_WORD_SIZE == 64)))
  16163. #if SP_INT_DIGITS >= 32
  16164. if (a->used == 16) {
  16165. err = _sp_sqr_16(a, r);
  16166. }
  16167. else
  16168. #endif /* SP_INT_DIGITS >= 32 */
  16169. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  16170. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  16171. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  16172. #if SP_INT_DIGITS >= 48
  16173. if (a->used == 24) {
  16174. err = _sp_sqr_24(a, r);
  16175. }
  16176. else
  16177. #endif /* SP_INT_DIGITS >= 48 */
  16178. #if SP_INT_DIGITS >= 64
  16179. if (a->used == 32) {
  16180. err = _sp_sqr_32(a, r);
  16181. }
  16182. else
  16183. #endif /* SP_INT_DIGITS >= 64 */
  16184. #if SP_INT_DIGITS >= 96
  16185. if (a->used == 48) {
  16186. err = _sp_sqr_48(a, r);
  16187. }
  16188. else
  16189. #endif /* SP_INT_DIGITS >= 96 */
  16190. #if SP_INT_DIGITS >= 128
  16191. if (a->used == 64) {
  16192. err = _sp_sqr_64(a, r);
  16193. }
  16194. else
  16195. #endif /* SP_INT_DIGITS >= 128 */
  16196. #if SP_INT_DIGITS >= 192
  16197. if (a->used == 96) {
  16198. err = _sp_sqr_96(a, r);
  16199. }
  16200. else
  16201. #endif /* SP_INT_DIGITS >= 192 */
  16202. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  16203. #endif /* !WOLFSSL_SP_SMALL */
  16204. {
  16205. err = _sp_sqr(a, r);
  16206. }
  16207. }
  16208. #ifdef WOLFSSL_SP_INT_NEGATIVE
  16209. if (err == MP_OKAY) {
  16210. r->sign = MP_ZPOS;
  16211. }
  16212. #endif
  16213. #if 0
  16214. if (err == MP_OKAY) {
  16215. sp_print(r, "rsqr");
  16216. }
  16217. #endif
  16218. return err;
  16219. #endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
  16220. }
  16221. /* END SP_SQR implementations */
  16222. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
  16223. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  16224. #if defined(WOLFSSL_SP_MATH_ALL) || \
  16225. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  16226. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || defined(HAVE_ECC)
  16227. /* Square a mod m and store in r: r = (a * a) mod m
  16228. *
  16229. * @param [in] a SP integer to square.
  16230. * @param [in] m SP integer that is the modulus.
  16231. * @param [out] r SP integer result.
  16232. *
  16233. * @return MP_OKAY on success.
  16234. * @return MP_MEM when dynamic memory allocation fails.
  16235. */
  16236. static int _sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
  16237. {
  16238. int err = MP_OKAY;
  16239. /* Create temporary for multiplication result. */
  16240. DECL_SP_INT(t, a->used * 2);
  16241. ALLOC_SP_INT(t, a->used * 2, err, NULL);
  16242. if (err == MP_OKAY) {
  16243. err = sp_init_size(t, a->used * 2);
  16244. }
  16245. /* Square and reduce. */
  16246. if (err == MP_OKAY) {
  16247. err = sp_sqr(a, t);
  16248. }
  16249. if (err == MP_OKAY) {
  16250. err = sp_mod(t, m, r);
  16251. }
  16252. /* Dispose of an allocated SP int. */
  16253. FREE_SP_INT(t, NULL);
  16254. return err;
  16255. }
  16256. /* Square a mod m and store in r: r = (a * a) mod m
  16257. *
  16258. * @param [in] a SP integer to square.
  16259. * @param [in] m SP integer that is the modulus.
  16260. * @param [out] r SP integer result.
  16261. *
  16262. * @return MP_OKAY on success.
  16263. * @return MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
  16264. * for fixed data length.
  16265. * @return MP_MEM when dynamic memory allocation fails.
  16266. */
  16267. int sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
  16268. {
  16269. int err = MP_OKAY;
  16270. /* Validate parameters. */
  16271. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  16272. err = MP_VAL;
  16273. }
  16274. /* Ensure r has space for intermediate result. */
  16275. if ((err == MP_OKAY) && (r != m) && (a->used * 2 > r->size)) {
  16276. err = MP_VAL;
  16277. }
  16278. /* Ensure a is not too big. */
  16279. if ((err == MP_OKAY) && (r == m) && (a->used * 2 > SP_INT_DIGITS)) {
  16280. err = MP_VAL;
  16281. }
  16282. /* Use r as intermediate result if not same as pointer m which is needed
  16283. * after first intermediate result.
  16284. */
  16285. if ((err == MP_OKAY) && (r != m)) {
  16286. /* Square and reduce. */
  16287. err = sp_sqr(a, r);
  16288. if (err == MP_OKAY) {
  16289. err = sp_mod(r, m, r);
  16290. }
  16291. }
  16292. else if (err == MP_OKAY) {
  16293. /* Do operation with temporary. */
  16294. err = _sp_sqrmod(a, m, r);
  16295. }
  16296. return err;
  16297. }
  16298. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  16299. /**********************
  16300. * Montgomery functions
  16301. **********************/
  16302. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  16303. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
  16304. defined(OPENSSL_ALL)
  16305. /* Reduce a number in Montgomery form.
  16306. *
  16307. * Assumes a and m are not NULL and m is not 0.
  16308. *
  16309. * DigitMask(a,i) := mask out the 'i'th digit in place.
  16310. *
  16311. * Algorithm:
  16312. * 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
  16313. * 2. For i = 0..NumDigits(m)-1
  16314. * 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK
  16315. * 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask
  16316. * 2.3. a += mu * DigitMask(m, 0)
  16317. * 2.4. For j = 1 up to NumDigits(m)-2
  16318. * 2.4.1 a += mu * DigitMask(m, j)
  16319. * 2.5 a += mu * DigitMask(m, NumDigits(m)-1))
  16320. * 3. a >>= NumBits(m)
  16321. * 4. a = a % m
  16322. *
  16323. * @param [in,out] a SP integer to Montgomery reduce.
  16324. * @param [in] m SP integer that is the modulus.
  16325. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  16326. * @param [in] ct Indicates operation must be constant time.
  16327. *
  16328. * @return MP_OKAY on success.
  16329. */
  16330. static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct)
  16331. {
  16332. #if !defined(SQR_MUL_ASM)
  16333. unsigned int i;
  16334. int bits;
  16335. sp_int_word w;
  16336. sp_int_digit mu;
  16337. #if 0
  16338. sp_print(a, "a");
  16339. sp_print(m, "m");
  16340. #endif
  16341. /* Count bits in modulus. */
  16342. bits = sp_count_bits(m);
  16343. /* Adding numbers into m->used * 2 digits - zero out unused digits. */
  16344. if (!ct) {
  16345. for (i = a->used; i < m->used * 2; i++) {
  16346. a->dp[i] = 0;
  16347. }
  16348. }
  16349. else {
  16350. for (i = 0; i < m->used * 2; i++) {
  16351. a->dp[i] &=
  16352. (sp_int_digit)
  16353. (sp_int_sdigit)ctMaskIntGTE((int)(a->used-1), (int)i);
  16354. }
  16355. }
  16356. /* Special case when modulus is 1 digit or less. */
  16357. if (m->used <= 1) {
  16358. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16359. mu = mp * a->dp[0];
  16360. /* a += mu * m */
  16361. w = a->dp[0];
  16362. w += (sp_int_word)mu * m->dp[0];
  16363. a->dp[0] = (sp_int_digit)w;
  16364. w >>= SP_WORD_SIZE;
  16365. w += a->dp[1];
  16366. a->dp[1] = (sp_int_digit)w;
  16367. w >>= SP_WORD_SIZE;
  16368. a->dp[2] = (sp_int_digit)w;
  16369. a->used = 3;
  16370. /* mp is SP_WORD_SIZE */
  16371. bits = SP_WORD_SIZE;
  16372. }
  16373. else {
  16374. /* 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
  16375. * Mask when last digit of modulus doesn't have highest bit set.
  16376. */
  16377. sp_int_digit mask = (sp_int_digit)
  16378. (((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1);
  16379. /* Overflow. */
  16380. sp_int_word o = 0;
  16381. /* 2. For i = 0..NumDigits(m)-1 */
  16382. for (i = 0; i < m->used; i++) {
  16383. unsigned int j;
  16384. /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16385. mu = mp * a->dp[i];
  16386. /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
  16387. if ((i == m->used - 1) && (mask != 0)) {
  16388. mu &= mask;
  16389. }
  16390. /* 2.3. a += mu * DigitMask(m, 0) */
  16391. w = a->dp[i];
  16392. w += (sp_int_word)mu * m->dp[0];
  16393. a->dp[i] = (sp_int_digit)w;
  16394. w >>= SP_WORD_SIZE;
  16395. /* 2.4. For j = 1 up to NumDigits(m)-2 */
  16396. for (j = 1; j < m->used - 1; j++) {
  16397. /* 2.4.1 a += mu * DigitMask(m, j) */
  16398. w += a->dp[i + j];
  16399. w += (sp_int_word)mu * m->dp[j];
  16400. a->dp[i + j] = (sp_int_digit)w;
  16401. w >>= SP_WORD_SIZE;
  16402. }
  16403. /* Handle overflow. */
  16404. w += o;
  16405. w += a->dp[i + j];
  16406. o = (sp_int_digit)(w >> SP_WORD_SIZE);
  16407. /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1)) */
  16408. w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
  16409. a->dp[i + j] = (sp_int_digit)w;
  16410. w >>= SP_WORD_SIZE;
  16411. o += w;
  16412. }
  16413. /* Handle overflow. */
  16414. o += a->dp[m->used * 2 - 1];
  16415. a->dp[m->used * 2 - 1] = (sp_int_digit)o;
  16416. o >>= SP_WORD_SIZE;
  16417. a->dp[m->used * 2] = (sp_int_digit)o;
  16418. a->used = m->used * 2 + 1;
  16419. }
  16420. if (!ct) {
  16421. /* Remove leading zeros. */
  16422. sp_clamp(a);
  16423. /* 3. a >>= NumBits(m) */
  16424. (void)sp_rshb(a, bits, a);
  16425. /* 4. a = a mod m */
  16426. if (_sp_cmp_abs(a, m) != MP_LT) {
  16427. _sp_sub_off(a, m, a, 0);
  16428. }
  16429. }
  16430. else {
  16431. /* 3. a >>= NumBits(m) */
  16432. (void)sp_rshb(a, bits, a);
  16433. /* Constant time clamping. */
  16434. sp_clamp_ct(a);
  16435. /* 4. a = a mod m
  16436. * Always subtract but at a too high offset if a is less than m.
  16437. */
  16438. _sp_submod_ct(a, m, m, m->used + 1, a);
  16439. }
  16440. #if 0
  16441. sp_print(a, "rr");
  16442. #endif
  16443. return MP_OKAY;
  16444. #else /* !SQR_MUL_ASM */
  16445. unsigned int i;
  16446. unsigned int j;
  16447. int bits;
  16448. sp_int_digit mu;
  16449. sp_int_digit o;
  16450. sp_int_digit mask;
  16451. #if 0
  16452. sp_print(a, "a");
  16453. sp_print(m, "m");
  16454. #endif
  16455. bits = sp_count_bits(m);
  16456. mask = ((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1;
  16457. if (!ct) {
  16458. for (i = a->used; i < m->used * 2; i++) {
  16459. a->dp[i] = 0;
  16460. }
  16461. }
  16462. else {
  16463. for (i = 0; i < m->used * 2; i++) {
  16464. a->dp[i] &=
  16465. (sp_int_digit)
  16466. (sp_int_sdigit)ctMaskIntGTE((int)(a->used-1), (int)i);
  16467. }
  16468. }
  16469. if (m->used <= 1) {
  16470. sp_int_digit l;
  16471. sp_int_digit h;
  16472. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16473. mu = mp * a->dp[0];
  16474. /* a += mu * m */
  16475. l = a->dp[0];
  16476. h = 0;
  16477. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  16478. a->dp[0] = l;
  16479. l = h;
  16480. h = 0;
  16481. SP_ASM_ADDC(l, h, a->dp[1]);
  16482. a->dp[1] = l;
  16483. a->dp[2] = h;
  16484. a->used = m->used * 2 + 1;
  16485. /* mp is SP_WORD_SIZE */
  16486. bits = SP_WORD_SIZE;
  16487. }
  16488. #if !defined(WOLFSSL_SP_MATH) && defined(HAVE_ECC)
  16489. #if SP_WORD_SIZE == 64
  16490. #if SP_INT_DIGITS >= 8
  16491. else if ((m->used == 4) && (mask == 0)) {
  16492. sp_int_digit l;
  16493. sp_int_digit h;
  16494. sp_int_digit o2;
  16495. l = 0;
  16496. h = 0;
  16497. o = 0;
  16498. o2 = 0;
  16499. /* For i = 0..NumDigits(m)-1 */
  16500. for (i = 0; i < 4; i++) {
  16501. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16502. mu = mp * a->dp[0];
  16503. l = a->dp[0];
  16504. /* a = (a + mu * m) >> WORD_SIZE */
  16505. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  16506. l = h;
  16507. h = 0;
  16508. SP_ASM_ADDC(l, h, a->dp[1]);
  16509. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
  16510. a->dp[0] = l;
  16511. l = h;
  16512. h = 0;
  16513. SP_ASM_ADDC(l, h, a->dp[2]);
  16514. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
  16515. a->dp[1] = l;
  16516. l = h;
  16517. h = o2;
  16518. o2 = 0;
  16519. SP_ASM_ADDC_REG(l, h, o);
  16520. SP_ASM_ADDC(l, h, a->dp[i + 3]);
  16521. SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
  16522. a->dp[2] = l;
  16523. o = h;
  16524. l = h;
  16525. h = 0;
  16526. }
  16527. /* Handle overflow. */
  16528. h = o2;
  16529. SP_ASM_ADDC(l, h, a->dp[7]);
  16530. a->dp[3] = l;
  16531. a->dp[4] = h;
  16532. a->used = 5;
  16533. /* Remove leading zeros. */
  16534. sp_clamp(a);
  16535. /* a = a mod m */
  16536. if (_sp_cmp_abs(a, m) != MP_LT) {
  16537. _sp_sub_off(a, m, a, 0);
  16538. }
  16539. return MP_OKAY;
  16540. }
  16541. #endif /* SP_INT_DIGITS >= 8 */
  16542. #if SP_INT_DIGITS >= 12
  16543. else if ((m->used == 6) && (mask == 0)) {
  16544. sp_int_digit l;
  16545. sp_int_digit h;
  16546. sp_int_digit o2;
  16547. l = 0;
  16548. h = 0;
  16549. o = 0;
  16550. o2 = 0;
  16551. /* For i = 0..NumDigits(m)-1 */
  16552. for (i = 0; i < 6; i++) {
  16553. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16554. mu = mp * a->dp[0];
  16555. l = a->dp[0];
  16556. /* a = (a + mu * m) >> WORD_SIZE */
  16557. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  16558. l = h;
  16559. h = 0;
  16560. SP_ASM_ADDC(l, h, a->dp[1]);
  16561. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
  16562. a->dp[0] = l;
  16563. l = h;
  16564. h = 0;
  16565. SP_ASM_ADDC(l, h, a->dp[2]);
  16566. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
  16567. a->dp[1] = l;
  16568. l = h;
  16569. h = 0;
  16570. SP_ASM_ADDC(l, h, a->dp[3]);
  16571. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
  16572. a->dp[2] = l;
  16573. l = h;
  16574. h = 0;
  16575. SP_ASM_ADDC(l, h, a->dp[4]);
  16576. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
  16577. a->dp[3] = l;
  16578. l = h;
  16579. h = o2;
  16580. o2 = 0;
  16581. SP_ASM_ADDC_REG(l, h, o);
  16582. SP_ASM_ADDC(l, h, a->dp[i + 5]);
  16583. SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
  16584. a->dp[4] = l;
  16585. o = h;
  16586. l = h;
  16587. h = 0;
  16588. }
  16589. /* Handle overflow. */
  16590. h = o2;
  16591. SP_ASM_ADDC(l, h, a->dp[11]);
  16592. a->dp[5] = l;
  16593. a->dp[6] = h;
  16594. a->used = 7;
  16595. /* Remove leading zeros. */
  16596. sp_clamp(a);
  16597. /* a = a mod m */
  16598. if (_sp_cmp_abs(a, m) != MP_LT) {
  16599. _sp_sub_off(a, m, a, 0);
  16600. }
  16601. return MP_OKAY;
  16602. }
  16603. #endif /* SP_INT_DIGITS >= 12 */
  16604. #elif SP_WORD_SIZE == 32
  16605. else if ((m->used <= 12) && (mask == 0)) {
  16606. sp_int_digit l;
  16607. sp_int_digit h;
  16608. sp_int_digit o2;
  16609. sp_int_digit* ad;
  16610. const sp_int_digit* md;
  16611. o = 0;
  16612. o2 = 0;
  16613. ad = a->dp;
  16614. /* For i = 0..NumDigits(m)-1 */
  16615. for (i = 0; i < m->used; i++) {
  16616. md = m->dp;
  16617. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16618. mu = mp * ad[0];
  16619. /* a = (a + mu * m, 0) >> WORD_SIZE */
  16620. l = ad[0];
  16621. h = 0;
  16622. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16623. l = h;
  16624. for (j = 1; j + 1 < m->used - 1; j += 2) {
  16625. h = 0;
  16626. SP_ASM_ADDC(l, h, ad[j]);
  16627. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16628. ad[j - 1] = l;
  16629. l = 0;
  16630. SP_ASM_ADDC(h, l, ad[j + 1]);
  16631. SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
  16632. ad[j] = h;
  16633. }
  16634. for (; j < m->used - 1; j++) {
  16635. h = 0;
  16636. SP_ASM_ADDC(l, h, ad[j]);
  16637. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16638. ad[j - 1] = l;
  16639. l = h;
  16640. }
  16641. h = o2;
  16642. o2 = 0;
  16643. SP_ASM_ADDC_REG(l, h, o);
  16644. SP_ASM_ADDC(l, h, ad[i + j]);
  16645. SP_ASM_MUL_ADD(l, h, o2, mu, *md);
  16646. ad[j - 1] = l;
  16647. o = h;
  16648. }
  16649. /* Handle overflow. */
  16650. l = o;
  16651. h = o2;
  16652. SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
  16653. a->dp[m->used - 1] = l;
  16654. a->dp[m->used] = h;
  16655. a->used = m->used + 1;
  16656. /* Remove leading zeros. */
  16657. sp_clamp(a);
  16658. /* a = a mod m */
  16659. if (_sp_cmp_abs(a, m) != MP_LT) {
  16660. _sp_sub_off(a, m, a, 0);
  16661. }
  16662. return MP_OKAY;
  16663. }
  16664. #endif /* SP_WORD_SIZE == 64 | 32 */
  16665. #endif /* !WOLFSSL_SP_MATH && HAVE_ECC */
  16666. else {
  16667. sp_int_digit l;
  16668. sp_int_digit h;
  16669. sp_int_digit o2;
  16670. sp_int_digit* ad;
  16671. const sp_int_digit* md;
  16672. o = 0;
  16673. o2 = 0;
  16674. ad = a->dp;
  16675. /* 2. For i = 0..NumDigits(m)-1 */
  16676. for (i = 0; i < m->used; i++, ad++) {
  16677. md = m->dp;
  16678. /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16679. mu = mp * ad[0];
  16680. /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
  16681. if ((i == m->used - 1) && (mask != 0)) {
  16682. mu &= mask;
  16683. }
  16684. /* 2.3 a += mu * DigitMask(m, 0) */
  16685. l = ad[0];
  16686. h = 0;
  16687. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16688. ad[0] = l;
  16689. l = h;
  16690. /* 2.4. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
  16691. for (j = 1; j + 1 < m->used - 1; j += 2) {
  16692. h = 0;
  16693. /* 2.4.1. a += mu * DigitMask(m, j) */
  16694. SP_ASM_ADDC(l, h, ad[j + 0]);
  16695. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16696. ad[j + 0] = l;
  16697. l = 0;
  16698. /* 2.4.1. a += mu * DigitMask(m, j) */
  16699. SP_ASM_ADDC(h, l, ad[j + 1]);
  16700. SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
  16701. ad[j + 1] = h;
  16702. }
  16703. for (; j < m->used - 1; j++) {
  16704. h = 0;
  16705. /* 2.4.1. a += mu * DigitMask(m, j) */
  16706. SP_ASM_ADDC(l, h, ad[j]);
  16707. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16708. ad[j] = l;
  16709. l = h;
  16710. }
  16711. h = o2;
  16712. o2 = 0;
  16713. SP_ASM_ADDC_REG(l, h, o);
  16714. /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1) */
  16715. SP_ASM_ADDC(l, h, ad[j]);
  16716. SP_ASM_MUL_ADD(l, h, o2, mu, *md);
  16717. ad[j] = l;
  16718. o = h;
  16719. }
  16720. /* Handle overflow. */
  16721. l = o;
  16722. h = o2;
  16723. SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
  16724. a->dp[m->used * 2 - 1] = l;
  16725. a->dp[m->used * 2] = h;
  16726. a->used = m->used * 2 + 1;
  16727. }
  16728. if (!ct) {
  16729. /* Remove leading zeros. */
  16730. sp_clamp(a);
  16731. (void)sp_rshb(a, bits, a);
  16732. /* a = a mod m */
  16733. if (_sp_cmp_abs(a, m) != MP_LT) {
  16734. _sp_sub_off(a, m, a, 0);
  16735. }
  16736. }
  16737. else {
  16738. (void)sp_rshb(a, bits, a);
  16739. /* Constant time clamping. */
  16740. sp_clamp_ct(a);
  16741. _sp_submod_ct(a, m, m, m->used + 1, a);
  16742. }
  16743. #if 0
  16744. sp_print(a, "rr");
  16745. #endif
  16746. return MP_OKAY;
  16747. #endif /* !SQR_MUL_ASM */
  16748. }
  16749. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  16750. (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
  16751. /* Reduce a number in Montgomery form.
  16752. *
  16753. * @param [in,out] a SP integer to Montgomery reduce.
  16754. * @param [in] m SP integer that is the modulus.
  16755. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  16756. * @param [in] ct Indicates operation must be constant time.
  16757. *
  16758. * @return MP_OKAY on success.
  16759. * @return MP_VAL when a or m is NULL or m is zero.
  16760. */
  16761. int sp_mont_red_ex(sp_int* a, const sp_int* m, sp_int_digit mp, int ct)
  16762. {
  16763. int err;
  16764. /* Validate parameters. */
  16765. if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
  16766. err = MP_VAL;
  16767. }
  16768. #ifdef WOLFSSL_SP_INT_NEGATIVE
  16769. else if ((a->sign == MP_NEG) || (m->sign == MP_NEG)) {
  16770. err = MP_VAL;
  16771. }
  16772. #endif
  16773. /* Ensure a has enough space for calculation. */
  16774. else if (a->size < m->used * 2 + 1) {
  16775. err = MP_VAL;
  16776. }
  16777. else {
  16778. /* Perform Montogomery Reduction. */
  16779. err = _sp_mont_red(a, m, mp, ct);
  16780. }
  16781. return err;
  16782. }
  16783. #endif
  16784. /* Calculate the bottom digit of the inverse of negative m.
  16785. * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
  16786. *
  16787. * Used when performing Montgomery Reduction.
  16788. * m must be odd.
  16789. * Jeffrey Hurchalla’s method.
  16790. * https://arxiv.org/pdf/2204.04342.pdf
  16791. *
  16792. * @param [in] m SP integer that is the modulus.
  16793. * @param [out] mp SP integer digit that is the bottom digit of inv(-m).
  16794. */
  16795. static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho)
  16796. {
  16797. sp_int_digit d = m->dp[0];
  16798. sp_int_digit x = (3 * d) ^ 2;
  16799. sp_int_digit y = 1 - d * x;
  16800. #if SP_WORD_SIZE >= 16
  16801. x *= 1 + y; y *= y;
  16802. #endif
  16803. #if SP_WORD_SIZE >= 32
  16804. x *= 1 + y; y *= y;
  16805. #endif
  16806. #if SP_WORD_SIZE >= 64
  16807. x *= 1 + y; y *= y;
  16808. #endif
  16809. x *= 1 + y;
  16810. /* rho = -1/m mod d, subtract x (unsigned) from 0, assign negative */
  16811. *rho = (sp_int_digit)((sp_int_sdigit)0 - (sp_int_sdigit)x);
  16812. }
  16813. /* Calculate the bottom digit of the inverse of negative m.
  16814. * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
  16815. *
  16816. * Used when performing Montgomery Reduction.
  16817. *
  16818. * @param [in] m SP integer that is the modulus.
  16819. * @param [out] mp SP integer digit that is the bottom digit of inv(-m).
  16820. *
  16821. * @return MP_OKAY on success.
  16822. * @return MP_VAL when m or rho is NULL.
  16823. */
  16824. int sp_mont_setup(const sp_int* m, sp_int_digit* rho)
  16825. {
  16826. int err = MP_OKAY;
  16827. /* Validate parameters. */
  16828. if ((m == NULL) || (rho == NULL)) {
  16829. err = MP_VAL;
  16830. }
  16831. /* Calculation only works with odd modulus. */
  16832. if ((err == MP_OKAY) && !sp_isodd(m)) {
  16833. err = MP_VAL;
  16834. }
  16835. if (err == MP_OKAY) {
  16836. /* Calculate negative of inverse mod 2^n. */
  16837. _sp_mont_setup(m, rho);
  16838. }
  16839. return err;
  16840. }
  16841. /* Calculate the normalization value of m.
  16842. * norm = 2^k - m, where k is the number of bits in m
  16843. *
  16844. * @param [out] norm SP integer that normalises numbers into Montgomery
  16845. * form.
  16846. * @param [in] m SP integer that is the modulus.
  16847. *
  16848. * @return MP_OKAY on success.
  16849. * @return MP_VAL when norm or m is NULL, or number of bits in m is maximual.
  16850. */
  16851. int sp_mont_norm(sp_int* norm, const sp_int* m)
  16852. {
  16853. int err = MP_OKAY;
  16854. unsigned int bits = 0;
  16855. /* Validate parameters. */
  16856. if ((norm == NULL) || (m == NULL)) {
  16857. err = MP_VAL;
  16858. }
  16859. if (err == MP_OKAY) {
  16860. /* Find top bit and ensure norm has enough space. */
  16861. bits = (unsigned int)sp_count_bits(m);
  16862. if (bits >= norm->size * SP_WORD_SIZE) {
  16863. err = MP_VAL;
  16864. }
  16865. }
  16866. if (err == MP_OKAY) {
  16867. /* Round up for case when m is less than a word - no advantage in using
  16868. * a smaller mask and would take more operations.
  16869. */
  16870. if (bits < SP_WORD_SIZE) {
  16871. bits = SP_WORD_SIZE;
  16872. }
  16873. /* Smallest number greater than m of form 2^n. */
  16874. _sp_zero(norm);
  16875. err = sp_set_bit(norm, (int)bits);
  16876. }
  16877. if (err == MP_OKAY) {
  16878. /* norm = 2^n % m */
  16879. err = sp_sub(norm, m, norm);
  16880. }
  16881. if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
  16882. /* Sub made norm one word and now finish calculation. */
  16883. norm->dp[0] %= m->dp[0];
  16884. }
  16885. if (err == MP_OKAY) {
  16886. /* Remove leading zeros. */
  16887. sp_clamp(norm);
  16888. }
  16889. return err;
  16890. }
  16891. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
  16892. * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
  16893. /*********************************
  16894. * To and from binary and strings.
  16895. *********************************/
  16896. /* Calculate the number of 8-bit values required to represent the
  16897. * multi-precision number.
  16898. *
  16899. * When a is NULL, return s 0.
  16900. *
  16901. * @param [in] a SP integer.
  16902. *
  16903. * @return The count of 8-bit values.
  16904. * @return 0 when a is NULL.
  16905. */
  16906. int sp_unsigned_bin_size(const sp_int* a)
  16907. {
  16908. int cnt = 0;
  16909. if (a != NULL) {
  16910. cnt = (sp_count_bits(a) + 7) / 8;
  16911. }
  16912. return cnt;
  16913. }
  16914. /* Convert a number as an array of bytes in big-endian format to a
  16915. * multi-precision number.
  16916. *
  16917. * @param [out] a SP integer.
  16918. * @param [in] in Array of bytes.
  16919. * @param [in] inSz Number of data bytes in array.
  16920. *
  16921. * @return MP_OKAY on success.
  16922. * @return MP_VAL when the number is too big to fit in an SP.
  16923. */
  16924. int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
  16925. {
  16926. int err = MP_OKAY;
  16927. /* Validate parameters. */
  16928. if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
  16929. err = MP_VAL;
  16930. }
  16931. /* Check a has enough space for number. */
  16932. if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
  16933. err = MP_VAL;
  16934. }
  16935. if (err == MP_OKAY) {
  16936. /* Load full digits at a time from in. */
  16937. int i;
  16938. int j = 0;
  16939. a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  16940. #if defined(BIG_ENDIAN_ORDER) && !defined(WOLFSSL_SP_INT_DIGIT_ALIGN)
  16941. /* Data endian matches representation of number.
  16942. * Directly copy if we don't have alignment issues.
  16943. */
  16944. for (i = (int)(inSz-1); i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF) {
  16945. a->dp[j++] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
  16946. }
  16947. #else
  16948. /* Construct digit from required number of bytes. */
  16949. for (i = (int)(inSz-1); i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
  16950. a->dp[j] = ((sp_int_digit)in[i - 0] << 0)
  16951. #if SP_WORD_SIZE >= 16
  16952. | ((sp_int_digit)in[i - 1] << 8)
  16953. #endif
  16954. #if SP_WORD_SIZE >= 32
  16955. | ((sp_int_digit)in[i - 2] << 16) |
  16956. ((sp_int_digit)in[i - 3] << 24)
  16957. #endif
  16958. #if SP_WORD_SIZE >= 64
  16959. | ((sp_int_digit)in[i - 4] << 32) |
  16960. ((sp_int_digit)in[i - 5] << 40) |
  16961. ((sp_int_digit)in[i - 6] << 48) |
  16962. ((sp_int_digit)in[i - 7] << 56)
  16963. #endif
  16964. ;
  16965. j++;
  16966. }
  16967. #endif
  16968. #if SP_WORD_SIZE >= 16
  16969. /* Handle leftovers. */
  16970. if (i >= 0) {
  16971. #ifdef BIG_ENDIAN_ORDER
  16972. int s;
  16973. /* Place remaining bytes into last digit. */
  16974. a->dp[a->used - 1] = 0;
  16975. for (s = 0; i >= 0; i--,s += 8) {
  16976. a->dp[j] |= ((sp_int_digit)in[i]) << s;
  16977. }
  16978. #else
  16979. /* Cast digits to an array of bytes so we can insert directly. */
  16980. byte *d = (byte*)a->dp;
  16981. /* Zero out all bytes in last digit. */
  16982. a->dp[a->used - 1] = 0;
  16983. /* Place remaining bytes directly into digit. */
  16984. switch (i) {
  16985. #if SP_WORD_SIZE >= 64
  16986. case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
  16987. case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
  16988. case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
  16989. case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
  16990. #endif
  16991. #if SP_WORD_SIZE >= 32
  16992. case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
  16993. case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
  16994. #endif
  16995. case 0: d[inSz - 1 - 0] = in[0];
  16996. }
  16997. #endif /* LITTLE_ENDIAN_ORDER */
  16998. }
  16999. #endif
  17000. sp_clamp_ct(a);
  17001. }
  17002. return err;
  17003. }
  17004. /* Convert the multi-precision number to an array of bytes in big-endian format.
  17005. *
  17006. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  17007. * to calculate the number of bytes required.
  17008. *
  17009. * @param [in] a SP integer.
  17010. * @param [out] out Array to put encoding into.
  17011. *
  17012. * @return MP_OKAY on success.
  17013. * @return MP_VAL when a or out is NULL.
  17014. */
  17015. int sp_to_unsigned_bin(const sp_int* a, byte* out)
  17016. {
  17017. /* Write assuming output buffer is big enough. */
  17018. return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
  17019. }
  17020. /* Convert the multi-precision number to an array of bytes in big-endian format.
  17021. *
  17022. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  17023. * to calculate the number of bytes required.
  17024. * Front-pads the output array with zeros to make number the size of the array.
  17025. *
  17026. * @param [in] a SP integer.
  17027. * @param [out] out Array to put encoding into.
  17028. * @param [in] outSz Size of the array in bytes.
  17029. *
  17030. * @return MP_OKAY on success.
  17031. * @return MP_VAL when a or out is NULL.
  17032. */
  17033. int sp_to_unsigned_bin_len(const sp_int* a, byte* out, int outSz)
  17034. {
  17035. int err = MP_OKAY;
  17036. /* Validate parameters. */
  17037. if ((a == NULL) || (out == NULL) || (outSz < 0)) {
  17038. err = MP_VAL;
  17039. }
  17040. #if SP_WORD_SIZE > 8
  17041. if (err == MP_OKAY) {
  17042. /* Start at the end of the buffer - least significant byte. */
  17043. int j = outSz - 1;
  17044. if (!sp_iszero(a)) {
  17045. unsigned int i;
  17046. /* Put each digit in. */
  17047. for (i = 0; (j >= 0) && (i < a->used); i++) {
  17048. int b;
  17049. sp_int_digit d = a->dp[i];
  17050. /* Place each byte of a digit into the buffer. */
  17051. for (b = 0; b < SP_WORD_SIZE; b += 8) {
  17052. out[j--] = (byte)d;
  17053. d >>= 8;
  17054. /* Stop if the output buffer is filled. */
  17055. if (j < 0) {
  17056. if ((i < a->used - 1) || (d > 0)) {
  17057. err = MP_VAL;
  17058. }
  17059. break;
  17060. }
  17061. }
  17062. }
  17063. }
  17064. /* Front pad buffer with 0s. */
  17065. for (; j >= 0; j--) {
  17066. out[j] = 0;
  17067. }
  17068. }
  17069. #else
  17070. if ((err == MP_OKAY) && ((unsigned int)outSz < a->used)) {
  17071. err = MP_VAL;
  17072. }
  17073. if (err == MP_OKAY) {
  17074. unsigned int i;
  17075. int j;
  17076. XMEMSET(out, 0, (unsigned int)outSz - a->used);
  17077. for (i = 0, j = outSz - 1; i < a->used; i++, j--) {
  17078. out[j] = a->dp[i];
  17079. }
  17080. }
  17081. #endif
  17082. return err;
  17083. }
  17084. /* Convert the multi-precision number to an array of bytes in big-endian format.
  17085. *
  17086. * Constant-time implementation.
  17087. *
  17088. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  17089. * to calculate the number of bytes required.
  17090. * Front-pads the output array with zeros to make number the size of the array.
  17091. *
  17092. * @param [in] a SP integer.
  17093. * @param [out] out Array to put encoding into.
  17094. * @param [in] outSz Size of the array in bytes.
  17095. *
  17096. * @return MP_OKAY on success.
  17097. * @return MP_VAL when a or out is NULL.
  17098. */
  17099. int sp_to_unsigned_bin_len_ct(const sp_int* a, byte* out, int outSz)
  17100. {
  17101. int err = MP_OKAY;
  17102. /* Validate parameters. */
  17103. if ((a == NULL) || (out == NULL) || (outSz < 0)) {
  17104. err = MP_VAL;
  17105. }
  17106. #if SP_WORD_SIZE > 8
  17107. if (err == MP_OKAY) {
  17108. /* Start at the end of the buffer - least significant byte. */
  17109. int j;
  17110. unsigned int i;
  17111. sp_int_digit mask = (sp_int_digit)-1;
  17112. sp_int_digit d;
  17113. /* Put each digit in. */
  17114. i = 0;
  17115. for (j = outSz - 1; j >= 0; ) {
  17116. int b;
  17117. d = a->dp[i];
  17118. /* Place each byte of a digit into the buffer. */
  17119. for (b = 0; (j >= 0) && (b < SP_WORD_SIZEOF); b++) {
  17120. out[j--] = (byte)(d & mask);
  17121. d >>= 8;
  17122. }
  17123. mask &= (sp_int_digit)0 - (i < a->used - 1);
  17124. i += (unsigned int)(1 & mask);
  17125. }
  17126. }
  17127. #else
  17128. if ((err == MP_OKAY) && ((unsigned int)outSz < a->used)) {
  17129. err = MP_VAL;
  17130. }
  17131. if (err == MP_OKAY) {
  17132. unsigned int i;
  17133. int j;
  17134. sp_int_digit mask = (sp_int_digit)-1;
  17135. i = 0;
  17136. for (j = outSz - 1; j >= 0; j--) {
  17137. out[j] = a->dp[i] & mask;
  17138. mask &= (sp_int_digit)0 - (i < a->used - 1);
  17139. i += (unsigned int)(1 & mask);
  17140. }
  17141. }
  17142. #endif
  17143. return err;
  17144. }
  17145. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  17146. !defined(WOLFSSL_RSA_VERIFY_ONLY)
  17147. /* Store the number in big-endian format in array at an offset.
  17148. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  17149. * to calculate the number of bytes required.
  17150. *
  17151. * @param [in] o Offset into array o start encoding.
  17152. * @param [in] a SP integer.
  17153. * @param [out] out Array to put encoding into.
  17154. *
  17155. * @return Index of next byte after data.
  17156. * @return MP_VAL when a or out is NULL.
  17157. */
  17158. int sp_to_unsigned_bin_at_pos(int o, const sp_int* a, unsigned char* out)
  17159. {
  17160. /* Get length of data that will be written. */
  17161. int len = sp_unsigned_bin_size(a);
  17162. /* Write number to buffer at offset. */
  17163. int ret = sp_to_unsigned_bin_len(a, out + o, len);
  17164. if (ret == MP_OKAY) {
  17165. /* Return offset of next byte after number. */
  17166. ret = o + len;
  17167. }
  17168. return ret;
  17169. }
  17170. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
  17171. #ifdef WOLFSSL_SP_READ_RADIX_16
  17172. /* Convert hexadecimal number as string in big-endian format to a
  17173. * multi-precision number.
  17174. *
  17175. * Assumes negative sign and leading zeros have been stripped.
  17176. *
  17177. * @param [out] a SP integer.
  17178. * @param [in] in NUL terminated string.
  17179. *
  17180. * @return MP_OKAY on success.
  17181. * @return MP_VAL when radix not supported, value is negative, or a character
  17182. * is not valid.
  17183. */
  17184. static int _sp_read_radix_16(sp_int* a, const char* in)
  17185. {
  17186. int err = MP_OKAY;
  17187. int i;
  17188. unsigned int s = 0;
  17189. unsigned int j = 0;
  17190. sp_int_digit d;
  17191. /* Make all nibbles in digit 0. */
  17192. d = 0;
  17193. /* Step through string a character at a time starting at end - least
  17194. * significant byte. */
  17195. for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
  17196. /* Convert character from hex. */
  17197. int ch = (int)HexCharToByte(in[i]);
  17198. /* Check for invalid character. */
  17199. if (ch < 0) {
  17200. err = MP_VAL;
  17201. break;
  17202. }
  17203. /* Check whether we have filled the digit. */
  17204. if (s == SP_WORD_SIZE) {
  17205. /* Store digit and move index to next in a. */
  17206. a->dp[j++] = d;
  17207. /* Fail if we are out of space in a. */
  17208. if (j >= a->size) {
  17209. err = MP_VAL;
  17210. break;
  17211. }
  17212. /* Set shift back to 0 - lowest nibble. */
  17213. s = 0;
  17214. /* Make all nibbles in digit 0. */
  17215. d = 0;
  17216. }
  17217. /* Put next nibble into digit. */
  17218. d |= ((sp_int_digit)ch) << s;
  17219. /* Update shift for next nibble. */
  17220. s += 4;
  17221. }
  17222. if (err == MP_OKAY) {
  17223. /* If space, store last digit. */
  17224. if (j < a->size) {
  17225. a->dp[j] = d;
  17226. }
  17227. /* Update used count. */
  17228. a->used = j + 1;
  17229. /* Remove leading zeros. */
  17230. sp_clamp(a);
  17231. }
  17232. return err;
  17233. }
  17234. #endif /* WOLFSSL_SP_READ_RADIX_16 */
  17235. #ifdef WOLFSSL_SP_READ_RADIX_10
  17236. /* Convert decimal number as string in big-endian format to a multi-precision
  17237. * number.
  17238. *
  17239. * Assumes negative sign and leading zeros have been stripped.
  17240. *
  17241. * @param [out] a SP integer.
  17242. * @param [in] in NUL terminated string.
  17243. *
  17244. * @return MP_OKAY on success.
  17245. * @return MP_VAL when radix not supported, value is negative, or a character
  17246. * is not valid.
  17247. */
  17248. static int _sp_read_radix_10(sp_int* a, const char* in)
  17249. {
  17250. int err = MP_OKAY;
  17251. int i;
  17252. char ch;
  17253. /* Start with a being zero. */
  17254. _sp_zero(a);
  17255. /* Process all characters. */
  17256. for (i = 0; in[i] != '\0'; i++) {
  17257. /* Get character. */
  17258. ch = in[i];
  17259. /* Check character is valid. */
  17260. if ((ch >= '0') && (ch <= '9')) {
  17261. /* Assume '0'..'9' are continuous values as characters. */
  17262. ch -= '0';
  17263. }
  17264. else {
  17265. /* Return error on invalid character. */
  17266. err = MP_VAL;
  17267. break;
  17268. }
  17269. /* Multiply a by 10. */
  17270. err = _sp_mul_d(a, 10, a, 0);
  17271. if (err != MP_OKAY) {
  17272. break;
  17273. }
  17274. /* Add character value. */
  17275. err = _sp_add_d(a, (sp_int_digit)ch, a);
  17276. if (err != MP_OKAY) {
  17277. break;
  17278. }
  17279. }
  17280. return err;
  17281. }
  17282. #endif /* WOLFSSL_SP_READ_RADIX_10 */
  17283. #if defined(WOLFSSL_SP_READ_RADIX_16) || defined(WOLFSSL_SP_READ_RADIX_10)
  17284. /* Convert a number as string in big-endian format to a big number.
  17285. * Only supports base-16 (hexadecimal) and base-10 (decimal).
  17286. *
  17287. * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
  17288. *
  17289. * @param [out] a SP integer.
  17290. * @param [in] in NUL terminated string.
  17291. * @param [in] radix Number of values in a digit.
  17292. *
  17293. * @return MP_OKAY on success.
  17294. * @return MP_VAL when a or in is NULL, radix not supported, value is negative,
  17295. * or a character is not valid.
  17296. */
  17297. int sp_read_radix(sp_int* a, const char* in, int radix)
  17298. {
  17299. int err = MP_OKAY;
  17300. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17301. unsigned int sign = MP_ZPOS;
  17302. #endif
  17303. if ((a == NULL) || (in == NULL)) {
  17304. err = MP_VAL;
  17305. }
  17306. if (err == MP_OKAY) {
  17307. #ifndef WOLFSSL_SP_INT_NEGATIVE
  17308. if (*in == '-') {
  17309. err = MP_VAL;
  17310. }
  17311. else
  17312. #endif
  17313. {
  17314. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17315. if (*in == '-') {
  17316. /* Make number negative if signed string. */
  17317. sign = MP_NEG;
  17318. in++;
  17319. }
  17320. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17321. /* Skip leading zeros. */
  17322. while (*in == '0') {
  17323. in++;
  17324. }
  17325. if (radix == 16) {
  17326. err = _sp_read_radix_16(a, in);
  17327. }
  17328. #ifdef WOLFSSL_SP_READ_RADIX_10
  17329. else if (radix == 10) {
  17330. err = _sp_read_radix_10(a, in);
  17331. }
  17332. #endif
  17333. else {
  17334. err = MP_VAL;
  17335. }
  17336. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17337. /* Ensure not negative when zero. */
  17338. if (err == MP_OKAY) {
  17339. if (sp_iszero(a)) {
  17340. a->sign = MP_ZPOS;
  17341. }
  17342. else {
  17343. a->sign = sign;
  17344. }
  17345. }
  17346. #endif
  17347. }
  17348. }
  17349. return err;
  17350. }
  17351. #endif /* WOLFSSL_SP_READ_RADIX_16 || WOLFSSL_SP_READ_RADIX_10 */
  17352. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17353. defined(WC_MP_TO_RADIX)
  17354. /* Put the big-endian, hex string encoding of a into str.
  17355. *
  17356. * Assumes str is large enough for result.
  17357. * Use sp_radix_size() to calculate required length.
  17358. *
  17359. * @param [in] a SP integer to convert.
  17360. * @param [out] str String to hold hex string result.
  17361. *
  17362. * @return MP_OKAY on success.
  17363. * @return MP_VAL when a or str is NULL.
  17364. */
  17365. int sp_tohex(const sp_int* a, char* str)
  17366. {
  17367. int err = MP_OKAY;
  17368. /* Validate parameters. */
  17369. if ((a == NULL) || (str == NULL)) {
  17370. err = MP_VAL;
  17371. }
  17372. if (err == MP_OKAY) {
  17373. /* Quick out if number is zero. */
  17374. if (sp_iszero(a) == MP_YES) {
  17375. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17376. /* Make string represent complete bytes. */
  17377. *str++ = '0';
  17378. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17379. *str++ = '0';
  17380. }
  17381. else {
  17382. int i;
  17383. int j;
  17384. sp_int_digit d;
  17385. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17386. if (a->sign == MP_NEG) {
  17387. /* Add negative sign character. */
  17388. *str = '-';
  17389. str++;
  17390. }
  17391. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17392. /* Start at last digit - most significant digit. */
  17393. i = (int)(a->used - 1);
  17394. d = a->dp[i];
  17395. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17396. /* Find highest non-zero byte in most-significant word. */
  17397. for (j = SP_WORD_SIZE - 8; j >= 0 && i >= 0; j -= 8) {
  17398. /* When a byte at this index is not 0 break out to start
  17399. * writing.
  17400. */
  17401. if (((d >> j) & 0xff) != 0) {
  17402. break;
  17403. }
  17404. /* Skip this digit if it was 0. */
  17405. if (j == 0) {
  17406. j = SP_WORD_SIZE - 8;
  17407. d = a->dp[--i];
  17408. }
  17409. }
  17410. /* Start with high nibble of byte. */
  17411. j += 4;
  17412. #else
  17413. /* Find highest non-zero nibble in most-significant word. */
  17414. for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
  17415. /* When a nibble at this index is not 0 break out to start
  17416. * writing.
  17417. */
  17418. if (((d >> j) & 0xf) != 0) {
  17419. break;
  17420. }
  17421. /* Skip this digit if it was 0. */
  17422. if (j == 0) {
  17423. j = SP_WORD_SIZE - 4;
  17424. d = a->dp[--i];
  17425. }
  17426. }
  17427. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17428. /* Write out as much as required from most-significant digit. */
  17429. for (; j >= 0; j -= 4) {
  17430. *(str++) = ByteToHex((byte)(d >> j));
  17431. }
  17432. /* Write rest of digits. */
  17433. for (--i; i >= 0; i--) {
  17434. /* Get digit from memory. */
  17435. d = a->dp[i];
  17436. /* Write out all nibbles of digit. */
  17437. for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
  17438. *(str++) = (char)ByteToHex((byte)(d >> j));
  17439. }
  17440. }
  17441. }
  17442. /* Terminate string. */
  17443. *str = '\0';
  17444. }
  17445. return err;
  17446. }
  17447. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  17448. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17449. defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
  17450. defined(WC_MP_TO_RADIX)
  17451. /* Put the big-endian, decimal string encoding of a into str.
  17452. *
  17453. * Assumes str is large enough for result.
  17454. * Use sp_radix_size() to calculate required length.
  17455. *
  17456. * @param [in] a SP integer to convert.
  17457. * @param [out] str String to hold hex string result.
  17458. *
  17459. * @return MP_OKAY on success.
  17460. * @return MP_VAL when a or str is NULL.
  17461. * @return MP_MEM when dynamic memory allocation fails.
  17462. */
  17463. int sp_todecimal(const sp_int* a, char* str)
  17464. {
  17465. int err = MP_OKAY;
  17466. int i;
  17467. int j;
  17468. sp_int_digit d = 0;
  17469. /* Validate parameters. */
  17470. if ((a == NULL) || (str == NULL)) {
  17471. err = MP_VAL;
  17472. }
  17473. /* Quick out if number is zero. */
  17474. else if (sp_iszero(a) == MP_YES) {
  17475. *str++ = '0';
  17476. *str = '\0';
  17477. }
  17478. else if (a->used >= SP_INT_DIGITS) {
  17479. err = MP_VAL;
  17480. }
  17481. else {
  17482. /* Temporary that is divided by 10. */
  17483. DECL_SP_INT(t, a->used + 1);
  17484. ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
  17485. if (err == MP_OKAY) {
  17486. _sp_copy(a, t);
  17487. }
  17488. if (err == MP_OKAY) {
  17489. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17490. if (a->sign == MP_NEG) {
  17491. /* Add negative sign character. */
  17492. *str = '-';
  17493. str++;
  17494. }
  17495. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17496. /* Write out little endian. */
  17497. i = 0;
  17498. do {
  17499. /* Divide by 10 and get remainder of division. */
  17500. (void)sp_div_d(t, 10, t, &d);
  17501. /* Write out remainder as a character. */
  17502. str[i++] = (char)('0' + d);
  17503. }
  17504. /* Keep going while we there is a value to write. */
  17505. while (!sp_iszero(t));
  17506. /* Terminate string. */
  17507. str[i] = '\0';
  17508. if (err == MP_OKAY) {
  17509. /* Reverse string to big endian. */
  17510. for (j = 0; j <= (i - 1) / 2; j++) {
  17511. int c = (unsigned char)str[j];
  17512. str[j] = str[i - 1 - j];
  17513. str[i - 1 - j] = (char)c;
  17514. }
  17515. }
  17516. }
  17517. FREE_SP_INT(t, NULL);
  17518. }
  17519. return err;
  17520. }
  17521. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  17522. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17523. defined(WC_MP_TO_RADIX)
  17524. /* Put the string version, big-endian, of a in str using the given radix.
  17525. *
  17526. * @param [in] a SP integer to convert.
  17527. * @param [out] str String to hold hex string result.
  17528. * @param [in] radix Base of character.
  17529. * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
  17530. *
  17531. * @return MP_OKAY on success.
  17532. * @return MP_VAL when a or str is NULL, or radix not supported.
  17533. */
  17534. int sp_toradix(const sp_int* a, char* str, int radix)
  17535. {
  17536. int err = MP_OKAY;
  17537. /* Validate parameters. */
  17538. if ((a == NULL) || (str == NULL)) {
  17539. err = MP_VAL;
  17540. }
  17541. /* Handle base 16 if requested. */
  17542. else if (radix == MP_RADIX_HEX) {
  17543. err = sp_tohex(a, str);
  17544. }
  17545. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
  17546. defined(HAVE_COMP_KEY)
  17547. /* Handle base 10 if requested. */
  17548. else if (radix == MP_RADIX_DEC) {
  17549. err = sp_todecimal(a, str);
  17550. }
  17551. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  17552. else {
  17553. /* Base not supported. */
  17554. err = MP_VAL;
  17555. }
  17556. return err;
  17557. }
  17558. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  17559. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17560. defined(WC_MP_TO_RADIX)
  17561. /* Calculate the length of the string version, big-endian, of a using the given
  17562. * radix.
  17563. *
  17564. * @param [in] a SP integer to convert.
  17565. * @param [in] radix Base of character.
  17566. * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
  17567. * @param [out] size The number of characters in encoding.
  17568. *
  17569. * @return MP_OKAY on success.
  17570. * @return MP_VAL when a or size is NULL, or radix not supported.
  17571. */
  17572. int sp_radix_size(const sp_int* a, int radix, int* size)
  17573. {
  17574. int err = MP_OKAY;
  17575. /* Validate parameters. */
  17576. if ((a == NULL) || (size == NULL)) {
  17577. err = MP_VAL;
  17578. }
  17579. /* Handle base 16 if requested. */
  17580. else if (radix == MP_RADIX_HEX) {
  17581. if (a->used == 0) {
  17582. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17583. /* 00 and '\0' */
  17584. *size = 2 + 1;
  17585. #else
  17586. /* Zero and '\0' */
  17587. *size = 1 + 1;
  17588. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17589. }
  17590. else {
  17591. /* Count of nibbles. */
  17592. int cnt = (sp_count_bits(a) + 3) / 4;
  17593. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17594. /* Must have even number of nibbles to have complete bytes. */
  17595. if (cnt & 1) {
  17596. cnt++;
  17597. }
  17598. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17599. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17600. /* Add to count of characters for negative sign. */
  17601. if (a->sign == MP_NEG) {
  17602. cnt++;
  17603. }
  17604. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17605. /* One more for \0 */
  17606. *size = cnt + 1;
  17607. }
  17608. }
  17609. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
  17610. defined(HAVE_COMP_KEY)
  17611. /* Handle base 10 if requested. */
  17612. else if (radix == MP_RADIX_DEC) {
  17613. int i;
  17614. sp_int_digit d;
  17615. /* quick out if its zero */
  17616. if (sp_iszero(a) == MP_YES) {
  17617. /* Zero and '\0' */
  17618. *size = 1 + 1;
  17619. }
  17620. else {
  17621. DECL_SP_INT(t, a->used);
  17622. /* Temporary to be divided by 10. */
  17623. ALLOC_SP_INT(t, a->used, err, NULL);
  17624. if (err == MP_OKAY) {
  17625. t->size = a->used;
  17626. _sp_copy(a, t);
  17627. }
  17628. if (err == MP_OKAY) {
  17629. /* Count number of times number can be divided by 10. */
  17630. for (i = 0; !sp_iszero(t); i++) {
  17631. (void)sp_div_d(t, 10, t, &d);
  17632. }
  17633. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17634. /* Add to count of characters for negative sign. */
  17635. if (a->sign == MP_NEG) {
  17636. i++;
  17637. }
  17638. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17639. /* One more for \0 */
  17640. *size = i + 1;
  17641. }
  17642. FREE_SP_INT(t, NULL);
  17643. }
  17644. }
  17645. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  17646. else {
  17647. /* Base not supported. */
  17648. err = MP_VAL;
  17649. }
  17650. return err;
  17651. }
  17652. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  17653. /***************************************
  17654. * Prime number generation and checking.
  17655. ***************************************/
  17656. #if defined(WOLFSSL_KEY_GEN) && (!defined(NO_RSA) || !defined(NO_DH) || \
  17657. !defined(NO_DSA)) && !defined(WC_NO_RNG)
  17658. #ifndef WOLFSSL_SP_MILLER_RABIN_CNT
  17659. /* Always done 8 iterations of Miller-Rabin on check of primality when
  17660. * generating.
  17661. */
  17662. #define WOLFSSL_SP_MILLER_RABIN_CNT 8
  17663. #endif
  17664. /* Generate a random prime for RSA only.
  17665. *
  17666. * @param [out] r SP integer to hold result.
  17667. * @param [in] len Number of bytes in prime. Use -ve to indicate the two
  17668. * lowest bits must be set.
  17669. * @param [in] rng Random number generator.
  17670. * @param [in] heap Heap hint. Unused.
  17671. *
  17672. * @return MP_OKAY on success
  17673. * @return MP_VAL when r or rng is NULL, length is not supported or random
  17674. * number generator fails.
  17675. */
  17676. int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
  17677. {
  17678. static const byte USE_BBS = 3;
  17679. int err = MP_OKAY;
  17680. byte low_bits = 1;
  17681. int isPrime = MP_NO;
  17682. #if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
  17683. int bits = 0;
  17684. #endif /* WOLFSSL_SP_MATH_ALL */
  17685. unsigned int digits = 0;
  17686. (void)heap;
  17687. /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
  17688. if ((r == NULL) || (rng == NULL) || (len == 0)) {
  17689. err = MP_VAL;
  17690. }
  17691. if (err == MP_OKAY) {
  17692. /* Get type. */
  17693. if (len < 0) {
  17694. low_bits = USE_BBS;
  17695. len = -len;
  17696. }
  17697. /* Get number of digits required to handle required number of bytes. */
  17698. digits = ((unsigned int)len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  17699. /* Ensure result has space. */
  17700. if (r->size < digits) {
  17701. err = MP_VAL;
  17702. }
  17703. }
  17704. if (err == MP_OKAY) {
  17705. #ifndef WOLFSSL_SP_MATH_ALL
  17706. /* For minimal maths, support only what's in SP and needed for DH. */
  17707. #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
  17708. if (len == 32) {
  17709. }
  17710. else
  17711. #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
  17712. /* Generate RSA primes that are half the modulus length. */
  17713. #ifdef WOLFSSL_SP_4096
  17714. if (len == 256) {
  17715. /* Support 2048-bit operations compiled in. */
  17716. }
  17717. else
  17718. #endif
  17719. #ifndef WOLFSSL_SP_NO_3072
  17720. if (len == 192) {
  17721. /* Support 1536-bit operations compiled in. */
  17722. }
  17723. else
  17724. #endif
  17725. #ifndef WOLFSSL_SP_NO_2048
  17726. if (len == 128) {
  17727. /* Support 1024-bit operations compiled in. */
  17728. }
  17729. else
  17730. #endif
  17731. {
  17732. /* Bit length not supported in SP. */
  17733. err = MP_VAL;
  17734. }
  17735. #endif /* !WOLFSSL_SP_MATH_ALL */
  17736. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17737. /* Generated number is always positive. */
  17738. r->sign = MP_ZPOS;
  17739. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17740. /* Set number of digits that will be used. */
  17741. r->used = digits;
  17742. #if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
  17743. /* Calculate number of bits in last digit. */
  17744. bits = (len * 8) & SP_WORD_MASK;
  17745. #endif /* WOLFSSL_SP_MATH_ALL || BIG_ENDIAN_ORDER */
  17746. }
  17747. /* Assume the candidate is probably prime and then test until it is proven
  17748. * composite.
  17749. */
  17750. while ((err == MP_OKAY) && (isPrime == MP_NO)) {
  17751. #ifdef SHOW_GEN
  17752. printf(".");
  17753. fflush(stdout);
  17754. #endif /* SHOW_GEN */
  17755. /* Generate bytes into digit array. */
  17756. err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, (word32)len);
  17757. if (err != 0) {
  17758. err = MP_VAL;
  17759. break;
  17760. }
  17761. /* Set top bits to ensure bit length required is generated.
  17762. * Also set second top to help ensure product of two primes is
  17763. * going to be twice the number of bits of each.
  17764. */
  17765. #ifdef LITTLE_ENDIAN_ORDER
  17766. ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
  17767. #else
  17768. ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
  17769. #endif /* LITTLE_ENDIAN_ORDER */
  17770. #ifdef BIG_ENDIAN_ORDER
  17771. /* Bytes were put into wrong place when less than full digit. */
  17772. if (bits != 0) {
  17773. r->dp[r->used - 1] >>= SP_WORD_SIZE - bits;
  17774. }
  17775. #endif /* BIG_ENDIAN_ORDER */
  17776. #ifdef WOLFSSL_SP_MATH_ALL
  17777. /* Mask top digit when less than a digit requested. */
  17778. if (bits > 0) {
  17779. r->dp[r->used - 1] &= ((sp_int_digit)1 << bits) - 1;
  17780. }
  17781. #endif /* WOLFSSL_SP_MATH_ALL */
  17782. /* Set mandatory low bits
  17783. * - bottom bit to make odd.
  17784. * - For BBS, second lowest too to make Blum integer (3 mod 4).
  17785. */
  17786. r->dp[0] |= low_bits;
  17787. /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
  17788. * of a 1024-bit candidate being a false positive, when it is our
  17789. * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
  17790. */
  17791. err = sp_prime_is_prime_ex(r, WOLFSSL_SP_MILLER_RABIN_CNT, &isPrime,
  17792. rng);
  17793. }
  17794. return err;
  17795. }
  17796. #endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
  17797. #ifdef WOLFSSL_SP_PRIME_GEN
  17798. /* Miller-Rabin test of "a" to the base of "b" as described in
  17799. * HAC pp. 139 Algorithm 4.24
  17800. *
  17801. * Sets result to 0 if definitely composite or 1 if probably prime.
  17802. * Randomly the chance of error is no more than 1/4 and often
  17803. * very much lower.
  17804. *
  17805. * a is assumed to be odd.
  17806. *
  17807. * @param [in] a SP integer to check.
  17808. * @param [in] b SP integer that is a small prime.
  17809. * @param [out] result MP_YES when number is likely prime.
  17810. * MP_NO otherwise.
  17811. * @param [in] n1 SP integer temporary.
  17812. * @param [in] r SP integer temporary.
  17813. *
  17814. * @return MP_OKAY on success.
  17815. * @return MP_MEM when dynamic memory allocation fails.
  17816. */
  17817. static int sp_prime_miller_rabin(const sp_int* a, sp_int* b, int* result,
  17818. sp_int* n1, sp_int* r)
  17819. {
  17820. int err = MP_OKAY;
  17821. int s = 0;
  17822. sp_int* y = b;
  17823. /* Assume not prime. */
  17824. *result = MP_NO;
  17825. /* Ensure small prime is 2 or more. */
  17826. if (sp_cmp_d(b, 1) != MP_GT) {
  17827. err = MP_VAL;
  17828. }
  17829. if (err == MP_OKAY) {
  17830. /* n1 = a - 1 (a is assumed odd.) */
  17831. (void)sp_copy(a, n1);
  17832. n1->dp[0]--;
  17833. /* Set 2**s * r = n1 */
  17834. /* Count the number of least significant bits which are zero. */
  17835. s = sp_cnt_lsb(n1);
  17836. /* Divide n - 1 by 2**s into r. */
  17837. (void)sp_rshb(n1, s, r);
  17838. /* Compute y = b**r mod a */
  17839. err = sp_exptmod(b, r, a, y);
  17840. }
  17841. if (err == MP_OKAY) {
  17842. /* Assume probably prime until shown otherwise. */
  17843. *result = MP_YES;
  17844. /* If y != 1 and y != n1 do */
  17845. if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
  17846. int j = 1;
  17847. /* While j <= s-1 and y != n1 */
  17848. while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
  17849. /* Square for bit shifted down. */
  17850. err = sp_sqrmod(y, a, y);
  17851. if (err != MP_OKAY) {
  17852. break;
  17853. }
  17854. /* If y == 1 then composite. */
  17855. if (sp_cmp_d(y, 1) == MP_EQ) {
  17856. *result = MP_NO;
  17857. break;
  17858. }
  17859. ++j;
  17860. }
  17861. /* If y != n1 then composite. */
  17862. if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
  17863. *result = MP_NO;
  17864. }
  17865. }
  17866. }
  17867. return err;
  17868. }
  17869. #if SP_WORD_SIZE == 8
  17870. /* Number of pre-computed primes. First n primes - fitting in a digit. */
  17871. #define SP_PRIME_SIZE 54
  17872. static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
  17873. 0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
  17874. 0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
  17875. 0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
  17876. 0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
  17877. 0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
  17878. 0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
  17879. 0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
  17880. };
  17881. #else
  17882. /* Number of pre-computed primes. First n primes. */
  17883. #define SP_PRIME_SIZE 256
  17884. /* The first 256 primes. */
  17885. static const sp_uint16 sp_primes[SP_PRIME_SIZE] = {
  17886. 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
  17887. 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
  17888. 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
  17889. 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
  17890. 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
  17891. 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
  17892. 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
  17893. 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
  17894. 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
  17895. 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
  17896. 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
  17897. 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
  17898. 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
  17899. 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
  17900. 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
  17901. 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
  17902. 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
  17903. 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
  17904. 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
  17905. 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
  17906. 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
  17907. 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
  17908. 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
  17909. 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
  17910. 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
  17911. 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
  17912. 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
  17913. 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
  17914. 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
  17915. 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
  17916. 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
  17917. 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
  17918. };
  17919. #endif
  17920. /* Compare the first n primes with a.
  17921. *
  17922. * @param [in] a Number to check.
  17923. * @param [out] result Whether number was found to be prime.
  17924. * @return 0 when no small prime matches.
  17925. * @return 1 when small prime matches.
  17926. */
  17927. static WC_INLINE int sp_cmp_primes(const sp_int* a, int* result)
  17928. {
  17929. int i;
  17930. int haveRes = 0;
  17931. *result = MP_NO;
  17932. /* Check one digit a against primes table. */
  17933. for (i = 0; i < SP_PRIME_SIZE; i++) {
  17934. if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
  17935. *result = MP_YES;
  17936. haveRes = 1;
  17937. break;
  17938. }
  17939. }
  17940. return haveRes;
  17941. }
  17942. /* Using composites is only faster when using 64-bit values. */
  17943. #if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
  17944. /* Number of composites. */
  17945. #define SP_COMP_CNT 38
  17946. /* Products of small primes that fit into 64-bits. */
  17947. static sp_int_digit sp_comp[SP_COMP_CNT] = {
  17948. 0x088886ffdb344692, 0x34091fa96ffdf47b, 0x3c47d8d728a77ebb,
  17949. 0x077ab7da9d709ea9, 0x310df3e7bd4bc897, 0xe657d7a1fd5161d1,
  17950. 0x02ad3dbe0cca85ff, 0x0787f9a02c3388a7, 0x1113c5cc6d101657,
  17951. 0x2456c94f936bdb15, 0x4236a30b85ffe139, 0x805437b38eada69d,
  17952. 0x00723e97bddcd2af, 0x00a5a792ee239667, 0x00e451352ebca269,
  17953. 0x013a7955f14b7805, 0x01d37cbd653b06ff, 0x0288fe4eca4d7cdf,
  17954. 0x039fddb60d3af63d, 0x04cd73f19080fb03, 0x0639c390b9313f05,
  17955. 0x08a1c420d25d388f, 0x0b4b5322977db499, 0x0e94c170a802ee29,
  17956. 0x11f6a0e8356100df, 0x166c8898f7b3d683, 0x1babda0a0afd724b,
  17957. 0x2471b07c44024abf, 0x2d866dbc2558ad71, 0x3891410d45fb47df,
  17958. 0x425d5866b049e263, 0x51f767298e2cf13b, 0x6d9f9ece5fc74f13,
  17959. 0x7f5ffdb0f56ee64d, 0x943740d46a1bc71f, 0xaf2d7ca25cec848f,
  17960. 0xcec010484e4ad877, 0xef972c3cfafbcd25
  17961. };
  17962. /* Index of next prime after those used to create composite. */
  17963. static int sp_comp_idx[SP_COMP_CNT] = {
  17964. 15, 25, 34, 42, 50, 58, 65, 72, 79, 86, 93, 100, 106, 112, 118,
  17965. 124, 130, 136, 142, 148, 154, 160, 166, 172, 178, 184, 190, 196, 202, 208,
  17966. 214, 220, 226, 232, 238, 244, 250, 256
  17967. };
  17968. #endif
  17969. /* Determines whether any of the first n small primes divide a evenly.
  17970. *
  17971. * @param [in] a Number to check.
  17972. * @param [in, out] haveRes Boolean indicating a no prime result found.
  17973. * @param [in, out] result Whether a is known to be prime.
  17974. * @return MP_OKAY on success.
  17975. * @return Negative on failure.
  17976. */
  17977. static WC_INLINE int sp_div_primes(const sp_int* a, int* haveRes, int* result)
  17978. {
  17979. int i;
  17980. #if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
  17981. int j;
  17982. #endif
  17983. sp_int_digit d;
  17984. int err = MP_OKAY;
  17985. #if defined(WOLFSSL_SP_SMALL) || (SP_WORD_SIZE < 64)
  17986. /* Do trial division of a with all known small primes. */
  17987. for (i = 0; i < SP_PRIME_SIZE; i++) {
  17988. /* Small prime divides a when remainder is 0. */
  17989. err = sp_mod_d(a, (sp_int_digit)sp_primes[i], &d);
  17990. if ((err != MP_OKAY) || (d == 0)) {
  17991. *result = MP_NO;
  17992. *haveRes = 1;
  17993. break;
  17994. }
  17995. }
  17996. #else
  17997. /* Start with first prime in composite. */
  17998. i = 0;
  17999. for (j = 0; (!(*haveRes)) && (j < SP_COMP_CNT); j++) {
  18000. /* Reduce a down to a single word. */
  18001. err = sp_mod_d(a, sp_comp[j], &d);
  18002. if ((err != MP_OKAY) || (d == 0)) {
  18003. *result = MP_NO;
  18004. *haveRes = 1;
  18005. break;
  18006. }
  18007. /* Do trial division of d with small primes that make up composite. */
  18008. for (; i < sp_comp_idx[j]; i++) {
  18009. /* Small prime divides a when remainder is 0. */
  18010. if (d % sp_primes[i] == 0) {
  18011. *result = MP_NO;
  18012. *haveRes = 1;
  18013. break;
  18014. }
  18015. }
  18016. }
  18017. #endif
  18018. return err;
  18019. }
  18020. /* Check whether a is prime by checking t iterations of Miller-Rabin.
  18021. *
  18022. * @param [in] a SP integer to check.
  18023. * @param [in] trials Number of trials of Miller-Rabin test to perform.
  18024. * @param [out] result MP_YES when number is prime.
  18025. * MP_NO otherwise.
  18026. *
  18027. * @return MP_OKAY on success.
  18028. * @return MP_MEM when dynamic memory allocation fails.
  18029. */
  18030. static int _sp_prime_trials(const sp_int* a, int trials, int* result)
  18031. {
  18032. int err = MP_OKAY;
  18033. int i;
  18034. sp_int* n1;
  18035. sp_int* r;
  18036. DECL_SP_INT_ARRAY(t, a->used + 1, 2);
  18037. DECL_SP_INT(b, a->used * 2 + 1);
  18038. ALLOC_SP_INT_ARRAY(t, a->used + 1, 2, err, NULL);
  18039. /* Allocate number that will hold modular exponentiation result. */
  18040. ALLOC_SP_INT(b, a->used * 2 + 1, err, NULL);
  18041. if (err == MP_OKAY) {
  18042. n1 = t[0];
  18043. r = t[1];
  18044. _sp_init_size(n1, a->used + 1);
  18045. _sp_init_size(r, a->used + 1);
  18046. _sp_init_size(b, a->used * 2 + 1);
  18047. /* Do requested number of trials of Miller-Rabin test. */
  18048. for (i = 0; i < trials; i++) {
  18049. /* Miller-Rabin test with known small prime. */
  18050. _sp_set(b, sp_primes[i]);
  18051. err = sp_prime_miller_rabin(a, b, result, n1, r);
  18052. if ((err != MP_OKAY) || (*result == MP_NO)) {
  18053. break;
  18054. }
  18055. }
  18056. /* Clear temporary values. */
  18057. sp_clear(n1);
  18058. sp_clear(r);
  18059. sp_clear(b);
  18060. }
  18061. /* Free allocated temporary. */
  18062. FREE_SP_INT(b, NULL);
  18063. FREE_SP_INT_ARRAY(t, NULL);
  18064. return err;
  18065. }
  18066. /* Check whether a is prime.
  18067. * Checks against a number of small primes and does t iterations of
  18068. * Miller-Rabin.
  18069. *
  18070. * @param [in] a SP integer to check.
  18071. * @param [in] trials Number of trials of Miller-Rabin test to perform.
  18072. * @param [out] result MP_YES when number is prime.
  18073. * MP_NO otherwise.
  18074. *
  18075. * @return MP_OKAY on success.
  18076. * @return MP_VAL when a or result is NULL, or trials is out of range.
  18077. * @return MP_MEM when dynamic memory allocation fails.
  18078. */
  18079. int sp_prime_is_prime(const sp_int* a, int trials, int* result)
  18080. {
  18081. int err = MP_OKAY;
  18082. int haveRes = 0;
  18083. /* Validate parameters. */
  18084. if ((a == NULL) || (result == NULL)) {
  18085. if (result != NULL) {
  18086. *result = MP_NO;
  18087. }
  18088. err = MP_VAL;
  18089. }
  18090. else if (a->used * 2 >= SP_INT_DIGITS) {
  18091. err = MP_VAL;
  18092. }
  18093. /* Check validity of Miller-Rabin iterations count.
  18094. * Must do at least one and need a unique pre-computed prime for each
  18095. * iteration.
  18096. */
  18097. if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
  18098. *result = MP_NO;
  18099. err = MP_VAL;
  18100. }
  18101. /* Short-cut, 1 is not prime. */
  18102. if ((err == MP_OKAY) && sp_isone(a)) {
  18103. *result = MP_NO;
  18104. haveRes = 1;
  18105. }
  18106. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  18107. /* Check against known small primes when a has 1 digit. */
  18108. if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
  18109. (a->dp[0] <= sp_primes[SP_PRIME_SIZE - 1])) {
  18110. haveRes = sp_cmp_primes(a, result);
  18111. }
  18112. /* Check all small primes for even divisibility. */
  18113. if ((err == MP_OKAY) && (!haveRes)) {
  18114. err = sp_div_primes(a, &haveRes, result);
  18115. }
  18116. /* Check a number of iterations of Miller-Rabin with small primes. */
  18117. if ((err == MP_OKAY) && (!haveRes)) {
  18118. err = _sp_prime_trials(a, trials, result);
  18119. }
  18120. RESTORE_VECTOR_REGISTERS();
  18121. return err;
  18122. }
  18123. #ifndef WC_NO_RNG
  18124. /* Check whether a is prime by doing t iterations of Miller-Rabin.
  18125. *
  18126. * t random numbers should give a (1/4)^t chance of a false prime.
  18127. *
  18128. * @param [in] a SP integer to check.
  18129. * @param [in] trials Number of iterations of Miller-Rabin test to perform.
  18130. * @param [out] result MP_YES when number is prime.
  18131. * MP_NO otherwise.
  18132. * @param [in] rng Random number generator for Miller-Rabin testing.
  18133. *
  18134. * @return MP_OKAY on success.
  18135. * @return MP_VAL when a, result or rng is NULL.
  18136. * @return MP_MEM when dynamic memory allocation fails.
  18137. */
  18138. static int _sp_prime_random_trials(const sp_int* a, int trials, int* result,
  18139. WC_RNG* rng)
  18140. {
  18141. int err = MP_OKAY;
  18142. int bits = sp_count_bits(a);
  18143. word32 baseSz = ((word32)bits + 7) / 8;
  18144. DECL_SP_INT_ARRAY(ds, a->used + 1, 2);
  18145. DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 2);
  18146. ALLOC_SP_INT_ARRAY(ds, a->used + 1, 2, err, NULL);
  18147. ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 2, err, NULL);
  18148. if (err == MP_OKAY) {
  18149. sp_int* c = ds[0];
  18150. sp_int* n1 = ds[1];
  18151. sp_int* b = d[0];
  18152. sp_int* r = d[1];
  18153. _sp_init_size(c , a->used + 1);
  18154. _sp_init_size(n1, a->used + 1);
  18155. _sp_init_size(b , a->used * 2 + 1);
  18156. _sp_init_size(r , a->used * 2 + 1);
  18157. _sp_sub_d(a, 2, c);
  18158. bits &= SP_WORD_MASK;
  18159. /* Keep trying random numbers until all trials complete. */
  18160. while (trials > 0) {
  18161. /* Generate random trial number. */
  18162. err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
  18163. if (err != MP_OKAY) {
  18164. break;
  18165. }
  18166. b->used = a->used;
  18167. #ifdef BIG_ENDIAN_ORDER
  18168. /* Fix top digit if fewer bytes than a full digit generated. */
  18169. if (((baseSz * 8) & SP_WORD_MASK) != 0) {
  18170. b->dp[b->used-1] >>=
  18171. SP_WORD_SIZE - ((baseSz * 8) & SP_WORD_MASK);
  18172. }
  18173. #endif /* BIG_ENDIAN_ORDER */
  18174. /* Ensure the top word has no more bits than necessary. */
  18175. if (bits > 0) {
  18176. b->dp[b->used - 1] &= ((sp_int_digit)1 << bits) - 1;
  18177. sp_clamp(b);
  18178. }
  18179. /* Can't use random value it is: 0, 1, a-2, a-1, >= a */
  18180. if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
  18181. continue;
  18182. }
  18183. /* Perform Miller-Rabin test with random value. */
  18184. err = sp_prime_miller_rabin(a, b, result, n1, r);
  18185. if ((err != MP_OKAY) || (*result == MP_NO)) {
  18186. break;
  18187. }
  18188. /* Trial complete. */
  18189. trials--;
  18190. }
  18191. /* Zeroize temporary values used when generating private prime. */
  18192. sp_forcezero(n1);
  18193. sp_forcezero(r);
  18194. sp_forcezero(b);
  18195. sp_forcezero(c);
  18196. }
  18197. FREE_SP_INT_ARRAY(d, NULL);
  18198. FREE_SP_INT_ARRAY(ds, NULL);
  18199. return err;
  18200. }
  18201. #endif /*!WC_NO_RNG */
  18202. /* Check whether a is prime.
  18203. * Checks against a number of small primes and does t iterations of
  18204. * Miller-Rabin.
  18205. *
  18206. * @param [in] a SP integer to check.
  18207. * @param [in] trials Number of iterations of Miller-Rabin test to perform.
  18208. * @param [out] result MP_YES when number is prime.
  18209. * MP_NO otherwise.
  18210. * @param [in] rng Random number generator for Miller-Rabin testing.
  18211. *
  18212. * @return MP_OKAY on success.
  18213. * @return MP_VAL when a, result or rng is NULL.
  18214. * @return MP_MEM when dynamic memory allocation fails.
  18215. */
  18216. int sp_prime_is_prime_ex(const sp_int* a, int trials, int* result, WC_RNG* rng)
  18217. {
  18218. int err = MP_OKAY;
  18219. int ret = MP_YES;
  18220. int haveRes = 0;
  18221. if ((a == NULL) || (result == NULL) || (rng == NULL)) {
  18222. err = MP_VAL;
  18223. }
  18224. #ifndef WC_NO_RNG
  18225. if ((err == MP_OKAY) && (a->used * 2 >= SP_INT_DIGITS)) {
  18226. err = MP_VAL;
  18227. }
  18228. #endif
  18229. #ifdef WOLFSSL_SP_INT_NEGATIVE
  18230. if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
  18231. err = MP_VAL;
  18232. }
  18233. #endif
  18234. /* Ensure trials is valid. Maximum based on number of small primes
  18235. * available. */
  18236. if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
  18237. err = MP_VAL;
  18238. }
  18239. if ((err == MP_OKAY) && sp_isone(a)) {
  18240. ret = MP_NO;
  18241. haveRes = 1;
  18242. }
  18243. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  18244. /* Check against known small primes when a has 1 digit. */
  18245. if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
  18246. (a->dp[0] <= (sp_int_digit)sp_primes[SP_PRIME_SIZE - 1])) {
  18247. haveRes = sp_cmp_primes(a, &ret);
  18248. }
  18249. /* Check all small primes for even divisibility. */
  18250. if ((err == MP_OKAY) && (!haveRes)) {
  18251. err = sp_div_primes(a, &haveRes, &ret);
  18252. }
  18253. #ifndef WC_NO_RNG
  18254. /* Check a number of iterations of Miller-Rabin with random large values. */
  18255. if ((err == MP_OKAY) && (!haveRes)) {
  18256. err = _sp_prime_random_trials(a, trials, &ret, rng);
  18257. }
  18258. #else
  18259. (void)trials;
  18260. #endif /* !WC_NO_RNG */
  18261. if (result != NULL) {
  18262. *result = ret;
  18263. }
  18264. RESTORE_VECTOR_REGISTERS();
  18265. return err;
  18266. }
  18267. #endif /* WOLFSSL_SP_PRIME_GEN */
  18268. #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
  18269. /* Calculates the Greatest Common Denominator (GCD) of a and b into r.
  18270. *
  18271. * Find the largest number that divides both a and b without remainder.
  18272. * r <= a, r <= b, a % r == 0, b % r == 0
  18273. *
  18274. * a and b are positive integers.
  18275. *
  18276. * Euclidean Algorithm:
  18277. * 1. If a > b then a = b, b = a
  18278. * 2. u = a
  18279. * 3. v = b % a
  18280. * 4. While v != 0
  18281. * 4.1. t = u % v
  18282. * 4.2. u <= v, v <= t, t <= u
  18283. * 5. r = u
  18284. *
  18285. * @param [in] a SP integer of first operand.
  18286. * @param [in] b SP integer of second operand.
  18287. * @param [out] r SP integer to hold result.
  18288. *
  18289. * @return MP_OKAY on success.
  18290. * @return MP_MEM when dynamic memory allocation fails.
  18291. */
  18292. static WC_INLINE int _sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
  18293. {
  18294. int err = MP_OKAY;
  18295. sp_int* u = NULL;
  18296. sp_int* v = NULL;
  18297. sp_int* t = NULL;
  18298. /* Used for swapping sp_ints. */
  18299. sp_int* s;
  18300. /* Determine maximum digit length numbers will reach. */
  18301. unsigned int used = (a->used >= b->used) ? a->used + 1 : b->used + 1;
  18302. DECL_SP_INT_ARRAY(d, used, 3);
  18303. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  18304. ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
  18305. if (err == MP_OKAY) {
  18306. u = d[0];
  18307. v = d[1];
  18308. t = d[2];
  18309. _sp_init_size(u, used);
  18310. _sp_init_size(v, used);
  18311. _sp_init_size(t, used);
  18312. /* 1. If a > b then a = b, b = a.
  18313. * Make a <= b.
  18314. */
  18315. if (_sp_cmp(a, b) == MP_GT) {
  18316. const sp_int* tmp;
  18317. tmp = a;
  18318. a = b;
  18319. b = tmp;
  18320. }
  18321. /* 2. u = a, v = b mod a */
  18322. _sp_copy(a, u);
  18323. /* 3. v = b mod a */
  18324. if (a->used == 1) {
  18325. err = sp_mod_d(b, a->dp[0], &v->dp[0]);
  18326. v->used = (v->dp[0] != 0);
  18327. }
  18328. else {
  18329. err = sp_mod(b, a, v);
  18330. }
  18331. }
  18332. /* 4. While v != 0 */
  18333. /* Keep reducing larger by smaller until smaller is 0 or u and v both one
  18334. * digit.
  18335. */
  18336. while ((err == MP_OKAY) && (!sp_iszero(v)) && (u->used > 1)) {
  18337. /* u' = v, v' = u mod v */
  18338. /* 4.1 t = u mod v */
  18339. if (v->used == 1) {
  18340. err = sp_mod_d(u, v->dp[0], &t->dp[0]);
  18341. t->used = (t->dp[0] != 0);
  18342. }
  18343. else {
  18344. err = sp_mod(u, v, t);
  18345. }
  18346. /* 4.2. u <= v, v <= t, t <= u */
  18347. s = u; u = v; v = t; t = s;
  18348. }
  18349. /* Only one digit remaining in u and v. */
  18350. while ((err == MP_OKAY) && (!sp_iszero(v))) {
  18351. /* u' = v, v' = u mod v */
  18352. /* 4.1 t = u mod v */
  18353. t->dp[0] = u->dp[0] % v->dp[0];
  18354. t->used = (t->dp[0] != 0);
  18355. /* 4.2. u <= v, v <= t, t <= u */
  18356. s = u; u = v; v = t; t = s;
  18357. }
  18358. if (err == MP_OKAY) {
  18359. /* 5. r = u */
  18360. _sp_copy(u, r);
  18361. }
  18362. FREE_SP_INT_ARRAY(d, NULL);
  18363. RESTORE_VECTOR_REGISTERS();
  18364. return err;
  18365. }
  18366. /* Calculates the Greatest Common Denominator (GCD) of a and b into r.
  18367. *
  18368. * Find the largest number that divides both a and b without remainder.
  18369. * r <= a, r <= b, a % r == 0, b % r == 0
  18370. *
  18371. * a and b are positive integers.
  18372. *
  18373. * @param [in] a SP integer of first operand.
  18374. * @param [in] b SP integer of second operand.
  18375. * @param [out] r SP integer to hold result.
  18376. *
  18377. * @return MP_OKAY on success.
  18378. * @return MP_VAL when a, b or r is NULL or too large.
  18379. * @return MP_MEM when dynamic memory allocation fails.
  18380. */
  18381. int sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
  18382. {
  18383. int err = MP_OKAY;
  18384. /* Validate parameters. */
  18385. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  18386. err = MP_VAL;
  18387. }
  18388. /* Check that we have space in numbers to do work. */
  18389. else if ((a->used >= SP_INT_DIGITS) || (b->used >= SP_INT_DIGITS)) {
  18390. err = MP_VAL;
  18391. }
  18392. /* Check that r is large enough to hold maximum sized result. */
  18393. else if (((a->used <= b->used) && (r->size < a->used)) ||
  18394. ((b->used < a->used) && (r->size < b->used))) {
  18395. err = MP_VAL;
  18396. }
  18397. #ifdef WOLFSSL_SP_INT_NEGATIVE
  18398. /* Algorithm doesn't work with negative numbers. */
  18399. else if ((a->sign == MP_NEG) || (b->sign == MP_NEG)) {
  18400. err = MP_VAL;
  18401. }
  18402. #endif
  18403. else if (sp_iszero(a)) {
  18404. /* GCD of 0 and 0 is undefined - all integers divide 0. */
  18405. if (sp_iszero(b)) {
  18406. err = MP_VAL;
  18407. }
  18408. else {
  18409. /* GCD of 0 and b is b - b divides 0. */
  18410. err = sp_copy(b, r);
  18411. }
  18412. }
  18413. else if (sp_iszero(b)) {
  18414. /* GCD of 0 and a is a - a divides 0. */
  18415. err = sp_copy(a, r);
  18416. }
  18417. else {
  18418. /* Calculate GCD. */
  18419. err = _sp_gcd(a, b, r);
  18420. }
  18421. return err;
  18422. }
  18423. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
  18424. #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && \
  18425. (!defined(WC_RSA_BLINDING) || defined(HAVE_FIPS) || defined(HAVE_SELFTEST))
  18426. /* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
  18427. * Smallest number divisible by both numbers.
  18428. *
  18429. * a and b are positive integers.
  18430. *
  18431. * lcm(a, b) = (a / gcd(a, b)) * b
  18432. * Divide the common divisor from a and multiply by b.
  18433. *
  18434. * Algorithm:
  18435. * 1. t0 = gcd(a, b)
  18436. * 2. If a > b then
  18437. * 2.1. t1 = a / t0
  18438. * 2.2. r = b * t1
  18439. * 3. Else
  18440. * 3.1. t1 = b / t0
  18441. * 3.2. r = a * t1
  18442. *
  18443. * @param [in] a SP integer of first operand.
  18444. * @param [in] b SP integer of second operand.
  18445. * @param [out] r SP integer to hold result.
  18446. *
  18447. * @return MP_OKAY on success.
  18448. * @return MP_MEM when dynamic memory allocation fails.
  18449. */
  18450. static int _sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
  18451. {
  18452. int err = MP_OKAY;
  18453. /* Determine maximum digit length numbers will reach. */
  18454. unsigned int used = ((a->used >= b->used) ? a->used + 1: b->used + 1);
  18455. DECL_SP_INT_ARRAY(t, used, 2);
  18456. ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
  18457. if (err == MP_OKAY) {
  18458. _sp_init_size(t[0], used);
  18459. _sp_init_size(t[1], used);
  18460. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  18461. if (err == MP_OKAY) {
  18462. /* 1. t0 = gcd(a, b) */
  18463. err = sp_gcd(a, b, t[0]);
  18464. }
  18465. if (err == MP_OKAY) {
  18466. /* Divide the greater by the common divisor and multiply by other
  18467. * to operate on the smallest length numbers.
  18468. */
  18469. /* 2. If a > b then */
  18470. if (_sp_cmp_abs(a, b) == MP_GT) {
  18471. /* 2.1. t1 = a / t0 */
  18472. err = sp_div(a, t[0], t[1], NULL);
  18473. if (err == MP_OKAY) {
  18474. /* 2.2. r = b * t1 */
  18475. err = sp_mul(b, t[1], r);
  18476. }
  18477. }
  18478. /* 3. Else */
  18479. else {
  18480. /* 3.1. t1 = b / t0 */
  18481. err = sp_div(b, t[0], t[1], NULL);
  18482. if (err == MP_OKAY) {
  18483. /* 3.2. r = a * t1 */
  18484. err = sp_mul(a, t[1], r);
  18485. }
  18486. }
  18487. }
  18488. RESTORE_VECTOR_REGISTERS();
  18489. }
  18490. FREE_SP_INT_ARRAY(t, NULL);
  18491. return err;
  18492. }
  18493. /* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
  18494. * Smallest number divisible by both numbers.
  18495. *
  18496. * a and b are positive integers.
  18497. *
  18498. * @param [in] a SP integer of first operand.
  18499. * @param [in] b SP integer of second operand.
  18500. * @param [out] r SP integer to hold result.
  18501. *
  18502. * @return MP_OKAY on success.
  18503. * @return MP_VAL when a, b or r is NULL; or a or b is zero.
  18504. * @return MP_MEM when dynamic memory allocation fails.
  18505. */
  18506. int sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
  18507. {
  18508. int err = MP_OKAY;
  18509. /* Validate parameters. */
  18510. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  18511. err = MP_VAL;
  18512. }
  18513. #ifdef WOLFSSL_SP_INT_NEGATIVE
  18514. /* Ensure a and b are positive. */
  18515. else if ((a->sign == MP_NEG) || (b->sign >= MP_NEG)) {
  18516. err = MP_VAL;
  18517. }
  18518. #endif
  18519. /* Ensure r has space for maximumal result. */
  18520. else if (r->size < a->used + b->used) {
  18521. err = MP_VAL;
  18522. }
  18523. /* LCM of 0 and any number is undefined as 0 is not in the set of values
  18524. * being used.
  18525. */
  18526. if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
  18527. err = MP_VAL;
  18528. }
  18529. if (err == MP_OKAY) {
  18530. /* Do operation. */
  18531. err = _sp_lcm(a, b, r);
  18532. }
  18533. return err;
  18534. }
  18535. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
  18536. /* Returns the run time settings.
  18537. *
  18538. * @return Settings value.
  18539. */
  18540. word32 CheckRunTimeSettings(void)
  18541. {
  18542. return CTC_SETTINGS;
  18543. }
  18544. /* Returns the fast math settings.
  18545. *
  18546. * @return Setting - number of bits in a digit.
  18547. */
  18548. word32 CheckRunTimeFastMath(void)
  18549. {
  18550. return SP_WORD_SIZE;
  18551. }
  18552. #ifdef WOLFSSL_CHECK_MEM_ZERO
  18553. /* Add an MP to check.
  18554. *
  18555. * @param [in] name Name of address to check.
  18556. * @param [in] sp sp_int that needs to be checked.
  18557. */
  18558. void sp_memzero_add(const char* name, sp_int* sp)
  18559. {
  18560. wc_MemZero_Add(name, sp->dp, sp->size * sizeof(sp_int_digit));
  18561. }
  18562. /* Check the memory in the data pointer for memory that must be zero.
  18563. *
  18564. * @param [in] sp sp_int that needs to be checked.
  18565. */
  18566. void sp_memzero_check(sp_int* sp)
  18567. {
  18568. wc_MemZero_Check(sp->dp, sp->size * sizeof(sp_int_digit));
  18569. }
  18570. #endif /* WOLFSSL_CHECK_MEM_ZERO */
  18571. #if (!defined(WOLFSSL_SMALL_STACK) && !defined(SP_ALLOC)) || \
  18572. defined(WOLFSSL_SP_NO_MALLOC)
  18573. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  18574. !defined(WOLFSSL_SP_NO_DYN_STACK)
  18575. #pragma GCC diagnostic pop
  18576. #endif
  18577. #endif
  18578. #endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */