sp_int.c 646 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362
  1. /* sp_int.c
  2. *
  3. * Copyright (C) 2006-2023 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /* Implementation by Sean Parkinson. */
  22. /*
  23. DESCRIPTION
  24. This library provides single precision (SP) integer math functions.
  25. */
  26. #ifdef HAVE_CONFIG_H
  27. #include <config.h>
  28. #endif
  29. #include <wolfssl/wolfcrypt/settings.h>
  30. #if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
  31. #if (!defined(WOLFSSL_SMALL_STACK) && !defined(SP_ALLOC)) || \
  32. defined(WOLFSSL_SP_NO_MALLOC)
  33. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  34. !defined(WOLFSSL_SP_NO_DYN_STACK)
  35. #pragma GCC diagnostic push
  36. /* We are statically declaring a variable smaller than sp_int.
  37. * We track available memory in the 'size' field.
  38. * Disable warnings of sp_int being partly outside array bounds of variable.
  39. */
  40. #pragma GCC diagnostic ignored "-Warray-bounds"
  41. #endif
  42. #endif
  43. #ifdef NO_INLINE
  44. #include <wolfssl/wolfcrypt/misc.h>
  45. #else
  46. #define WOLFSSL_MISC_INCLUDED
  47. #include <wolfcrypt/src/misc.c>
  48. #endif
  49. /* SP Build Options:
  50. * WOLFSSL_HAVE_SP_RSA: Enable SP RSA support
  51. * WOLFSSL_HAVE_SP_DH: Enable SP DH support
  52. * WOLFSSL_HAVE_SP_ECC: Enable SP ECC support
  53. * WOLFSSL_SP_MATH: Use only single precision math and algorithms
  54. * it supports (no fastmath tfm.c or normal integer.c)
  55. * WOLFSSL_SP_MATH_ALL Implementation of all MP functions
  56. * (replacement for tfm.c and integer.c)
  57. * WOLFSSL_SP_SMALL: Use smaller version of code and avoid large
  58. * stack variables
  59. * WOLFSSL_SP_NO_MALLOC: Always use stack, no heap XMALLOC/XFREE allowed
  60. * WOLFSSL_SP_NO_2048: Disable RSA/DH 2048-bit support
  61. * WOLFSSL_SP_NO_3072: Disable RSA/DH 3072-bit support
  62. * WOLFSSL_SP_4096: Enable RSA/RH 4096-bit support
  63. * WOLFSSL_SP_NO_256 Disable ECC 256-bit SECP256R1 support
  64. * WOLFSSL_SP_384 Enable ECC 384-bit SECP384R1 support
  65. * WOLFSSL_SP_521 Enable ECC 521-bit SECP521R1 support
  66. * WOLFSSL_SP_ASM Enable assembly speedups (detect platform)
  67. * WOLFSSL_SP_X86_64_ASM Enable Intel x64 assembly implementation
  68. * WOLFSSL_SP_ARM32_ASM Enable Aarch32 assembly implementation
  69. * WOLFSSL_SP_ARM64_ASM Enable Aarch64 assembly implementation
  70. * WOLFSSL_SP_ARM_CORTEX_M_ASM Enable Cortex-M assembly implementation
  71. * WOLFSSL_SP_ARM_THUMB_ASM Enable ARM Thumb assembly implementation
  72. * (used with -mthumb)
  73. * WOLFSSL_SP_X86_64 Enable Intel x86 64-bit assembly speedups
  74. * WOLFSSL_SP_X86 Enable Intel x86 assembly speedups
  75. * WOLFSSL_SP_ARM64 Enable Aarch64 assembly speedups
  76. * WOLFSSL_SP_ARM32 Enable ARM32 assembly speedups
  77. * WOLFSSL_SP_ARM32_UDIV Enable word divide asm that uses UDIV instr
  78. * WOLFSSL_SP_ARM_THUMB Enable ARM Thumb assembly speedups
  79. * (explicitly uses register 'r7')
  80. * WOLFSSL_SP_PPC64 Enable PPC64 assembly speedups
  81. * WOLFSSL_SP_PPC Enable PPC assembly speedups
  82. * WOLFSSL_SP_MIPS64 Enable MIPS64 assembly speedups
  83. * WOLFSSL_SP_MIPS Enable MIPS assembly speedups
  84. * WOLFSSL_SP_RISCV64 Enable RISCV64 assembly speedups
  85. * WOLFSSL_SP_RISCV32 Enable RISCV32 assembly speedups
  86. * WOLFSSL_SP_S390X Enable S390X assembly speedups
  87. * SP_WORD_SIZE Force 32 or 64 bit mode
  88. * WOLFSSL_SP_NONBLOCK Enables "non blocking" mode for SP math, which
  89. * will return FP_WOULDBLOCK for long operations and function must be
  90. * called again until complete.
  91. * WOLFSSL_SP_FAST_NCT_EXPTMOD Enables the faster non-constant time modular
  92. * exponentiation implementation.
  93. * WOLFSSL_SP_INT_NEGATIVE Enables negative values to be used.
  94. * WOLFSSL_SP_INT_DIGIT_ALIGN Enable when unaligned access of sp_int_digit
  95. * pointer is not allowed.
  96. * WOLFSSL_SP_NO_DYN_STACK Disable use of dynamic stack items.
  97. * Dynamic arrays used when not small stack.
  98. * WOLFSSL_SP_FAST_MODEXP Allow fast mod_exp with small C code
  99. * WOLFSSL_SP_LOW_MEM Use algorithms that use less memory.
  100. */
  101. /* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
  102. #if defined(__clang__) && defined(__clang_major__) && \
  103. (__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
  104. #undef WOLFSSL_SP_SMALL
  105. #endif
  106. #include <wolfssl/wolfcrypt/sp_int.h>
  107. /* DECL_SP_INT: Declare one variable of type 'sp_int'. */
  108. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  109. !defined(WOLFSSL_SP_NO_MALLOC)
  110. /* Declare a variable that will be assigned a value on XMALLOC. */
  111. #define DECL_SP_INT(n, s) \
  112. sp_int* n = NULL
  113. #else
  114. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  115. !defined(WOLFSSL_SP_NO_DYN_STACK)
  116. /* Declare a variable on the stack with the required data size. */
  117. #define DECL_SP_INT(n, s) \
  118. byte n##d[MP_INT_SIZEOF(s)]; \
  119. sp_int* (n) = (sp_int*)n##d
  120. #else
  121. /* Declare a variable on the stack. */
  122. #define DECL_SP_INT(n, s) \
  123. sp_int n[1]
  124. #endif
  125. #endif
  126. /* ALLOC_SP_INT: Allocate an 'sp_int' of required size. */
  127. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  128. !defined(WOLFSSL_SP_NO_MALLOC)
  129. /* Dynamically allocate just enough data to support size. */
  130. #define ALLOC_SP_INT(n, s, err, h) \
  131. do { \
  132. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  133. (err) = MP_VAL; \
  134. } \
  135. if ((err) == MP_OKAY) { \
  136. (n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h), \
  137. DYNAMIC_TYPE_BIGINT); \
  138. if ((n) == NULL) { \
  139. (err) = MP_MEM; \
  140. } \
  141. } \
  142. } \
  143. while (0)
  144. /* Dynamically allocate just enough data to support size - and set size. */
  145. #define ALLOC_SP_INT_SIZE(n, s, err, h) \
  146. do { \
  147. ALLOC_SP_INT(n, s, err, h); \
  148. if ((err) == MP_OKAY) { \
  149. (n)->size = (s); \
  150. } \
  151. } \
  152. while (0)
  153. #else
  154. /* Array declared on stack - check size is valid. */
  155. #define ALLOC_SP_INT(n, s, err, h) \
  156. do { \
  157. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  158. (err) = MP_VAL; \
  159. } \
  160. } \
  161. while (0)
  162. /* Array declared on stack - set the size field. */
  163. #define ALLOC_SP_INT_SIZE(n, s, err, h) \
  164. do { \
  165. ALLOC_SP_INT(n, s, err, h); \
  166. if ((err) == MP_OKAY) { \
  167. (n)->size = (unsigned int)(s); \
  168. } \
  169. } \
  170. while (0)
  171. #endif
  172. /* FREE_SP_INT: Free an 'sp_int' variable. */
  173. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  174. !defined(WOLFSSL_SP_NO_MALLOC)
  175. /* Free dynamically allocated data. */
  176. #define FREE_SP_INT(n, h) \
  177. do { \
  178. if ((n) != NULL) { \
  179. XFREE(n, h, DYNAMIC_TYPE_BIGINT); \
  180. } \
  181. } \
  182. while (0)
  183. #else
  184. /* Nothing to do as declared on stack. */
  185. #define FREE_SP_INT(n, h) WC_DO_NOTHING
  186. #endif
  187. /* Declare a variable that will be assigned a value on XMALLOC. */
  188. #define DECL_DYN_SP_INT_ARRAY(n, s, c) \
  189. sp_int* n##d = NULL; \
  190. sp_int* (n)[c] = { NULL, }
  191. /* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
  192. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  193. !defined(WOLFSSL_SP_NO_MALLOC)
  194. /* Declare a variable that will be assigned a value on XMALLOC. */
  195. #define DECL_SP_INT_ARRAY(n, s, c) \
  196. DECL_DYN_SP_INT_ARRAY(n, s, c)
  197. #else
  198. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  199. !defined(WOLFSSL_SP_NO_DYN_STACK)
  200. /* Declare a variable on the stack with the required data size. */
  201. #define DECL_SP_INT_ARRAY(n, s, c) \
  202. byte n##d[MP_INT_SIZEOF(s) * (c)]; \
  203. sp_int* (n)[c] = { NULL, }
  204. #else
  205. /* Declare a variable on the stack. */
  206. #define DECL_SP_INT_ARRAY(n, s, c) \
  207. sp_int n##d[c]; \
  208. sp_int* (n)[c]
  209. #endif
  210. #endif
  211. /* Dynamically allocate just enough data to support multiple sp_ints of the
  212. * required size. Use pointers into data to make up array and set sizes.
  213. */
  214. #define ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h) \
  215. do { \
  216. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  217. (err) = MP_VAL; \
  218. } \
  219. if ((err) == MP_OKAY) { \
  220. n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h), \
  221. DYNAMIC_TYPE_BIGINT); \
  222. if (n##d == NULL) { \
  223. (err) = MP_MEM; \
  224. } \
  225. else { \
  226. int n##ii; \
  227. (n)[0] = n##d; \
  228. (n)[0]->size = (s); \
  229. for (n##ii = 1; n##ii < (int)(c); n##ii++) { \
  230. (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
  231. (n)[n##ii]->size = (s); \
  232. } \
  233. } \
  234. } \
  235. } \
  236. while (0)
  237. /* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of required size. */
  238. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  239. !defined(WOLFSSL_SP_NO_MALLOC)
  240. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  241. ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)
  242. #else
  243. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  244. !defined(WOLFSSL_SP_NO_DYN_STACK)
  245. /* Data declared on stack that supports multiple sp_ints of the
  246. * required size. Use pointers into data to make up array and set sizes.
  247. */
  248. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  249. do { \
  250. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  251. (err) = MP_VAL; \
  252. } \
  253. if ((err) == MP_OKAY) { \
  254. int n##ii; \
  255. (n)[0] = (sp_int*)n##d; \
  256. ((sp_int_minimal*)(n)[0])->size = (s); \
  257. for (n##ii = 1; n##ii < (int)(c); n##ii++) { \
  258. (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
  259. ((sp_int_minimal*)(n)[n##ii])->size = (s); \
  260. } \
  261. } \
  262. } \
  263. while (0)
  264. #else
  265. /* Data declared on stack that supports multiple sp_ints of the
  266. * required size. Set into array and set sizes.
  267. */
  268. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  269. do { \
  270. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  271. (err) = MP_VAL; \
  272. } \
  273. if ((err) == MP_OKAY) { \
  274. int n##ii; \
  275. for (n##ii = 0; n##ii < (int)(c); n##ii++) { \
  276. (n)[n##ii] = &n##d[n##ii]; \
  277. (n)[n##ii]->size = (s); \
  278. } \
  279. } \
  280. } \
  281. while (0)
  282. #endif
  283. #endif
  284. /* Free data variable that was dynamically allocated. */
  285. #define FREE_DYN_SP_INT_ARRAY(n, h) \
  286. do { \
  287. if (n##d != NULL) { \
  288. XFREE(n##d, h, DYNAMIC_TYPE_BIGINT); \
  289. } \
  290. } \
  291. while (0)
  292. /* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
  293. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  294. !defined(WOLFSSL_SP_NO_MALLOC)
  295. #define FREE_SP_INT_ARRAY(n, h) \
  296. FREE_DYN_SP_INT_ARRAY(n, h)
  297. #else
  298. /* Nothing to do as data declared on stack. */
  299. #define FREE_SP_INT_ARRAY(n, h) WC_DO_NOTHING
  300. #endif
  301. #ifndef WOLFSSL_NO_ASM
  302. #ifdef __IAR_SYSTEMS_ICC__
  303. #define __asm__ asm
  304. #define __volatile__ volatile
  305. #endif /* __IAR_SYSTEMS_ICC__ */
  306. #ifdef __KEIL__
  307. #define __asm__ __asm
  308. #define __volatile__ volatile
  309. #endif
  310. #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
  311. /*
  312. * CPU: x86_64
  313. */
  314. #ifndef _MSC_VER
  315. /* Multiply va by vb and store double size result in: vh | vl */
  316. #define SP_ASM_MUL(vl, vh, va, vb) \
  317. __asm__ __volatile__ ( \
  318. "movq %[b], %%rax \n\t" \
  319. "mulq %[a] \n\t" \
  320. "movq %%rax, %[l] \n\t" \
  321. "movq %%rdx, %[h] \n\t" \
  322. : [h] "+r" (vh), [l] "+r" (vl) \
  323. : [a] "m" (va), [b] "m" (vb) \
  324. : "memory", "%rax", "%rdx", "cc" \
  325. )
  326. /* Multiply va by vb and store double size result in: vo | vh | vl */
  327. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  328. __asm__ __volatile__ ( \
  329. "movq %[b], %%rax \n\t" \
  330. "mulq %[a] \n\t" \
  331. "movq $0 , %[o] \n\t" \
  332. "movq %%rax, %[l] \n\t" \
  333. "movq %%rdx, %[h] \n\t" \
  334. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  335. : [a] "m" (va), [b] "m" (vb) \
  336. : "%rax", "%rdx", "cc" \
  337. )
  338. /* Multiply va by vb and add double size result into: vo | vh | vl */
  339. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  340. __asm__ __volatile__ ( \
  341. "movq %[b], %%rax \n\t" \
  342. "mulq %[a] \n\t" \
  343. "addq %%rax, %[l] \n\t" \
  344. "adcq %%rdx, %[h] \n\t" \
  345. "adcq $0 , %[o] \n\t" \
  346. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  347. : [a] "m" (va), [b] "m" (vb) \
  348. : "%rax", "%rdx", "cc" \
  349. )
  350. /* Multiply va by vb and add double size result into: vh | vl */
  351. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  352. __asm__ __volatile__ ( \
  353. "movq %[b], %%rax \n\t" \
  354. "mulq %[a] \n\t" \
  355. "addq %%rax, %[l] \n\t" \
  356. "adcq %%rdx, %[h] \n\t" \
  357. : [l] "+r" (vl), [h] "+r" (vh) \
  358. : [a] "m" (va), [b] "m" (vb) \
  359. : "%rax", "%rdx", "cc" \
  360. )
  361. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  362. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  363. __asm__ __volatile__ ( \
  364. "movq %[b], %%rax \n\t" \
  365. "mulq %[a] \n\t" \
  366. "addq %%rax, %[l] \n\t" \
  367. "adcq %%rdx, %[h] \n\t" \
  368. "adcq $0 , %[o] \n\t" \
  369. "addq %%rax, %[l] \n\t" \
  370. "adcq %%rdx, %[h] \n\t" \
  371. "adcq $0 , %[o] \n\t" \
  372. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  373. : [a] "m" (va), [b] "m" (vb) \
  374. : "%rax", "%rdx", "cc" \
  375. )
  376. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  377. * Assumes first add will not overflow vh | vl
  378. */
  379. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  380. __asm__ __volatile__ ( \
  381. "movq %[b], %%rax \n\t" \
  382. "mulq %[a] \n\t" \
  383. "addq %%rax, %[l] \n\t" \
  384. "adcq %%rdx, %[h] \n\t" \
  385. "addq %%rax, %[l] \n\t" \
  386. "adcq %%rdx, %[h] \n\t" \
  387. "adcq $0 , %[o] \n\t" \
  388. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  389. : [a] "m" (va), [b] "m" (vb) \
  390. : "%rax", "%rdx", "cc" \
  391. )
  392. /* Square va and store double size result in: vh | vl */
  393. #define SP_ASM_SQR(vl, vh, va) \
  394. __asm__ __volatile__ ( \
  395. "movq %[a], %%rax \n\t" \
  396. "mulq %%rax \n\t" \
  397. "movq %%rax, %[l] \n\t" \
  398. "movq %%rdx, %[h] \n\t" \
  399. : [h] "+r" (vh), [l] "+r" (vl) \
  400. : [a] "m" (va) \
  401. : "memory", "%rax", "%rdx", "cc" \
  402. )
  403. /* Square va and add double size result into: vo | vh | vl */
  404. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  405. __asm__ __volatile__ ( \
  406. "movq %[a], %%rax \n\t" \
  407. "mulq %%rax \n\t" \
  408. "addq %%rax, %[l] \n\t" \
  409. "adcq %%rdx, %[h] \n\t" \
  410. "adcq $0 , %[o] \n\t" \
  411. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  412. : [a] "m" (va) \
  413. : "%rax", "%rdx", "cc" \
  414. )
  415. /* Square va and add double size result into: vh | vl */
  416. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  417. __asm__ __volatile__ ( \
  418. "movq %[a], %%rax \n\t" \
  419. "mulq %%rax \n\t" \
  420. "addq %%rax, %[l] \n\t" \
  421. "adcq %%rdx, %[h] \n\t" \
  422. : [l] "+r" (vl), [h] "+r" (vh) \
  423. : [a] "m" (va) \
  424. : "%rax", "%rdx", "cc" \
  425. )
  426. /* Add va into: vh | vl */
  427. #define SP_ASM_ADDC(vl, vh, va) \
  428. __asm__ __volatile__ ( \
  429. "addq %[a], %[l] \n\t" \
  430. "adcq $0 , %[h] \n\t" \
  431. : [l] "+r" (vl), [h] "+r" (vh) \
  432. : [a] "m" (va) \
  433. : "cc" \
  434. )
  435. /* Add va, variable in a register, into: vh | vl */
  436. #define SP_ASM_ADDC_REG(vl, vh, va) \
  437. __asm__ __volatile__ ( \
  438. "addq %[a], %[l] \n\t" \
  439. "adcq $0 , %[h] \n\t" \
  440. : [l] "+r" (vl), [h] "+r" (vh) \
  441. : [a] "r" (va) \
  442. : "cc" \
  443. )
  444. /* Sub va from: vh | vl */
  445. #define SP_ASM_SUBB(vl, vh, va) \
  446. __asm__ __volatile__ ( \
  447. "subq %[a], %[l] \n\t" \
  448. "sbbq $0 , %[h] \n\t" \
  449. : [l] "+r" (vl), [h] "+r" (vh) \
  450. : [a] "m" (va) \
  451. : "cc" \
  452. )
  453. /* Sub va from: vh | vl */
  454. #define SP_ASM_SUBB_REG(vl, vh, va) \
  455. __asm__ __volatile__ ( \
  456. "subq %[a], %[l] \n\t" \
  457. "sbbq $0 , %[h] \n\t" \
  458. : [l] "+r" (vl), [h] "+r" (vh) \
  459. : [a] "r" (va) \
  460. : "cc" \
  461. )
  462. /* Add two times vc | vb | va into vo | vh | vl */
  463. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  464. __asm__ __volatile__ ( \
  465. "addq %[a], %[l] \n\t" \
  466. "adcq %[b], %[h] \n\t" \
  467. "adcq %[c], %[o] \n\t" \
  468. "addq %[a], %[l] \n\t" \
  469. "adcq %[b], %[h] \n\t" \
  470. "adcq %[c], %[o] \n\t" \
  471. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  472. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  473. : "cc" \
  474. )
  475. /* Index of highest bit set. */
  476. #define SP_ASM_HI_BIT_SET_IDX(va, vi) \
  477. __asm__ __volatile__ ( \
  478. "bsr %[a], %[i] \n\t" \
  479. : [i] "=r" (vi) \
  480. : [a] "r" (va) \
  481. : "cc" \
  482. )
  483. #else
  484. #include <intrin.h>
  485. /* Multiply va by vb and store double size result in: vh | vl */
  486. #define SP_ASM_MUL(vl, vh, va, vb) \
  487. vl = _umul128(va, vb, &vh)
  488. /* Multiply va by vb and store double size result in: vo | vh | vl */
  489. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  490. do { \
  491. vl = _umul128(va, vb, &vh); \
  492. vo = 0; \
  493. } \
  494. while (0)
  495. /* Multiply va by vb and add double size result into: vo | vh | vl */
  496. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  497. do { \
  498. unsigned __int64 vtl, vth; \
  499. unsigned char c; \
  500. vtl = _umul128(va, vb, &vth); \
  501. c = _addcarry_u64(0, vl, vtl, &vl); \
  502. c = _addcarry_u64(c, vh, vth, &vh); \
  503. _addcarry_u64(c, vo, 0, &vo); \
  504. } \
  505. while (0)
  506. /* Multiply va by vb and add double size result into: vh | vl */
  507. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  508. do { \
  509. unsigned __int64 vtl, vth; \
  510. unsigned char c; \
  511. vtl = _umul128(va, vb, &vth); \
  512. c = _addcarry_u64(0, vl, vtl, &vl); \
  513. _addcarry_u64(c, vh, vth, &vh); \
  514. } \
  515. while (0)
  516. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  517. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  518. do { \
  519. unsigned __int64 vtl, vth; \
  520. unsigned char c; \
  521. vtl = _umul128(va, vb, &vth); \
  522. c = _addcarry_u64(0, vl, vtl, &vl); \
  523. c = _addcarry_u64(c, vh, vth, &vh); \
  524. _addcarry_u64(c, vo, 0, &vo); \
  525. c = _addcarry_u64(0, vl, vtl, &vl); \
  526. c = _addcarry_u64(c, vh, vth, &vh); \
  527. _addcarry_u64(c, vo, 0, &vo); \
  528. } \
  529. while (0)
  530. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  531. * Assumes first add will not overflow vh | vl
  532. */
  533. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  534. do { \
  535. unsigned __int64 vtl, vth; \
  536. unsigned char c; \
  537. vtl = _umul128(va, vb, &vth); \
  538. c = _addcarry_u64(0, vl, vtl, &vl); \
  539. _addcarry_u64(c, vh, vth, &vh); \
  540. c = _addcarry_u64(0, vl, vtl, &vl); \
  541. c = _addcarry_u64(c, vh, vth, &vh); \
  542. _addcarry_u64(c, vo, 0, &vo); \
  543. } \
  544. while (0)
  545. /* Square va and store double size result in: vh | vl */
  546. #define SP_ASM_SQR(vl, vh, va) \
  547. vl = _umul128(va, va, &vh)
  548. /* Square va and add double size result into: vo | vh | vl */
  549. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  550. do { \
  551. unsigned __int64 vtl, vth; \
  552. unsigned char c; \
  553. vtl = _umul128(va, va, &vth); \
  554. c = _addcarry_u64(0, vl, vtl, &vl); \
  555. c = _addcarry_u64(c, vh, vth, &vh); \
  556. _addcarry_u64(c, vo, 0, &vo); \
  557. } \
  558. while (0)
  559. /* Square va and add double size result into: vh | vl */
  560. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  561. do { \
  562. unsigned __int64 vtl, vth; \
  563. unsigned char c; \
  564. vtl = _umul128(va, va, &vth); \
  565. c = _addcarry_u64(0, vl, vtl, &vl); \
  566. _addcarry_u64(c, vh, vth, &vh); \
  567. } \
  568. while (0)
  569. /* Add va into: vh | vl */
  570. #define SP_ASM_ADDC(vl, vh, va) \
  571. do { \
  572. unsigned char c; \
  573. c = _addcarry_u64(0, vl, va, &vl); \
  574. _addcarry_u64(c, vh, 0, &vh); \
  575. } \
  576. while (0)
  577. /* Add va, variable in a register, into: vh | vl */
  578. #define SP_ASM_ADDC_REG(vl, vh, va) \
  579. do { \
  580. unsigned char c; \
  581. c = _addcarry_u64(0, vl, va, &vl); \
  582. _addcarry_u64(c, vh, 0, &vh); \
  583. } \
  584. while (0)
  585. /* Sub va from: vh | vl */
  586. #define SP_ASM_SUBB(vl, vh, va) \
  587. do { \
  588. unsigned char c; \
  589. c = _subborrow_u64(0, vl, va, &vl); \
  590. _subborrow_u64(c, vh, 0, &vh); \
  591. } \
  592. while (0)
  593. /* Add two times vc | vb | va into vo | vh | vl */
  594. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  595. do { \
  596. unsigned char c; \
  597. c = _addcarry_u64(0, vl, va, &vl); \
  598. c = _addcarry_u64(c, vh, vb, &vh); \
  599. _addcarry_u64(c, vo, vc, &vo); \
  600. c = _addcarry_u64(0, vl, va, &vl); \
  601. c = _addcarry_u64(c, vh, vb, &vh); \
  602. _addcarry_u64(c, vo, vc, &vo); \
  603. } \
  604. while (0)
  605. /* Index of highest bit set. */
  606. #define SP_ASM_HI_BIT_SET_IDX(va, vi) \
  607. do { \
  608. unsigned long idx; \
  609. _BitScanReverse64(&idx, va); \
  610. vi = idx; \
  611. } \
  612. while (0)
  613. #endif
  614. #if !defined(WOLFSSL_SP_DIV_WORD_HALF) && (!defined(_MSC_VER) || \
  615. _MSC_VER >= 1920)
  616. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  617. *
  618. * Using divq instruction on Intel x64.
  619. *
  620. * @param [in] hi SP integer digit. High digit of the dividend.
  621. * @param [in] lo SP integer digit. Lower digit of the dividend.
  622. * @param [in] d SP integer digit. Number to divide by.
  623. * @return The division result.
  624. */
  625. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  626. sp_int_digit d)
  627. {
  628. #ifndef _MSC_VER
  629. __asm__ __volatile__ (
  630. "divq %2"
  631. : "+a" (lo)
  632. : "d" (hi), "r" (d)
  633. : "cc"
  634. );
  635. return lo;
  636. #elif defined(_MSC_VER) && _MSC_VER >= 1920
  637. return _udiv128(hi, lo, d, NULL);
  638. #endif
  639. }
  640. #define SP_ASM_DIV_WORD
  641. #endif
  642. #define SP_INT_ASM_AVAILABLE
  643. #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
  644. #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
  645. /*
  646. * CPU: x86
  647. */
  648. /* Multiply va by vb and store double size result in: vh | vl */
  649. #define SP_ASM_MUL(vl, vh, va, vb) \
  650. __asm__ __volatile__ ( \
  651. "movl %[b], %%eax \n\t" \
  652. "mull %[a] \n\t" \
  653. "movl %%eax, %[l] \n\t" \
  654. "movl %%edx, %[h] \n\t" \
  655. : [h] "+r" (vh), [l] "+r" (vl) \
  656. : [a] "m" (va), [b] "m" (vb) \
  657. : "memory", "eax", "edx", "cc" \
  658. )
  659. /* Multiply va by vb and store double size result in: vo | vh | vl */
  660. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  661. __asm__ __volatile__ ( \
  662. "movl %[b], %%eax \n\t" \
  663. "mull %[a] \n\t" \
  664. "movl $0 , %[o] \n\t" \
  665. "movl %%eax, %[l] \n\t" \
  666. "movl %%edx, %[h] \n\t" \
  667. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  668. : [a] "m" (va), [b] "m" (vb) \
  669. : "eax", "edx", "cc" \
  670. )
  671. /* Multiply va by vb and add double size result into: vo | vh | vl */
  672. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  673. __asm__ __volatile__ ( \
  674. "movl %[b], %%eax \n\t" \
  675. "mull %[a] \n\t" \
  676. "addl %%eax, %[l] \n\t" \
  677. "adcl %%edx, %[h] \n\t" \
  678. "adcl $0 , %[o] \n\t" \
  679. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  680. : [a] "r" (va), [b] "r" (vb) \
  681. : "eax", "edx", "cc" \
  682. )
  683. /* Multiply va by vb and add double size result into: vh | vl */
  684. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  685. __asm__ __volatile__ ( \
  686. "movl %[b], %%eax \n\t" \
  687. "mull %[a] \n\t" \
  688. "addl %%eax, %[l] \n\t" \
  689. "adcl %%edx, %[h] \n\t" \
  690. : [l] "+r" (vl), [h] "+r" (vh) \
  691. : [a] "m" (va), [b] "m" (vb) \
  692. : "eax", "edx", "cc" \
  693. )
  694. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  695. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  696. __asm__ __volatile__ ( \
  697. "movl %[b], %%eax \n\t" \
  698. "mull %[a] \n\t" \
  699. "addl %%eax, %[l] \n\t" \
  700. "adcl %%edx, %[h] \n\t" \
  701. "adcl $0 , %[o] \n\t" \
  702. "addl %%eax, %[l] \n\t" \
  703. "adcl %%edx, %[h] \n\t" \
  704. "adcl $0 , %[o] \n\t" \
  705. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  706. : [a] "r" (va), [b] "r" (vb) \
  707. : "eax", "edx", "cc" \
  708. )
  709. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  710. * Assumes first add will not overflow vh | vl
  711. */
  712. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  713. __asm__ __volatile__ ( \
  714. "movl %[b], %%eax \n\t" \
  715. "mull %[a] \n\t" \
  716. "addl %%eax, %[l] \n\t" \
  717. "adcl %%edx, %[h] \n\t" \
  718. "addl %%eax, %[l] \n\t" \
  719. "adcl %%edx, %[h] \n\t" \
  720. "adcl $0 , %[o] \n\t" \
  721. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  722. : [a] "m" (va), [b] "m" (vb) \
  723. : "eax", "edx", "cc" \
  724. )
  725. /* Square va and store double size result in: vh | vl */
  726. #define SP_ASM_SQR(vl, vh, va) \
  727. __asm__ __volatile__ ( \
  728. "movl %[a], %%eax \n\t" \
  729. "mull %%eax \n\t" \
  730. "movl %%eax, %[l] \n\t" \
  731. "movl %%edx, %[h] \n\t" \
  732. : [h] "+r" (vh), [l] "+r" (vl) \
  733. : [a] "m" (va) \
  734. : "memory", "eax", "edx", "cc" \
  735. )
  736. /* Square va and add double size result into: vo | vh | vl */
  737. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  738. __asm__ __volatile__ ( \
  739. "movl %[a], %%eax \n\t" \
  740. "mull %%eax \n\t" \
  741. "addl %%eax, %[l] \n\t" \
  742. "adcl %%edx, %[h] \n\t" \
  743. "adcl $0 , %[o] \n\t" \
  744. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  745. : [a] "m" (va) \
  746. : "eax", "edx", "cc" \
  747. )
  748. /* Square va and add double size result into: vh | vl */
  749. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  750. __asm__ __volatile__ ( \
  751. "movl %[a], %%eax \n\t" \
  752. "mull %%eax \n\t" \
  753. "addl %%eax, %[l] \n\t" \
  754. "adcl %%edx, %[h] \n\t" \
  755. : [l] "+r" (vl), [h] "+r" (vh) \
  756. : [a] "m" (va) \
  757. : "eax", "edx", "cc" \
  758. )
  759. /* Add va into: vh | vl */
  760. #define SP_ASM_ADDC(vl, vh, va) \
  761. __asm__ __volatile__ ( \
  762. "addl %[a], %[l] \n\t" \
  763. "adcl $0 , %[h] \n\t" \
  764. : [l] "+r" (vl), [h] "+r" (vh) \
  765. : [a] "m" (va) \
  766. : "cc" \
  767. )
  768. /* Add va, variable in a register, into: vh | vl */
  769. #define SP_ASM_ADDC_REG(vl, vh, va) \
  770. __asm__ __volatile__ ( \
  771. "addl %[a], %[l] \n\t" \
  772. "adcl $0 , %[h] \n\t" \
  773. : [l] "+r" (vl), [h] "+r" (vh) \
  774. : [a] "r" (va) \
  775. : "cc" \
  776. )
  777. /* Sub va from: vh | vl */
  778. #define SP_ASM_SUBB(vl, vh, va) \
  779. __asm__ __volatile__ ( \
  780. "subl %[a], %[l] \n\t" \
  781. "sbbl $0 , %[h] \n\t" \
  782. : [l] "+r" (vl), [h] "+r" (vh) \
  783. : [a] "m" (va) \
  784. : "cc" \
  785. )
  786. /* Sub va from: vh | vl */
  787. #define SP_ASM_SUBB_REG(vl, vh, va) \
  788. __asm__ __volatile__ ( \
  789. "subl %[a], %[l] \n\t" \
  790. "sbbl $0 , %[h] \n\t" \
  791. : [l] "+r" (vl), [h] "+r" (vh) \
  792. : [a] "r" (va) \
  793. : "cc" \
  794. )
  795. /* Add two times vc | vb | va into vo | vh | vl */
  796. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  797. __asm__ __volatile__ ( \
  798. "addl %[a], %[l] \n\t" \
  799. "adcl %[b], %[h] \n\t" \
  800. "adcl %[c], %[o] \n\t" \
  801. "addl %[a], %[l] \n\t" \
  802. "adcl %[b], %[h] \n\t" \
  803. "adcl %[c], %[o] \n\t" \
  804. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  805. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  806. : "cc" \
  807. )
  808. /* Index of highest bit set. */
  809. #define SP_ASM_HI_BIT_SET_IDX(va, vi) \
  810. __asm__ __volatile__ ( \
  811. "bsr %[a], %[i] \n\t" \
  812. : [i] "=r" (vi) \
  813. : [a] "r" (va) \
  814. : "cC" \
  815. )
  816. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  817. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  818. *
  819. * Using divl instruction on Intel x64.
  820. *
  821. * @param [in] hi SP integer digit. High digit of the dividend.
  822. * @param [in] lo SP integer digit. Lower digit of the dividend.
  823. * @param [in] d SP integer digit. Number to divide by.
  824. * @return The division result.
  825. */
  826. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  827. sp_int_digit d)
  828. {
  829. __asm__ __volatile__ (
  830. "divl %2"
  831. : "+a" (lo)
  832. : "d" (hi), "r" (d)
  833. : "cc"
  834. );
  835. return lo;
  836. }
  837. #define SP_ASM_DIV_WORD
  838. #endif
  839. #define SP_INT_ASM_AVAILABLE
  840. #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
  841. #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
  842. /*
  843. * CPU: Aarch64
  844. */
  845. /* Multiply va by vb and store double size result in: vh | vl */
  846. #define SP_ASM_MUL(vl, vh, va, vb) \
  847. __asm__ __volatile__ ( \
  848. "mul %[l], %[a], %[b] \n\t" \
  849. "umulh %[h], %[a], %[b] \n\t" \
  850. : [h] "+r" (vh), [l] "+r" (vl) \
  851. : [a] "r" (va), [b] "r" (vb) \
  852. : "memory", "cc" \
  853. )
  854. /* Multiply va by vb and store double size result in: vo | vh | vl */
  855. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  856. __asm__ __volatile__ ( \
  857. "mul x8, %[a], %[b] \n\t" \
  858. "umulh %[h], %[a], %[b] \n\t" \
  859. "mov %[l], x8 \n\t" \
  860. "mov %[o], xzr \n\t" \
  861. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  862. : [a] "r" (va), [b] "r" (vb) \
  863. : "x8" \
  864. )
  865. /* Multiply va by vb and add double size result into: vo | vh | vl */
  866. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  867. __asm__ __volatile__ ( \
  868. "mul x8, %[a], %[b] \n\t" \
  869. "umulh x9, %[a], %[b] \n\t" \
  870. "adds %[l], %[l], x8 \n\t" \
  871. "adcs %[h], %[h], x9 \n\t" \
  872. "adc %[o], %[o], xzr \n\t" \
  873. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  874. : [a] "r" (va), [b] "r" (vb) \
  875. : "x8", "x9", "cc" \
  876. )
  877. /* Multiply va by vb and add double size result into: vh | vl */
  878. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  879. __asm__ __volatile__ ( \
  880. "mul x8, %[a], %[b] \n\t" \
  881. "umulh x9, %[a], %[b] \n\t" \
  882. "adds %[l], %[l], x8 \n\t" \
  883. "adc %[h], %[h], x9 \n\t" \
  884. : [l] "+r" (vl), [h] "+r" (vh) \
  885. : [a] "r" (va), [b] "r" (vb) \
  886. : "x8", "x9", "cc" \
  887. )
  888. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  889. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  890. __asm__ __volatile__ ( \
  891. "mul x8, %[a], %[b] \n\t" \
  892. "umulh x9, %[a], %[b] \n\t" \
  893. "adds %[l], %[l], x8 \n\t" \
  894. "adcs %[h], %[h], x9 \n\t" \
  895. "adc %[o], %[o], xzr \n\t" \
  896. "adds %[l], %[l], x8 \n\t" \
  897. "adcs %[h], %[h], x9 \n\t" \
  898. "adc %[o], %[o], xzr \n\t" \
  899. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  900. : [a] "r" (va), [b] "r" (vb) \
  901. : "x8", "x9", "cc" \
  902. )
  903. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  904. * Assumes first add will not overflow vh | vl
  905. */
  906. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  907. __asm__ __volatile__ ( \
  908. "mul x8, %[a], %[b] \n\t" \
  909. "umulh x9, %[a], %[b] \n\t" \
  910. "adds %[l], %[l], x8 \n\t" \
  911. "adc %[h], %[h], x9 \n\t" \
  912. "adds %[l], %[l], x8 \n\t" \
  913. "adcs %[h], %[h], x9 \n\t" \
  914. "adc %[o], %[o], xzr \n\t" \
  915. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  916. : [a] "r" (va), [b] "r" (vb) \
  917. : "x8", "x9", "cc" \
  918. )
  919. /* Square va and store double size result in: vh | vl */
  920. #define SP_ASM_SQR(vl, vh, va) \
  921. __asm__ __volatile__ ( \
  922. "mul %[l], %[a], %[a] \n\t" \
  923. "umulh %[h], %[a], %[a] \n\t" \
  924. : [h] "+r" (vh), [l] "+r" (vl) \
  925. : [a] "r" (va) \
  926. : "memory" \
  927. )
  928. /* Square va and add double size result into: vo | vh | vl */
  929. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  930. __asm__ __volatile__ ( \
  931. "mul x8, %[a], %[a] \n\t" \
  932. "umulh x9, %[a], %[a] \n\t" \
  933. "adds %[l], %[l], x8 \n\t" \
  934. "adcs %[h], %[h], x9 \n\t" \
  935. "adc %[o], %[o], xzr \n\t" \
  936. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  937. : [a] "r" (va) \
  938. : "x8", "x9", "cc" \
  939. )
  940. /* Square va and add double size result into: vh | vl */
  941. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  942. __asm__ __volatile__ ( \
  943. "mul x8, %[a], %[a] \n\t" \
  944. "umulh x9, %[a], %[a] \n\t" \
  945. "adds %[l], %[l], x8 \n\t" \
  946. "adc %[h], %[h], x9 \n\t" \
  947. : [l] "+r" (vl), [h] "+r" (vh) \
  948. : [a] "r" (va) \
  949. : "x8", "x9", "cc" \
  950. )
  951. /* Add va into: vh | vl */
  952. #define SP_ASM_ADDC(vl, vh, va) \
  953. __asm__ __volatile__ ( \
  954. "adds %[l], %[l], %[a] \n\t" \
  955. "adc %[h], %[h], xzr \n\t" \
  956. : [l] "+r" (vl), [h] "+r" (vh) \
  957. : [a] "r" (va) \
  958. : "cc" \
  959. )
  960. /* Sub va from: vh | vl */
  961. #define SP_ASM_SUBB(vl, vh, va) \
  962. __asm__ __volatile__ ( \
  963. "subs %[l], %[l], %[a] \n\t" \
  964. "sbc %[h], %[h], xzr \n\t" \
  965. : [l] "+r" (vl), [h] "+r" (vh) \
  966. : [a] "r" (va) \
  967. : "cc" \
  968. )
  969. /* Add two times vc | vb | va into vo | vh | vl */
  970. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  971. __asm__ __volatile__ ( \
  972. "adds %[l], %[l], %[a] \n\t" \
  973. "adcs %[h], %[h], %[b] \n\t" \
  974. "adc %[o], %[o], %[c] \n\t" \
  975. "adds %[l], %[l], %[a] \n\t" \
  976. "adcs %[h], %[h], %[b] \n\t" \
  977. "adc %[o], %[o], %[c] \n\t" \
  978. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  979. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  980. : "cc" \
  981. )
  982. /* Count leading zeros. */
  983. #define SP_ASM_LZCNT(va, vn) \
  984. __asm__ __volatile__ ( \
  985. "clz %[n], %[a] \n\t" \
  986. : [n] "=r" (vn) \
  987. : [a] "r" (va) \
  988. : \
  989. )
  990. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  991. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  992. *
  993. * Using udiv instruction on Aarch64.
  994. * Constant time.
  995. *
  996. * @param [in] hi SP integer digit. High digit of the dividend.
  997. * @param [in] lo SP integer digit. Lower digit of the dividend.
  998. * @param [in] d SP integer digit. Number to divide by.
  999. * @return The division result.
  1000. */
  1001. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  1002. sp_int_digit d)
  1003. {
  1004. __asm__ __volatile__ (
  1005. "lsr x3, %[d], 48\n\t"
  1006. "mov x5, 16\n\t"
  1007. "cmp x3, 0\n\t"
  1008. "mov x4, 63\n\t"
  1009. "csel x3, x5, xzr, eq\n\t"
  1010. "sub x4, x4, x3\n\t"
  1011. "lsl %[d], %[d], x3\n\t"
  1012. "lsl %[hi], %[hi], x3\n\t"
  1013. "lsr x5, %[lo], x4\n\t"
  1014. "lsl %[lo], %[lo], x3\n\t"
  1015. "orr %[hi], %[hi], x5, lsr 1\n\t"
  1016. "lsr x5, %[d], 32\n\t"
  1017. "add x5, x5, 1\n\t"
  1018. "udiv x3, %[hi], x5\n\t"
  1019. "lsl x6, x3, 32\n\t"
  1020. "mul x4, %[d], x6\n\t"
  1021. "umulh x3, %[d], x6\n\t"
  1022. "subs %[lo], %[lo], x4\n\t"
  1023. "sbc %[hi], %[hi], x3\n\t"
  1024. "udiv x3, %[hi], x5\n\t"
  1025. "lsl x3, x3, 32\n\t"
  1026. "add x6, x6, x3\n\t"
  1027. "mul x4, %[d], x3\n\t"
  1028. "umulh x3, %[d], x3\n\t"
  1029. "subs %[lo], %[lo], x4\n\t"
  1030. "sbc %[hi], %[hi], x3\n\t"
  1031. "lsr x3, %[lo], 32\n\t"
  1032. "orr x3, x3, %[hi], lsl 32\n\t"
  1033. "udiv x3, x3, x5\n\t"
  1034. "add x6, x6, x3\n\t"
  1035. "mul x4, %[d], x3\n\t"
  1036. "umulh x3, %[d], x3\n\t"
  1037. "subs %[lo], %[lo], x4\n\t"
  1038. "sbc %[hi], %[hi], x3\n\t"
  1039. "lsr x3, %[lo], 32\n\t"
  1040. "orr x3, x3, %[hi], lsl 32\n\t"
  1041. "udiv x3, x3, x5\n\t"
  1042. "add x6, x6, x3\n\t"
  1043. "mul x4, %[d], x3\n\t"
  1044. "sub %[lo], %[lo], x4\n\t"
  1045. "udiv x3, %[lo], %[d]\n\t"
  1046. "add %[hi], x6, x3\n\t"
  1047. : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1048. :
  1049. : "x3", "x4", "x5", "x6", "cc"
  1050. );
  1051. return hi;
  1052. }
  1053. #define SP_ASM_DIV_WORD
  1054. #endif
  1055. #define SP_INT_ASM_AVAILABLE
  1056. #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
  1057. #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
  1058. SP_WORD_SIZE == 32
  1059. /*
  1060. * CPU: ARM32 or Cortex-M4 and similar
  1061. */
  1062. /* Multiply va by vb and store double size result in: vh | vl */
  1063. #define SP_ASM_MUL(vl, vh, va, vb) \
  1064. __asm__ __volatile__ ( \
  1065. "umull %[l], %[h], %[a], %[b] \n\t" \
  1066. : [h] "+r" (vh), [l] "+r" (vl) \
  1067. : [a] "r" (va), [b] "r" (vb) \
  1068. : "memory" \
  1069. )
  1070. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1071. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1072. __asm__ __volatile__ ( \
  1073. "umull %[l], %[h], %[a], %[b] \n\t" \
  1074. "mov %[o], #0 \n\t" \
  1075. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  1076. : [a] "r" (va), [b] "r" (vb) \
  1077. : \
  1078. )
  1079. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1080. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1081. __asm__ __volatile__ ( \
  1082. "umull r8, r9, %[a], %[b] \n\t" \
  1083. "adds %[l], %[l], r8 \n\t" \
  1084. "adcs %[h], %[h], r9 \n\t" \
  1085. "adc %[o], %[o], #0 \n\t" \
  1086. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1087. : [a] "r" (va), [b] "r" (vb) \
  1088. : "r8", "r9", "cc" \
  1089. )
  1090. /* Multiply va by vb and add double size result into: vh | vl */
  1091. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1092. __asm__ __volatile__ ( \
  1093. "umlal %[l], %[h], %[a], %[b] \n\t" \
  1094. : [l] "+r" (vl), [h] "+r" (vh) \
  1095. : [a] "r" (va), [b] "r" (vb) \
  1096. : \
  1097. )
  1098. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1099. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1100. __asm__ __volatile__ ( \
  1101. "umull r8, r9, %[a], %[b] \n\t" \
  1102. "adds %[l], %[l], r8 \n\t" \
  1103. "adcs %[h], %[h], r9 \n\t" \
  1104. "adc %[o], %[o], #0 \n\t" \
  1105. "adds %[l], %[l], r8 \n\t" \
  1106. "adcs %[h], %[h], r9 \n\t" \
  1107. "adc %[o], %[o], #0 \n\t" \
  1108. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1109. : [a] "r" (va), [b] "r" (vb) \
  1110. : "r8", "r9", "cc" \
  1111. )
  1112. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1113. * Assumes first add will not overflow vh | vl
  1114. */
  1115. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1116. __asm__ __volatile__ ( \
  1117. "umull r8, r9, %[a], %[b] \n\t" \
  1118. "adds %[l], %[l], r8 \n\t" \
  1119. "adc %[h], %[h], r9 \n\t" \
  1120. "adds %[l], %[l], r8 \n\t" \
  1121. "adcs %[h], %[h], r9 \n\t" \
  1122. "adc %[o], %[o], #0 \n\t" \
  1123. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1124. : [a] "r" (va), [b] "r" (vb) \
  1125. : "r8", "r9", "cc" \
  1126. )
  1127. /* Square va and store double size result in: vh | vl */
  1128. #define SP_ASM_SQR(vl, vh, va) \
  1129. __asm__ __volatile__ ( \
  1130. "umull %[l], %[h], %[a], %[a] \n\t" \
  1131. : [h] "+r" (vh), [l] "+r" (vl) \
  1132. : [a] "r" (va) \
  1133. : "memory" \
  1134. )
  1135. /* Square va and add double size result into: vo | vh | vl */
  1136. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1137. __asm__ __volatile__ ( \
  1138. "umull r8, r9, %[a], %[a] \n\t" \
  1139. "adds %[l], %[l], r8 \n\t" \
  1140. "adcs %[h], %[h], r9 \n\t" \
  1141. "adc %[o], %[o], #0 \n\t" \
  1142. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1143. : [a] "r" (va) \
  1144. : "r8", "r9", "cc" \
  1145. )
  1146. /* Square va and add double size result into: vh | vl */
  1147. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1148. __asm__ __volatile__ ( \
  1149. "umlal %[l], %[h], %[a], %[a] \n\t" \
  1150. : [l] "+r" (vl), [h] "+r" (vh) \
  1151. : [a] "r" (va) \
  1152. : "cc" \
  1153. )
  1154. /* Add va into: vh | vl */
  1155. #define SP_ASM_ADDC(vl, vh, va) \
  1156. __asm__ __volatile__ ( \
  1157. "adds %[l], %[l], %[a] \n\t" \
  1158. "adc %[h], %[h], #0 \n\t" \
  1159. : [l] "+r" (vl), [h] "+r" (vh) \
  1160. : [a] "r" (va) \
  1161. : "cc" \
  1162. )
  1163. /* Sub va from: vh | vl */
  1164. #define SP_ASM_SUBB(vl, vh, va) \
  1165. __asm__ __volatile__ ( \
  1166. "subs %[l], %[l], %[a] \n\t" \
  1167. "sbc %[h], %[h], #0 \n\t" \
  1168. : [l] "+r" (vl), [h] "+r" (vh) \
  1169. : [a] "r" (va) \
  1170. : "cc" \
  1171. )
  1172. /* Add two times vc | vb | va into vo | vh | vl */
  1173. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1174. __asm__ __volatile__ ( \
  1175. "adds %[l], %[l], %[a] \n\t" \
  1176. "adcs %[h], %[h], %[b] \n\t" \
  1177. "adc %[o], %[o], %[c] \n\t" \
  1178. "adds %[l], %[l], %[a] \n\t" \
  1179. "adcs %[h], %[h], %[b] \n\t" \
  1180. "adc %[o], %[o], %[c] \n\t" \
  1181. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1182. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  1183. : "cc" \
  1184. )
  1185. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH >= 7)
  1186. /* Count leading zeros - instruction only available on ARMv7 and newer. */
  1187. #define SP_ASM_LZCNT(va, vn) \
  1188. __asm__ __volatile__ ( \
  1189. "clz %[n], %[a] \n\t" \
  1190. : [n] "=r" (vn) \
  1191. : [a] "r" (va) \
  1192. : \
  1193. )
  1194. #endif
  1195. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  1196. #ifndef WOLFSSL_SP_ARM32_UDIV
  1197. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  1198. *
  1199. * No division instruction used - does operation bit by bit.
  1200. * Constant time.
  1201. *
  1202. * @param [in] hi SP integer digit. High digit of the dividend.
  1203. * @param [in] lo SP integer digit. Lower digit of the dividend.
  1204. * @param [in] d SP integer digit. Number to divide by.
  1205. * @return The division result.
  1206. */
  1207. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  1208. sp_int_digit d)
  1209. {
  1210. sp_int_digit r = 0;
  1211. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
  1212. static const char debruijn32[32] = {
  1213. 0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
  1214. 1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
  1215. };
  1216. static const sp_uint32 debruijn32_mul = 0x076be629;
  1217. #endif
  1218. __asm__ __volatile__ (
  1219. /* Shift d so that top bit is set. */
  1220. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
  1221. "ldr r4, %[m]\n\t"
  1222. "mov r5, %[d]\n\t"
  1223. "orr r5, r5, r5, lsr #1\n\t"
  1224. "orr r5, r5, r5, lsr #2\n\t"
  1225. "orr r5, r5, r5, lsr #4\n\t"
  1226. "orr r5, r5, r5, lsr #8\n\t"
  1227. "orr r5, r5, r5, lsr #16\n\t"
  1228. "add r5, r5, #1\n\t"
  1229. "mul r5, r5, r4\n\t"
  1230. "lsr r5, r5, #27\n\t"
  1231. "ldrb r5, [%[t], r5]\n\t"
  1232. #else
  1233. "clz r5, %[d]\n\t"
  1234. #endif
  1235. "rsb r6, r5, #31\n\t"
  1236. "lsl %[d], %[d], r5\n\t"
  1237. "lsl %[hi], %[hi], r5\n\t"
  1238. "lsr r9, %[lo], r6\n\t"
  1239. "lsl %[lo], %[lo], r5\n\t"
  1240. "orr %[hi], %[hi], r9, lsr #1\n\t"
  1241. "lsr r5, %[d], #1\n\t"
  1242. "add r5, r5, #1\n\t"
  1243. "mov r6, %[lo]\n\t"
  1244. "mov r9, %[hi]\n\t"
  1245. /* Do top 32 */
  1246. "subs r8, r5, r9\n\t"
  1247. "sbc r8, r8, r8\n\t"
  1248. "add %[r], %[r], %[r]\n\t"
  1249. "sub %[r], %[r], r8\n\t"
  1250. "and r8, r8, r5\n\t"
  1251. "subs r9, r9, r8\n\t"
  1252. /* Next 30 bits */
  1253. "mov r4, #29\n\t"
  1254. "\n1:\n\t"
  1255. "movs r6, r6, lsl #1\n\t"
  1256. "adc r9, r9, r9\n\t"
  1257. "subs r8, r5, r9\n\t"
  1258. "sbc r8, r8, r8\n\t"
  1259. "add %[r], %[r], %[r]\n\t"
  1260. "sub %[r], %[r], r8\n\t"
  1261. "and r8, r8, r5\n\t"
  1262. "subs r9, r9, r8\n\t"
  1263. "subs r4, r4, #1\n\t"
  1264. "bpl 1b\n\t"
  1265. "add %[r], %[r], %[r]\n\t"
  1266. "add %[r], %[r], #1\n\t"
  1267. /* Handle difference has hi word > 0. */
  1268. "umull r4, r5, %[r], %[d]\n\t"
  1269. "subs r4, %[lo], r4\n\t"
  1270. "sbc r5, %[hi], r5\n\t"
  1271. "add %[r], %[r], r5\n\t"
  1272. "umull r4, r5, %[r], %[d]\n\t"
  1273. "subs r4, %[lo], r4\n\t"
  1274. "sbc r5, %[hi], r5\n\t"
  1275. "add %[r], %[r], r5\n\t"
  1276. /* Add 1 to result if bottom half of difference is >= d. */
  1277. "mul r4, %[r], %[d]\n\t"
  1278. "subs r4, %[lo], r4\n\t"
  1279. "subs r9, %[d], r4\n\t"
  1280. "sbc r8, r8, r8\n\t"
  1281. "sub %[r], %[r], r8\n\t"
  1282. "subs r9, r9, #1\n\t"
  1283. "sbc r8, r8, r8\n\t"
  1284. "sub %[r], %[r], r8\n\t"
  1285. : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1286. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
  1287. : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
  1288. #else
  1289. :
  1290. #endif
  1291. : "r4", "r5", "r6", "r8", "r9", "cc"
  1292. );
  1293. return r;
  1294. }
  1295. #else
  1296. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  1297. *
  1298. * Using udiv instruction on arm32
  1299. * Constant time.
  1300. *
  1301. * @param [in] hi SP integer digit. High digit of the dividend.
  1302. * @param [in] lo SP integer digit. Lower digit of the dividend.
  1303. * @param [in] d SP integer digit. Number to divide by.
  1304. * @return The division result.
  1305. */
  1306. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  1307. sp_int_digit d)
  1308. {
  1309. __asm__ __volatile__ (
  1310. "lsrs r3, %[d], #24\n\t"
  1311. "it eq\n\t"
  1312. "moveq r3, #8\n\t"
  1313. "it ne\n\t"
  1314. "movne r3, #0\n\t"
  1315. "rsb r4, r3, #31\n\t"
  1316. "lsl %[d], %[d], r3\n\t"
  1317. "lsl %[hi], %[hi], r3\n\t"
  1318. "lsr r5, %[lo], r4\n\t"
  1319. "lsl %[lo], %[lo], r3\n\t"
  1320. "orr %[hi], %[hi], r5, lsr #1\n\t"
  1321. "lsr r5, %[d], 16\n\t"
  1322. "add r5, r5, 1\n\t"
  1323. "udiv r3, %[hi], r5\n\t"
  1324. "lsl r6, r3, 16\n\t"
  1325. "umull r4, r3, %[d], r6\n\t"
  1326. "subs %[lo], %[lo], r4\n\t"
  1327. "sbc %[hi], %[hi], r3\n\t"
  1328. "udiv r3, %[hi], r5\n\t"
  1329. "lsl r3, r3, 16\n\t"
  1330. "add r6, r6, r3\n\t"
  1331. "umull r4, r3, %[d], r3\n\t"
  1332. "subs %[lo], %[lo], r4\n\t"
  1333. "sbc %[hi], %[hi], r3\n\t"
  1334. "lsr r3, %[lo], 16\n\t"
  1335. "orr r3, r3, %[hi], lsl 16\n\t"
  1336. "udiv r3, r3, r5\n\t"
  1337. "add r6, r6, r3\n\t"
  1338. "umull r4, r3, %[d], r3\n\t"
  1339. "subs %[lo], %[lo], r4\n\t"
  1340. "sbc %[hi], %[hi], r3\n\t"
  1341. "lsr r3, %[lo], 16\n\t"
  1342. "orr r3, r3, %[hi], lsl 16\n\t"
  1343. "udiv r3, r3, r5\n\t"
  1344. "add r6, r6, r3\n\t"
  1345. "mul r4, %[d], r3\n\t"
  1346. "sub %[lo], %[lo], r4\n\t"
  1347. "udiv r3, %[lo], %[d]\n\t"
  1348. "add %[hi], r6, r3\n\t"
  1349. : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1350. :
  1351. : "r3", "r4", "r5", "r6", "cc"
  1352. );
  1353. return hi;
  1354. }
  1355. #endif
  1356. #define SP_ASM_DIV_WORD
  1357. #endif
  1358. #define SP_INT_ASM_AVAILABLE
  1359. #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
  1360. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  1361. /*
  1362. * CPU: ARM Thumb (like Cortex-M0)
  1363. */
  1364. /* Compile with -fomit-frame-pointer, or similar, if compiler complains about
  1365. * usage of register 'r7'.
  1366. */
  1367. #if defined(__clang__)
  1368. /* Multiply va by vb and store double size result in: vh | vl */
  1369. #define SP_ASM_MUL(vl, vh, va, vb) \
  1370. __asm__ __volatile__ ( \
  1371. /* al * bl */ \
  1372. "uxth r6, %[a] \n\t" \
  1373. "uxth %[l], %[b] \n\t" \
  1374. "muls %[l], r6 \n\t" \
  1375. /* al * bh */ \
  1376. "lsrs r4, %[b], #16 \n\t" \
  1377. "muls r6, r4 \n\t" \
  1378. "lsrs %[h], r6, #16 \n\t" \
  1379. "lsls r6, r6, #16 \n\t" \
  1380. "adds %[l], %[l], r6 \n\t" \
  1381. "movs r5, #0 \n\t" \
  1382. "adcs %[h], r5 \n\t" \
  1383. /* ah * bh */ \
  1384. "lsrs r6, %[a], #16 \n\t" \
  1385. "muls r4, r6 \n\t" \
  1386. "adds %[h], %[h], r4 \n\t" \
  1387. /* ah * bl */ \
  1388. "uxth r4, %[b] \n\t" \
  1389. "muls r6, r4 \n\t" \
  1390. "lsrs r4, r6, #16 \n\t" \
  1391. "lsls r6, r6, #16 \n\t" \
  1392. "adds %[l], %[l], r6 \n\t" \
  1393. "adcs %[h], r4 \n\t" \
  1394. : [h] "+l" (vh), [l] "+l" (vl) \
  1395. : [a] "l" (va), [b] "l" (vb) \
  1396. : "r4", "r5", "r6", "cc" \
  1397. )
  1398. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1399. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1400. __asm__ __volatile__ ( \
  1401. /* al * bl */ \
  1402. "uxth r6, %[a] \n\t" \
  1403. "uxth %[l], %[b] \n\t" \
  1404. "muls %[l], r6 \n\t" \
  1405. /* al * bh */ \
  1406. "lsrs r5, %[b], #16 \n\t" \
  1407. "muls r6, r5 \n\t" \
  1408. "lsrs %[h], r6, #16 \n\t" \
  1409. "lsls r6, r6, #16 \n\t" \
  1410. "adds %[l], %[l], r6 \n\t" \
  1411. "movs %[o], #0 \n\t" \
  1412. "adcs %[h], %[o] \n\t" \
  1413. /* ah * bh */ \
  1414. "lsrs r6, %[a], #16 \n\t" \
  1415. "muls r5, r6 \n\t" \
  1416. "adds %[h], %[h], r5 \n\t" \
  1417. /* ah * bl */ \
  1418. "uxth r5, %[b] \n\t" \
  1419. "muls r6, r5 \n\t" \
  1420. "lsrs r5, r6, #16 \n\t" \
  1421. "lsls r6, r6, #16 \n\t" \
  1422. "adds %[l], %[l], r6 \n\t" \
  1423. "adcs %[h], r5 \n\t" \
  1424. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1425. : [a] "l" (va), [b] "l" (vb) \
  1426. : "r5", "r6", "cc" \
  1427. )
  1428. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  1429. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1430. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1431. __asm__ __volatile__ ( \
  1432. /* al * bl */ \
  1433. "uxth r6, %[a] \n\t" \
  1434. "uxth r7, %[b] \n\t" \
  1435. "muls r7, r6 \n\t" \
  1436. "adds %[l], %[l], r7 \n\t" \
  1437. "movs r5, #0 \n\t" \
  1438. "adcs %[h], r5 \n\t" \
  1439. "adcs %[o], r5 \n\t" \
  1440. /* al * bh */ \
  1441. "lsrs r7, %[b], #16 \n\t" \
  1442. "muls r6, r7 \n\t" \
  1443. "lsrs r7, r6, #16 \n\t" \
  1444. "lsls r6, r6, #16 \n\t" \
  1445. "adds %[l], %[l], r6 \n\t" \
  1446. "adcs %[h], r7 \n\t" \
  1447. "adcs %[o], r5 \n\t" \
  1448. /* ah * bh */ \
  1449. "lsrs r6, %[a], #16 \n\t" \
  1450. "lsrs r7, %[b], #16 \n\t" \
  1451. "muls r7, r6 \n\t" \
  1452. "adds %[h], %[h], r7 \n\t" \
  1453. "adcs %[o], r5 \n\t" \
  1454. /* ah * bl */ \
  1455. "uxth r7, %[b] \n\t" \
  1456. "muls r6, r7 \n\t" \
  1457. "lsrs r7, r6, #16 \n\t" \
  1458. "lsls r6, r6, #16 \n\t" \
  1459. "adds %[l], %[l], r6 \n\t" \
  1460. "adcs %[h], r7 \n\t" \
  1461. "adcs %[o], r5 \n\t" \
  1462. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1463. : [a] "l" (va), [b] "l" (vb) \
  1464. : "r5", "r6", "r7", "cc" \
  1465. )
  1466. #else
  1467. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1468. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1469. __asm__ __volatile__ ( \
  1470. /* al * bl */ \
  1471. "uxth r6, %[a] \n\t" \
  1472. "uxth r5, %[b] \n\t" \
  1473. "muls r5, r6 \n\t" \
  1474. "adds %[l], %[l], r5 \n\t" \
  1475. "movs r5, #0 \n\t" \
  1476. "adcs %[h], r5 \n\t" \
  1477. "adcs %[o], r5 \n\t" \
  1478. /* al * bh */ \
  1479. "lsrs r5, %[b], #16 \n\t" \
  1480. "muls r6, r5 \n\t" \
  1481. "lsrs r5, r6, #16 \n\t" \
  1482. "lsls r6, r6, #16 \n\t" \
  1483. "adds %[l], %[l], r6 \n\t" \
  1484. "adcs %[h], r5 \n\t" \
  1485. "movs r5, #0 \n\t" \
  1486. "adcs %[o], r5 \n\t" \
  1487. /* ah * bh */ \
  1488. "lsrs r6, %[a], #16 \n\t" \
  1489. "lsrs r5, %[b], #16 \n\t" \
  1490. "muls r5, r6 \n\t" \
  1491. "adds %[h], %[h], r5 \n\t" \
  1492. "movs r5, #0 \n\t" \
  1493. "adcs %[o], r5 \n\t" \
  1494. /* ah * bl */ \
  1495. "uxth r5, %[b] \n\t" \
  1496. "muls r6, r5 \n\t" \
  1497. "lsrs r5, r6, #16 \n\t" \
  1498. "lsls r6, r6, #16 \n\t" \
  1499. "adds %[l], %[l], r6 \n\t" \
  1500. "adcs %[h], r5 \n\t" \
  1501. "movs r5, #0 \n\t" \
  1502. "adcs %[o], r5 \n\t" \
  1503. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1504. : [a] "l" (va), [b] "l" (vb) \
  1505. : "r5", "r6", "cc" \
  1506. )
  1507. #endif
  1508. /* Multiply va by vb and add double size result into: vh | vl */
  1509. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1510. __asm__ __volatile__ ( \
  1511. /* al * bl */ \
  1512. "uxth r6, %[a] \n\t" \
  1513. "uxth r4, %[b] \n\t" \
  1514. "muls r4, r6 \n\t" \
  1515. "adds %[l], %[l], r4 \n\t" \
  1516. "movs r5, #0 \n\t" \
  1517. "adcs %[h], r5 \n\t" \
  1518. /* al * bh */ \
  1519. "lsrs r4, %[b], #16 \n\t" \
  1520. "muls r6, r4 \n\t" \
  1521. "lsrs r4, r6, #16 \n\t" \
  1522. "lsls r6, r6, #16 \n\t" \
  1523. "adds %[l], %[l], r6 \n\t" \
  1524. "adcs %[h], r4 \n\t" \
  1525. /* ah * bh */ \
  1526. "lsrs r6, %[a], #16 \n\t" \
  1527. "lsrs r4, %[b], #16 \n\t" \
  1528. "muls r4, r6 \n\t" \
  1529. "adds %[h], %[h], r4 \n\t" \
  1530. /* ah * bl */ \
  1531. "uxth r4, %[b] \n\t" \
  1532. "muls r6, r4 \n\t" \
  1533. "lsrs r4, r6, #16 \n\t" \
  1534. "lsls r6, r6, #16 \n\t" \
  1535. "adds %[l], %[l], r6 \n\t" \
  1536. "adcs %[h], r4 \n\t" \
  1537. : [l] "+l" (vl), [h] "+l" (vh) \
  1538. : [a] "l" (va), [b] "l" (vb) \
  1539. : "r4", "r5", "r6", "cc" \
  1540. )
  1541. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  1542. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1543. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1544. __asm__ __volatile__ ( \
  1545. /* al * bl */ \
  1546. "uxth r6, %[a] \n\t" \
  1547. "uxth r7, %[b] \n\t" \
  1548. "muls r7, r6 \n\t" \
  1549. "adds %[l], %[l], r7 \n\t" \
  1550. "movs r5, #0 \n\t" \
  1551. "adcs %[h], r5 \n\t" \
  1552. "adcs %[o], r5 \n\t" \
  1553. "adds %[l], %[l], r7 \n\t" \
  1554. "adcs %[h], r5 \n\t" \
  1555. "adcs %[o], r5 \n\t" \
  1556. /* al * bh */ \
  1557. "lsrs r7, %[b], #16 \n\t" \
  1558. "muls r6, r7 \n\t" \
  1559. "lsrs r7, r6, #16 \n\t" \
  1560. "lsls r6, r6, #16 \n\t" \
  1561. "adds %[l], %[l], r6 \n\t" \
  1562. "adcs %[h], r7 \n\t" \
  1563. "adcs %[o], r5 \n\t" \
  1564. "adds %[l], %[l], r6 \n\t" \
  1565. "adcs %[h], r7 \n\t" \
  1566. "adcs %[o], r5 \n\t" \
  1567. /* ah * bh */ \
  1568. "lsrs r6, %[a], #16 \n\t" \
  1569. "lsrs r7, %[b], #16 \n\t" \
  1570. "muls r7, r6 \n\t" \
  1571. "adds %[h], %[h], r7 \n\t" \
  1572. "adcs %[o], r5 \n\t" \
  1573. "adds %[h], %[h], r7 \n\t" \
  1574. "adcs %[o], r5 \n\t" \
  1575. /* ah * bl */ \
  1576. "uxth r7, %[b] \n\t" \
  1577. "muls r6, r7 \n\t" \
  1578. "lsrs r7, r6, #16 \n\t" \
  1579. "lsls r6, r6, #16 \n\t" \
  1580. "adds %[l], %[l], r6 \n\t" \
  1581. "adcs %[h], r7 \n\t" \
  1582. "adcs %[o], r5 \n\t" \
  1583. "adds %[l], %[l], r6 \n\t" \
  1584. "adcs %[h], r7 \n\t" \
  1585. "adcs %[o], r5 \n\t" \
  1586. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1587. : [a] "l" (va), [b] "l" (vb) \
  1588. : "r5", "r6", "r7", "cc" \
  1589. )
  1590. #else
  1591. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1592. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1593. __asm__ __volatile__ ( \
  1594. "movs r8, %[a] \n\t" \
  1595. /* al * bl */ \
  1596. "uxth r6, %[a] \n\t" \
  1597. "uxth r5, %[b] \n\t" \
  1598. "muls r5, r6 \n\t" \
  1599. "adds %[l], %[l], r5 \n\t" \
  1600. "movs %[a], #0 \n\t" \
  1601. "adcs %[h], %[a] \n\t" \
  1602. "adcs %[o], %[a] \n\t" \
  1603. "adds %[l], %[l], r5 \n\t" \
  1604. "adcs %[h], %[a] \n\t" \
  1605. "adcs %[o], %[a] \n\t" \
  1606. /* al * bh */ \
  1607. "lsrs r5, %[b], #16 \n\t" \
  1608. "muls r6, r5 \n\t" \
  1609. "lsrs r5, r6, #16 \n\t" \
  1610. "lsls r6, r6, #16 \n\t" \
  1611. "adds %[l], %[l], r6 \n\t" \
  1612. "adcs %[h], r5 \n\t" \
  1613. "adcs %[o], %[a] \n\t" \
  1614. "adds %[l], %[l], r6 \n\t" \
  1615. "adcs %[h], r5 \n\t" \
  1616. "adcs %[o], %[a] \n\t" \
  1617. /* ah * bh */ \
  1618. "movs %[a], r8 \n\t" \
  1619. "lsrs r6, %[a], #16 \n\t" \
  1620. "lsrs r5, %[b], #16 \n\t" \
  1621. "muls r5, r6 \n\t" \
  1622. "adds %[h], %[h], r5 \n\t" \
  1623. "movs %[a], #0 \n\t" \
  1624. "adcs %[o], %[a] \n\t" \
  1625. "adds %[h], %[h], r5 \n\t" \
  1626. "adcs %[o], %[a] \n\t" \
  1627. /* ah * bl */ \
  1628. "uxth r5, %[b] \n\t" \
  1629. "muls r6, r5 \n\t" \
  1630. "lsrs r5, r6, #16 \n\t" \
  1631. "lsls r6, r6, #16 \n\t" \
  1632. "adds %[l], %[l], r6 \n\t" \
  1633. "adcs %[h], r5 \n\t" \
  1634. "adcs %[o], %[a] \n\t" \
  1635. "adds %[l], %[l], r6 \n\t" \
  1636. "adcs %[h], r5 \n\t" \
  1637. "adcs %[o], %[a] \n\t" \
  1638. "movs %[a], r8 \n\t" \
  1639. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1640. : [a] "l" (va), [b] "l" (vb) \
  1641. : "r5", "r6", "r8", "cc" \
  1642. )
  1643. #endif
  1644. #ifndef DEBUG
  1645. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1646. * Assumes first add will not overflow vh | vl
  1647. */
  1648. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1649. __asm__ __volatile__ ( \
  1650. /* al * bl */ \
  1651. "uxth r6, %[a] \n\t" \
  1652. "uxth r7, %[b] \n\t" \
  1653. "muls r7, r6 \n\t" \
  1654. "adds %[l], %[l], r7 \n\t" \
  1655. "movs r5, #0 \n\t" \
  1656. "adcs %[h], r5 \n\t" \
  1657. "adds %[l], %[l], r7 \n\t" \
  1658. "adcs %[h], r5 \n\t" \
  1659. /* al * bh */ \
  1660. "lsrs r7, %[b], #16 \n\t" \
  1661. "muls r6, r7 \n\t" \
  1662. "lsrs r7, r6, #16 \n\t" \
  1663. "lsls r6, r6, #16 \n\t" \
  1664. "adds %[l], %[l], r6 \n\t" \
  1665. "adcs %[h], r7 \n\t" \
  1666. "adds %[l], %[l], r6 \n\t" \
  1667. "adcs %[h], r7 \n\t" \
  1668. "adcs %[o], r5 \n\t" \
  1669. /* ah * bh */ \
  1670. "lsrs r6, %[a], #16 \n\t" \
  1671. "lsrs r7, %[b], #16 \n\t" \
  1672. "muls r7, r6 \n\t" \
  1673. "adds %[h], %[h], r7 \n\t" \
  1674. "adcs %[o], r5 \n\t" \
  1675. "adds %[h], %[h], r7 \n\t" \
  1676. "adcs %[o], r5 \n\t" \
  1677. /* ah * bl */ \
  1678. "uxth r7, %[b] \n\t" \
  1679. "muls r6, r7 \n\t" \
  1680. "lsrs r7, r6, #16 \n\t" \
  1681. "lsls r6, r6, #16 \n\t" \
  1682. "adds %[l], %[l], r6 \n\t" \
  1683. "adcs %[h], r7 \n\t" \
  1684. "adcs %[o], r5 \n\t" \
  1685. "adds %[l], %[l], r6 \n\t" \
  1686. "adcs %[h], r7 \n\t" \
  1687. "adcs %[o], r5 \n\t" \
  1688. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1689. : [a] "l" (va), [b] "l" (vb) \
  1690. : "r5", "r6", "r7", "cc" \
  1691. )
  1692. #else
  1693. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1694. * Assumes first add will not overflow vh | vl
  1695. */
  1696. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1697. __asm__ __volatile__ ( \
  1698. "movs r8, %[a] \n\t" \
  1699. /* al * bl */ \
  1700. "uxth r5, %[a] \n\t" \
  1701. "uxth r6, %[b] \n\t" \
  1702. "muls r6, r5 \n\t" \
  1703. "adds %[l], %[l], r6 \n\t" \
  1704. "movs %[a], #0 \n\t" \
  1705. "adcs %[h], %[a] \n\t" \
  1706. "adds %[l], %[l], r6 \n\t" \
  1707. "adcs %[h], %[a] \n\t" \
  1708. /* al * bh */ \
  1709. "lsrs r6, %[b], #16 \n\t" \
  1710. "muls r5, r6 \n\t" \
  1711. "lsrs r6, r5, #16 \n\t" \
  1712. "lsls r5, r5, #16 \n\t" \
  1713. "adds %[l], %[l], r5 \n\t" \
  1714. "adcs %[h], r6 \n\t" \
  1715. "adds %[l], %[l], r5 \n\t" \
  1716. "adcs %[h], r6 \n\t" \
  1717. "adcs %[o], %[a] \n\t" \
  1718. /* ah * bh */ \
  1719. "movs %[a], r8 \n\t" \
  1720. "lsrs r5, %[a], #16 \n\t" \
  1721. "lsrs r6, %[b], #16 \n\t" \
  1722. "muls r6, r5 \n\t" \
  1723. "movs %[a], #0 \n\t" \
  1724. "adds %[h], %[h], r6 \n\t" \
  1725. "adcs %[o], %[a] \n\t" \
  1726. "adds %[h], %[h], r6 \n\t" \
  1727. "adcs %[o], %[a] \n\t" \
  1728. /* ah * bl */ \
  1729. "uxth r6, %[b] \n\t" \
  1730. "muls r5, r6 \n\t" \
  1731. "lsrs r6, r5, #16 \n\t" \
  1732. "lsls r5, r5, #16 \n\t" \
  1733. "adds %[l], %[l], r5 \n\t" \
  1734. "adcs %[h], r6 \n\t" \
  1735. "adcs %[o], %[a] \n\t" \
  1736. "adds %[l], %[l], r5 \n\t" \
  1737. "adcs %[h], r6 \n\t" \
  1738. "adcs %[o], %[a] \n\t" \
  1739. "movs %[a], r8 \n\t" \
  1740. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1741. : [a] "l" (va), [b] "l" (vb) \
  1742. : "r5", "r6", "r8", "cc" \
  1743. )
  1744. #endif
  1745. /* Square va and store double size result in: vh | vl */
  1746. #define SP_ASM_SQR(vl, vh, va) \
  1747. __asm__ __volatile__ ( \
  1748. "lsrs r5, %[a], #16 \n\t" \
  1749. "uxth r6, %[a] \n\t" \
  1750. "mov %[l], r6 \n\t" \
  1751. "mov %[h], r5 \n\t" \
  1752. /* al * al */ \
  1753. "muls %[l], %[l] \n\t" \
  1754. /* ah * ah */ \
  1755. "muls %[h], %[h] \n\t" \
  1756. /* 2 * al * ah */ \
  1757. "muls r6, r5 \n\t" \
  1758. "lsrs r5, r6, #15 \n\t" \
  1759. "lsls r6, r6, #17 \n\t" \
  1760. "adds %[l], %[l], r6 \n\t" \
  1761. "adcs %[h], r5 \n\t" \
  1762. : [h] "+l" (vh), [l] "+l" (vl) \
  1763. : [a] "l" (va) \
  1764. : "r5", "r6", "cc" \
  1765. )
  1766. /* Square va and add double size result into: vo | vh | vl */
  1767. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1768. __asm__ __volatile__ ( \
  1769. "lsrs r4, %[a], #16 \n\t" \
  1770. "uxth r6, %[a] \n\t" \
  1771. /* al * al */ \
  1772. "muls r6, r6 \n\t" \
  1773. /* ah * ah */ \
  1774. "muls r4, r4 \n\t" \
  1775. "adds %[l], %[l], r6 \n\t" \
  1776. "adcs %[h], r4 \n\t" \
  1777. "movs r5, #0 \n\t" \
  1778. "adcs %[o], r5 \n\t" \
  1779. "lsrs r4, %[a], #16 \n\t" \
  1780. "uxth r6, %[a] \n\t" \
  1781. /* 2 * al * ah */ \
  1782. "muls r6, r4 \n\t" \
  1783. "lsrs r4, r6, #15 \n\t" \
  1784. "lsls r6, r6, #17 \n\t" \
  1785. "adds %[l], %[l], r6 \n\t" \
  1786. "adcs %[h], r4 \n\t" \
  1787. "adcs %[o], r5 \n\t" \
  1788. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1789. : [a] "l" (va) \
  1790. : "r4", "r5", "r6", "cc" \
  1791. )
  1792. /* Square va and add double size result into: vh | vl */
  1793. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1794. __asm__ __volatile__ ( \
  1795. "lsrs r6, %[a], #16 \n\t" \
  1796. "uxth r6, %[a] \n\t" \
  1797. /* al * al */ \
  1798. "muls r6, r6 \n\t" \
  1799. /* ah * ah */ \
  1800. "muls r6, r6 \n\t" \
  1801. "adds %[l], %[l], r6 \n\t" \
  1802. "adcs %[h], r6 \n\t" \
  1803. "lsrs r6, %[a], #16 \n\t" \
  1804. "uxth r6, %[a] \n\t" \
  1805. /* 2 * al * ah */ \
  1806. "muls r6, r6 \n\t" \
  1807. "lsrs r6, r6, #15 \n\t" \
  1808. "lsls r6, r6, #17 \n\t" \
  1809. "adds %[l], %[l], r6 \n\t" \
  1810. "adcs %[h], r6 \n\t" \
  1811. : [l] "+l" (vl), [h] "+l" (vh) \
  1812. : [a] "l" (va) \
  1813. : "r5", "r6", "cc" \
  1814. )
  1815. /* Add va into: vh | vl */
  1816. #define SP_ASM_ADDC(vl, vh, va) \
  1817. __asm__ __volatile__ ( \
  1818. "adds %[l], %[l], %[a] \n\t" \
  1819. "movs r5, #0 \n\t" \
  1820. "adcs %[h], r5 \n\t" \
  1821. : [l] "+l" (vl), [h] "+l" (vh) \
  1822. : [a] "l" (va) \
  1823. : "r5", "cc" \
  1824. )
  1825. /* Sub va from: vh | vl */
  1826. #define SP_ASM_SUBB(vl, vh, va) \
  1827. __asm__ __volatile__ ( \
  1828. "subs %[l], %[l], %[a] \n\t" \
  1829. "movs r5, #0 \n\t" \
  1830. "sbcs %[h], r5 \n\t" \
  1831. : [l] "+l" (vl), [h] "+l" (vh) \
  1832. : [a] "l" (va) \
  1833. : "r5", "cc" \
  1834. )
  1835. /* Add two times vc | vb | va into vo | vh | vl */
  1836. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1837. __asm__ __volatile__ ( \
  1838. "adds %[l], %[l], %[a] \n\t" \
  1839. "adcs %[h], %[b] \n\t" \
  1840. "adcs %[o], %[c] \n\t" \
  1841. "adds %[l], %[l], %[a] \n\t" \
  1842. "adcs %[h], %[b] \n\t" \
  1843. "adcs %[o], %[c] \n\t" \
  1844. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1845. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  1846. : "cc" \
  1847. )
  1848. #elif defined(WOLFSSL_KEIL)
  1849. /* Multiply va by vb and store double size result in: vh | vl */
  1850. #define SP_ASM_MUL(vl, vh, va, vb) \
  1851. __asm__ __volatile__ ( \
  1852. /* al * bl */ \
  1853. "uxth r6, %[a] \n\t" \
  1854. "uxth %[l], %[b] \n\t" \
  1855. "muls %[l], r6, %[l] \n\t" \
  1856. /* al * bh */ \
  1857. "lsrs r4, %[b], #16 \n\t" \
  1858. "muls r6, r4, r6 \n\t" \
  1859. "lsrs %[h], r6, #16 \n\t" \
  1860. "lsls r6, r6, #16 \n\t" \
  1861. "adds %[l], %[l], r6 \n\t" \
  1862. "movs r5, #0 \n\t" \
  1863. "adcs %[h], %[h], r5 \n\t" \
  1864. /* ah * bh */ \
  1865. "lsrs r6, %[a], #16 \n\t" \
  1866. "muls r4, r6, r4 \n\t" \
  1867. "adds %[h], %[h], r4 \n\t" \
  1868. /* ah * bl */ \
  1869. "uxth r4, %[b] \n\t" \
  1870. "muls r6, r4, r6 \n\t" \
  1871. "lsrs r4, r6, #16 \n\t" \
  1872. "lsls r6, r6, #16 \n\t" \
  1873. "adds %[l], %[l], r6 \n\t" \
  1874. "adcs %[h], %[h], r4 \n\t" \
  1875. : [h] "+l" (vh), [l] "+l" (vl) \
  1876. : [a] "l" (va), [b] "l" (vb) \
  1877. : "r4", "r5", "r6", "cc" \
  1878. )
  1879. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1880. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1881. __asm__ __volatile__ ( \
  1882. /* al * bl */ \
  1883. "uxth r6, %[a] \n\t" \
  1884. "uxth %[l], %[b] \n\t" \
  1885. "muls %[l], r6, %[l] \n\t" \
  1886. /* al * bh */ \
  1887. "lsrs r5, %[b], #16 \n\t" \
  1888. "muls r6, r5, r6 \n\t" \
  1889. "lsrs %[h], r6, #16 \n\t" \
  1890. "lsls r6, r6, #16 \n\t" \
  1891. "adds %[l], %[l], r6 \n\t" \
  1892. "movs %[o], #0 \n\t" \
  1893. "adcs %[h], %[h], %[o] \n\t" \
  1894. /* ah * bh */ \
  1895. "lsrs r6, %[a], #16 \n\t" \
  1896. "muls r5, r6, r5 \n\t" \
  1897. "adds %[h], %[h], r5 \n\t" \
  1898. /* ah * bl */ \
  1899. "uxth r5, %[b] \n\t" \
  1900. "muls r6, r5, r6 \n\t" \
  1901. "lsrs r5, r6, #16 \n\t" \
  1902. "lsls r6, r6, #16 \n\t" \
  1903. "adds %[l], %[l], r6 \n\t" \
  1904. "adcs %[h], %[h], r5 \n\t" \
  1905. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1906. : [a] "l" (va), [b] "l" (vb) \
  1907. : "r5", "r6", "cc" \
  1908. )
  1909. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  1910. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1911. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1912. __asm__ __volatile__ ( \
  1913. /* al * bl */ \
  1914. "uxth r6, %[a] \n\t" \
  1915. "uxth r7, %[b] \n\t" \
  1916. "muls r7, r6, r7 \n\t" \
  1917. "adds %[l], %[l], r7 \n\t" \
  1918. "movs r5, #0 \n\t" \
  1919. "adcs %[h], %[h], r5 \n\t" \
  1920. "adcs %[o], %[o], r5 \n\t" \
  1921. /* al * bh */ \
  1922. "lsrs r7, %[b], #16 \n\t" \
  1923. "muls r6, r7, r6 \n\t" \
  1924. "lsrs r7, r6, #16 \n\t" \
  1925. "lsls r6, r6, #16 \n\t" \
  1926. "adds %[l], %[l], r6 \n\t" \
  1927. "adcs %[h], %[h], r7 \n\t" \
  1928. "adcs %[o], %[o], r5 \n\t" \
  1929. /* ah * bh */ \
  1930. "lsrs r6, %[a], #16 \n\t" \
  1931. "lsrs r7, %[b], #16 \n\t" \
  1932. "muls r7, r6, r7 \n\t" \
  1933. "adds %[h], %[h], r7 \n\t" \
  1934. "adcs %[o], %[o], r5 \n\t" \
  1935. /* ah * bl */ \
  1936. "uxth r7, %[b] \n\t" \
  1937. "muls r6, r7, r6 \n\t" \
  1938. "lsrs r7, r6, #16 \n\t" \
  1939. "lsls r6, r6, #16 \n\t" \
  1940. "adds %[l], %[l], r6 \n\t" \
  1941. "adcs %[h], %[h], r7 \n\t" \
  1942. "adcs %[o], %[o], r5 \n\t" \
  1943. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1944. : [a] "l" (va), [b] "l" (vb) \
  1945. : "r5", "r6", "r7", "cc" \
  1946. )
  1947. #else
  1948. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1949. __asm__ __volatile__ ( \
  1950. /* al * bl */ \
  1951. "uxth r6, %[a] \n\t" \
  1952. "uxth r5, %[b] \n\t" \
  1953. "muls r5, r6, r5 \n\t" \
  1954. "adds %[l], %[l], r5 \n\t" \
  1955. "movs r5, #0 \n\t" \
  1956. "adcs %[h], %[h], r5 \n\t" \
  1957. "adcs %[o], %[o], r5 \n\t" \
  1958. /* al * bh */ \
  1959. "lsrs r5, %[b], #16 \n\t" \
  1960. "muls r6, r5, r6 \n\t" \
  1961. "lsrs r5, r6, #16 \n\t" \
  1962. "lsls r6, r6, #16 \n\t" \
  1963. "adds %[l], %[l], r6 \n\t" \
  1964. "adcs %[h], %[h], r5 \n\t" \
  1965. "movs r5, #0 \n\t" \
  1966. "adcs %[o], %[o], r5 \n\t" \
  1967. /* ah * bh */ \
  1968. "lsrs r6, %[a], #16 \n\t" \
  1969. "lsrs r5, %[b], #16 \n\t" \
  1970. "muls r5, r6, r5 \n\t" \
  1971. "adds %[h], %[h], r5 \n\t" \
  1972. "movs r5, #0 \n\t" \
  1973. "adcs %[o], %[o], r5 \n\t" \
  1974. /* ah * bl */ \
  1975. "uxth r5, %[b] \n\t" \
  1976. "muls r6, r5, r6 \n\t" \
  1977. "lsrs r5, r6, #16 \n\t" \
  1978. "lsls r6, r6, #16 \n\t" \
  1979. "adds %[l], %[l], r6 \n\t" \
  1980. "adcs %[h], %[h], r5 \n\t" \
  1981. "movs r5, #0 \n\t" \
  1982. "adcs %[o], %[o], r5 \n\t" \
  1983. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1984. : [a] "l" (va), [b] "l" (vb) \
  1985. : "r5", "r6", "cc" \
  1986. )
  1987. #endif
  1988. /* Multiply va by vb and add double size result into: vh | vl */
  1989. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1990. __asm__ __volatile__ ( \
  1991. /* al * bl */ \
  1992. "uxth r6, %[a] \n\t" \
  1993. "uxth r4, %[b] \n\t" \
  1994. "muls r4, r6, r4 \n\t" \
  1995. "adds %[l], %[l], r4 \n\t" \
  1996. "movs r5, #0 \n\t" \
  1997. "adcs %[h], %[h], r5 \n\t" \
  1998. /* al * bh */ \
  1999. "lsrs r4, %[b], #16 \n\t" \
  2000. "muls r6, r4, r6 \n\t" \
  2001. "lsrs r4, r6, #16 \n\t" \
  2002. "lsls r6, r6, #16 \n\t" \
  2003. "adds %[l], %[l], r6 \n\t" \
  2004. "adcs %[h], %[h], r4 \n\t" \
  2005. /* ah * bh */ \
  2006. "lsrs r6, %[a], #16 \n\t" \
  2007. "lsrs r4, %[b], #16 \n\t" \
  2008. "muls r4, r6, r4 \n\t" \
  2009. "adds %[h], %[h], r4 \n\t" \
  2010. /* ah * bl */ \
  2011. "uxth r4, %[b] \n\t" \
  2012. "muls r6, r4, r6 \n\t" \
  2013. "lsrs r4, r6, #16 \n\t" \
  2014. "lsls r6, r6, #16 \n\t" \
  2015. "adds %[l], %[l], r6 \n\t" \
  2016. "adcs %[h], %[h], r4 \n\t" \
  2017. : [l] "+l" (vl), [h] "+l" (vh) \
  2018. : [a] "l" (va), [b] "l" (vb) \
  2019. : "r4", "r5", "r6", "cc" \
  2020. )
  2021. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  2022. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2023. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2024. __asm__ __volatile__ ( \
  2025. /* al * bl */ \
  2026. "uxth r6, %[a] \n\t" \
  2027. "uxth r7, %[b] \n\t" \
  2028. "muls r7, r6, r7 \n\t" \
  2029. "adds %[l], %[l], r7 \n\t" \
  2030. "movs r5, #0 \n\t" \
  2031. "adcs %[h], %[h], r5 \n\t" \
  2032. "adcs %[o], %[o], r5 \n\t" \
  2033. "adds %[l], %[l], r7 \n\t" \
  2034. "adcs %[h], %[h], r5 \n\t" \
  2035. "adcs %[o], %[o], r5 \n\t" \
  2036. /* al * bh */ \
  2037. "lsrs r7, %[b], #16 \n\t" \
  2038. "muls r6, r7, r6 \n\t" \
  2039. "lsrs r7, r6, #16 \n\t" \
  2040. "lsls r6, r6, #16 \n\t" \
  2041. "adds %[l], %[l], r6 \n\t" \
  2042. "adcs %[h], %[h], r7 \n\t" \
  2043. "adcs %[o], %[o], r5 \n\t" \
  2044. "adds %[l], %[l], r6 \n\t" \
  2045. "adcs %[h], %[h], r7 \n\t" \
  2046. "adcs %[o], %[o], r5 \n\t" \
  2047. /* ah * bh */ \
  2048. "lsrs r6, %[a], #16 \n\t" \
  2049. "lsrs r7, %[b], #16 \n\t" \
  2050. "muls r7, r6, r7 \n\t" \
  2051. "adds %[h], %[h], r7 \n\t" \
  2052. "adcs %[o], %[o], r5 \n\t" \
  2053. "adds %[h], %[h], r7 \n\t" \
  2054. "adcs %[o], %[o], r5 \n\t" \
  2055. /* ah * bl */ \
  2056. "uxth r7, %[b] \n\t" \
  2057. "muls r6, r7, r6 \n\t" \
  2058. "lsrs r7, r6, #16 \n\t" \
  2059. "lsls r6, r6, #16 \n\t" \
  2060. "adds %[l], %[l], r6 \n\t" \
  2061. "adcs %[h], %[h], r7 \n\t" \
  2062. "adcs %[o], %[o], r5 \n\t" \
  2063. "adds %[l], %[l], r6 \n\t" \
  2064. "adcs %[h], %[h], r7 \n\t" \
  2065. "adcs %[o], %[o], r5 \n\t" \
  2066. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2067. : [a] "l" (va), [b] "l" (vb) \
  2068. : "r5", "r6", "r7", "cc" \
  2069. )
  2070. #else
  2071. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2072. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2073. __asm__ __volatile__ ( \
  2074. "movs r8, %[a] \n\t" \
  2075. /* al * bl */ \
  2076. "uxth r6, %[a] \n\t" \
  2077. "uxth r5, %[b] \n\t" \
  2078. "muls r5, r6, r5 \n\t" \
  2079. "adds %[l], %[l], r5 \n\t" \
  2080. "movs %[a], #0 \n\t" \
  2081. "adcs %[h], %[h], %[a] \n\t" \
  2082. "adcs %[o], %[o], %[a] \n\t" \
  2083. "adds %[l], %[l], r5 \n\t" \
  2084. "adcs %[h], %[h], %[a] \n\t" \
  2085. "adcs %[o], %[o], %[a] \n\t" \
  2086. /* al * bh */ \
  2087. "lsrs r5, %[b], #16 \n\t" \
  2088. "muls r6, r5, r6 \n\t" \
  2089. "lsrs r5, r6, #16 \n\t" \
  2090. "lsls r6, r6, #16 \n\t" \
  2091. "adds %[l], %[l], r6 \n\t" \
  2092. "adcs %[h], %[h], r5 \n\t" \
  2093. "adcs %[o], %[o], %[a] \n\t" \
  2094. "adds %[l], %[l], r6 \n\t" \
  2095. "adcs %[h], %[h], r5 \n\t" \
  2096. "adcs %[o], %[o], %[a] \n\t" \
  2097. /* ah * bh */ \
  2098. "movs %[a], r8 \n\t" \
  2099. "lsrs r6, %[a], #16 \n\t" \
  2100. "lsrs r5, %[b], #16 \n\t" \
  2101. "muls r5, r6, r5 \n\t" \
  2102. "adds %[h], %[h], r5 \n\t" \
  2103. "movs %[a], #0 \n\t" \
  2104. "adcs %[o], %[o], %[a] \n\t" \
  2105. "adds %[h], %[h], r5 \n\t" \
  2106. "adcs %[o], %[o], %[a] \n\t" \
  2107. /* ah * bl */ \
  2108. "uxth r5, %[b] \n\t" \
  2109. "muls r6, r5, r6 \n\t" \
  2110. "lsrs r5, r6, #16 \n\t" \
  2111. "lsls r6, r6, #16 \n\t" \
  2112. "adds %[l], %[l], r6 \n\t" \
  2113. "adcs %[h], %[h], r5 \n\t" \
  2114. "adcs %[o], %[o], %[a] \n\t" \
  2115. "adds %[l], %[l], r6 \n\t" \
  2116. "adcs %[h], %[h], r5 \n\t" \
  2117. "adcs %[o], %[o], %[a] \n\t" \
  2118. "movs %[a], r8 \n\t" \
  2119. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2120. : [a] "l" (va), [b] "l" (vb) \
  2121. : "r5", "r6", "r8", "cc" \
  2122. )
  2123. #endif
  2124. #ifndef DEBUG
  2125. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2126. * Assumes first add will not overflow vh | vl
  2127. */
  2128. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2129. __asm__ __volatile__ ( \
  2130. /* al * bl */ \
  2131. "uxth r6, %[a] \n\t" \
  2132. "uxth r7, %[b] \n\t" \
  2133. "muls r7, r6, r7 \n\t" \
  2134. "adds %[l], %[l], r7 \n\t" \
  2135. "movs r5, #0 \n\t" \
  2136. "adcs %[h], %[h], r5 \n\t" \
  2137. "adds %[l], %[l], r7 \n\t" \
  2138. "adcs %[h], %[h], r5 \n\t" \
  2139. /* al * bh */ \
  2140. "lsrs r7, %[b], #16 \n\t" \
  2141. "muls r6, r7, r6 \n\t" \
  2142. "lsrs r7, r6, #16 \n\t" \
  2143. "lsls r6, r6, #16 \n\t" \
  2144. "adds %[l], %[l], r6 \n\t" \
  2145. "adcs %[h], %[h], r7 \n\t" \
  2146. "adds %[l], %[l], r6 \n\t" \
  2147. "adcs %[h], %[h], r7 \n\t" \
  2148. "adcs %[o], %[o], r5 \n\t" \
  2149. /* ah * bh */ \
  2150. "lsrs r6, %[a], #16 \n\t" \
  2151. "lsrs r7, %[b], #16 \n\t" \
  2152. "muls r7, r6, r7 \n\t" \
  2153. "adds %[h], %[h], r7 \n\t" \
  2154. "adcs %[o], %[o], r5 \n\t" \
  2155. "adds %[h], %[h], r7 \n\t" \
  2156. "adcs %[o], %[o], r5 \n\t" \
  2157. /* ah * bl */ \
  2158. "uxth r7, %[b] \n\t" \
  2159. "muls r6, r7, r6 \n\t" \
  2160. "lsrs r7, r6, #16 \n\t" \
  2161. "lsls r6, r6, #16 \n\t" \
  2162. "adds %[l], %[l], r6 \n\t" \
  2163. "adcs %[h], %[h], r7 \n\t" \
  2164. "adcs %[o], %[o], r5 \n\t" \
  2165. "adds %[l], %[l], r6 \n\t" \
  2166. "adcs %[h], %[h], r7 \n\t" \
  2167. "adcs %[o], %[o], r5 \n\t" \
  2168. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2169. : [a] "l" (va), [b] "l" (vb) \
  2170. : "r5", "r6", "r7", "cc" \
  2171. )
  2172. #else
  2173. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2174. * Assumes first add will not overflow vh | vl
  2175. */
  2176. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2177. __asm__ __volatile__ ( \
  2178. "movs r8, %[a] \n\t" \
  2179. /* al * bl */ \
  2180. "uxth r5, %[a] \n\t" \
  2181. "uxth r6, %[b] \n\t" \
  2182. "muls r6, r5, r6 \n\t" \
  2183. "adds %[l], %[l], r6 \n\t" \
  2184. "movs %[a], #0 \n\t" \
  2185. "adcs %[h], %[h], %[a] \n\t" \
  2186. "adds %[l], %[l], r6 \n\t" \
  2187. "adcs %[h], %[h], %[a] \n\t" \
  2188. /* al * bh */ \
  2189. "lsrs r6, %[b], #16 \n\t" \
  2190. "muls r5, r6, r5 \n\t" \
  2191. "lsrs r6, r5, #16 \n\t" \
  2192. "lsls r5, r5, #16 \n\t" \
  2193. "adds %[l], %[l], r5 \n\t" \
  2194. "adcs %[h], %[h], r6 \n\t" \
  2195. "adds %[l], %[l], r5 \n\t" \
  2196. "adcs %[h], %[h], r6 \n\t" \
  2197. "adcs %[o], %[o], %[a] \n\t" \
  2198. /* ah * bh */ \
  2199. "movs %[a], r8 \n\t" \
  2200. "lsrs r5, %[a], #16 \n\t" \
  2201. "lsrs r6, %[b], #16 \n\t" \
  2202. "muls r6, r5, r6 \n\t" \
  2203. "movs %[a], #0 \n\t" \
  2204. "adds %[h], %[h], r6 \n\t" \
  2205. "adcs %[o], %[o], %[a] \n\t" \
  2206. "adds %[h], %[h], r6 \n\t" \
  2207. "adcs %[o], %[o], %[a] \n\t" \
  2208. /* ah * bl */ \
  2209. "uxth r6, %[b] \n\t" \
  2210. "muls r5, r6, r5 \n\t" \
  2211. "lsrs r6, r5, #16 \n\t" \
  2212. "lsls r5, r5, #16 \n\t" \
  2213. "adds %[l], %[l], r5 \n\t" \
  2214. "adcs %[h], %[h], r6 \n\t" \
  2215. "adcs %[o], %[o], %[a] \n\t" \
  2216. "adds %[l], %[l], r5 \n\t" \
  2217. "adcs %[h], %[h], r6 \n\t" \
  2218. "adcs %[o], %[o], %[a] \n\t" \
  2219. "movs %[a], r8 \n\t" \
  2220. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2221. : [a] "l" (va), [b] "l" (vb) \
  2222. : "r5", "r6", "r8", "cc" \
  2223. )
  2224. #endif
  2225. /* Square va and store double size result in: vh | vl */
  2226. #define SP_ASM_SQR(vl, vh, va) \
  2227. __asm__ __volatile__ ( \
  2228. "lsrs r5, %[a], #16 \n\t" \
  2229. "uxth r6, %[a] \n\t" \
  2230. "mov %[l], r6 \n\t" \
  2231. "mov %[h], r5 \n\t" \
  2232. /* al * al */ \
  2233. "muls %[l], %[l], %[l] \n\t" \
  2234. /* ah * ah */ \
  2235. "muls %[h], %[h], %[h] \n\t" \
  2236. /* 2 * al * ah */ \
  2237. "muls r6, r5, r6 \n\t" \
  2238. "lsrs r5, r6, #15 \n\t" \
  2239. "lsls r6, r6, #17 \n\t" \
  2240. "adds %[l], %[l], r6 \n\t" \
  2241. "adcs %[h], %[h], r5 \n\t" \
  2242. : [h] "+l" (vh), [l] "+l" (vl) \
  2243. : [a] "l" (va) \
  2244. : "r5", "r6", "cc" \
  2245. )
  2246. /* Square va and add double size result into: vo | vh | vl */
  2247. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  2248. __asm__ __volatile__ ( \
  2249. "lsrs r4, %[a], #16 \n\t" \
  2250. "uxth r6, %[a] \n\t" \
  2251. /* al * al */ \
  2252. "muls r6, r6, r6 \n\t" \
  2253. /* ah * ah */ \
  2254. "muls r4, r4, r4 \n\t" \
  2255. "adds %[l], %[l], r6 \n\t" \
  2256. "adcs %[h], %[h], r4 \n\t" \
  2257. "movs r5, #0 \n\t" \
  2258. "adcs %[o], %[o], r5 \n\t" \
  2259. "lsrs r4, %[a], #16 \n\t" \
  2260. "uxth r6, %[a] \n\t" \
  2261. /* 2 * al * ah */ \
  2262. "muls r6, r4, r6 \n\t" \
  2263. "lsrs r4, r6, #15 \n\t" \
  2264. "lsls r6, r6, #17 \n\t" \
  2265. "adds %[l], %[l], r6 \n\t" \
  2266. "adcs %[h], %[h], r4 \n\t" \
  2267. "adcs %[o], %[o], r5 \n\t" \
  2268. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2269. : [a] "l" (va) \
  2270. : "r4", "r5", "r6", "cc" \
  2271. )
  2272. /* Square va and add double size result into: vh | vl */
  2273. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  2274. __asm__ __volatile__ ( \
  2275. "lsrs r5, %[a], #16 \n\t" \
  2276. "uxth r6, %[a] \n\t" \
  2277. /* al * al */ \
  2278. "muls r6, r6, r6 \n\t" \
  2279. /* ah * ah */ \
  2280. "muls r5, r5, r5 \n\t" \
  2281. "adds %[l], %[l], r6 \n\t" \
  2282. "adcs %[h], %[h], r5 \n\t" \
  2283. "lsrs r5, %[a], #16 \n\t" \
  2284. "uxth r6, %[a] \n\t" \
  2285. /* 2 * al * ah */ \
  2286. "muls r6, r5, r6 \n\t" \
  2287. "lsrs r5, r6, #15 \n\t" \
  2288. "lsls r6, r6, #17 \n\t" \
  2289. "adds %[l], %[l], r6 \n\t" \
  2290. "adcs %[h], %[h], r5 \n\t" \
  2291. : [l] "+l" (vl), [h] "+l" (vh) \
  2292. : [a] "l" (va) \
  2293. : "r5", "r6", "cc" \
  2294. )
  2295. /* Add va into: vh | vl */
  2296. #define SP_ASM_ADDC(vl, vh, va) \
  2297. __asm__ __volatile__ ( \
  2298. "adds %[l], %[l], %[a] \n\t" \
  2299. "movs r5, #0 \n\t" \
  2300. "adcs %[h], %[h], r5 \n\t" \
  2301. : [l] "+l" (vl), [h] "+l" (vh) \
  2302. : [a] "l" (va) \
  2303. : "r5", "cc" \
  2304. )
  2305. /* Sub va from: vh | vl */
  2306. #define SP_ASM_SUBB(vl, vh, va) \
  2307. __asm__ __volatile__ ( \
  2308. "subs %[l], %[l], %[a] \n\t" \
  2309. "movs r5, #0 \n\t" \
  2310. "sbcs %[h], %[h], r5 \n\t" \
  2311. : [l] "+l" (vl), [h] "+l" (vh) \
  2312. : [a] "l" (va) \
  2313. : "r5", "cc" \
  2314. )
  2315. /* Add two times vc | vb | va into vo | vh | vl */
  2316. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  2317. __asm__ __volatile__ ( \
  2318. "adds %[l], %[l], %[a] \n\t" \
  2319. "adcs %[h], %[h], %[b] \n\t" \
  2320. "adcs %[o], %[o], %[c] \n\t" \
  2321. "adds %[l], %[l], %[a] \n\t" \
  2322. "adcs %[h], %[h], %[b] \n\t" \
  2323. "adcs %[o], %[o], %[c] \n\t" \
  2324. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2325. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  2326. : "cc" \
  2327. )
  2328. #elif defined(__GNUC__)
  2329. /* Multiply va by vb and store double size result in: vh | vl */
  2330. #define SP_ASM_MUL(vl, vh, va, vb) \
  2331. __asm__ __volatile__ ( \
  2332. /* al * bl */ \
  2333. "uxth r6, %[a] \n\t" \
  2334. "uxth %[l], %[b] \n\t" \
  2335. "mul %[l], r6 \n\t" \
  2336. /* al * bh */ \
  2337. "lsr r4, %[b], #16 \n\t" \
  2338. "mul r6, r4 \n\t" \
  2339. "lsr %[h], r6, #16 \n\t" \
  2340. "lsl r6, r6, #16 \n\t" \
  2341. "add %[l], %[l], r6 \n\t" \
  2342. "mov r5, #0 \n\t" \
  2343. "adc %[h], r5 \n\t" \
  2344. /* ah * bh */ \
  2345. "lsr r6, %[a], #16 \n\t" \
  2346. "mul r4, r6 \n\t" \
  2347. "add %[h], %[h], r4 \n\t" \
  2348. /* ah * bl */ \
  2349. "uxth r4, %[b] \n\t" \
  2350. "mul r6, r4 \n\t" \
  2351. "lsr r4, r6, #16 \n\t" \
  2352. "lsl r6, r6, #16 \n\t" \
  2353. "add %[l], %[l], r6 \n\t" \
  2354. "adc %[h], r4 \n\t" \
  2355. : [h] "+l" (vh), [l] "+l" (vl) \
  2356. : [a] "l" (va), [b] "l" (vb) \
  2357. : "r4", "r5", "r6", "cc" \
  2358. )
  2359. /* Multiply va by vb and store double size result in: vo | vh | vl */
  2360. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  2361. __asm__ __volatile__ ( \
  2362. /* al * bl */ \
  2363. "uxth r6, %[a] \n\t" \
  2364. "uxth %[l], %[b] \n\t" \
  2365. "mul %[l], r6 \n\t" \
  2366. /* al * bh */ \
  2367. "lsr r5, %[b], #16 \n\t" \
  2368. "mul r6, r5 \n\t" \
  2369. "lsr %[h], r6, #16 \n\t" \
  2370. "lsl r6, r6, #16 \n\t" \
  2371. "add %[l], %[l], r6 \n\t" \
  2372. "mov %[o], #0 \n\t" \
  2373. "adc %[h], %[o] \n\t" \
  2374. /* ah * bh */ \
  2375. "lsr r6, %[a], #16 \n\t" \
  2376. "mul r5, r6 \n\t" \
  2377. "add %[h], %[h], r5 \n\t" \
  2378. /* ah * bl */ \
  2379. "uxth r5, %[b] \n\t" \
  2380. "mul r6, r5 \n\t" \
  2381. "lsr r5, r6, #16 \n\t" \
  2382. "lsl r6, r6, #16 \n\t" \
  2383. "add %[l], %[l], r6 \n\t" \
  2384. "adc %[h], r5 \n\t" \
  2385. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2386. : [a] "l" (va), [b] "l" (vb) \
  2387. : "r5", "r6", "cc" \
  2388. )
  2389. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  2390. /* Multiply va by vb and add double size result into: vo | vh | vl */
  2391. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  2392. __asm__ __volatile__ ( \
  2393. /* al * bl */ \
  2394. "uxth r6, %[a] \n\t" \
  2395. "uxth r7, %[b] \n\t" \
  2396. "mul r7, r6 \n\t" \
  2397. "add %[l], %[l], r7 \n\t" \
  2398. "mov r5, #0 \n\t" \
  2399. "adc %[h], r5 \n\t" \
  2400. "adc %[o], r5 \n\t" \
  2401. /* al * bh */ \
  2402. "lsr r7, %[b], #16 \n\t" \
  2403. "mul r6, r7 \n\t" \
  2404. "lsr r7, r6, #16 \n\t" \
  2405. "lsl r6, r6, #16 \n\t" \
  2406. "add %[l], %[l], r6 \n\t" \
  2407. "adc %[h], r7 \n\t" \
  2408. "adc %[o], r5 \n\t" \
  2409. /* ah * bh */ \
  2410. "lsr r6, %[a], #16 \n\t" \
  2411. "lsr r7, %[b], #16 \n\t" \
  2412. "mul r7, r6 \n\t" \
  2413. "add %[h], %[h], r7 \n\t" \
  2414. "adc %[o], r5 \n\t" \
  2415. /* ah * bl */ \
  2416. "uxth r7, %[b] \n\t" \
  2417. "mul r6, r7 \n\t" \
  2418. "lsr r7, r6, #16 \n\t" \
  2419. "lsl r6, r6, #16 \n\t" \
  2420. "add %[l], %[l], r6 \n\t" \
  2421. "adc %[h], r7 \n\t" \
  2422. "adc %[o], r5 \n\t" \
  2423. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2424. : [a] "l" (va), [b] "l" (vb) \
  2425. : "r5", "r6", "r7", "cc" \
  2426. )
  2427. #else
  2428. /* Multiply va by vb and add double size result into: vo | vh | vl */
  2429. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  2430. __asm__ __volatile__ ( \
  2431. /* al * bl */ \
  2432. "uxth r6, %[a] \n\t" \
  2433. "uxth r5, %[b] \n\t" \
  2434. "mul r5, r6 \n\t" \
  2435. "add %[l], %[l], r5 \n\t" \
  2436. "mov r5, #0 \n\t" \
  2437. "adc %[h], r5 \n\t" \
  2438. "adc %[o], r5 \n\t" \
  2439. /* al * bh */ \
  2440. "lsr r5, %[b], #16 \n\t" \
  2441. "mul r6, r5 \n\t" \
  2442. "lsr r5, r6, #16 \n\t" \
  2443. "lsl r6, r6, #16 \n\t" \
  2444. "add %[l], %[l], r6 \n\t" \
  2445. "adc %[h], r5 \n\t" \
  2446. "mov r5, #0 \n\t" \
  2447. "adc %[o], r5 \n\t" \
  2448. /* ah * bh */ \
  2449. "lsr r6, %[a], #16 \n\t" \
  2450. "lsr r5, %[b], #16 \n\t" \
  2451. "mul r5, r6 \n\t" \
  2452. "add %[h], %[h], r5 \n\t" \
  2453. "mov r5, #0 \n\t" \
  2454. "adc %[o], r5 \n\t" \
  2455. /* ah * bl */ \
  2456. "uxth r5, %[b] \n\t" \
  2457. "mul r6, r5 \n\t" \
  2458. "lsr r5, r6, #16 \n\t" \
  2459. "lsl r6, r6, #16 \n\t" \
  2460. "add %[l], %[l], r6 \n\t" \
  2461. "adc %[h], r5 \n\t" \
  2462. "mov r5, #0 \n\t" \
  2463. "adc %[o], r5 \n\t" \
  2464. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2465. : [a] "l" (va), [b] "l" (vb) \
  2466. : "r5", "r6", "cc" \
  2467. )
  2468. #endif
  2469. /* Multiply va by vb and add double size result into: vh | vl */
  2470. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  2471. __asm__ __volatile__ ( \
  2472. /* al * bl */ \
  2473. "uxth r6, %[a] \n\t" \
  2474. "uxth r4, %[b] \n\t" \
  2475. "mul r4, r6 \n\t" \
  2476. "add %[l], %[l], r4 \n\t" \
  2477. "mov r5, #0 \n\t" \
  2478. "adc %[h], r5 \n\t" \
  2479. /* al * bh */ \
  2480. "lsr r4, %[b], #16 \n\t" \
  2481. "mul r6, r4 \n\t" \
  2482. "lsr r4, r6, #16 \n\t" \
  2483. "lsl r6, r6, #16 \n\t" \
  2484. "add %[l], %[l], r6 \n\t" \
  2485. "adc %[h], r4 \n\t" \
  2486. /* ah * bh */ \
  2487. "lsr r6, %[a], #16 \n\t" \
  2488. "lsr r4, %[b], #16 \n\t" \
  2489. "mul r4, r6 \n\t" \
  2490. "add %[h], %[h], r4 \n\t" \
  2491. /* ah * bl */ \
  2492. "uxth r4, %[b] \n\t" \
  2493. "mul r6, r4 \n\t" \
  2494. "lsr r4, r6, #16 \n\t" \
  2495. "lsl r6, r6, #16 \n\t" \
  2496. "add %[l], %[l], r6 \n\t" \
  2497. "adc %[h], r4 \n\t" \
  2498. : [l] "+l" (vl), [h] "+l" (vh) \
  2499. : [a] "l" (va), [b] "l" (vb) \
  2500. : "r4", "r5", "r6", "cc" \
  2501. )
  2502. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  2503. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2504. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2505. __asm__ __volatile__ ( \
  2506. /* al * bl */ \
  2507. "uxth r6, %[a] \n\t" \
  2508. "uxth r7, %[b] \n\t" \
  2509. "mul r7, r6 \n\t" \
  2510. "add %[l], %[l], r7 \n\t" \
  2511. "mov r5, #0 \n\t" \
  2512. "adc %[h], r5 \n\t" \
  2513. "adc %[o], r5 \n\t" \
  2514. "add %[l], %[l], r7 \n\t" \
  2515. "adc %[h], r5 \n\t" \
  2516. "adc %[o], r5 \n\t" \
  2517. /* al * bh */ \
  2518. "lsr r7, %[b], #16 \n\t" \
  2519. "mul r6, r7 \n\t" \
  2520. "lsr r7, r6, #16 \n\t" \
  2521. "lsl r6, r6, #16 \n\t" \
  2522. "add %[l], %[l], r6 \n\t" \
  2523. "adc %[h], r7 \n\t" \
  2524. "adc %[o], r5 \n\t" \
  2525. "add %[l], %[l], r6 \n\t" \
  2526. "adc %[h], r7 \n\t" \
  2527. "adc %[o], r5 \n\t" \
  2528. /* ah * bh */ \
  2529. "lsr r6, %[a], #16 \n\t" \
  2530. "lsr r7, %[b], #16 \n\t" \
  2531. "mul r7, r6 \n\t" \
  2532. "add %[h], %[h], r7 \n\t" \
  2533. "adc %[o], r5 \n\t" \
  2534. "add %[h], %[h], r7 \n\t" \
  2535. "adc %[o], r5 \n\t" \
  2536. /* ah * bl */ \
  2537. "uxth r7, %[b] \n\t" \
  2538. "mul r6, r7 \n\t" \
  2539. "lsr r7, r6, #16 \n\t" \
  2540. "lsl r6, r6, #16 \n\t" \
  2541. "add %[l], %[l], r6 \n\t" \
  2542. "adc %[h], r7 \n\t" \
  2543. "adc %[o], r5 \n\t" \
  2544. "add %[l], %[l], r6 \n\t" \
  2545. "adc %[h], r7 \n\t" \
  2546. "adc %[o], r5 \n\t" \
  2547. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2548. : [a] "l" (va), [b] "l" (vb) \
  2549. : "r5", "r6", "r7", "cc" \
  2550. )
  2551. #else
  2552. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2553. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2554. __asm__ __volatile__ ( \
  2555. "mov r8, %[a] \n\t" \
  2556. /* al * bl */ \
  2557. "uxth r6, %[a] \n\t" \
  2558. "uxth r5, %[b] \n\t" \
  2559. "mul r5, r6 \n\t" \
  2560. "add %[l], %[l], r5 \n\t" \
  2561. "mov %[a], #0 \n\t" \
  2562. "adc %[h], %[a] \n\t" \
  2563. "adc %[o], %[a] \n\t" \
  2564. "add %[l], %[l], r5 \n\t" \
  2565. "adc %[h], %[a] \n\t" \
  2566. "adc %[o], %[a] \n\t" \
  2567. /* al * bh */ \
  2568. "lsr r5, %[b], #16 \n\t" \
  2569. "mul r6, r5 \n\t" \
  2570. "lsr r5, r6, #16 \n\t" \
  2571. "lsl r6, r6, #16 \n\t" \
  2572. "add %[l], %[l], r6 \n\t" \
  2573. "adc %[h], r5 \n\t" \
  2574. "adc %[o], %[a] \n\t" \
  2575. "add %[l], %[l], r6 \n\t" \
  2576. "adc %[h], r5 \n\t" \
  2577. "adc %[o], %[a] \n\t" \
  2578. /* ah * bh */ \
  2579. "mov %[a], r8 \n\t" \
  2580. "lsr r6, %[a], #16 \n\t" \
  2581. "lsr r5, %[b], #16 \n\t" \
  2582. "mul r5, r6 \n\t" \
  2583. "add %[h], %[h], r5 \n\t" \
  2584. "mov %[a], #0 \n\t" \
  2585. "adc %[o], %[a] \n\t" \
  2586. "add %[h], %[h], r5 \n\t" \
  2587. "adc %[o], %[a] \n\t" \
  2588. /* ah * bl */ \
  2589. "uxth r5, %[b] \n\t" \
  2590. "mul r6, r5 \n\t" \
  2591. "lsr r5, r6, #16 \n\t" \
  2592. "lsl r6, r6, #16 \n\t" \
  2593. "add %[l], %[l], r6 \n\t" \
  2594. "adc %[h], r5 \n\t" \
  2595. "adc %[o], %[a] \n\t" \
  2596. "add %[l], %[l], r6 \n\t" \
  2597. "adc %[h], r5 \n\t" \
  2598. "adc %[o], %[a] \n\t" \
  2599. "mov %[a], r8 \n\t" \
  2600. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2601. : [a] "l" (va), [b] "l" (vb) \
  2602. : "r5", "r6", "r8", "cc" \
  2603. )
  2604. #endif
  2605. #ifndef DEBUG
  2606. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2607. * Assumes first add will not overflow vh | vl
  2608. */
  2609. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2610. __asm__ __volatile__ ( \
  2611. /* al * bl */ \
  2612. "uxth r6, %[a] \n\t" \
  2613. "uxth r7, %[b] \n\t" \
  2614. "mul r7, r6 \n\t" \
  2615. "add %[l], %[l], r7 \n\t" \
  2616. "mov r5, #0 \n\t" \
  2617. "adc %[h], r5 \n\t" \
  2618. "add %[l], %[l], r7 \n\t" \
  2619. "adc %[h], r5 \n\t" \
  2620. /* al * bh */ \
  2621. "lsr r7, %[b], #16 \n\t" \
  2622. "mul r6, r7 \n\t" \
  2623. "lsr r7, r6, #16 \n\t" \
  2624. "lsl r6, r6, #16 \n\t" \
  2625. "add %[l], %[l], r6 \n\t" \
  2626. "adc %[h], r7 \n\t" \
  2627. "add %[l], %[l], r6 \n\t" \
  2628. "adc %[h], r7 \n\t" \
  2629. "adc %[o], r5 \n\t" \
  2630. /* ah * bh */ \
  2631. "lsr r6, %[a], #16 \n\t" \
  2632. "lsr r7, %[b], #16 \n\t" \
  2633. "mul r7, r6 \n\t" \
  2634. "add %[h], %[h], r7 \n\t" \
  2635. "adc %[o], r5 \n\t" \
  2636. "add %[h], %[h], r7 \n\t" \
  2637. "adc %[o], r5 \n\t" \
  2638. /* ah * bl */ \
  2639. "uxth r7, %[b] \n\t" \
  2640. "mul r6, r7 \n\t" \
  2641. "lsr r7, r6, #16 \n\t" \
  2642. "lsl r6, r6, #16 \n\t" \
  2643. "add %[l], %[l], r6 \n\t" \
  2644. "adc %[h], r7 \n\t" \
  2645. "adc %[o], r5 \n\t" \
  2646. "add %[l], %[l], r6 \n\t" \
  2647. "adc %[h], r7 \n\t" \
  2648. "adc %[o], r5 \n\t" \
  2649. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2650. : [a] "l" (va), [b] "l" (vb) \
  2651. : "r5", "r6", "r7", "cc" \
  2652. )
  2653. #else
  2654. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2655. * Assumes first add will not overflow vh | vl
  2656. */
  2657. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2658. __asm__ __volatile__ ( \
  2659. "mov r8, %[a] \n\t" \
  2660. /* al * bl */ \
  2661. "uxth r5, %[a] \n\t" \
  2662. "uxth r6, %[b] \n\t" \
  2663. "mul r6, r5 \n\t" \
  2664. "add %[l], %[l], r6 \n\t" \
  2665. "mov %[a], #0 \n\t" \
  2666. "adc %[h], %[a] \n\t" \
  2667. "add %[l], %[l], r6 \n\t" \
  2668. "adc %[h], %[a] \n\t" \
  2669. /* al * bh */ \
  2670. "lsr r6, %[b], #16 \n\t" \
  2671. "mul r5, r6 \n\t" \
  2672. "lsr r6, r5, #16 \n\t" \
  2673. "lsl r5, r5, #16 \n\t" \
  2674. "add %[l], %[l], r5 \n\t" \
  2675. "adc %[h], r6 \n\t" \
  2676. "add %[l], %[l], r5 \n\t" \
  2677. "adc %[h], r6 \n\t" \
  2678. "adc %[o], %[a] \n\t" \
  2679. /* ah * bh */ \
  2680. "mov %[a], r8 \n\t" \
  2681. "lsr r5, %[a], #16 \n\t" \
  2682. "lsr r6, %[b], #16 \n\t" \
  2683. "mul r6, r5 \n\t" \
  2684. "mov %[a], #0 \n\t" \
  2685. "add %[h], %[h], r6 \n\t" \
  2686. "adc %[o], %[a] \n\t" \
  2687. "add %[h], %[h], r6 \n\t" \
  2688. "adc %[o], %[a] \n\t" \
  2689. /* ah * bl */ \
  2690. "uxth r6, %[b] \n\t" \
  2691. "mul r5, r6 \n\t" \
  2692. "lsr r6, r5, #16 \n\t" \
  2693. "lsl r5, r5, #16 \n\t" \
  2694. "add %[l], %[l], r5 \n\t" \
  2695. "adc %[h], r6 \n\t" \
  2696. "adc %[o], %[a] \n\t" \
  2697. "add %[l], %[l], r5 \n\t" \
  2698. "adc %[h], r6 \n\t" \
  2699. "adc %[o], %[a] \n\t" \
  2700. "mov %[a], r8 \n\t" \
  2701. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2702. : [a] "l" (va), [b] "l" (vb) \
  2703. : "r5", "r6", "r8", "cc" \
  2704. )
  2705. #endif
  2706. /* Square va and store double size result in: vh | vl */
  2707. #define SP_ASM_SQR(vl, vh, va) \
  2708. __asm__ __volatile__ ( \
  2709. "lsr r5, %[a], #16 \n\t" \
  2710. "uxth r6, %[a] \n\t" \
  2711. "mov %[l], r6 \n\t" \
  2712. "mov %[h], r5 \n\t" \
  2713. /* al * al */ \
  2714. "mul %[l], %[l] \n\t" \
  2715. /* ah * ah */ \
  2716. "mul %[h], %[h] \n\t" \
  2717. /* 2 * al * ah */ \
  2718. "mul r6, r5 \n\t" \
  2719. "lsr r5, r6, #15 \n\t" \
  2720. "lsl r6, r6, #17 \n\t" \
  2721. "add %[l], %[l], r6 \n\t" \
  2722. "adc %[h], r5 \n\t" \
  2723. : [h] "+l" (vh), [l] "+l" (vl) \
  2724. : [a] "l" (va) \
  2725. : "r5", "r6", "cc" \
  2726. )
  2727. /* Square va and add double size result into: vo | vh | vl */
  2728. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  2729. __asm__ __volatile__ ( \
  2730. "lsr r4, %[a], #16 \n\t" \
  2731. "uxth r6, %[a] \n\t" \
  2732. /* al * al */ \
  2733. "mul r6, r6 \n\t" \
  2734. /* ah * ah */ \
  2735. "mul r4, r4 \n\t" \
  2736. "add %[l], %[l], r6 \n\t" \
  2737. "adc %[h], r4 \n\t" \
  2738. "mov r5, #0 \n\t" \
  2739. "adc %[o], r5 \n\t" \
  2740. "lsr r4, %[a], #16 \n\t" \
  2741. "uxth r6, %[a] \n\t" \
  2742. /* 2 * al * ah */ \
  2743. "mul r6, r4 \n\t" \
  2744. "lsr r4, r6, #15 \n\t" \
  2745. "lsl r6, r6, #17 \n\t" \
  2746. "add %[l], %[l], r6 \n\t" \
  2747. "adc %[h], r4 \n\t" \
  2748. "adc %[o], r5 \n\t" \
  2749. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2750. : [a] "l" (va) \
  2751. : "r4", "r5", "r6", "cc" \
  2752. )
  2753. /* Square va and add double size result into: vh | vl */
  2754. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  2755. __asm__ __volatile__ ( \
  2756. "lsr r5, %[a], #16 \n\t" \
  2757. "uxth r6, %[a] \n\t" \
  2758. /* al * al */ \
  2759. "mul r6, r6 \n\t" \
  2760. /* ah * ah */ \
  2761. "mul r5, r5 \n\t" \
  2762. "add %[l], %[l], r6 \n\t" \
  2763. "adc %[h], r5 \n\t" \
  2764. "lsr r5, %[a], #16 \n\t" \
  2765. "uxth r6, %[a] \n\t" \
  2766. /* 2 * al * ah */ \
  2767. "mul r6, r5 \n\t" \
  2768. "lsr r5, r6, #15 \n\t" \
  2769. "lsl r6, r6, #17 \n\t" \
  2770. "add %[l], %[l], r6 \n\t" \
  2771. "adc %[h], r5 \n\t" \
  2772. : [l] "+l" (vl), [h] "+l" (vh) \
  2773. : [a] "l" (va) \
  2774. : "r5", "r6", "cc" \
  2775. )
  2776. /* Add va into: vh | vl */
  2777. #define SP_ASM_ADDC(vl, vh, va) \
  2778. __asm__ __volatile__ ( \
  2779. "add %[l], %[l], %[a] \n\t" \
  2780. "mov r5, #0 \n\t" \
  2781. "adc %[h], r5 \n\t" \
  2782. : [l] "+l" (vl), [h] "+l" (vh) \
  2783. : [a] "l" (va) \
  2784. : "r5", "cc" \
  2785. )
  2786. /* Sub va from: vh | vl */
  2787. #define SP_ASM_SUBB(vl, vh, va) \
  2788. __asm__ __volatile__ ( \
  2789. "sub %[l], %[l], %[a] \n\t" \
  2790. "mov r5, #0 \n\t" \
  2791. "sbc %[h], r5 \n\t" \
  2792. : [l] "+l" (vl), [h] "+l" (vh) \
  2793. : [a] "l" (va) \
  2794. : "r5", "cc" \
  2795. )
  2796. /* Add two times vc | vb | va into vo | vh | vl */
  2797. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  2798. __asm__ __volatile__ ( \
  2799. "add %[l], %[l], %[a] \n\t" \
  2800. "adc %[h], %[b] \n\t" \
  2801. "adc %[o], %[c] \n\t" \
  2802. "add %[l], %[l], %[a] \n\t" \
  2803. "adc %[h], %[b] \n\t" \
  2804. "adc %[o], %[c] \n\t" \
  2805. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2806. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  2807. : "cc" \
  2808. )
  2809. #endif
  2810. #ifdef WOLFSSL_SP_DIV_WORD_HALF
  2811. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  2812. *
  2813. * No division instruction used - does operation bit by bit.
  2814. * Constant time.
  2815. *
  2816. * @param [in] hi SP integer digit. High digit of the dividend.
  2817. * @param [in] lo SP integer digit. Lower digit of the dividend.
  2818. * @param [in] d SP integer digit. Number to divide by.
  2819. * @return The division result.
  2820. */
  2821. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  2822. sp_int_digit d)
  2823. {
  2824. __asm__ __volatile__ (
  2825. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2826. "lsrs r3, %[d], #24\n\t"
  2827. #else
  2828. "lsr r3, %[d], #24\n\t"
  2829. #endif
  2830. "beq 2%=f\n\t"
  2831. "\n1%=:\n\t"
  2832. "movs r3, #0\n\t"
  2833. "b 3%=f\n\t"
  2834. "\n2%=:\n\t"
  2835. "mov r3, #8\n\t"
  2836. "\n3%=:\n\t"
  2837. "movs r4, #31\n\t"
  2838. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2839. "subs r4, r4, r3\n\t"
  2840. #else
  2841. "sub r4, r4, r3\n\t"
  2842. #endif
  2843. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2844. "lsls %[d], %[d], r3\n\t"
  2845. #else
  2846. "lsl %[d], %[d], r3\n\t"
  2847. #endif
  2848. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2849. "lsls %[hi], %[hi], r3\n\t"
  2850. #else
  2851. "lsl %[hi], %[hi], r3\n\t"
  2852. #endif
  2853. "mov r5, %[lo]\n\t"
  2854. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2855. "lsrs r5, r5, r4\n\t"
  2856. #else
  2857. "lsr r5, r5, r4\n\t"
  2858. #endif
  2859. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2860. "lsls %[lo], %[lo], r3\n\t"
  2861. #else
  2862. "lsl %[lo], %[lo], r3\n\t"
  2863. #endif
  2864. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2865. "lsrs r5, r5, #1\n\t"
  2866. #else
  2867. "lsr r5, r5, #1\n\t"
  2868. #endif
  2869. #if defined(WOLFSSL_KEIL)
  2870. "orrs %[hi], %[hi], r5\n\t"
  2871. #elif defined(__clang__)
  2872. "orrs %[hi], r5\n\t"
  2873. #else
  2874. "orr %[hi], r5\n\t"
  2875. #endif
  2876. "movs r3, #0\n\t"
  2877. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2878. "lsrs r5, %[d], #1\n\t"
  2879. #else
  2880. "lsr r5, %[d], #1\n\t"
  2881. #endif
  2882. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2883. "adds r5, r5, #1\n\t"
  2884. #else
  2885. "add r5, r5, #1\n\t"
  2886. #endif
  2887. "mov r8, %[lo]\n\t"
  2888. "mov r9, %[hi]\n\t"
  2889. /* Do top 32 */
  2890. "movs r6, r5\n\t"
  2891. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2892. "subs r6, r6, %[hi]\n\t"
  2893. #else
  2894. "sub r6, r6, %[hi]\n\t"
  2895. #endif
  2896. #ifdef WOLFSSL_KEIL
  2897. "sbcs r6, r6, r6\n\t"
  2898. #elif defined(__clang__)
  2899. "sbcs r6, r6\n\t"
  2900. #else
  2901. "sbc r6, r6\n\t"
  2902. #endif
  2903. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2904. "adds r3, r3, r3\n\t"
  2905. #else
  2906. "add r3, r3, r3\n\t"
  2907. #endif
  2908. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2909. "subs r3, r3, r6\n\t"
  2910. #else
  2911. "sub r3, r3, r6\n\t"
  2912. #endif
  2913. #ifdef WOLFSSL_KEIL
  2914. "ands r6, r6, r5\n\t"
  2915. #elif defined(__clang__)
  2916. "ands r6, r5\n\t"
  2917. #else
  2918. "and r6, r5\n\t"
  2919. #endif
  2920. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2921. "subs %[hi], %[hi], r6\n\t"
  2922. #else
  2923. "sub %[hi], %[hi], r6\n\t"
  2924. #endif
  2925. "movs r4, #29\n\t"
  2926. "\n"
  2927. "L_sp_div_word_loop%=:\n\t"
  2928. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2929. "lsls %[lo], %[lo], #1\n\t"
  2930. #else
  2931. "lsl %[lo], %[lo], #1\n\t"
  2932. #endif
  2933. #ifdef WOLFSSL_KEIL
  2934. "adcs %[hi], %[hi], %[hi]\n\t"
  2935. #elif defined(__clang__)
  2936. "adcs %[hi], %[hi]\n\t"
  2937. #else
  2938. "adc %[hi], %[hi]\n\t"
  2939. #endif
  2940. "movs r6, r5\n\t"
  2941. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2942. "subs r6, r6, %[hi]\n\t"
  2943. #else
  2944. "sub r6, r6, %[hi]\n\t"
  2945. #endif
  2946. #ifdef WOLFSSL_KEIL
  2947. "sbcs r6, r6, r6\n\t"
  2948. #elif defined(__clang__)
  2949. "sbcs r6, r6\n\t"
  2950. #else
  2951. "sbc r6, r6\n\t"
  2952. #endif
  2953. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2954. "adds r3, r3, r3\n\t"
  2955. #else
  2956. "add r3, r3, r3\n\t"
  2957. #endif
  2958. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2959. "subs r3, r3, r6\n\t"
  2960. #else
  2961. "sub r3, r3, r6\n\t"
  2962. #endif
  2963. #ifdef WOLFSSL_KEIL
  2964. "ands r6, r6, r5\n\t"
  2965. #elif defined(__clang__)
  2966. "ands r6, r5\n\t"
  2967. #else
  2968. "and r6, r5\n\t"
  2969. #endif
  2970. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2971. "subs %[hi], %[hi], r6\n\t"
  2972. #else
  2973. "sub %[hi], %[hi], r6\n\t"
  2974. #endif
  2975. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2976. "subs r4, r4, #1\n\t"
  2977. #else
  2978. "sub r4, r4, #1\n\t"
  2979. #endif
  2980. "bpl L_sp_div_word_loop%=\n\t"
  2981. "movs r7, #0\n\t"
  2982. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2983. "adds r3, r3, r3\n\t"
  2984. #else
  2985. "add r3, r3, r3\n\t"
  2986. #endif
  2987. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2988. "adds r3, r3, #1\n\t"
  2989. #else
  2990. "add r3, r3, #1\n\t"
  2991. #endif
  2992. /* r * d - Start */
  2993. "uxth %[hi], r3\n\t"
  2994. "uxth r4, %[d]\n\t"
  2995. #ifdef WOLFSSL_KEIL
  2996. "muls r4, %[hi], r4\n\t"
  2997. #elif defined(__clang__)
  2998. "muls r4, %[hi]\n\t"
  2999. #else
  3000. "mul r4, %[hi]\n\t"
  3001. #endif
  3002. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3003. "lsrs r6, %[d], #16\n\t"
  3004. #else
  3005. "lsr r6, %[d], #16\n\t"
  3006. #endif
  3007. #ifdef WOLFSSL_KEIL
  3008. "muls %[hi], r6, %[hi]\n\t"
  3009. #elif defined(__clang__)
  3010. "muls %[hi], r6\n\t"
  3011. #else
  3012. "mul %[hi], r6\n\t"
  3013. #endif
  3014. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3015. "lsrs r5, %[hi], #16\n\t"
  3016. #else
  3017. "lsr r5, %[hi], #16\n\t"
  3018. #endif
  3019. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3020. "lsls %[hi], %[hi], #16\n\t"
  3021. #else
  3022. "lsl %[hi], %[hi], #16\n\t"
  3023. #endif
  3024. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3025. "adds r4, r4, %[hi]\n\t"
  3026. #else
  3027. "add r4, r4, %[hi]\n\t"
  3028. #endif
  3029. #ifdef WOLFSSL_KEIL
  3030. "adcs r5, r5, r7\n\t"
  3031. #elif defined(__clang__)
  3032. "adcs r5, r7\n\t"
  3033. #else
  3034. "adc r5, r7\n\t"
  3035. #endif
  3036. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3037. "lsrs %[hi], r3, #16\n\t"
  3038. #else
  3039. "lsr %[hi], r3, #16\n\t"
  3040. #endif
  3041. #ifdef WOLFSSL_KEIL
  3042. "muls r6, %[hi], r6\n\t"
  3043. #elif defined(__clang__)
  3044. "muls r6, %[hi]\n\t"
  3045. #else
  3046. "mul r6, %[hi]\n\t"
  3047. #endif
  3048. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3049. "adds r5, r5, r6\n\t"
  3050. #else
  3051. "add r5, r5, r6\n\t"
  3052. #endif
  3053. "uxth r6, %[d]\n\t"
  3054. #ifdef WOLFSSL_KEIL
  3055. "muls %[hi], r6, %[hi]\n\t"
  3056. #elif defined(__clang__)
  3057. "muls %[hi], r6\n\t"
  3058. #else
  3059. "mul %[hi], r6\n\t"
  3060. #endif
  3061. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3062. "lsrs r6, %[hi], #16\n\t"
  3063. #else
  3064. "lsr r6, %[hi], #16\n\t"
  3065. #endif
  3066. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3067. "lsls %[hi], %[hi], #16\n\t"
  3068. #else
  3069. "lsl %[hi], %[hi], #16\n\t"
  3070. #endif
  3071. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3072. "adds r4, r4, %[hi]\n\t"
  3073. #else
  3074. "add r4, r4, %[hi]\n\t"
  3075. #endif
  3076. #ifdef WOLFSSL_KEIL
  3077. "adcs r5, r5, r6\n\t"
  3078. #elif defined(__clang__)
  3079. "adcs r5, r6\n\t"
  3080. #else
  3081. "adc r5, r6\n\t"
  3082. #endif
  3083. /* r * d - Done */
  3084. "mov %[hi], r8\n\t"
  3085. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3086. "subs %[hi], %[hi], r4\n\t"
  3087. #else
  3088. "sub %[hi], %[hi], r4\n\t"
  3089. #endif
  3090. "movs r4, %[hi]\n\t"
  3091. "mov %[hi], r9\n\t"
  3092. #ifdef WOLFSSL_KEIL
  3093. "sbcs %[hi], %[hi], r5\n\t"
  3094. #elif defined(__clang__)
  3095. "sbcs %[hi], r5\n\t"
  3096. #else
  3097. "sbc %[hi], r5\n\t"
  3098. #endif
  3099. "movs r5, %[hi]\n\t"
  3100. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3101. "adds r3, r3, r5\n\t"
  3102. #else
  3103. "add r3, r3, r5\n\t"
  3104. #endif
  3105. /* r * d - Start */
  3106. "uxth %[hi], r3\n\t"
  3107. "uxth r4, %[d]\n\t"
  3108. #ifdef WOLFSSL_KEIL
  3109. "muls r4, %[hi], r4\n\t"
  3110. #elif defined(__clang__)
  3111. "muls r4, %[hi]\n\t"
  3112. #else
  3113. "mul r4, %[hi]\n\t"
  3114. #endif
  3115. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3116. "lsrs r6, %[d], #16\n\t"
  3117. #else
  3118. "lsr r6, %[d], #16\n\t"
  3119. #endif
  3120. #ifdef WOLFSSL_KEIL
  3121. "muls %[hi], r6, %[hi]\n\t"
  3122. #elif defined(__clang__)
  3123. "muls %[hi], r6\n\t"
  3124. #else
  3125. "mul %[hi], r6\n\t"
  3126. #endif
  3127. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3128. "lsrs r5, %[hi], #16\n\t"
  3129. #else
  3130. "lsr r5, %[hi], #16\n\t"
  3131. #endif
  3132. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3133. "lsls %[hi], %[hi], #16\n\t"
  3134. #else
  3135. "lsl %[hi], %[hi], #16\n\t"
  3136. #endif
  3137. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3138. "adds r4, r4, %[hi]\n\t"
  3139. #else
  3140. "add r4, r4, %[hi]\n\t"
  3141. #endif
  3142. #ifdef WOLFSSL_KEIL
  3143. "adcs r5, r5, r7\n\t"
  3144. #elif defined(__clang__)
  3145. "adcs r5, r7\n\t"
  3146. #else
  3147. "adc r5, r7\n\t"
  3148. #endif
  3149. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3150. "lsrs %[hi], r3, #16\n\t"
  3151. #else
  3152. "lsr %[hi], r3, #16\n\t"
  3153. #endif
  3154. #ifdef WOLFSSL_KEIL
  3155. "muls r6, %[hi], r6\n\t"
  3156. #elif defined(__clang__)
  3157. "muls r6, %[hi]\n\t"
  3158. #else
  3159. "mul r6, %[hi]\n\t"
  3160. #endif
  3161. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3162. "adds r5, r5, r6\n\t"
  3163. #else
  3164. "add r5, r5, r6\n\t"
  3165. #endif
  3166. "uxth r6, %[d]\n\t"
  3167. #ifdef WOLFSSL_KEIL
  3168. "muls %[hi], r6, %[hi]\n\t"
  3169. #elif defined(__clang__)
  3170. "muls %[hi], r6\n\t"
  3171. #else
  3172. "mul %[hi], r6\n\t"
  3173. #endif
  3174. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3175. "lsrs r6, %[hi], #16\n\t"
  3176. #else
  3177. "lsr r6, %[hi], #16\n\t"
  3178. #endif
  3179. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3180. "lsls %[hi], %[hi], #16\n\t"
  3181. #else
  3182. "lsl %[hi], %[hi], #16\n\t"
  3183. #endif
  3184. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3185. "adds r4, r4, %[hi]\n\t"
  3186. #else
  3187. "add r4, r4, %[hi]\n\t"
  3188. #endif
  3189. #ifdef WOLFSSL_KEIL
  3190. "adcs r5, r5, r6\n\t"
  3191. #elif defined(__clang__)
  3192. "adcs r5, r6\n\t"
  3193. #else
  3194. "adc r5, r6\n\t"
  3195. #endif
  3196. /* r * d - Done */
  3197. "mov %[hi], r8\n\t"
  3198. "mov r6, r9\n\t"
  3199. #ifdef WOLFSSL_KEIL
  3200. "subs r4, %[hi], r4\n\t"
  3201. #else
  3202. #ifdef __clang__
  3203. "subs r4, %[hi], r4\n\t"
  3204. #else
  3205. "sub r4, %[hi], r4\n\t"
  3206. #endif
  3207. #endif
  3208. #ifdef WOLFSSL_KEIL
  3209. "sbcs r6, r6, r5\n\t"
  3210. #elif defined(__clang__)
  3211. "sbcs r6, r5\n\t"
  3212. #else
  3213. "sbc r6, r5\n\t"
  3214. #endif
  3215. "movs r5, r6\n\t"
  3216. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3217. "adds r3, r3, r5\n\t"
  3218. #else
  3219. "add r3, r3, r5\n\t"
  3220. #endif
  3221. /* r * d - Start */
  3222. "uxth %[hi], r3\n\t"
  3223. "uxth r4, %[d]\n\t"
  3224. #ifdef WOLFSSL_KEIL
  3225. "muls r4, %[hi], r4\n\t"
  3226. #elif defined(__clang__)
  3227. "muls r4, %[hi]\n\t"
  3228. #else
  3229. "mul r4, %[hi]\n\t"
  3230. #endif
  3231. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3232. "lsrs r6, %[d], #16\n\t"
  3233. #else
  3234. "lsr r6, %[d], #16\n\t"
  3235. #endif
  3236. #ifdef WOLFSSL_KEIL
  3237. "muls %[hi], r6, %[hi]\n\t"
  3238. #elif defined(__clang__)
  3239. "muls %[hi], r6\n\t"
  3240. #else
  3241. "mul %[hi], r6\n\t"
  3242. #endif
  3243. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3244. "lsrs r5, %[hi], #16\n\t"
  3245. #else
  3246. "lsr r5, %[hi], #16\n\t"
  3247. #endif
  3248. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3249. "lsls %[hi], %[hi], #16\n\t"
  3250. #else
  3251. "lsl %[hi], %[hi], #16\n\t"
  3252. #endif
  3253. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3254. "adds r4, r4, %[hi]\n\t"
  3255. #else
  3256. "add r4, r4, %[hi]\n\t"
  3257. #endif
  3258. #ifdef WOLFSSL_KEIL
  3259. "adcs r5, r5, r7\n\t"
  3260. #elif defined(__clang__)
  3261. "adcs r5, r7\n\t"
  3262. #else
  3263. "adc r5, r7\n\t"
  3264. #endif
  3265. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3266. "lsrs %[hi], r3, #16\n\t"
  3267. #else
  3268. "lsr %[hi], r3, #16\n\t"
  3269. #endif
  3270. #ifdef WOLFSSL_KEIL
  3271. "muls r6, %[hi], r6\n\t"
  3272. #elif defined(__clang__)
  3273. "muls r6, %[hi]\n\t"
  3274. #else
  3275. "mul r6, %[hi]\n\t"
  3276. #endif
  3277. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3278. "adds r5, r5, r6\n\t"
  3279. #else
  3280. "add r5, r5, r6\n\t"
  3281. #endif
  3282. "uxth r6, %[d]\n\t"
  3283. #ifdef WOLFSSL_KEIL
  3284. "muls %[hi], r6, %[hi]\n\t"
  3285. #elif defined(__clang__)
  3286. "muls %[hi], r6\n\t"
  3287. #else
  3288. "mul %[hi], r6\n\t"
  3289. #endif
  3290. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3291. "lsrs r6, %[hi], #16\n\t"
  3292. #else
  3293. "lsr r6, %[hi], #16\n\t"
  3294. #endif
  3295. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3296. "lsls %[hi], %[hi], #16\n\t"
  3297. #else
  3298. "lsl %[hi], %[hi], #16\n\t"
  3299. #endif
  3300. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3301. "adds r4, r4, %[hi]\n\t"
  3302. #else
  3303. "add r4, r4, %[hi]\n\t"
  3304. #endif
  3305. #ifdef WOLFSSL_KEIL
  3306. "adcs r5, r5, r6\n\t"
  3307. #elif defined(__clang__)
  3308. "adcs r5, r6\n\t"
  3309. #else
  3310. "adc r5, r6\n\t"
  3311. #endif
  3312. /* r * d - Done */
  3313. "mov %[hi], r8\n\t"
  3314. "mov r6, r9\n\t"
  3315. #ifdef WOLFSSL_KEIL
  3316. "subs r4, %[hi], r4\n\t"
  3317. #else
  3318. #ifdef __clang__
  3319. "subs r4, %[hi], r4\n\t"
  3320. #else
  3321. "sub r4, %[hi], r4\n\t"
  3322. #endif
  3323. #endif
  3324. #ifdef WOLFSSL_KEIL
  3325. "sbcs r6, r6, r5\n\t"
  3326. #elif defined(__clang__)
  3327. "sbcs r6, r5\n\t"
  3328. #else
  3329. "sbc r6, r5\n\t"
  3330. #endif
  3331. "movs r5, r6\n\t"
  3332. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3333. "adds r3, r3, r5\n\t"
  3334. #else
  3335. "add r3, r3, r5\n\t"
  3336. #endif
  3337. "movs r6, %[d]\n\t"
  3338. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3339. "subs r6, r6, r4\n\t"
  3340. #else
  3341. "sub r6, r6, r4\n\t"
  3342. #endif
  3343. #ifdef WOLFSSL_KEIL
  3344. "sbcs r6, r6, r6\n\t"
  3345. #elif defined(__clang__)
  3346. "sbcs r6, r6\n\t"
  3347. #else
  3348. "sbc r6, r6\n\t"
  3349. #endif
  3350. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3351. "subs r3, r3, r6\n\t"
  3352. #else
  3353. "sub r3, r3, r6\n\t"
  3354. #endif
  3355. "movs %[hi], r3\n\t"
  3356. : [hi] "+l" (hi), [lo] "+l" (lo), [d] "+l" (d)
  3357. :
  3358. : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
  3359. );
  3360. return (uint32_t)(size_t)hi;
  3361. }
  3362. #define SP_ASM_DIV_WORD
  3363. #endif /* !WOLFSSL_SP_DIV_WORD_HALF */
  3364. #define SP_INT_ASM_AVAILABLE
  3365. #endif /* WOLFSSL_SP_ARM_THUMB && SP_WORD_SIZE == 32 */
  3366. #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
  3367. /*
  3368. * CPU: PPC64
  3369. */
  3370. /* Multiply va by vb and store double size result in: vh | vl */
  3371. #define SP_ASM_MUL(vl, vh, va, vb) \
  3372. __asm__ __volatile__ ( \
  3373. "mulld %[l], %[a], %[b] \n\t" \
  3374. "mulhdu %[h], %[a], %[b] \n\t" \
  3375. : [h] "+r" (vh), [l] "+r" (vl) \
  3376. : [a] "r" (va), [b] "r" (vb) \
  3377. : "memory" \
  3378. )
  3379. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3380. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3381. __asm__ __volatile__ ( \
  3382. "mulhdu %[h], %[a], %[b] \n\t" \
  3383. "mulld %[l], %[a], %[b] \n\t" \
  3384. "li %[o], 0 \n\t" \
  3385. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3386. : [a] "r" (va), [b] "r" (vb) \
  3387. : \
  3388. )
  3389. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3390. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3391. __asm__ __volatile__ ( \
  3392. "mulld 16, %[a], %[b] \n\t" \
  3393. "mulhdu 17, %[a], %[b] \n\t" \
  3394. "addc %[l], %[l], 16 \n\t" \
  3395. "adde %[h], %[h], 17 \n\t" \
  3396. "addze %[o], %[o] \n\t" \
  3397. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3398. : [a] "r" (va), [b] "r" (vb) \
  3399. : "16", "17", "cc" \
  3400. )
  3401. /* Multiply va by vb and add double size result into: vh | vl */
  3402. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3403. __asm__ __volatile__ ( \
  3404. "mulld 16, %[a], %[b] \n\t" \
  3405. "mulhdu 17, %[a], %[b] \n\t" \
  3406. "addc %[l], %[l], 16 \n\t" \
  3407. "adde %[h], %[h], 17 \n\t" \
  3408. : [l] "+r" (vl), [h] "+r" (vh) \
  3409. : [a] "r" (va), [b] "r" (vb) \
  3410. : "16", "17", "cc" \
  3411. )
  3412. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3413. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3414. __asm__ __volatile__ ( \
  3415. "mulld 16, %[a], %[b] \n\t" \
  3416. "mulhdu 17, %[a], %[b] \n\t" \
  3417. "addc %[l], %[l], 16 \n\t" \
  3418. "adde %[h], %[h], 17 \n\t" \
  3419. "addze %[o], %[o] \n\t" \
  3420. "addc %[l], %[l], 16 \n\t" \
  3421. "adde %[h], %[h], 17 \n\t" \
  3422. "addze %[o], %[o] \n\t" \
  3423. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3424. : [a] "r" (va), [b] "r" (vb) \
  3425. : "16", "17", "cc" \
  3426. )
  3427. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3428. * Assumes first add will not overflow vh | vl
  3429. */
  3430. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3431. __asm__ __volatile__ ( \
  3432. "mulld 16, %[a], %[b] \n\t" \
  3433. "mulhdu 17, %[a], %[b] \n\t" \
  3434. "addc %[l], %[l], 16 \n\t" \
  3435. "adde %[h], %[h], 17 \n\t" \
  3436. "addc %[l], %[l], 16 \n\t" \
  3437. "adde %[h], %[h], 17 \n\t" \
  3438. "addze %[o], %[o] \n\t" \
  3439. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3440. : [a] "r" (va), [b] "r" (vb) \
  3441. : "16", "17", "cc" \
  3442. )
  3443. /* Square va and store double size result in: vh | vl */
  3444. #define SP_ASM_SQR(vl, vh, va) \
  3445. __asm__ __volatile__ ( \
  3446. "mulld %[l], %[a], %[a] \n\t" \
  3447. "mulhdu %[h], %[a], %[a] \n\t" \
  3448. : [h] "+r" (vh), [l] "+r" (vl) \
  3449. : [a] "r" (va) \
  3450. : "memory" \
  3451. )
  3452. /* Square va and add double size result into: vo | vh | vl */
  3453. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3454. __asm__ __volatile__ ( \
  3455. "mulld 16, %[a], %[a] \n\t" \
  3456. "mulhdu 17, %[a], %[a] \n\t" \
  3457. "addc %[l], %[l], 16 \n\t" \
  3458. "adde %[h], %[h], 17 \n\t" \
  3459. "addze %[o], %[o] \n\t" \
  3460. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3461. : [a] "r" (va) \
  3462. : "16", "17", "cc" \
  3463. )
  3464. /* Square va and add double size result into: vh | vl */
  3465. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3466. __asm__ __volatile__ ( \
  3467. "mulld 16, %[a], %[a] \n\t" \
  3468. "mulhdu 17, %[a], %[a] \n\t" \
  3469. "addc %[l], %[l], 16 \n\t" \
  3470. "adde %[h], %[h], 17 \n\t" \
  3471. : [l] "+r" (vl), [h] "+r" (vh) \
  3472. : [a] "r" (va) \
  3473. : "16", "17", "cc" \
  3474. )
  3475. /* Add va into: vh | vl */
  3476. #define SP_ASM_ADDC(vl, vh, va) \
  3477. __asm__ __volatile__ ( \
  3478. "addc %[l], %[l], %[a] \n\t" \
  3479. "addze %[h], %[h] \n\t" \
  3480. : [l] "+r" (vl), [h] "+r" (vh) \
  3481. : [a] "r" (va) \
  3482. : "cc" \
  3483. )
  3484. /* Sub va from: vh | vl */
  3485. #define SP_ASM_SUBB(vl, vh, va) \
  3486. __asm__ __volatile__ ( \
  3487. "subfc %[l], %[a], %[l] \n\t" \
  3488. "li 16, 0 \n\t" \
  3489. "subfe %[h], 16, %[h] \n\t" \
  3490. : [l] "+r" (vl), [h] "+r" (vh) \
  3491. : [a] "r" (va) \
  3492. : "16", "cc" \
  3493. )
  3494. /* Add two times vc | vb | va into vo | vh | vl */
  3495. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3496. __asm__ __volatile__ ( \
  3497. "addc %[l], %[l], %[a] \n\t" \
  3498. "adde %[h], %[h], %[b] \n\t" \
  3499. "adde %[o], %[o], %[c] \n\t" \
  3500. "addc %[l], %[l], %[a] \n\t" \
  3501. "adde %[h], %[h], %[b] \n\t" \
  3502. "adde %[o], %[o], %[c] \n\t" \
  3503. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3504. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3505. : "cc" \
  3506. )
  3507. /* Count leading zeros. */
  3508. #define SP_ASM_LZCNT(va, vn) \
  3509. __asm__ __volatile__ ( \
  3510. "cntlzd %[n], %[a] \n\t" \
  3511. : [n] "=r" (vn) \
  3512. : [a] "r" (va) \
  3513. : \
  3514. )
  3515. #define SP_INT_ASM_AVAILABLE
  3516. #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
  3517. #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
  3518. /*
  3519. * CPU: PPC 32-bit
  3520. */
  3521. /* Multiply va by vb and store double size result in: vh | vl */
  3522. #define SP_ASM_MUL(vl, vh, va, vb) \
  3523. __asm__ __volatile__ ( \
  3524. "mullw %[l], %[a], %[b] \n\t" \
  3525. "mulhwu %[h], %[a], %[b] \n\t" \
  3526. : [h] "+r" (vh), [l] "+r" (vl) \
  3527. : [a] "r" (va), [b] "r" (vb) \
  3528. : "memory" \
  3529. )
  3530. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3531. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3532. __asm__ __volatile__ ( \
  3533. "mulhwu %[h], %[a], %[b] \n\t" \
  3534. "mullw %[l], %[a], %[b] \n\t" \
  3535. "li %[o], 0 \n\t" \
  3536. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3537. : [a] "r" (va), [b] "r" (vb) \
  3538. )
  3539. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3540. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3541. __asm__ __volatile__ ( \
  3542. "mullw 16, %[a], %[b] \n\t" \
  3543. "mulhwu 17, %[a], %[b] \n\t" \
  3544. "addc %[l], %[l], 16 \n\t" \
  3545. "adde %[h], %[h], 17 \n\t" \
  3546. "addze %[o], %[o] \n\t" \
  3547. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3548. : [a] "r" (va), [b] "r" (vb) \
  3549. : "16", "17", "cc" \
  3550. )
  3551. /* Multiply va by vb and add double size result into: vh | vl */
  3552. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3553. __asm__ __volatile__ ( \
  3554. "mullw 16, %[a], %[b] \n\t" \
  3555. "mulhwu 17, %[a], %[b] \n\t" \
  3556. "addc %[l], %[l], 16 \n\t" \
  3557. "adde %[h], %[h], 17 \n\t" \
  3558. : [l] "+r" (vl), [h] "+r" (vh) \
  3559. : [a] "r" (va), [b] "r" (vb) \
  3560. : "16", "17", "cc" \
  3561. )
  3562. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3563. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3564. __asm__ __volatile__ ( \
  3565. "mullw 16, %[a], %[b] \n\t" \
  3566. "mulhwu 17, %[a], %[b] \n\t" \
  3567. "addc %[l], %[l], 16 \n\t" \
  3568. "adde %[h], %[h], 17 \n\t" \
  3569. "addze %[o], %[o] \n\t" \
  3570. "addc %[l], %[l], 16 \n\t" \
  3571. "adde %[h], %[h], 17 \n\t" \
  3572. "addze %[o], %[o] \n\t" \
  3573. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3574. : [a] "r" (va), [b] "r" (vb) \
  3575. : "16", "17", "cc" \
  3576. )
  3577. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3578. * Assumes first add will not overflow vh | vl
  3579. */
  3580. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3581. __asm__ __volatile__ ( \
  3582. "mullw 16, %[a], %[b] \n\t" \
  3583. "mulhwu 17, %[a], %[b] \n\t" \
  3584. "addc %[l], %[l], 16 \n\t" \
  3585. "adde %[h], %[h], 17 \n\t" \
  3586. "addc %[l], %[l], 16 \n\t" \
  3587. "adde %[h], %[h], 17 \n\t" \
  3588. "addze %[o], %[o] \n\t" \
  3589. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3590. : [a] "r" (va), [b] "r" (vb) \
  3591. : "16", "17", "cc" \
  3592. )
  3593. /* Square va and store double size result in: vh | vl */
  3594. #define SP_ASM_SQR(vl, vh, va) \
  3595. __asm__ __volatile__ ( \
  3596. "mullw %[l], %[a], %[a] \n\t" \
  3597. "mulhwu %[h], %[a], %[a] \n\t" \
  3598. : [h] "+r" (vh), [l] "+r" (vl) \
  3599. : [a] "r" (va) \
  3600. : "memory" \
  3601. )
  3602. /* Square va and add double size result into: vo | vh | vl */
  3603. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3604. __asm__ __volatile__ ( \
  3605. "mullw 16, %[a], %[a] \n\t" \
  3606. "mulhwu 17, %[a], %[a] \n\t" \
  3607. "addc %[l], %[l], 16 \n\t" \
  3608. "adde %[h], %[h], 17 \n\t" \
  3609. "addze %[o], %[o] \n\t" \
  3610. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3611. : [a] "r" (va) \
  3612. : "16", "17", "cc" \
  3613. )
  3614. /* Square va and add double size result into: vh | vl */
  3615. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3616. __asm__ __volatile__ ( \
  3617. "mullw 16, %[a], %[a] \n\t" \
  3618. "mulhwu 17, %[a], %[a] \n\t" \
  3619. "addc %[l], %[l], 16 \n\t" \
  3620. "adde %[h], %[h], 17 \n\t" \
  3621. : [l] "+r" (vl), [h] "+r" (vh) \
  3622. : [a] "r" (va) \
  3623. : "16", "17", "cc" \
  3624. )
  3625. /* Add va into: vh | vl */
  3626. #define SP_ASM_ADDC(vl, vh, va) \
  3627. __asm__ __volatile__ ( \
  3628. "addc %[l], %[l], %[a] \n\t" \
  3629. "addze %[h], %[h] \n\t" \
  3630. : [l] "+r" (vl), [h] "+r" (vh) \
  3631. : [a] "r" (va) \
  3632. : "cc" \
  3633. )
  3634. /* Sub va from: vh | vl */
  3635. #define SP_ASM_SUBB(vl, vh, va) \
  3636. __asm__ __volatile__ ( \
  3637. "subfc %[l], %[a], %[l] \n\t" \
  3638. "li 16, 0 \n\t" \
  3639. "subfe %[h], 16, %[h] \n\t" \
  3640. : [l] "+r" (vl), [h] "+r" (vh) \
  3641. : [a] "r" (va) \
  3642. : "16", "cc" \
  3643. )
  3644. /* Add two times vc | vb | va into vo | vh | vl */
  3645. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3646. __asm__ __volatile__ ( \
  3647. "addc %[l], %[l], %[a] \n\t" \
  3648. "adde %[h], %[h], %[b] \n\t" \
  3649. "adde %[o], %[o], %[c] \n\t" \
  3650. "addc %[l], %[l], %[a] \n\t" \
  3651. "adde %[h], %[h], %[b] \n\t" \
  3652. "adde %[o], %[o], %[c] \n\t" \
  3653. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3654. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3655. : "cc" \
  3656. )
  3657. /* Count leading zeros. */
  3658. #define SP_ASM_LZCNT(va, vn) \
  3659. __asm__ __volatile__ ( \
  3660. "cntlzw %[n], %[a] \n\t" \
  3661. : [n] "=r" (vn) \
  3662. : [a] "r" (va) \
  3663. )
  3664. #define SP_INT_ASM_AVAILABLE
  3665. #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
  3666. #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
  3667. /*
  3668. * CPU: MIPS 64-bit
  3669. */
  3670. /* Multiply va by vb and store double size result in: vh | vl */
  3671. #define SP_ASM_MUL(vl, vh, va, vb) \
  3672. __asm__ __volatile__ ( \
  3673. "dmultu %[a], %[b] \n\t" \
  3674. "mflo %[l] \n\t" \
  3675. "mfhi %[h] \n\t" \
  3676. : [h] "+r" (vh), [l] "+r" (vl) \
  3677. : [a] "r" (va), [b] "r" (vb) \
  3678. : "memory", "$lo", "$hi" \
  3679. )
  3680. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3681. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3682. __asm__ __volatile__ ( \
  3683. "dmultu %[a], %[b] \n\t" \
  3684. "mflo %[l] \n\t" \
  3685. "mfhi %[h] \n\t" \
  3686. "move %[o], $0 \n\t" \
  3687. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3688. : [a] "r" (va), [b] "r" (vb) \
  3689. : "$lo", "$hi" \
  3690. )
  3691. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3692. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3693. __asm__ __volatile__ ( \
  3694. "dmultu %[a], %[b] \n\t" \
  3695. "mflo $10 \n\t" \
  3696. "mfhi $11 \n\t" \
  3697. "daddu %[l], %[l], $10 \n\t" \
  3698. "sltu $12, %[l], $10 \n\t" \
  3699. "daddu %[h], %[h], $12 \n\t" \
  3700. "sltu $12, %[h], $12 \n\t" \
  3701. "daddu %[o], %[o], $12 \n\t" \
  3702. "daddu %[h], %[h], $11 \n\t" \
  3703. "sltu $12, %[h], $11 \n\t" \
  3704. "daddu %[o], %[o], $12 \n\t" \
  3705. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3706. : [a] "r" (va), [b] "r" (vb) \
  3707. : "$10", "$11", "$12", "$lo", "$hi" \
  3708. )
  3709. /* Multiply va by vb and add double size result into: vh | vl */
  3710. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3711. __asm__ __volatile__ ( \
  3712. "dmultu %[a], %[b] \n\t" \
  3713. "mflo $10 \n\t" \
  3714. "mfhi $11 \n\t" \
  3715. "daddu %[l], %[l], $10 \n\t" \
  3716. "sltu $12, %[l], $10 \n\t" \
  3717. "daddu %[h], %[h], $11 \n\t" \
  3718. "daddu %[h], %[h], $12 \n\t" \
  3719. : [l] "+r" (vl), [h] "+r" (vh) \
  3720. : [a] "r" (va), [b] "r" (vb) \
  3721. : "$10", "$11", "$12", "$lo", "$hi" \
  3722. )
  3723. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3724. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3725. __asm__ __volatile__ ( \
  3726. "dmultu %[a], %[b] \n\t" \
  3727. "mflo $10 \n\t" \
  3728. "mfhi $11 \n\t" \
  3729. "daddu %[l], %[l], $10 \n\t" \
  3730. "sltu $12, %[l], $10 \n\t" \
  3731. "daddu %[h], %[h], $12 \n\t" \
  3732. "sltu $12, %[h], $12 \n\t" \
  3733. "daddu %[o], %[o], $12 \n\t" \
  3734. "daddu %[h], %[h], $11 \n\t" \
  3735. "sltu $12, %[h], $11 \n\t" \
  3736. "daddu %[o], %[o], $12 \n\t" \
  3737. "daddu %[l], %[l], $10 \n\t" \
  3738. "sltu $12, %[l], $10 \n\t" \
  3739. "daddu %[h], %[h], $12 \n\t" \
  3740. "sltu $12, %[h], $12 \n\t" \
  3741. "daddu %[o], %[o], $12 \n\t" \
  3742. "daddu %[h], %[h], $11 \n\t" \
  3743. "sltu $12, %[h], $11 \n\t" \
  3744. "daddu %[o], %[o], $12 \n\t" \
  3745. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3746. : [a] "r" (va), [b] "r" (vb) \
  3747. : "$10", "$11", "$12", "$lo", "$hi" \
  3748. )
  3749. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3750. * Assumes first add will not overflow vh | vl
  3751. */
  3752. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3753. __asm__ __volatile__ ( \
  3754. "dmultu %[a], %[b] \n\t" \
  3755. "mflo $10 \n\t" \
  3756. "mfhi $11 \n\t" \
  3757. "daddu %[l], %[l], $10 \n\t" \
  3758. "sltu $12, %[l], $10 \n\t" \
  3759. "daddu %[h], %[h], $11 \n\t" \
  3760. "daddu %[h], %[h], $12 \n\t" \
  3761. "daddu %[l], %[l], $10 \n\t" \
  3762. "sltu $12, %[l], $10 \n\t" \
  3763. "daddu %[h], %[h], $12 \n\t" \
  3764. "sltu $12, %[h], $12 \n\t" \
  3765. "daddu %[o], %[o], $12 \n\t" \
  3766. "daddu %[h], %[h], $11 \n\t" \
  3767. "sltu $12, %[h], $11 \n\t" \
  3768. "daddu %[o], %[o], $12 \n\t" \
  3769. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3770. : [a] "r" (va), [b] "r" (vb) \
  3771. : "$10", "$11", "$12", "$lo", "$hi" \
  3772. )
  3773. /* Square va and store double size result in: vh | vl */
  3774. #define SP_ASM_SQR(vl, vh, va) \
  3775. __asm__ __volatile__ ( \
  3776. "dmultu %[a], %[a] \n\t" \
  3777. "mflo %[l] \n\t" \
  3778. "mfhi %[h] \n\t" \
  3779. : [h] "+r" (vh), [l] "+r" (vl) \
  3780. : [a] "r" (va) \
  3781. : "memory", "$lo", "$hi" \
  3782. )
  3783. /* Square va and add double size result into: vo | vh | vl */
  3784. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3785. __asm__ __volatile__ ( \
  3786. "dmultu %[a], %[a] \n\t" \
  3787. "mflo $10 \n\t" \
  3788. "mfhi $11 \n\t" \
  3789. "daddu %[l], %[l], $10 \n\t" \
  3790. "sltu $12, %[l], $10 \n\t" \
  3791. "daddu %[h], %[h], $12 \n\t" \
  3792. "sltu $12, %[h], $12 \n\t" \
  3793. "daddu %[o], %[o], $12 \n\t" \
  3794. "daddu %[h], %[h], $11 \n\t" \
  3795. "sltu $12, %[h], $11 \n\t" \
  3796. "daddu %[o], %[o], $12 \n\t" \
  3797. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3798. : [a] "r" (va) \
  3799. : "$10", "$11", "$12", "$lo", "$hi" \
  3800. )
  3801. /* Square va and add double size result into: vh | vl */
  3802. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3803. __asm__ __volatile__ ( \
  3804. "dmultu %[a], %[a] \n\t" \
  3805. "mflo $10 \n\t" \
  3806. "mfhi $11 \n\t" \
  3807. "daddu %[l], %[l], $10 \n\t" \
  3808. "sltu $12, %[l], $10 \n\t" \
  3809. "daddu %[h], %[h], $11 \n\t" \
  3810. "daddu %[h], %[h], $12 \n\t" \
  3811. : [l] "+r" (vl), [h] "+r" (vh) \
  3812. : [a] "r" (va) \
  3813. : "$10", "$11", "$12", "$lo", "$hi" \
  3814. )
  3815. /* Add va into: vh | vl */
  3816. #define SP_ASM_ADDC(vl, vh, va) \
  3817. __asm__ __volatile__ ( \
  3818. "daddu %[l], %[l], %[a] \n\t" \
  3819. "sltu $12, %[l], %[a] \n\t" \
  3820. "daddu %[h], %[h], $12 \n\t" \
  3821. : [l] "+r" (vl), [h] "+r" (vh) \
  3822. : [a] "r" (va) \
  3823. : "$12" \
  3824. )
  3825. /* Sub va from: vh | vl */
  3826. #define SP_ASM_SUBB(vl, vh, va) \
  3827. __asm__ __volatile__ ( \
  3828. "move $12, %[l] \n\t" \
  3829. "dsubu %[l], $12, %[a] \n\t" \
  3830. "sltu $12, $12, %[l] \n\t" \
  3831. "dsubu %[h], %[h], $12 \n\t" \
  3832. : [l] "+r" (vl), [h] "+r" (vh) \
  3833. : [a] "r" (va) \
  3834. : "$12" \
  3835. )
  3836. /* Add two times vc | vb | va into vo | vh | vl */
  3837. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3838. __asm__ __volatile__ ( \
  3839. "daddu %[l], %[l], %[a] \n\t" \
  3840. "sltu $12, %[l], %[a] \n\t" \
  3841. "daddu %[h], %[h], $12 \n\t" \
  3842. "sltu $12, %[h], $12 \n\t" \
  3843. "daddu %[o], %[o], $12 \n\t" \
  3844. "daddu %[h], %[h], %[b] \n\t" \
  3845. "sltu $12, %[h], %[b] \n\t" \
  3846. "daddu %[o], %[o], %[c] \n\t" \
  3847. "daddu %[o], %[o], $12 \n\t" \
  3848. "daddu %[l], %[l], %[a] \n\t" \
  3849. "sltu $12, %[l], %[a] \n\t" \
  3850. "daddu %[h], %[h], $12 \n\t" \
  3851. "sltu $12, %[h], $12 \n\t" \
  3852. "daddu %[o], %[o], $12 \n\t" \
  3853. "daddu %[h], %[h], %[b] \n\t" \
  3854. "sltu $12, %[h], %[b] \n\t" \
  3855. "daddu %[o], %[o], %[c] \n\t" \
  3856. "daddu %[o], %[o], $12 \n\t" \
  3857. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3858. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3859. : "$12" \
  3860. )
  3861. #define SP_INT_ASM_AVAILABLE
  3862. #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
  3863. #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
  3864. /*
  3865. * CPU: MIPS 32-bit
  3866. */
  3867. /* Multiply va by vb and store double size result in: vh | vl */
  3868. #define SP_ASM_MUL(vl, vh, va, vb) \
  3869. __asm__ __volatile__ ( \
  3870. "multu %[a], %[b] \n\t" \
  3871. "mflo %[l] \n\t" \
  3872. "mfhi %[h] \n\t" \
  3873. : [h] "+r" (vh), [l] "+r" (vl) \
  3874. : [a] "r" (va), [b] "r" (vb) \
  3875. : "memory", "%lo", "%hi" \
  3876. )
  3877. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3878. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3879. __asm__ __volatile__ ( \
  3880. "multu %[a], %[b] \n\t" \
  3881. "mflo %[l] \n\t" \
  3882. "mfhi %[h] \n\t" \
  3883. "move %[o], $0 \n\t" \
  3884. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3885. : [a] "r" (va), [b] "r" (vb) \
  3886. : "%lo", "%hi" \
  3887. )
  3888. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3889. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3890. __asm__ __volatile__ ( \
  3891. "multu %[a], %[b] \n\t" \
  3892. "mflo $10 \n\t" \
  3893. "mfhi $11 \n\t" \
  3894. "addu %[l], %[l], $10 \n\t" \
  3895. "sltu $12, %[l], $10 \n\t" \
  3896. "addu %[h], %[h], $12 \n\t" \
  3897. "sltu $12, %[h], $12 \n\t" \
  3898. "addu %[o], %[o], $12 \n\t" \
  3899. "addu %[h], %[h], $11 \n\t" \
  3900. "sltu $12, %[h], $11 \n\t" \
  3901. "addu %[o], %[o], $12 \n\t" \
  3902. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3903. : [a] "r" (va), [b] "r" (vb) \
  3904. : "$10", "$11", "$12", "%lo", "%hi" \
  3905. )
  3906. /* Multiply va by vb and add double size result into: vh | vl */
  3907. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3908. __asm__ __volatile__ ( \
  3909. "multu %[a], %[b] \n\t" \
  3910. "mflo $10 \n\t" \
  3911. "mfhi $11 \n\t" \
  3912. "addu %[l], %[l], $10 \n\t" \
  3913. "sltu $12, %[l], $10 \n\t" \
  3914. "addu %[h], %[h], $11 \n\t" \
  3915. "addu %[h], %[h], $12 \n\t" \
  3916. : [l] "+r" (vl), [h] "+r" (vh) \
  3917. : [a] "r" (va), [b] "r" (vb) \
  3918. : "$10", "$11", "$12", "%lo", "%hi" \
  3919. )
  3920. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3921. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3922. __asm__ __volatile__ ( \
  3923. "multu %[a], %[b] \n\t" \
  3924. "mflo $10 \n\t" \
  3925. "mfhi $11 \n\t" \
  3926. "addu %[l], %[l], $10 \n\t" \
  3927. "sltu $12, %[l], $10 \n\t" \
  3928. "addu %[h], %[h], $12 \n\t" \
  3929. "sltu $12, %[h], $12 \n\t" \
  3930. "addu %[o], %[o], $12 \n\t" \
  3931. "addu %[h], %[h], $11 \n\t" \
  3932. "sltu $12, %[h], $11 \n\t" \
  3933. "addu %[o], %[o], $12 \n\t" \
  3934. "addu %[l], %[l], $10 \n\t" \
  3935. "sltu $12, %[l], $10 \n\t" \
  3936. "addu %[h], %[h], $12 \n\t" \
  3937. "sltu $12, %[h], $12 \n\t" \
  3938. "addu %[o], %[o], $12 \n\t" \
  3939. "addu %[h], %[h], $11 \n\t" \
  3940. "sltu $12, %[h], $11 \n\t" \
  3941. "addu %[o], %[o], $12 \n\t" \
  3942. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3943. : [a] "r" (va), [b] "r" (vb) \
  3944. : "$10", "$11", "$12", "%lo", "%hi" \
  3945. )
  3946. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3947. * Assumes first add will not overflow vh | vl
  3948. */
  3949. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3950. __asm__ __volatile__ ( \
  3951. "multu %[a], %[b] \n\t" \
  3952. "mflo $10 \n\t" \
  3953. "mfhi $11 \n\t" \
  3954. "addu %[l], %[l], $10 \n\t" \
  3955. "sltu $12, %[l], $10 \n\t" \
  3956. "addu %[h], %[h], $11 \n\t" \
  3957. "addu %[h], %[h], $12 \n\t" \
  3958. "addu %[l], %[l], $10 \n\t" \
  3959. "sltu $12, %[l], $10 \n\t" \
  3960. "addu %[h], %[h], $12 \n\t" \
  3961. "sltu $12, %[h], $12 \n\t" \
  3962. "addu %[o], %[o], $12 \n\t" \
  3963. "addu %[h], %[h], $11 \n\t" \
  3964. "sltu $12, %[h], $11 \n\t" \
  3965. "addu %[o], %[o], $12 \n\t" \
  3966. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3967. : [a] "r" (va), [b] "r" (vb) \
  3968. : "$10", "$11", "$12", "%lo", "%hi" \
  3969. )
  3970. /* Square va and store double size result in: vh | vl */
  3971. #define SP_ASM_SQR(vl, vh, va) \
  3972. __asm__ __volatile__ ( \
  3973. "multu %[a], %[a] \n\t" \
  3974. "mflo %[l] \n\t" \
  3975. "mfhi %[h] \n\t" \
  3976. : [h] "+r" (vh), [l] "+r" (vl) \
  3977. : [a] "r" (va) \
  3978. : "memory", "%lo", "%hi" \
  3979. )
  3980. /* Square va and add double size result into: vo | vh | vl */
  3981. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3982. __asm__ __volatile__ ( \
  3983. "multu %[a], %[a] \n\t" \
  3984. "mflo $10 \n\t" \
  3985. "mfhi $11 \n\t" \
  3986. "addu %[l], %[l], $10 \n\t" \
  3987. "sltu $12, %[l], $10 \n\t" \
  3988. "addu %[h], %[h], $12 \n\t" \
  3989. "sltu $12, %[h], $12 \n\t" \
  3990. "addu %[o], %[o], $12 \n\t" \
  3991. "addu %[h], %[h], $11 \n\t" \
  3992. "sltu $12, %[h], $11 \n\t" \
  3993. "addu %[o], %[o], $12 \n\t" \
  3994. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3995. : [a] "r" (va) \
  3996. : "$10", "$11", "$12", "%lo", "%hi" \
  3997. )
  3998. /* Square va and add double size result into: vh | vl */
  3999. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4000. __asm__ __volatile__ ( \
  4001. "multu %[a], %[a] \n\t" \
  4002. "mflo $10 \n\t" \
  4003. "mfhi $11 \n\t" \
  4004. "addu %[l], %[l], $10 \n\t" \
  4005. "sltu $12, %[l], $10 \n\t" \
  4006. "addu %[h], %[h], $11 \n\t" \
  4007. "addu %[h], %[h], $12 \n\t" \
  4008. : [l] "+r" (vl), [h] "+r" (vh) \
  4009. : [a] "r" (va) \
  4010. : "$10", "$11", "$12", "%lo", "%hi" \
  4011. )
  4012. /* Add va into: vh | vl */
  4013. #define SP_ASM_ADDC(vl, vh, va) \
  4014. __asm__ __volatile__ ( \
  4015. "addu %[l], %[l], %[a] \n\t" \
  4016. "sltu $12, %[l], %[a] \n\t" \
  4017. "addu %[h], %[h], $12 \n\t" \
  4018. : [l] "+r" (vl), [h] "+r" (vh) \
  4019. : [a] "r" (va) \
  4020. : "$12" \
  4021. )
  4022. /* Sub va from: vh | vl */
  4023. #define SP_ASM_SUBB(vl, vh, va) \
  4024. __asm__ __volatile__ ( \
  4025. "move $12, %[l] \n\t" \
  4026. "subu %[l], $12, %[a] \n\t" \
  4027. "sltu $12, $12, %[l] \n\t" \
  4028. "subu %[h], %[h], $12 \n\t" \
  4029. : [l] "+r" (vl), [h] "+r" (vh) \
  4030. : [a] "r" (va) \
  4031. : "$12" \
  4032. )
  4033. /* Add two times vc | vb | va into vo | vh | vl */
  4034. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4035. __asm__ __volatile__ ( \
  4036. "addu %[l], %[l], %[a] \n\t" \
  4037. "sltu $12, %[l], %[a] \n\t" \
  4038. "addu %[h], %[h], $12 \n\t" \
  4039. "sltu $12, %[h], $12 \n\t" \
  4040. "addu %[o], %[o], $12 \n\t" \
  4041. "addu %[h], %[h], %[b] \n\t" \
  4042. "sltu $12, %[h], %[b] \n\t" \
  4043. "addu %[o], %[o], %[c] \n\t" \
  4044. "addu %[o], %[o], $12 \n\t" \
  4045. "addu %[l], %[l], %[a] \n\t" \
  4046. "sltu $12, %[l], %[a] \n\t" \
  4047. "addu %[h], %[h], $12 \n\t" \
  4048. "sltu $12, %[h], $12 \n\t" \
  4049. "addu %[o], %[o], $12 \n\t" \
  4050. "addu %[h], %[h], %[b] \n\t" \
  4051. "sltu $12, %[h], %[b] \n\t" \
  4052. "addu %[o], %[o], %[c] \n\t" \
  4053. "addu %[o], %[o], $12 \n\t" \
  4054. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4055. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4056. : "$12" \
  4057. )
  4058. #define SP_INT_ASM_AVAILABLE
  4059. #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
  4060. #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
  4061. /*
  4062. * CPU: RISCV 64-bit
  4063. */
  4064. /* Multiply va by vb and store double size result in: vh | vl */
  4065. #define SP_ASM_MUL(vl, vh, va, vb) \
  4066. __asm__ __volatile__ ( \
  4067. "mul %[l], %[a], %[b] \n\t" \
  4068. "mulhu %[h], %[a], %[b] \n\t" \
  4069. : [h] "+r" (vh), [l] "+r" (vl) \
  4070. : [a] "r" (va), [b] "r" (vb) \
  4071. : "memory" \
  4072. )
  4073. /* Multiply va by vb and store double size result in: vo | vh | vl */
  4074. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  4075. __asm__ __volatile__ ( \
  4076. "mulhu %[h], %[a], %[b] \n\t" \
  4077. "mul %[l], %[a], %[b] \n\t" \
  4078. "add %[o], zero, zero \n\t" \
  4079. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  4080. : [a] "r" (va), [b] "r" (vb) \
  4081. : \
  4082. )
  4083. /* Multiply va by vb and add double size result into: vo | vh | vl */
  4084. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  4085. __asm__ __volatile__ ( \
  4086. "mul a5, %[a], %[b] \n\t" \
  4087. "mulhu a6, %[a], %[b] \n\t" \
  4088. "add %[l], %[l], a5 \n\t" \
  4089. "sltu a7, %[l], a5 \n\t" \
  4090. "add %[h], %[h], a7 \n\t" \
  4091. "sltu a7, %[h], a7 \n\t" \
  4092. "add %[o], %[o], a7 \n\t" \
  4093. "add %[h], %[h], a6 \n\t" \
  4094. "sltu a7, %[h], a6 \n\t" \
  4095. "add %[o], %[o], a7 \n\t" \
  4096. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4097. : [a] "r" (va), [b] "r" (vb) \
  4098. : "a5", "a6", "a7" \
  4099. )
  4100. /* Multiply va by vb and add double size result into: vh | vl */
  4101. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  4102. __asm__ __volatile__ ( \
  4103. "mul a5, %[a], %[b] \n\t" \
  4104. "mulhu a6, %[a], %[b] \n\t" \
  4105. "add %[l], %[l], a5 \n\t" \
  4106. "sltu a7, %[l], a5 \n\t" \
  4107. "add %[h], %[h], a6 \n\t" \
  4108. "add %[h], %[h], a7 \n\t" \
  4109. : [l] "+r" (vl), [h] "+r" (vh) \
  4110. : [a] "r" (va), [b] "r" (vb) \
  4111. : "a5", "a6", "a7" \
  4112. )
  4113. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  4114. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  4115. __asm__ __volatile__ ( \
  4116. "mul a5, %[a], %[b] \n\t" \
  4117. "mulhu a6, %[a], %[b] \n\t" \
  4118. "add %[l], %[l], a5 \n\t" \
  4119. "sltu a7, %[l], a5 \n\t" \
  4120. "add %[h], %[h], a7 \n\t" \
  4121. "sltu a7, %[h], a7 \n\t" \
  4122. "add %[o], %[o], a7 \n\t" \
  4123. "add %[h], %[h], a6 \n\t" \
  4124. "sltu a7, %[h], a6 \n\t" \
  4125. "add %[o], %[o], a7 \n\t" \
  4126. "add %[l], %[l], a5 \n\t" \
  4127. "sltu a7, %[l], a5 \n\t" \
  4128. "add %[h], %[h], a7 \n\t" \
  4129. "sltu a7, %[h], a7 \n\t" \
  4130. "add %[o], %[o], a7 \n\t" \
  4131. "add %[h], %[h], a6 \n\t" \
  4132. "sltu a7, %[h], a6 \n\t" \
  4133. "add %[o], %[o], a7 \n\t" \
  4134. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4135. : [a] "r" (va), [b] "r" (vb) \
  4136. : "a5", "a6", "a7" \
  4137. )
  4138. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  4139. * Assumes first add will not overflow vh | vl
  4140. */
  4141. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  4142. __asm__ __volatile__ ( \
  4143. "mul a5, %[a], %[b] \n\t" \
  4144. "mulhu a6, %[a], %[b] \n\t" \
  4145. "add %[l], %[l], a5 \n\t" \
  4146. "sltu a7, %[l], a5 \n\t" \
  4147. "add %[h], %[h], a6 \n\t" \
  4148. "add %[h], %[h], a7 \n\t" \
  4149. "add %[l], %[l], a5 \n\t" \
  4150. "sltu a7, %[l], a5 \n\t" \
  4151. "add %[h], %[h], a7 \n\t" \
  4152. "sltu a7, %[h], a7 \n\t" \
  4153. "add %[o], %[o], a7 \n\t" \
  4154. "add %[h], %[h], a6 \n\t" \
  4155. "sltu a7, %[h], a6 \n\t" \
  4156. "add %[o], %[o], a7 \n\t" \
  4157. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4158. : [a] "r" (va), [b] "r" (vb) \
  4159. : "a5", "a6", "a7" \
  4160. )
  4161. /* Square va and store double size result in: vh | vl */
  4162. #define SP_ASM_SQR(vl, vh, va) \
  4163. __asm__ __volatile__ ( \
  4164. "mul %[l], %[a], %[a] \n\t" \
  4165. "mulhu %[h], %[a], %[a] \n\t" \
  4166. : [h] "+r" (vh), [l] "+r" (vl) \
  4167. : [a] "r" (va) \
  4168. : "memory" \
  4169. )
  4170. /* Square va and add double size result into: vo | vh | vl */
  4171. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  4172. __asm__ __volatile__ ( \
  4173. "mul a5, %[a], %[a] \n\t" \
  4174. "mulhu a6, %[a], %[a] \n\t" \
  4175. "add %[l], %[l], a5 \n\t" \
  4176. "sltu a7, %[l], a5 \n\t" \
  4177. "add %[h], %[h], a7 \n\t" \
  4178. "sltu a7, %[h], a7 \n\t" \
  4179. "add %[o], %[o], a7 \n\t" \
  4180. "add %[h], %[h], a6 \n\t" \
  4181. "sltu a7, %[h], a6 \n\t" \
  4182. "add %[o], %[o], a7 \n\t" \
  4183. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4184. : [a] "r" (va) \
  4185. : "a5", "a6", "a7" \
  4186. )
  4187. /* Square va and add double size result into: vh | vl */
  4188. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4189. __asm__ __volatile__ ( \
  4190. "mul a5, %[a], %[a] \n\t" \
  4191. "mulhu a6, %[a], %[a] \n\t" \
  4192. "add %[l], %[l], a5 \n\t" \
  4193. "sltu a7, %[l], a5 \n\t" \
  4194. "add %[h], %[h], a6 \n\t" \
  4195. "add %[h], %[h], a7 \n\t" \
  4196. : [l] "+r" (vl), [h] "+r" (vh) \
  4197. : [a] "r" (va) \
  4198. : "a5", "a6", "a7" \
  4199. )
  4200. /* Add va into: vh | vl */
  4201. #define SP_ASM_ADDC(vl, vh, va) \
  4202. __asm__ __volatile__ ( \
  4203. "add %[l], %[l], %[a] \n\t" \
  4204. "sltu a7, %[l], %[a] \n\t" \
  4205. "add %[h], %[h], a7 \n\t" \
  4206. : [l] "+r" (vl), [h] "+r" (vh) \
  4207. : [a] "r" (va) \
  4208. : "a7" \
  4209. )
  4210. /* Sub va from: vh | vl */
  4211. #define SP_ASM_SUBB(vl, vh, va) \
  4212. __asm__ __volatile__ ( \
  4213. "add a7, %[l], zero \n\t" \
  4214. "sub %[l], a7, %[a] \n\t" \
  4215. "sltu a7, a7, %[l] \n\t" \
  4216. "sub %[h], %[h], a7 \n\t" \
  4217. : [l] "+r" (vl), [h] "+r" (vh) \
  4218. : [a] "r" (va) \
  4219. : "a7" \
  4220. )
  4221. /* Add two times vc | vb | va into vo | vh | vl */
  4222. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4223. __asm__ __volatile__ ( \
  4224. "add %[l], %[l], %[a] \n\t" \
  4225. "sltu a7, %[l], %[a] \n\t" \
  4226. "add %[h], %[h], a7 \n\t" \
  4227. "sltu a7, %[h], a7 \n\t" \
  4228. "add %[o], %[o], a7 \n\t" \
  4229. "add %[h], %[h], %[b] \n\t" \
  4230. "sltu a7, %[h], %[b] \n\t" \
  4231. "add %[o], %[o], %[c] \n\t" \
  4232. "add %[o], %[o], a7 \n\t" \
  4233. "add %[l], %[l], %[a] \n\t" \
  4234. "sltu a7, %[l], %[a] \n\t" \
  4235. "add %[h], %[h], a7 \n\t" \
  4236. "sltu a7, %[h], a7 \n\t" \
  4237. "add %[o], %[o], a7 \n\t" \
  4238. "add %[h], %[h], %[b] \n\t" \
  4239. "sltu a7, %[h], %[b] \n\t" \
  4240. "add %[o], %[o], %[c] \n\t" \
  4241. "add %[o], %[o], a7 \n\t" \
  4242. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4243. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4244. : "a7" \
  4245. )
  4246. #define SP_INT_ASM_AVAILABLE
  4247. #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
  4248. #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
  4249. /*
  4250. * CPU: RISCV 32-bit
  4251. */
  4252. /* Multiply va by vb and store double size result in: vh | vl */
  4253. #define SP_ASM_MUL(vl, vh, va, vb) \
  4254. __asm__ __volatile__ ( \
  4255. "mul %[l], %[a], %[b] \n\t" \
  4256. "mulhu %[h], %[a], %[b] \n\t" \
  4257. : [h] "+r" (vh), [l] "+r" (vl) \
  4258. : [a] "r" (va), [b] "r" (vb) \
  4259. : "memory" \
  4260. )
  4261. /* Multiply va by vb and store double size result in: vo | vh | vl */
  4262. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  4263. __asm__ __volatile__ ( \
  4264. "mulhu %[h], %[a], %[b] \n\t" \
  4265. "mul %[l], %[a], %[b] \n\t" \
  4266. "add %[o], zero, zero \n\t" \
  4267. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  4268. : [a] "r" (va), [b] "r" (vb) \
  4269. : \
  4270. )
  4271. /* Multiply va by vb and add double size result into: vo | vh | vl */
  4272. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  4273. __asm__ __volatile__ ( \
  4274. "mul a5, %[a], %[b] \n\t" \
  4275. "mulhu a6, %[a], %[b] \n\t" \
  4276. "add %[l], %[l], a5 \n\t" \
  4277. "sltu a7, %[l], a5 \n\t" \
  4278. "add %[h], %[h], a7 \n\t" \
  4279. "sltu a7, %[h], a7 \n\t" \
  4280. "add %[o], %[o], a7 \n\t" \
  4281. "add %[h], %[h], a6 \n\t" \
  4282. "sltu a7, %[h], a6 \n\t" \
  4283. "add %[o], %[o], a7 \n\t" \
  4284. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4285. : [a] "r" (va), [b] "r" (vb) \
  4286. : "a5", "a6", "a7" \
  4287. )
  4288. /* Multiply va by vb and add double size result into: vh | vl */
  4289. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  4290. __asm__ __volatile__ ( \
  4291. "mul a5, %[a], %[b] \n\t" \
  4292. "mulhu a6, %[a], %[b] \n\t" \
  4293. "add %[l], %[l], a5 \n\t" \
  4294. "sltu a7, %[l], a5 \n\t" \
  4295. "add %[h], %[h], a6 \n\t" \
  4296. "add %[h], %[h], a7 \n\t" \
  4297. : [l] "+r" (vl), [h] "+r" (vh) \
  4298. : [a] "r" (va), [b] "r" (vb) \
  4299. : "a5", "a6", "a7" \
  4300. )
  4301. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  4302. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  4303. __asm__ __volatile__ ( \
  4304. "mul a5, %[a], %[b] \n\t" \
  4305. "mulhu a6, %[a], %[b] \n\t" \
  4306. "add %[l], %[l], a5 \n\t" \
  4307. "sltu a7, %[l], a5 \n\t" \
  4308. "add %[h], %[h], a7 \n\t" \
  4309. "sltu a7, %[h], a7 \n\t" \
  4310. "add %[o], %[o], a7 \n\t" \
  4311. "add %[h], %[h], a6 \n\t" \
  4312. "sltu a7, %[h], a6 \n\t" \
  4313. "add %[o], %[o], a7 \n\t" \
  4314. "add %[l], %[l], a5 \n\t" \
  4315. "sltu a7, %[l], a5 \n\t" \
  4316. "add %[h], %[h], a7 \n\t" \
  4317. "sltu a7, %[h], a7 \n\t" \
  4318. "add %[o], %[o], a7 \n\t" \
  4319. "add %[h], %[h], a6 \n\t" \
  4320. "sltu a7, %[h], a6 \n\t" \
  4321. "add %[o], %[o], a7 \n\t" \
  4322. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4323. : [a] "r" (va), [b] "r" (vb) \
  4324. : "a5", "a6", "a7" \
  4325. )
  4326. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  4327. * Assumes first add will not overflow vh | vl
  4328. */
  4329. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  4330. __asm__ __volatile__ ( \
  4331. "mul a5, %[a], %[b] \n\t" \
  4332. "mulhu a6, %[a], %[b] \n\t" \
  4333. "add %[l], %[l], a5 \n\t" \
  4334. "sltu a7, %[l], a5 \n\t" \
  4335. "add %[h], %[h], a6 \n\t" \
  4336. "add %[h], %[h], a7 \n\t" \
  4337. "add %[l], %[l], a5 \n\t" \
  4338. "sltu a7, %[l], a5 \n\t" \
  4339. "add %[h], %[h], a7 \n\t" \
  4340. "sltu a7, %[h], a7 \n\t" \
  4341. "add %[o], %[o], a7 \n\t" \
  4342. "add %[h], %[h], a6 \n\t" \
  4343. "sltu a7, %[h], a6 \n\t" \
  4344. "add %[o], %[o], a7 \n\t" \
  4345. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4346. : [a] "r" (va), [b] "r" (vb) \
  4347. : "a5", "a6", "a7" \
  4348. )
  4349. /* Square va and store double size result in: vh | vl */
  4350. #define SP_ASM_SQR(vl, vh, va) \
  4351. __asm__ __volatile__ ( \
  4352. "mul %[l], %[a], %[a] \n\t" \
  4353. "mulhu %[h], %[a], %[a] \n\t" \
  4354. : [h] "+r" (vh), [l] "+r" (vl) \
  4355. : [a] "r" (va) \
  4356. : "memory" \
  4357. )
  4358. /* Square va and add double size result into: vo | vh | vl */
  4359. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  4360. __asm__ __volatile__ ( \
  4361. "mul a5, %[a], %[a] \n\t" \
  4362. "mulhu a6, %[a], %[a] \n\t" \
  4363. "add %[l], %[l], a5 \n\t" \
  4364. "sltu a7, %[l], a5 \n\t" \
  4365. "add %[h], %[h], a7 \n\t" \
  4366. "sltu a7, %[h], a7 \n\t" \
  4367. "add %[o], %[o], a7 \n\t" \
  4368. "add %[h], %[h], a6 \n\t" \
  4369. "sltu a7, %[h], a6 \n\t" \
  4370. "add %[o], %[o], a7 \n\t" \
  4371. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4372. : [a] "r" (va) \
  4373. : "a5", "a6", "a7" \
  4374. )
  4375. /* Square va and add double size result into: vh | vl */
  4376. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4377. __asm__ __volatile__ ( \
  4378. "mul a5, %[a], %[a] \n\t" \
  4379. "mulhu a6, %[a], %[a] \n\t" \
  4380. "add %[l], %[l], a5 \n\t" \
  4381. "sltu a7, %[l], a5 \n\t" \
  4382. "add %[h], %[h], a6 \n\t" \
  4383. "add %[h], %[h], a7 \n\t" \
  4384. : [l] "+r" (vl), [h] "+r" (vh) \
  4385. : [a] "r" (va) \
  4386. : "a5", "a6", "a7" \
  4387. )
  4388. /* Add va into: vh | vl */
  4389. #define SP_ASM_ADDC(vl, vh, va) \
  4390. __asm__ __volatile__ ( \
  4391. "add %[l], %[l], %[a] \n\t" \
  4392. "sltu a7, %[l], %[a] \n\t" \
  4393. "add %[h], %[h], a7 \n\t" \
  4394. : [l] "+r" (vl), [h] "+r" (vh) \
  4395. : [a] "r" (va) \
  4396. : "a7" \
  4397. )
  4398. /* Sub va from: vh | vl */
  4399. #define SP_ASM_SUBB(vl, vh, va) \
  4400. __asm__ __volatile__ ( \
  4401. "add a7, %[l], zero \n\t" \
  4402. "sub %[l], a7, %[a] \n\t" \
  4403. "sltu a7, a7, %[l] \n\t" \
  4404. "sub %[h], %[h], a7 \n\t" \
  4405. : [l] "+r" (vl), [h] "+r" (vh) \
  4406. : [a] "r" (va) \
  4407. : "a7" \
  4408. )
  4409. /* Add two times vc | vb | va into vo | vh | vl */
  4410. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4411. __asm__ __volatile__ ( \
  4412. "add %[l], %[l], %[a] \n\t" \
  4413. "sltu a7, %[l], %[a] \n\t" \
  4414. "add %[h], %[h], a7 \n\t" \
  4415. "sltu a7, %[h], a7 \n\t" \
  4416. "add %[o], %[o], a7 \n\t" \
  4417. "add %[h], %[h], %[b] \n\t" \
  4418. "sltu a7, %[h], %[b] \n\t" \
  4419. "add %[o], %[o], %[c] \n\t" \
  4420. "add %[o], %[o], a7 \n\t" \
  4421. "add %[l], %[l], %[a] \n\t" \
  4422. "sltu a7, %[l], %[a] \n\t" \
  4423. "add %[h], %[h], a7 \n\t" \
  4424. "sltu a7, %[h], a7 \n\t" \
  4425. "add %[o], %[o], a7 \n\t" \
  4426. "add %[h], %[h], %[b] \n\t" \
  4427. "sltu a7, %[h], %[b] \n\t" \
  4428. "add %[o], %[o], %[c] \n\t" \
  4429. "add %[o], %[o], a7 \n\t" \
  4430. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4431. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4432. : "a7" \
  4433. )
  4434. #define SP_INT_ASM_AVAILABLE
  4435. #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
  4436. #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
  4437. /*
  4438. * CPU: Intel s390x
  4439. */
  4440. /* Multiply va by vb and store double size result in: vh | vl */
  4441. #define SP_ASM_MUL(vl, vh, va, vb) \
  4442. __asm__ __volatile__ ( \
  4443. "lgr %%r1, %[a] \n\t" \
  4444. "mlgr %%r0, %[b] \n\t" \
  4445. "lgr %[l], %%r1 \n\t" \
  4446. "lgr %[h], %%r0 \n\t" \
  4447. : [h] "+r" (vh), [l] "+r" (vl) \
  4448. : [a] "r" (va), [b] "r" (vb) \
  4449. : "memory", "r0", "r1" \
  4450. )
  4451. /* Multiply va by vb and store double size result in: vo | vh | vl */
  4452. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  4453. __asm__ __volatile__ ( \
  4454. "lgr %%r1, %[a] \n\t" \
  4455. "mlgr %%r0, %[b] \n\t" \
  4456. "lghi %[o], 0 \n\t" \
  4457. "lgr %[l], %%r1 \n\t" \
  4458. "lgr %[h], %%r0 \n\t" \
  4459. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  4460. : [a] "r" (va), [b] "r" (vb) \
  4461. : "r0", "r1" \
  4462. )
  4463. /* Multiply va by vb and add double size result into: vo | vh | vl */
  4464. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  4465. __asm__ __volatile__ ( \
  4466. "lghi %%r10, 0 \n\t" \
  4467. "lgr %%r1, %[a] \n\t" \
  4468. "mlgr %%r0, %[b] \n\t" \
  4469. "algr %[l], %%r1 \n\t" \
  4470. "alcgr %[h], %%r0 \n\t" \
  4471. "alcgr %[o], %%r10 \n\t" \
  4472. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4473. : [a] "r" (va), [b] "r" (vb) \
  4474. : "r0", "r1", "r10", "cc" \
  4475. )
  4476. /* Multiply va by vb and add double size result into: vh | vl */
  4477. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  4478. __asm__ __volatile__ ( \
  4479. "lgr %%r1, %[a] \n\t" \
  4480. "mlgr %%r0, %[b] \n\t" \
  4481. "algr %[l], %%r1 \n\t" \
  4482. "alcgr %[h], %%r0 \n\t" \
  4483. : [l] "+r" (vl), [h] "+r" (vh) \
  4484. : [a] "r" (va), [b] "r" (vb) \
  4485. : "r0", "r1", "cc" \
  4486. )
  4487. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  4488. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  4489. __asm__ __volatile__ ( \
  4490. "lghi %%r10, 0 \n\t" \
  4491. "lgr %%r1, %[a] \n\t" \
  4492. "mlgr %%r0, %[b] \n\t" \
  4493. "algr %[l], %%r1 \n\t" \
  4494. "alcgr %[h], %%r0 \n\t" \
  4495. "alcgr %[o], %%r10 \n\t" \
  4496. "algr %[l], %%r1 \n\t" \
  4497. "alcgr %[h], %%r0 \n\t" \
  4498. "alcgr %[o], %%r10 \n\t" \
  4499. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4500. : [a] "r" (va), [b] "r" (vb) \
  4501. : "r0", "r1", "r10", "cc" \
  4502. )
  4503. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  4504. * Assumes first add will not overflow vh | vl
  4505. */
  4506. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  4507. __asm__ __volatile__ ( \
  4508. "lghi %%r10, 0 \n\t" \
  4509. "lgr %%r1, %[a] \n\t" \
  4510. "mlgr %%r0, %[b] \n\t" \
  4511. "algr %[l], %%r1 \n\t" \
  4512. "alcgr %[h], %%r0 \n\t" \
  4513. "algr %[l], %%r1 \n\t" \
  4514. "alcgr %[h], %%r0 \n\t" \
  4515. "alcgr %[o], %%r10 \n\t" \
  4516. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4517. : [a] "r" (va), [b] "r" (vb) \
  4518. : "r0", "r1", "r10", "cc" \
  4519. )
  4520. /* Square va and store double size result in: vh | vl */
  4521. #define SP_ASM_SQR(vl, vh, va) \
  4522. __asm__ __volatile__ ( \
  4523. "lgr %%r1, %[a] \n\t" \
  4524. "mlgr %%r0, %%r1 \n\t" \
  4525. "lgr %[l], %%r1 \n\t" \
  4526. "lgr %[h], %%r0 \n\t" \
  4527. : [h] "+r" (vh), [l] "+r" (vl) \
  4528. : [a] "r" (va) \
  4529. : "memory", "r0", "r1" \
  4530. )
  4531. /* Square va and add double size result into: vo | vh | vl */
  4532. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  4533. __asm__ __volatile__ ( \
  4534. "lghi %%r10, 0 \n\t" \
  4535. "lgr %%r1, %[a] \n\t" \
  4536. "mlgr %%r0, %%r1 \n\t" \
  4537. "algr %[l], %%r1 \n\t" \
  4538. "alcgr %[h], %%r0 \n\t" \
  4539. "alcgr %[o], %%r10 \n\t" \
  4540. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4541. : [a] "r" (va) \
  4542. : "r0", "r1", "r10", "cc" \
  4543. )
  4544. /* Square va and add double size result into: vh | vl */
  4545. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4546. __asm__ __volatile__ ( \
  4547. "lgr %%r1, %[a] \n\t" \
  4548. "mlgr %%r0, %%r1 \n\t" \
  4549. "algr %[l], %%r1 \n\t" \
  4550. "alcgr %[h], %%r0 \n\t" \
  4551. : [l] "+r" (vl), [h] "+r" (vh) \
  4552. : [a] "r" (va) \
  4553. : "r0", "r1", "cc" \
  4554. )
  4555. /* Add va into: vh | vl */
  4556. #define SP_ASM_ADDC(vl, vh, va) \
  4557. __asm__ __volatile__ ( \
  4558. "lghi %%r10, 0 \n\t" \
  4559. "algr %[l], %[a] \n\t" \
  4560. "alcgr %[h], %%r10 \n\t" \
  4561. : [l] "+r" (vl), [h] "+r" (vh) \
  4562. : [a] "r" (va) \
  4563. : "r10", "cc" \
  4564. )
  4565. /* Sub va from: vh | vl */
  4566. #define SP_ASM_SUBB(vl, vh, va) \
  4567. __asm__ __volatile__ ( \
  4568. "lghi %%r10, 0 \n\t" \
  4569. "slgr %[l], %[a] \n\t" \
  4570. "slbgr %[h], %%r10 \n\t" \
  4571. : [l] "+r" (vl), [h] "+r" (vh) \
  4572. : [a] "r" (va) \
  4573. : "r10", "cc" \
  4574. )
  4575. /* Add two times vc | vb | va into vo | vh | vl */
  4576. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4577. __asm__ __volatile__ ( \
  4578. "algr %[l], %[a] \n\t" \
  4579. "alcgr %[h], %[b] \n\t" \
  4580. "alcgr %[o], %[c] \n\t" \
  4581. "algr %[l], %[a] \n\t" \
  4582. "alcgr %[h], %[b] \n\t" \
  4583. "alcgr %[o], %[c] \n\t" \
  4584. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4585. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4586. : "cc" \
  4587. )
  4588. #define SP_INT_ASM_AVAILABLE
  4589. #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
  4590. #ifdef SP_INT_ASM_AVAILABLE
  4591. #ifndef SP_INT_NO_ASM
  4592. #define SQR_MUL_ASM
  4593. #endif
  4594. #ifndef SP_ASM_ADDC_REG
  4595. #define SP_ASM_ADDC_REG SP_ASM_ADDC
  4596. #endif /* SP_ASM_ADDC_REG */
  4597. #ifndef SP_ASM_SUBB_REG
  4598. #define SP_ASM_SUBB_REG SP_ASM_SUBB
  4599. #endif /* SP_ASM_ADDC_REG */
  4600. #endif /* SQR_MUL_ASM */
  4601. #endif /* !WOLFSSL_NO_ASM */
  4602. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
  4603. !defined(NO_DSA) || !defined(NO_DH) || \
  4604. (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
  4605. (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4606. #ifndef WC_NO_CACHE_RESISTANT
  4607. /* Mask of address for constant time operations. */
  4608. const size_t sp_off_on_addr[2] =
  4609. {
  4610. (size_t) 0,
  4611. (size_t)-1
  4612. };
  4613. #endif
  4614. #endif
  4615. #if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
  4616. #ifdef __cplusplus
  4617. extern "C" {
  4618. #endif
  4619. /* Modular exponentiation implementations using Single Precision. */
  4620. WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
  4621. sp_int* res);
  4622. WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
  4623. sp_int* res);
  4624. WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
  4625. sp_int* res);
  4626. WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
  4627. sp_int* res);
  4628. WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
  4629. sp_int* res);
  4630. #ifdef __cplusplus
  4631. } /* extern "C" */
  4632. #endif
  4633. #endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
  4634. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  4635. defined(OPENSSL_ALL)
  4636. static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp);
  4637. #endif
  4638. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  4639. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
  4640. defined(OPENSSL_ALL)
  4641. static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho);
  4642. #endif
  4643. /* Determine when mp_add_d is required. */
  4644. #if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || \
  4645. !defined(NO_DSA) || (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || \
  4646. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4647. defined(OPENSSL_EXTRA)
  4648. #define WOLFSSL_SP_ADD_D
  4649. #endif
  4650. /* Determine when mp_sub_d is required. */
  4651. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4652. !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
  4653. #define WOLFSSL_SP_SUB_D
  4654. #endif
  4655. /* Determine when mp_read_radix with a radix of 10 is required. */
  4656. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  4657. !defined(WOLFSSL_RSA_VERIFY_ONLY)) || defined(HAVE_ECC) || \
  4658. !defined(NO_DSA) || defined(OPENSSL_EXTRA)
  4659. #define WOLFSSL_SP_READ_RADIX_16
  4660. #endif
  4661. /* Determine when mp_read_radix with a radix of 10 is required. */
  4662. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  4663. !defined(WOLFSSL_RSA_VERIFY_ONLY)
  4664. #define WOLFSSL_SP_READ_RADIX_10
  4665. #endif
  4666. /* Determine when mp_invmod is required. */
  4667. #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
  4668. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4669. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4670. #define WOLFSSL_SP_INVMOD
  4671. #endif
  4672. /* Determine when mp_invmod_mont_ct is required. */
  4673. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  4674. #define WOLFSSL_SP_INVMOD_MONT_CT
  4675. #endif
  4676. /* Determine when mp_prime_gen is required. */
  4677. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4678. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  4679. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  4680. #define WOLFSSL_SP_PRIME_GEN
  4681. #endif
  4682. /* Set the multi-precision number to zero.
  4683. *
  4684. * Assumes a is not NULL.
  4685. *
  4686. * @param [out] a SP integer to set to zero.
  4687. */
  4688. static void _sp_zero(sp_int* a)
  4689. {
  4690. sp_int_minimal* am = (sp_int_minimal *)a;
  4691. am->used = 0;
  4692. am->dp[0] = 0;
  4693. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4694. am->sign = MP_ZPOS;
  4695. #endif
  4696. }
  4697. /* Initialize the multi-precision number to be zero with a given max size.
  4698. *
  4699. * @param [out] a SP integer.
  4700. * @param [in] size Number of words to say are available.
  4701. */
  4702. static void _sp_init_size(sp_int* a, unsigned int size)
  4703. {
  4704. volatile sp_int_minimal* am = (sp_int_minimal *)a;
  4705. #ifdef HAVE_WOLF_BIGINT
  4706. wc_bigint_init((struct WC_BIGINT*)&am->raw);
  4707. #endif
  4708. _sp_zero((sp_int*)am);
  4709. am->size = size;
  4710. }
  4711. /* Initialize the multi-precision number to be zero with a given max size.
  4712. *
  4713. * @param [out] a SP integer.
  4714. * @param [in] size Number of words to say are available.
  4715. *
  4716. * @return MP_OKAY on success.
  4717. * @return MP_VAL when a is NULL.
  4718. */
  4719. int sp_init_size(sp_int* a, unsigned int size)
  4720. {
  4721. int err = MP_OKAY;
  4722. /* Validate parameters. Don't use size more than max compiled. */
  4723. if ((a == NULL) || ((size <= 0) || (size > SP_INT_DIGITS))) {
  4724. err = MP_VAL;
  4725. }
  4726. if (err == MP_OKAY) {
  4727. _sp_init_size(a, size);
  4728. }
  4729. return err;
  4730. }
  4731. /* Initialize the multi-precision number to be zero.
  4732. *
  4733. * @param [out] a SP integer.
  4734. *
  4735. * @return MP_OKAY on success.
  4736. * @return MP_VAL when a is NULL.
  4737. */
  4738. int sp_init(sp_int* a)
  4739. {
  4740. int err = MP_OKAY;
  4741. /* Validate parameter. */
  4742. if (a == NULL) {
  4743. err = MP_VAL;
  4744. }
  4745. else {
  4746. /* Assume complete sp_int with SP_INT_DIGITS digits. */
  4747. _sp_init_size(a, SP_INT_DIGITS);
  4748. }
  4749. return err;
  4750. }
  4751. #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
  4752. /* Initialize up to six multi-precision numbers to be zero.
  4753. *
  4754. * @param [out] n1 SP integer.
  4755. * @param [out] n2 SP integer.
  4756. * @param [out] n3 SP integer.
  4757. * @param [out] n4 SP integer.
  4758. * @param [out] n5 SP integer.
  4759. * @param [out] n6 SP integer.
  4760. *
  4761. * @return MP_OKAY on success.
  4762. */
  4763. int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
  4764. sp_int* n6)
  4765. {
  4766. /* Initialize only those pointers that are valid. */
  4767. if (n1 != NULL) {
  4768. _sp_init_size(n1, SP_INT_DIGITS);
  4769. }
  4770. if (n2 != NULL) {
  4771. _sp_init_size(n2, SP_INT_DIGITS);
  4772. }
  4773. if (n3 != NULL) {
  4774. _sp_init_size(n3, SP_INT_DIGITS);
  4775. }
  4776. if (n4 != NULL) {
  4777. _sp_init_size(n4, SP_INT_DIGITS);
  4778. }
  4779. if (n5 != NULL) {
  4780. _sp_init_size(n5, SP_INT_DIGITS);
  4781. }
  4782. if (n6 != NULL) {
  4783. _sp_init_size(n6, SP_INT_DIGITS);
  4784. }
  4785. return MP_OKAY;
  4786. }
  4787. #endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
  4788. /* Free the memory allocated in the multi-precision number.
  4789. *
  4790. * @param [in] a SP integer.
  4791. */
  4792. void sp_free(sp_int* a)
  4793. {
  4794. if (a != NULL) {
  4795. #ifdef HAVE_WOLF_BIGINT
  4796. wc_bigint_free(&a->raw);
  4797. #endif
  4798. }
  4799. }
  4800. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4801. !defined(NO_DH) || defined(HAVE_ECC)
  4802. /* Grow multi-precision number to be able to hold l digits.
  4803. * This function does nothing as the number of digits is fixed.
  4804. *
  4805. * @param [in,out] a SP integer.
  4806. * @param [in] l Number of digits to grow to.
  4807. *
  4808. * @return MP_OKAY on success
  4809. * @return MP_MEM if the number of digits requested is more than available.
  4810. */
  4811. int sp_grow(sp_int* a, int l)
  4812. {
  4813. int err = MP_OKAY;
  4814. /* Validate parameter. */
  4815. if ((a == NULL) || (l < 0)) {
  4816. err = MP_VAL;
  4817. }
  4818. /* Ensure enough words allocated for grow. */
  4819. if ((err == MP_OKAY) && ((unsigned int)l > a->size)) {
  4820. err = MP_MEM;
  4821. }
  4822. if (err == MP_OKAY) {
  4823. unsigned int i;
  4824. /* Put in zeros up to the new length. */
  4825. for (i = a->used; i < (unsigned int)l; i++) {
  4826. a->dp[i] = 0;
  4827. }
  4828. }
  4829. return err;
  4830. }
  4831. #endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH || HAVE_ECC */
  4832. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4833. defined(HAVE_ECC)
  4834. /* Set the multi-precision number to zero.
  4835. *
  4836. * @param [out] a SP integer to set to zero.
  4837. */
  4838. void sp_zero(sp_int* a)
  4839. {
  4840. /* Make an sp_int with valid pointer zero. */
  4841. if (a != NULL) {
  4842. _sp_zero(a);
  4843. }
  4844. }
  4845. #endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
  4846. /* Clear the data from the multi-precision number, set to zero and free.
  4847. *
  4848. * @param [out] a SP integer.
  4849. */
  4850. void sp_clear(sp_int* a)
  4851. {
  4852. /* Clear when valid pointer passed in. */
  4853. if (a != NULL) {
  4854. unsigned int i;
  4855. /* Only clear the digits being used. */
  4856. for (i = 0; i < a->used; i++) {
  4857. a->dp[i] = 0;
  4858. }
  4859. /* Set back to zero and free. */
  4860. _sp_zero(a);
  4861. sp_free(a);
  4862. }
  4863. }
  4864. #if !defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4865. !defined(NO_DSA) || defined(WOLFSSL_SP_PRIME_GEN)
  4866. /* Ensure the data in the multi-precision number is zeroed.
  4867. *
  4868. * Use when security sensitive data needs to be wiped.
  4869. *
  4870. * @param [in] a SP integer.
  4871. */
  4872. void sp_forcezero(sp_int* a)
  4873. {
  4874. /* Zeroize when a vald pointer passed in. */
  4875. if (a != NULL) {
  4876. /* Ensure all data zeroized - data not zeroed when used decreases. */
  4877. ForceZero(a->dp, a->size * SP_WORD_SIZEOF);
  4878. /* Set back to zero. */
  4879. #ifdef HAVE_WOLF_BIGINT
  4880. /* Zeroize the raw data as well. */
  4881. wc_bigint_zero(&a->raw);
  4882. #endif
  4883. /* Make value zero and free. */
  4884. _sp_zero(a);
  4885. sp_free(a);
  4886. }
  4887. }
  4888. #endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
  4889. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4890. !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
  4891. /* Copy value of multi-precision number a into r.
  4892. *
  4893. * @param [in] a SP integer - source.
  4894. * @param [out] r SP integer - destination.
  4895. */
  4896. static void _sp_copy(const sp_int* a, sp_int* r)
  4897. {
  4898. /* Copy words across. */
  4899. if (a->used == 0) {
  4900. r->dp[0] = 0;
  4901. }
  4902. else {
  4903. XMEMCPY(r->dp, a->dp, a->used * SP_WORD_SIZEOF);
  4904. }
  4905. /* Set number of used words in result. */
  4906. r->used = a->used;
  4907. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4908. /* Set sign of result. */
  4909. r->sign = a->sign;
  4910. #endif
  4911. }
  4912. /* Copy value of multi-precision number a into r.
  4913. *
  4914. * @param [in] a SP integer - source.
  4915. * @param [out] r SP integer - destination.
  4916. *
  4917. * @return MP_OKAY on success.
  4918. */
  4919. int sp_copy(const sp_int* a, sp_int* r)
  4920. {
  4921. int err = MP_OKAY;
  4922. /* Validate parameters. */
  4923. if ((a == NULL) || (r == NULL)) {
  4924. err = MP_VAL;
  4925. }
  4926. /* Only copy if different pointers. */
  4927. if (a != r) {
  4928. /* Validated space in result. */
  4929. if ((err == MP_OKAY) && (a->used > r->size)) {
  4930. err = MP_VAL;
  4931. }
  4932. if (err == MP_OKAY) {
  4933. _sp_copy(a, r);
  4934. }
  4935. }
  4936. return err;
  4937. }
  4938. #endif
  4939. #if ((defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4940. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
  4941. defined(OPENSSL_ALL)) && defined(WC_PROTECT_ENCRYPTED_MEM)
  4942. /* Copy 2 numbers into two results based on y. Copy a fixed number of digits.
  4943. *
  4944. * Constant time implementation.
  4945. * When y is 0, r1 = a2 and r2 = a1.
  4946. * When y is 1, r1 = a1 and r2 = a2.
  4947. *
  4948. * @param [in] a1 First number to copy.
  4949. * @param [in] a2 Second number to copy.
  4950. * @param [out] r1 First result number to copy into.
  4951. * @param [out] r2 Second result number to copy into.
  4952. * @param [in] y Indicates which number goes into which result number.
  4953. * @param [in] used Number of digits to copy.
  4954. */
  4955. static void _sp_copy_2_ct(const sp_int* a1, const sp_int* a2, sp_int* r1,
  4956. sp_int* r2, int y, unsigned int used)
  4957. {
  4958. unsigned int i;
  4959. /* Copy data - constant time. */
  4960. for (i = 0; i < used; i++) {
  4961. r1->dp[i] = (a1->dp[i] & ((sp_digit)wc_off_on_addr[y ])) +
  4962. (a2->dp[i] & ((sp_digit)wc_off_on_addr[y^1]));
  4963. r2->dp[i] = (a1->dp[i] & ((sp_digit)wc_off_on_addr[y^1])) +
  4964. (a2->dp[i] & ((sp_digit)wc_off_on_addr[y ]));
  4965. }
  4966. /* Copy used. */
  4967. r1->used = (a1->used & ((int)wc_off_on_addr[y ])) +
  4968. (a2->used & ((int)wc_off_on_addr[y^1]));
  4969. r2->used = (a1->used & ((int)wc_off_on_addr[y^1])) +
  4970. (a2->used & ((int)wc_off_on_addr[y ]));
  4971. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4972. /* Copy sign. */
  4973. r1->sign = (a1->sign & ((int)wc_off_on_addr[y ])) +
  4974. (a2->sign & ((int)wc_off_on_addr[y^1]));
  4975. r2->sign = (a1->sign & ((int)wc_off_on_addr[y^1])) +
  4976. (a2->sign & ((int)wc_off_on_addr[y ]));
  4977. #endif
  4978. }
  4979. #endif
  4980. #if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
  4981. /* Initializes r and copies in value from a.
  4982. *
  4983. * @param [out] r SP integer - destination.
  4984. * @param [in] a SP integer - source.
  4985. *
  4986. * @return MP_OKAY on success.
  4987. * @return MP_VAL when a or r is NULL.
  4988. */
  4989. int sp_init_copy(sp_int* r, const sp_int* a)
  4990. {
  4991. int err;
  4992. /* Initialize r and copy value in a into it. */
  4993. err = sp_init(r);
  4994. if (err == MP_OKAY) {
  4995. err = sp_copy(a, r);
  4996. }
  4997. return err;
  4998. }
  4999. #endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
  5000. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5001. !defined(NO_DH) || !defined(NO_DSA)
  5002. /* Exchange the values in a and b.
  5003. *
  5004. * Avoid using this API as three copy operations are performed.
  5005. *
  5006. * @param [in,out] a SP integer to swap.
  5007. * @param [in,out] b SP integer to swap.
  5008. *
  5009. * @return MP_OKAY on success.
  5010. * @return MP_VAL when a or b is NULL.
  5011. * @return MP_MEM when dynamic memory allocation fails.
  5012. */
  5013. int sp_exch(sp_int* a, sp_int* b)
  5014. {
  5015. int err = MP_OKAY;
  5016. /* Validate parameters. */
  5017. if ((a == NULL) || (b == NULL)) {
  5018. err = MP_VAL;
  5019. }
  5020. /* Check space for a in b and b in a. */
  5021. if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
  5022. err = MP_VAL;
  5023. }
  5024. if (err == MP_OKAY) {
  5025. /* Declare temporary for swapping. */
  5026. DECL_SP_INT(t, a->used);
  5027. /* Create temporary for swapping. */
  5028. ALLOC_SP_INT(t, a->used, err, NULL);
  5029. if (err == MP_OKAY) {
  5030. /* Cache allocated size of a and b. */
  5031. unsigned int asize = a->size;
  5032. unsigned int bsize = b->size;
  5033. /* Copy all of SP int: t <- a, a <- b, b <- t. */
  5034. XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
  5035. XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
  5036. XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
  5037. /* Put back size of a and b. */
  5038. a->size = asize;
  5039. b->size = bsize;
  5040. }
  5041. FREE_SP_INT(t, NULL);
  5042. }
  5043. return err;
  5044. }
  5045. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
  5046. * !NO_DSA */
  5047. #if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
  5048. !defined(WC_NO_CACHE_RESISTANT)
  5049. /* Conditional swap of SP int values in constant time.
  5050. *
  5051. * @param [in] a First SP int to conditionally swap.
  5052. * @param [in] b Second SP int to conditionally swap.
  5053. * @param [in] cnt Count of words to copy.
  5054. * @param [in] swap When value is 1 then swap.
  5055. * @param [in] t Temporary SP int to use in swap.
  5056. * @return MP_OKAY on success.
  5057. * @return MP_MEM when dynamic memory allocation fails.
  5058. */
  5059. int sp_cond_swap_ct_ex(sp_int* a, sp_int* b, int cnt, int swap, sp_int* t)
  5060. {
  5061. unsigned int i;
  5062. sp_int_digit mask = (sp_int_digit)0 - (sp_int_digit)swap;
  5063. /* XOR other fields in sp_int into temp - mask set when swapping. */
  5064. t->used = (a->used ^ b->used) & (unsigned int)mask;
  5065. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5066. t->sign = (a->sign ^ b->sign) & (unsigned int)mask;
  5067. #endif
  5068. /* XOR requested words into temp - mask set when swapping. */
  5069. for (i = 0; i < (unsigned int)cnt; i++) {
  5070. t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
  5071. }
  5072. /* XOR temporary - when mask set then result will be b. */
  5073. a->used ^= t->used;
  5074. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5075. a->sign ^= t->sign;
  5076. #endif
  5077. for (i = 0; i < (unsigned int)cnt; i++) {
  5078. a->dp[i] ^= t->dp[i];
  5079. }
  5080. /* XOR temporary - when mask set then result will be a. */
  5081. b->used ^= t->used;
  5082. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5083. b->sign ^= b->sign;
  5084. #endif
  5085. for (i = 0; i < (unsigned int)cnt; i++) {
  5086. b->dp[i] ^= t->dp[i];
  5087. }
  5088. return MP_OKAY;
  5089. }
  5090. /* Conditional swap of SP int values in constant time.
  5091. *
  5092. * @param [in] a First SP int to conditionally swap.
  5093. * @param [in] b Second SP int to conditionally swap.
  5094. * @param [in] cnt Count of words to copy.
  5095. * @param [in] swap When value is 1 then swap.
  5096. * @return MP_OKAY on success.
  5097. * @return MP_MEM when dynamic memory allocation fails.
  5098. */
  5099. int sp_cond_swap_ct(sp_int* a, sp_int* b, int cnt, int swap)
  5100. {
  5101. int err = MP_OKAY;
  5102. DECL_SP_INT(t, (size_t)cnt);
  5103. /* Allocate temporary to hold masked xor of a and b. */
  5104. ALLOC_SP_INT(t, cnt, err, NULL);
  5105. if (err == MP_OKAY) {
  5106. err = sp_cond_swap_ct_ex(a, b, cnt, swap, t);
  5107. FREE_SP_INT(t, NULL);
  5108. }
  5109. return err;
  5110. }
  5111. #endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
  5112. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5113. /* Calculate the absolute value of the multi-precision number.
  5114. *
  5115. * @param [in] a SP integer to calculate absolute value of.
  5116. * @param [out] r SP integer to hold result.
  5117. *
  5118. * @return MP_OKAY on success.
  5119. * @return MP_VAL when a or r is NULL.
  5120. */
  5121. int sp_abs(const sp_int* a, sp_int* r)
  5122. {
  5123. int err;
  5124. /* Copy a into r - copy fails when r is NULL. */
  5125. err = sp_copy(a, r);
  5126. if (err == MP_OKAY) {
  5127. r->sign = MP_ZPOS;
  5128. }
  5129. return err;
  5130. }
  5131. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  5132. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  5133. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  5134. /* Compare absolute value of two multi-precision numbers.
  5135. *
  5136. * @param [in] a SP integer.
  5137. * @param [in] b SP integer.
  5138. *
  5139. * @return MP_GT when a is greater than b.
  5140. * @return MP_LT when a is less than b.
  5141. * @return MP_EQ when a is equals b.
  5142. */
  5143. static int _sp_cmp_abs(const sp_int* a, const sp_int* b)
  5144. {
  5145. int ret = MP_EQ;
  5146. /* Check number of words first. */
  5147. if (a->used > b->used) {
  5148. ret = MP_GT;
  5149. }
  5150. else if (a->used < b->used) {
  5151. ret = MP_LT;
  5152. }
  5153. else {
  5154. int i;
  5155. /* Starting from most significant word, compare words.
  5156. * Stop when different and set comparison return.
  5157. */
  5158. for (i = (int)(a->used - 1); i >= 0; i--) {
  5159. if (a->dp[i] > b->dp[i]) {
  5160. ret = MP_GT;
  5161. break;
  5162. }
  5163. else if (a->dp[i] < b->dp[i]) {
  5164. ret = MP_LT;
  5165. break;
  5166. }
  5167. }
  5168. /* If we made to the end then ret is MP_EQ from initialization. */
  5169. }
  5170. return ret;
  5171. }
  5172. #endif
  5173. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  5174. /* Compare absolute value of two multi-precision numbers.
  5175. *
  5176. * Pointers are compared such that NULL is less than not NULL.
  5177. *
  5178. * @param [in] a SP integer.
  5179. * @param [in] b SP integer.
  5180. *
  5181. * @return MP_GT when a is greater than b.
  5182. * @return MP_LT when a is less than b.
  5183. * @return MP_EQ when a equals b.
  5184. */
  5185. int sp_cmp_mag(const sp_int* a, const sp_int* b)
  5186. {
  5187. int ret;
  5188. /* Do pointer checks first. Both NULL returns equal. */
  5189. if (a == b) {
  5190. ret = MP_EQ;
  5191. }
  5192. /* Nothing is smaller than something. */
  5193. else if (a == NULL) {
  5194. ret = MP_LT;
  5195. }
  5196. /* Something is larger than nothing. */
  5197. else if (b == NULL) {
  5198. ret = MP_GT;
  5199. }
  5200. else
  5201. {
  5202. /* Compare values - a and b are not NULL. */
  5203. ret = _sp_cmp_abs(a, b);
  5204. }
  5205. return ret;
  5206. }
  5207. #endif
  5208. #if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
  5209. defined(OPENSSL_EXTRA) || !defined(NO_DH) || \
  5210. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  5211. /* Compare two multi-precision numbers.
  5212. *
  5213. * Assumes a and b are not NULL.
  5214. *
  5215. * @param [in] a SP integer.
  5216. * @param [in] a SP integer.
  5217. *
  5218. * @return MP_GT when a is greater than b.
  5219. * @return MP_LT when a is less than b.
  5220. * @return MP_EQ when a is equals b.
  5221. */
  5222. static int _sp_cmp(const sp_int* a, const sp_int* b)
  5223. {
  5224. int ret;
  5225. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5226. /* Check sign first. */
  5227. if (a->sign > b->sign) {
  5228. ret = MP_LT;
  5229. }
  5230. else if (a->sign < b->sign) {
  5231. ret = MP_GT;
  5232. }
  5233. else /* (a->sign == b->sign) */ {
  5234. #endif
  5235. /* Compare values. */
  5236. ret = _sp_cmp_abs(a, b);
  5237. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5238. if (a->sign == MP_NEG) {
  5239. /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
  5240. * Swapping MP_GT and MP_LT results.
  5241. */
  5242. ret = -ret;
  5243. }
  5244. }
  5245. #endif
  5246. return ret;
  5247. }
  5248. #endif
  5249. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5250. !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_DH) || \
  5251. defined(WOLFSSL_SP_MATH_ALL)
  5252. /* Compare two multi-precision numbers.
  5253. *
  5254. * Pointers are compared such that NULL is less than not NULL.
  5255. *
  5256. * @param [in] a SP integer.
  5257. * @param [in] a SP integer.
  5258. *
  5259. * @return MP_GT when a is greater than b.
  5260. * @return MP_LT when a is less than b.
  5261. * @return MP_EQ when a is equals b.
  5262. */
  5263. int sp_cmp(const sp_int* a, const sp_int* b)
  5264. {
  5265. int ret;
  5266. /* Check pointers first. Both NULL returns equal. */
  5267. if (a == b) {
  5268. ret = MP_EQ;
  5269. }
  5270. /* Nothing is smaller than something. */
  5271. else if (a == NULL) {
  5272. ret = MP_LT;
  5273. }
  5274. /* Something is larger than nothing. */
  5275. else if (b == NULL) {
  5276. ret = MP_GT;
  5277. }
  5278. else
  5279. {
  5280. /* Compare values - a and b are not NULL. */
  5281. ret = _sp_cmp(a, b);
  5282. }
  5283. return ret;
  5284. }
  5285. #endif
  5286. /*************************
  5287. * Bit check/set functions
  5288. *************************/
  5289. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5290. (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)) || \
  5291. defined(OPENSSL_EXTRA)
  5292. /* Check if a bit is set
  5293. *
  5294. * When a is NULL, result is 0.
  5295. *
  5296. * @param [in] a SP integer.
  5297. * @param [in] b Bit position to check.
  5298. *
  5299. * @return 0 when bit is not set.
  5300. * @return 1 when bit is set.
  5301. */
  5302. int sp_is_bit_set(const sp_int* a, unsigned int b)
  5303. {
  5304. int ret = 0;
  5305. /* Index of word. */
  5306. unsigned int i = b >> SP_WORD_SHIFT;
  5307. /* Check parameters. */
  5308. if ((a != NULL) && (i < a->used)) {
  5309. /* Shift amount to get bit down to index 0. */
  5310. unsigned int s = b & SP_WORD_MASK;
  5311. /* Get and mask bit. */
  5312. ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
  5313. }
  5314. return ret;
  5315. }
  5316. #endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) ||
  5317. * (WOLFSSL_SP_MATH_ALL && HAVE_ECC) */
  5318. /* Count the number of bits in the multi-precision number.
  5319. *
  5320. * When a is NULL, result is 0.
  5321. *
  5322. * @param [in] a SP integer.
  5323. *
  5324. * @return Number of bits in the SP integer value.
  5325. */
  5326. int sp_count_bits(const sp_int* a)
  5327. {
  5328. int n = -1;
  5329. /* Check parameter. */
  5330. if ((a != NULL) && (a->used > 0)) {
  5331. /* Get index of last word. */
  5332. n = (int)(a->used - 1);
  5333. /* Don't count leading zeros. */
  5334. while ((n >= 0) && (a->dp[n] == 0)) {
  5335. n--;
  5336. }
  5337. }
  5338. /* -1 indicates SP integer value was zero. */
  5339. if (n < 0) {
  5340. n = 0;
  5341. }
  5342. else {
  5343. /* Get the most significant word. */
  5344. sp_int_digit d = a->dp[n];
  5345. /* Count of bits up to last word. */
  5346. n *= SP_WORD_SIZE;
  5347. #ifdef SP_ASM_HI_BIT_SET_IDX
  5348. {
  5349. sp_int_digit hi;
  5350. /* Get index of highest set bit. */
  5351. SP_ASM_HI_BIT_SET_IDX(d, hi);
  5352. /* Add bits up to and including index. */
  5353. n += (int)hi + 1;
  5354. }
  5355. #elif defined(SP_ASM_LZCNT)
  5356. {
  5357. sp_int_digit lz;
  5358. /* Count number of leading zeros in highest non-zero digit. */
  5359. SP_ASM_LZCNT(d, lz);
  5360. /* Add non-leading zero bits count. */
  5361. n += SP_WORD_SIZE - (int)lz;
  5362. }
  5363. #else
  5364. /* Check if top word has more than half the bits set. */
  5365. if (d > SP_HALF_MAX) {
  5366. /* Set count to a full last word. */
  5367. n += SP_WORD_SIZE;
  5368. /* Don't count leading zero bits. */
  5369. while ((d & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
  5370. n--;
  5371. d <<= 1;
  5372. }
  5373. }
  5374. else {
  5375. /* Add to count until highest set bit is shifted out. */
  5376. while (d != 0) {
  5377. n++;
  5378. d >>= 1;
  5379. }
  5380. }
  5381. #endif
  5382. }
  5383. return n;
  5384. }
  5385. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5386. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  5387. (defined(HAVE_ECC) && defined(FP_ECC)) || \
  5388. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  5389. /* Number of entries in array of number of least significant zero bits. */
  5390. #define SP_LNZ_CNT 16
  5391. /* Number of bits the array checks. */
  5392. #define SP_LNZ_BITS 4
  5393. /* Mask to apply to check with array. */
  5394. #define SP_LNZ_MASK 0xf
  5395. /* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
  5396. static const int sp_lnz[SP_LNZ_CNT] = {
  5397. 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
  5398. };
  5399. /* Count the number of least significant zero bits.
  5400. *
  5401. * When a is not NULL, result is 0.
  5402. *
  5403. * @param [in] a SP integer to use.
  5404. *
  5405. * @return Number of least significant zero bits.
  5406. */
  5407. #if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
  5408. static
  5409. #endif /* !HAVE_ECC || HAVE_COMP_KEY */
  5410. int sp_cnt_lsb(const sp_int* a)
  5411. {
  5412. unsigned int bc = 0;
  5413. /* Check for number with a value. */
  5414. if ((a != NULL) && (!sp_iszero(a))) {
  5415. unsigned int i;
  5416. unsigned int j;
  5417. /* Count least significant words that are zero. */
  5418. for (i = 0; i < a->used && a->dp[i] == 0; i++, bc += SP_WORD_SIZE) {
  5419. }
  5420. /* Use 4-bit table to get count. */
  5421. for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
  5422. /* Get number of lesat significant 0 bits in nibble. */
  5423. int cnt = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
  5424. /* Done if not all 4 bits are zero. */
  5425. if (cnt != 4) {
  5426. /* Add checked bits and count in last 4 bits checked. */
  5427. bc += j + (unsigned int)cnt;
  5428. break;
  5429. }
  5430. }
  5431. }
  5432. return (int)bc;
  5433. }
  5434. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
  5435. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_ASN_TEMPLATE) || \
  5436. (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_ASN))
  5437. /* Determine if the most significant byte of the encoded multi-precision number
  5438. * has the top bit set.
  5439. *
  5440. * When a is NULL, result is 0.
  5441. *
  5442. * @param [in] a SP integer.
  5443. *
  5444. * @return 1 when the top bit of top byte is set.
  5445. * @return 0 when the top bit of top byte is not set.
  5446. */
  5447. int sp_leading_bit(const sp_int* a)
  5448. {
  5449. int bit = 0;
  5450. /* Check if we have a number and value to use. */
  5451. if ((a != NULL) && (a->used > 0)) {
  5452. /* Get top word. */
  5453. sp_int_digit d = a->dp[a->used - 1];
  5454. #if SP_WORD_SIZE > 8
  5455. /* Remove bottom 8 bits until highest 8 bits left. */
  5456. while (d > (sp_int_digit)0xff) {
  5457. d >>= 8;
  5458. }
  5459. #endif
  5460. /* Get the highest bit of the 8-bit value. */
  5461. bit = (int)(d >> 7);
  5462. }
  5463. return bit;
  5464. }
  5465. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  5466. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  5467. defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
  5468. !defined(NO_RSA)
  5469. /* Set one bit of a: a |= 1 << i
  5470. * The field 'used' is updated in a.
  5471. *
  5472. * @param [in,out] a SP integer to set bit into.
  5473. * @param [in] i Index of bit to set.
  5474. *
  5475. * @return MP_OKAY on success.
  5476. * @return MP_VAL when a is NULL, index is negative or index is too large.
  5477. */
  5478. int sp_set_bit(sp_int* a, int i)
  5479. {
  5480. int err = MP_OKAY;
  5481. /* Get index of word to set. */
  5482. unsigned int w = (unsigned int)(i >> SP_WORD_SHIFT);
  5483. /* Check for valid number and and space for bit. */
  5484. if ((a == NULL) || (i < 0) || (w >= a->size)) {
  5485. err = MP_VAL;
  5486. }
  5487. if (err == MP_OKAY) {
  5488. /* Amount to shift up to set bit in word. */
  5489. unsigned int s = (unsigned int)(i & (SP_WORD_SIZE - 1));
  5490. unsigned int j;
  5491. /* Set to zero all unused words up to and including word to have bit
  5492. * set.
  5493. */
  5494. for (j = a->used; j <= w; j++) {
  5495. a->dp[j] = 0;
  5496. }
  5497. /* Set bit in word. */
  5498. a->dp[w] |= (sp_int_digit)1 << s;
  5499. /* Update used if necessary */
  5500. if (a->used <= w) {
  5501. a->used = w + 1;
  5502. }
  5503. }
  5504. return err;
  5505. }
  5506. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
  5507. * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
  5508. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5509. defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
  5510. /* Exponentiate 2 to the power of e: a = 2^e
  5511. * This is done by setting the 'e'th bit.
  5512. *
  5513. * @param [out] a SP integer to hold result.
  5514. * @param [in] e Exponent.
  5515. *
  5516. * @return MP_OKAY on success.
  5517. * @return MP_VAL when a is NULL, e is negative or 2^exponent is too large.
  5518. */
  5519. int sp_2expt(sp_int* a, int e)
  5520. {
  5521. int err = MP_OKAY;
  5522. /* Validate parameters. */
  5523. if ((a == NULL) || (e < 0)) {
  5524. err = MP_VAL;
  5525. }
  5526. if (err == MP_OKAY) {
  5527. /* Set number to zero and then set bit. */
  5528. _sp_zero(a);
  5529. err = sp_set_bit(a, e);
  5530. }
  5531. return err;
  5532. }
  5533. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  5534. * WOLFSSL_KEY_GEN || !NO_DH */
  5535. /**********************
  5536. * Digit/Long functions
  5537. **********************/
  5538. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || !defined(NO_DH) || \
  5539. defined(HAVE_ECC)
  5540. /* Set the multi-precision number to be the value of the digit.
  5541. *
  5542. * @param [out] a SP integer to become number.
  5543. * @param [in] d Digit to be set.
  5544. */
  5545. static void _sp_set(sp_int* a, sp_int_digit d)
  5546. {
  5547. /* Use sp_int_minimal to support allocated byte arrays as sp_ints. */
  5548. sp_int_minimal* am = (sp_int_minimal*)a;
  5549. am->dp[0] = d;
  5550. /* d == 0 => used = 0, d > 0 => used = 1 */
  5551. am->used = (d > 0);
  5552. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5553. am->sign = MP_ZPOS;
  5554. #endif
  5555. }
  5556. /* Set the multi-precision number to be the value of the digit.
  5557. *
  5558. * @param [out] a SP integer to become number.
  5559. * @param [in] d Digit to be set.
  5560. *
  5561. * @return MP_OKAY on success.
  5562. * @return MP_VAL when a is NULL.
  5563. */
  5564. int sp_set(sp_int* a, sp_int_digit d)
  5565. {
  5566. int err = MP_OKAY;
  5567. /* Validate parameters. */
  5568. if (a == NULL) {
  5569. err = MP_VAL;
  5570. }
  5571. if (err == MP_OKAY) {
  5572. _sp_set(a, d);
  5573. }
  5574. return err;
  5575. }
  5576. #endif
  5577. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || defined(OPENSSL_EXTRA)
  5578. /* Set a number into the multi-precision number.
  5579. *
  5580. * Number may be larger than the size of a digit.
  5581. *
  5582. * @param [out] a SP integer to set.
  5583. * @param [in] n Long value to set.
  5584. *
  5585. * @return MP_OKAY on success.
  5586. * @return MP_VAL when a is NULL.
  5587. */
  5588. int sp_set_int(sp_int* a, unsigned long n)
  5589. {
  5590. int err = MP_OKAY;
  5591. if (a == NULL) {
  5592. err = MP_VAL;
  5593. }
  5594. if (err == MP_OKAY) {
  5595. #if SP_WORD_SIZE < SP_ULONG_BITS
  5596. /* Assign if value first in one word. */
  5597. if (n <= (sp_int_digit)SP_DIGIT_MAX) {
  5598. #endif
  5599. a->dp[0] = (sp_int_digit)n;
  5600. a->used = (n != 0);
  5601. #if SP_WORD_SIZE < SP_ULONG_BITS
  5602. }
  5603. else {
  5604. unsigned int i;
  5605. /* Assign value word by word. */
  5606. for (i = 0; (i < a->size) && (n > 0); i++,n >>= SP_WORD_SIZE) {
  5607. a->dp[i] = (sp_int_digit)n;
  5608. }
  5609. /* Update number of words used. */
  5610. a->used = i;
  5611. /* Check for overflow. */
  5612. if ((i == a->size) && (n != 0)) {
  5613. err = MP_VAL;
  5614. }
  5615. }
  5616. #endif
  5617. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5618. a->sign = MP_ZPOS;
  5619. #endif
  5620. }
  5621. return err;
  5622. }
  5623. #endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA */
  5624. #if defined(WOLFSSL_SP_MATH_ALL) || \
  5625. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5626. !defined(NO_DH) || defined(HAVE_ECC)
  5627. /* Compare a one digit number with a multi-precision number.
  5628. *
  5629. * When a is NULL, MP_LT is returned.
  5630. *
  5631. * @param [in] a SP integer to compare.
  5632. * @param [in] d Digit to compare with.
  5633. *
  5634. * @return MP_GT when a is greater than d.
  5635. * @return MP_LT when a is less than d.
  5636. * @return MP_EQ when a is equals d.
  5637. */
  5638. int sp_cmp_d(const sp_int* a, sp_int_digit d)
  5639. {
  5640. int ret = MP_EQ;
  5641. /* No SP integer is always less - even when d is zero. */
  5642. if (a == NULL) {
  5643. ret = MP_LT;
  5644. }
  5645. else
  5646. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5647. /* Check sign first. */
  5648. if (a->sign == MP_NEG) {
  5649. ret = MP_LT;
  5650. }
  5651. else
  5652. #endif
  5653. {
  5654. /* Check if SP integer as more than one word. */
  5655. if (a->used > 1) {
  5656. ret = MP_GT;
  5657. }
  5658. /* Special case for zero. */
  5659. else if (a->used == 0) {
  5660. if (d != 0) {
  5661. ret = MP_LT;
  5662. }
  5663. /* ret initialized to equal. */
  5664. }
  5665. else {
  5666. /* The single word in the SP integer can now be compared with d. */
  5667. if (a->dp[0] > d) {
  5668. ret = MP_GT;
  5669. }
  5670. else if (a->dp[0] < d) {
  5671. ret = MP_LT;
  5672. }
  5673. /* ret initialized to equal. */
  5674. }
  5675. }
  5676. return ret;
  5677. }
  5678. #endif
  5679. #if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
  5680. defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
  5681. /* Add a one digit number to the multi-precision number.
  5682. *
  5683. * @param [in] a SP integer be added to.
  5684. * @param [in] d Digit to add.
  5685. * @param [out] r SP integer to store result in.
  5686. *
  5687. * @return MP_OKAY on success.
  5688. * @return MP_VAL when result is too large for fixed size dp array.
  5689. */
  5690. static int _sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5691. {
  5692. int err = MP_OKAY;
  5693. /* Special case of zero means we want result to have a digit when not adding
  5694. * zero. */
  5695. if (a->used == 0) {
  5696. r->dp[0] = d;
  5697. r->used = (d > 0);
  5698. }
  5699. else {
  5700. unsigned int i = 0;
  5701. sp_int_digit a0 = a->dp[0];
  5702. /* Set used of result - updated if overflow seen. */
  5703. r->used = a->used;
  5704. r->dp[0] = a0 + d;
  5705. /* Check for carry. */
  5706. if (r->dp[0] < a0) {
  5707. /* Do carry through all words. */
  5708. for (++i; i < a->used; i++) {
  5709. r->dp[i] = a->dp[i] + 1;
  5710. if (r->dp[i] != 0) {
  5711. break;
  5712. }
  5713. }
  5714. /* Add another word if required. */
  5715. if (i == a->used) {
  5716. /* Check result has enough space for another word. */
  5717. if (i < r->size) {
  5718. r->used++;
  5719. r->dp[i] = 1;
  5720. }
  5721. else {
  5722. err = MP_VAL;
  5723. }
  5724. }
  5725. }
  5726. /* When result is not the same as input, copy rest of digits. */
  5727. if ((err == MP_OKAY) && (r != a)) {
  5728. /* Copy any words that didn't update with carry. */
  5729. for (++i; i < a->used; i++) {
  5730. r->dp[i] = a->dp[i];
  5731. }
  5732. }
  5733. }
  5734. return err;
  5735. }
  5736. #endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
  5737. * defined(WOLFSSL_SP_READ_RADIX_10) */
  5738. #if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
  5739. defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
  5740. defined(WOLFSSL_SP_INVMOD_MONT_CT) || (defined(WOLFSSL_SP_PRIME_GEN) && \
  5741. !defined(WC_NO_RNG))
  5742. /* Sub a one digit number from the multi-precision number.
  5743. *
  5744. * @param [in] a SP integer be subtracted from.
  5745. * @param [in] d Digit to subtract.
  5746. * @param [out] r SP integer to store result in.
  5747. */
  5748. static void _sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5749. {
  5750. /* Set result used to be same as input. Updated with clamp. */
  5751. r->used = a->used;
  5752. /* Only possible when not handling negatives. */
  5753. if (a->used == 0) {
  5754. /* Set result to zero as no negative support. */
  5755. r->dp[0] = 0;
  5756. }
  5757. else {
  5758. unsigned int i = 0;
  5759. sp_int_digit a0 = a->dp[0];
  5760. r->dp[0] = a0 - d;
  5761. /* Check for borrow. */
  5762. if (r->dp[0] > a0) {
  5763. /* Do borrow through all words. */
  5764. for (++i; i < a->used; i++) {
  5765. r->dp[i] = a->dp[i] - 1;
  5766. if (r->dp[i] != SP_DIGIT_MAX) {
  5767. break;
  5768. }
  5769. }
  5770. }
  5771. /* When result is not the same as input, copy rest of digits. */
  5772. if (r != a) {
  5773. /* Copy any words that didn't update with borrow. */
  5774. for (++i; i < a->used; i++) {
  5775. r->dp[i] = a->dp[i];
  5776. }
  5777. }
  5778. /* Remove leading zero words. */
  5779. sp_clamp(r);
  5780. }
  5781. }
  5782. #endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
  5783. * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
  5784. * WOLFSSL_SP_PRIME_GEN */
  5785. #ifdef WOLFSSL_SP_ADD_D
  5786. /* Add a one digit number to the multi-precision number.
  5787. *
  5788. * @param [in] a SP integer be added to.
  5789. * @param [in] d Digit to add.
  5790. * @param [out] r SP integer to store result in.
  5791. *
  5792. * @return MP_OKAY on success.
  5793. * @return MP_VAL when result is too large for fixed size dp array.
  5794. */
  5795. int sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5796. {
  5797. int err = MP_OKAY;
  5798. /* Check validity of parameters. */
  5799. if ((a == NULL) || (r == NULL)) {
  5800. err = MP_VAL;
  5801. }
  5802. #ifndef WOLFSSL_SP_INT_NEGATIVE
  5803. /* Check for space in result especially when carry adds a new word. */
  5804. if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
  5805. err = MP_VAL;
  5806. }
  5807. if (err == MP_OKAY) {
  5808. /* Positive only so just use internal function. */
  5809. err = _sp_add_d(a, d, r);
  5810. }
  5811. #else
  5812. /* Check for space in result especially when carry adds a new word. */
  5813. if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used + 1 > r->size)) {
  5814. err = MP_VAL;
  5815. }
  5816. /* Check for space in result - no carry but borrow possible. */
  5817. if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used > r->size)) {
  5818. err = MP_VAL;
  5819. }
  5820. if (err == MP_OKAY) {
  5821. if (a->sign == MP_ZPOS) {
  5822. /* Positive, so use internal function. */
  5823. r->sign = MP_ZPOS;
  5824. err = _sp_add_d(a, d, r);
  5825. }
  5826. else if ((a->used > 1) || (a->dp[0] > d)) {
  5827. /* Negative value bigger than digit so subtract digit. */
  5828. r->sign = MP_NEG;
  5829. _sp_sub_d(a, d, r);
  5830. }
  5831. else {
  5832. /* Negative value smaller or equal to digit. */
  5833. r->sign = MP_ZPOS;
  5834. /* Subtract negative value from digit. */
  5835. r->dp[0] = d - a->dp[0];
  5836. /* Result is a digit equal to or greater than zero. */
  5837. r->used = (r->dp[0] > 0);
  5838. }
  5839. }
  5840. #endif
  5841. return err;
  5842. }
  5843. #endif /* WOLFSSL_SP_ADD_D */
  5844. #ifdef WOLFSSL_SP_SUB_D
  5845. /* Sub a one digit number from the multi-precision number.
  5846. *
  5847. * @param [in] a SP integer be subtracted from.
  5848. * @param [in] d Digit to subtract.
  5849. * @param [out] r SP integer to store result in.
  5850. *
  5851. * @return MP_OKAY on success.
  5852. * @return MP_VAL when a or r is NULL.
  5853. */
  5854. int sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5855. {
  5856. int err = MP_OKAY;
  5857. /* Check validity of parameters. */
  5858. if ((a == NULL) || (r == NULL)) {
  5859. err = MP_VAL;
  5860. }
  5861. #ifndef WOLFSSL_SP_INT_NEGATIVE
  5862. /* Check for space in result. */
  5863. if ((err == MP_OKAY) && (a->used > r->size)) {
  5864. err = MP_VAL;
  5865. }
  5866. if (err == MP_OKAY) {
  5867. /* Positive only so just use internal function. */
  5868. _sp_sub_d(a, d, r);
  5869. }
  5870. #else
  5871. /* Check for space in result especially when borrow adds a new word. */
  5872. if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used + 1 > r->size)) {
  5873. err = MP_VAL;
  5874. }
  5875. /* Check for space in result - no carry but borrow possible. */
  5876. if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used > r->size)) {
  5877. err = MP_VAL;
  5878. }
  5879. if (err == MP_OKAY) {
  5880. if (a->sign == MP_NEG) {
  5881. /* Subtracting from negative use internal add. */
  5882. r->sign = MP_NEG;
  5883. err = _sp_add_d(a, d, r);
  5884. }
  5885. else if ((a->used > 1) || (a->dp[0] >= d)) {
  5886. /* Positive number greater than or equal to digit - subtract digit.
  5887. */
  5888. r->sign = MP_ZPOS;
  5889. _sp_sub_d(a, d, r);
  5890. }
  5891. else {
  5892. /* Positive value smaller than digit. */
  5893. r->sign = MP_NEG;
  5894. /* Subtract positive value from digit. */
  5895. r->dp[0] = d - a->dp[0];
  5896. /* Result is a digit equal to or greater than zero. */
  5897. r->used = 1;
  5898. }
  5899. }
  5900. #endif
  5901. return err;
  5902. }
  5903. #endif /* WOLFSSL_SP_SUB_D */
  5904. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5905. defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
  5906. !defined(NO_DH) || defined(HAVE_ECC) || \
  5907. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5908. !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
  5909. (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
  5910. /* Multiply a by digit n and put result into r shifting up o digits.
  5911. * r = (a * n) << (o * SP_WORD_SIZE)
  5912. *
  5913. * @param [in] a SP integer to be multiplied.
  5914. * @param [in] d SP digit to multiply by.
  5915. * @param [out] r SP integer result.
  5916. * @param [in] o Number of digits to move result up by.
  5917. * @return MP_OKAY on success.
  5918. * @return MP_VAL when result is too large for sp_int.
  5919. */
  5920. static int _sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r, unsigned int o)
  5921. {
  5922. int err = MP_OKAY;
  5923. unsigned int i;
  5924. #ifndef SQR_MUL_ASM
  5925. sp_int_word t = 0;
  5926. #else
  5927. sp_int_digit l = 0;
  5928. sp_int_digit h = 0;
  5929. #endif
  5930. #ifdef WOLFSSL_SP_SMALL
  5931. /* Zero out offset words. */
  5932. for (i = 0; i < o; i++) {
  5933. r->dp[i] = 0;
  5934. }
  5935. #else
  5936. /* Don't use the offset. Only when doing small code size div. */
  5937. (void)o;
  5938. #endif
  5939. /* Multiply each word of a by n. */
  5940. for (i = 0; i < a->used; i++, o++) {
  5941. #ifndef SQR_MUL_ASM
  5942. /* Add product to top word of previous result. */
  5943. t += (sp_int_word)a->dp[i] * d;
  5944. /* Store low word. */
  5945. r->dp[o] = (sp_int_digit)t;
  5946. /* Move top word down. */
  5947. t >>= SP_WORD_SIZE;
  5948. #else
  5949. /* Multiply and add into low and high from previous result.
  5950. * No overflow of possible with add. */
  5951. SP_ASM_MUL_ADD_NO(l, h, a->dp[i], d);
  5952. /* Store low word. */
  5953. r->dp[o] = l;
  5954. /* Move high word into low word and set high word to 0. */
  5955. l = h;
  5956. h = 0;
  5957. #endif
  5958. }
  5959. /* Check whether new word to be appended to result. */
  5960. #ifndef SQR_MUL_ASM
  5961. if (t > 0)
  5962. #else
  5963. if (l > 0)
  5964. #endif
  5965. {
  5966. /* Validate space available in result. */
  5967. if (o == r->size) {
  5968. err = MP_VAL;
  5969. }
  5970. else {
  5971. /* Store new top word. */
  5972. #ifndef SQR_MUL_ASM
  5973. r->dp[o++] = (sp_int_digit)t;
  5974. #else
  5975. r->dp[o++] = l;
  5976. #endif
  5977. }
  5978. }
  5979. /* Update number of words in result. */
  5980. r->used = o;
  5981. /* In case n is zero. */
  5982. sp_clamp(r);
  5983. return err;
  5984. }
  5985. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  5986. * WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
  5987. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5988. (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
  5989. /* Multiply a by digit n and put result into r. r = a * n
  5990. *
  5991. * @param [in] a SP integer to multiply.
  5992. * @param [in] n Digit to multiply by.
  5993. * @param [out] r SP integer to hold result.
  5994. *
  5995. * @return MP_OKAY on success.
  5996. * @return MP_VAL when a or b is NULL, or a has maximum number of digits used.
  5997. */
  5998. int sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5999. {
  6000. int err = MP_OKAY;
  6001. /* Validate parameters. */
  6002. if ((a == NULL) || (r == NULL)) {
  6003. err = MP_VAL;
  6004. }
  6005. /* Check space for product result - _sp_mul_d checks when new word added. */
  6006. if ((err == MP_OKAY) && (a->used > r->size)) {
  6007. err = MP_VAL;
  6008. }
  6009. if (err == MP_OKAY) {
  6010. err = _sp_mul_d(a, d, r, 0);
  6011. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6012. /* Update sign. */
  6013. if (d == 0) {
  6014. r->sign = MP_ZPOS;
  6015. }
  6016. else {
  6017. r->sign = a->sign;
  6018. }
  6019. #endif
  6020. }
  6021. return err;
  6022. }
  6023. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  6024. * (WOLFSSL_KEY_GEN && !NO_RSA) */
  6025. /* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
  6026. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  6027. defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
  6028. defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
  6029. #define WOLFSSL_SP_DIV_D
  6030. #endif
  6031. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  6032. !defined(NO_DH) || \
  6033. (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
  6034. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  6035. #define WOLFSSL_SP_MOD_D
  6036. #endif
  6037. #if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  6038. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  6039. !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
  6040. defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
  6041. #ifndef SP_ASM_DIV_WORD
  6042. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  6043. *
  6044. * @param [in] hi SP integer digit. High digit of the dividend.
  6045. * @param [in] lo SP integer digit. Lower digit of the dividend.
  6046. * @param [in] d SP integer digit. Number to divide by.
  6047. * @return The division result.
  6048. */
  6049. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  6050. sp_int_digit d)
  6051. {
  6052. #ifdef WOLFSSL_SP_DIV_WORD_HALF
  6053. sp_int_digit r;
  6054. /* Trial division using half of the bits in d. */
  6055. /* Check for shortcut when no high word set. */
  6056. if (hi == 0) {
  6057. r = lo / d;
  6058. }
  6059. else {
  6060. /* Half the bits of d. */
  6061. sp_int_digit divh = d >> SP_HALF_SIZE;
  6062. /* Number to divide in one value. */
  6063. sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
  6064. sp_int_word trial;
  6065. sp_int_digit r2;
  6066. /* Calculation for top SP_WORD_SIZE / 2 bits of dividend. */
  6067. /* Divide high word by top half of divisor. */
  6068. r = hi / divh;
  6069. /* When result too big then assume only max value. */
  6070. if (r > SP_HALF_MAX) {
  6071. r = SP_HALF_MAX;
  6072. }
  6073. /* Shift up result for trial division calucation. */
  6074. r <<= SP_HALF_SIZE;
  6075. /* Calculate trial value. */
  6076. trial = r * (sp_int_word)d;
  6077. /* Decrease r while trial is too big. */
  6078. while (trial > w) {
  6079. r -= (sp_int_digit)1 << SP_HALF_SIZE;
  6080. trial -= (sp_int_word)d << SP_HALF_SIZE;
  6081. }
  6082. /* Subtract trial. */
  6083. w -= trial;
  6084. /* Calculation for remaining second SP_WORD_SIZE / 2 bits. */
  6085. /* Divide top SP_WORD_SIZE of remainder by top half of divisor. */
  6086. r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divh;
  6087. /* Calculate trial value. */
  6088. trial = r2 * (sp_int_word)d;
  6089. /* Decrease r while trial is too big. */
  6090. while (trial > w) {
  6091. r2--;
  6092. trial -= d;
  6093. }
  6094. /* Subtract trial. */
  6095. w -= trial;
  6096. /* Update result. */
  6097. r += r2;
  6098. /* Calculation for remaining bottom SP_WORD_SIZE bits. */
  6099. r2 = ((sp_int_digit)w) / d;
  6100. /* Update result. */
  6101. r += r2;
  6102. }
  6103. return r;
  6104. #else
  6105. sp_int_word w;
  6106. sp_int_digit r;
  6107. /* Use built-in divide. */
  6108. w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
  6109. w /= d;
  6110. r = (sp_int_digit)w;
  6111. return r;
  6112. #endif /* WOLFSSL_SP_DIV_WORD_HALF */
  6113. }
  6114. #endif /* !SP_ASM_DIV_WORD */
  6115. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  6116. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  6117. #if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
  6118. !defined(WOLFSSL_SP_SMALL)
  6119. #if SP_WORD_SIZE == 64
  6120. /* 2^64 / 3 */
  6121. #define SP_DIV_3_CONST 0x5555555555555555L
  6122. /* 2^64 / 10 */
  6123. #define SP_DIV_10_CONST 0x1999999999999999L
  6124. #elif SP_WORD_SIZE == 32
  6125. /* 2^32 / 3 */
  6126. #define SP_DIV_3_CONST 0x55555555
  6127. /* 2^32 / 10 */
  6128. #define SP_DIV_10_CONST 0x19999999
  6129. #elif SP_WORD_SIZE == 16
  6130. /* 2^16 / 3 */
  6131. #define SP_DIV_3_CONST 0x5555
  6132. /* 2^16 / 10 */
  6133. #define SP_DIV_10_CONST 0x1999
  6134. #elif SP_WORD_SIZE == 8
  6135. /* 2^8 / 3 */
  6136. #define SP_DIV_3_CONST 0x55
  6137. /* 2^8 / 10 */
  6138. #define SP_DIV_10_CONST 0x19
  6139. #endif
  6140. #if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE < 64)
  6141. /* Divide by 3: r = a / 3 and rem = a % 3
  6142. *
  6143. * Used in checking prime: (a % 3) == 0?.
  6144. *
  6145. * @param [in] a SP integer to be divided.
  6146. * @param [out] r SP integer that is the quotient. May be NULL.
  6147. * @param [out] rem SP integer that is the remainder. May be NULL.
  6148. */
  6149. static void _sp_div_3(const sp_int* a, sp_int* r, sp_int_digit* rem)
  6150. {
  6151. #ifndef SQR_MUL_ASM
  6152. sp_int_word t;
  6153. sp_int_digit tt;
  6154. #else
  6155. sp_int_digit l = 0;
  6156. sp_int_digit tt = 0;
  6157. sp_int_digit t = SP_DIV_3_CONST;
  6158. sp_int_digit lm = 0;
  6159. sp_int_digit hm = 0;
  6160. #endif
  6161. sp_int_digit tr = 0;
  6162. /* Quotient fixup. */
  6163. static const unsigned char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
  6164. /* Remainder fixup. */
  6165. static const unsigned char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
  6166. /* Check whether only mod value needed. */
  6167. if (r == NULL) {
  6168. unsigned int i;
  6169. /* 2^2 mod 3 = 4 mod 3 = 1.
  6170. * => 2^(2*n) mod 3 = (2^2 mod 3)^n mod 3 = 1^n mod 3 = 1
  6171. * => (2^(2*n) * x) mod 3 = (2^(2*n) mod 3) * (x mod 3) = x mod 3
  6172. *
  6173. * Calculate mod 3 on sum of digits as SP_WORD_SIZE is a multiple of 2.
  6174. */
  6175. #ifndef SQR_MUL_ASM
  6176. t = 0;
  6177. /* Sum the digits. */
  6178. for (i = 0; i < a->used; i++) {
  6179. t += a->dp[i];
  6180. }
  6181. /* Sum digits of sum. */
  6182. t = (t >> SP_WORD_SIZE) + (t & SP_MASK);
  6183. /* Get top digit after multipling by (2^SP_WORD_SIZE) / 3. */
  6184. tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
  6185. /* Subtract trial division. */
  6186. tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
  6187. #else
  6188. /* Sum the digits. */
  6189. for (i = 0; i < a->used; i++) {
  6190. SP_ASM_ADDC_REG(l, tr, a->dp[i]);
  6191. }
  6192. /* Sum digits of sum - can get carry. */
  6193. SP_ASM_ADDC_REG(l, tt, tr);
  6194. /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
  6195. SP_ASM_MUL(lm, hm, l, t);
  6196. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
  6197. hm += tt * SP_DIV_3_CONST;
  6198. /* Subtract trial division from digit. */
  6199. tr = l - (hm * 3);
  6200. #endif
  6201. /* tr is 0..5 but need 0..2 */
  6202. /* Fix up remainder. */
  6203. tr = sp_rem6[tr];
  6204. *rem = tr;
  6205. }
  6206. /* At least result needed - remainder is calculated anyway. */
  6207. else {
  6208. int i;
  6209. /* Divide starting at most significant word down to least. */
  6210. for (i = (int)(a->used - 1); i >= 0; i--) {
  6211. #ifndef SQR_MUL_ASM
  6212. /* Combine remainder from last operation with this word. */
  6213. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6214. /* Get top digit after multipling by (2^SP_WORD_SIZE) / 3. */
  6215. tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
  6216. /* Subtract trial division. */
  6217. tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
  6218. #else
  6219. /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
  6220. SP_ASM_MUL(l, tt, a->dp[i], t);
  6221. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
  6222. tt += tr * SP_DIV_3_CONST;
  6223. /* Subtract trial division from digit. */
  6224. tr = a->dp[i] - (tt * 3);
  6225. #endif
  6226. /* tr is 0..5 but need 0..2 */
  6227. /* Fix up result. */
  6228. tt += sp_r6[tr];
  6229. /* Fix up remainder. */
  6230. tr = sp_rem6[tr];
  6231. /* Store result of digit divided by 3. */
  6232. r->dp[i] = tt;
  6233. }
  6234. /* Set the used amount to maximal amount. */
  6235. r->used = a->used;
  6236. /* Remove leading zeros. */
  6237. sp_clamp(r);
  6238. /* Return remainder if required. */
  6239. if (rem != NULL) {
  6240. *rem = tr;
  6241. }
  6242. }
  6243. }
  6244. #endif /* !(WOLFSSL_SP_SMALL && (SP_WORD_SIZE < 64) */
  6245. /* Divide by 10: r = a / 10 and rem = a % 10
  6246. *
  6247. * Used when writing with a radix of 10 - decimal number.
  6248. *
  6249. * @param [in] a SP integer to be divided.
  6250. * @param [out] r SP integer that is the quotient. May be NULL.
  6251. * @param [out] rem SP integer that is the remainder. May be NULL.
  6252. */
  6253. static void _sp_div_10(const sp_int* a, sp_int* r, sp_int_digit* rem)
  6254. {
  6255. int i;
  6256. #ifndef SQR_MUL_ASM
  6257. sp_int_word t;
  6258. sp_int_digit tt;
  6259. #else
  6260. sp_int_digit l = 0;
  6261. sp_int_digit tt = 0;
  6262. sp_int_digit t = SP_DIV_10_CONST;
  6263. #endif
  6264. sp_int_digit tr = 0;
  6265. /* Check whether only mod value needed. */
  6266. if (r == NULL) {
  6267. /* Divide starting at most significant word down to least. */
  6268. for (i = (int)(a->used - 1); i >= 0; i--) {
  6269. #ifndef SQR_MUL_ASM
  6270. /* Combine remainder from last operation with this word. */
  6271. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6272. /* Get top digit after multipling by (2^SP_WORD_SIZE) / 10. */
  6273. tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
  6274. /* Subtract trial division. */
  6275. tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
  6276. #else
  6277. /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
  6278. SP_ASM_MUL(l, tt, a->dp[i], t);
  6279. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
  6280. */
  6281. tt += tr * SP_DIV_10_CONST;
  6282. /* Subtract trial division from digit. */
  6283. tr = a->dp[i] - (tt * 10);
  6284. #endif
  6285. /* tr is 0..99 but need 0..9 */
  6286. /* Fix up remainder. */
  6287. tr = tr % 10;
  6288. }
  6289. *rem = tr;
  6290. }
  6291. /* At least result needed - remainder is calculated anyway. */
  6292. else {
  6293. /* Divide starting at most significant word down to least. */
  6294. for (i = (int)(a->used - 1); i >= 0; i--) {
  6295. #ifndef SQR_MUL_ASM
  6296. /* Combine remainder from last operation with this word. */
  6297. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6298. /* Get top digit after multipling by (2^SP_WORD_SIZE) / 10. */
  6299. tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
  6300. /* Subtract trial division. */
  6301. tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
  6302. #else
  6303. /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
  6304. SP_ASM_MUL(l, tt, a->dp[i], t);
  6305. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
  6306. */
  6307. tt += tr * SP_DIV_10_CONST;
  6308. /* Subtract trial division from digit. */
  6309. tr = a->dp[i] - (tt * 10);
  6310. #endif
  6311. /* tr is 0..99 but need 0..9 */
  6312. /* Fix up result. */
  6313. tt += tr / 10;
  6314. /* Fix up remainder. */
  6315. tr %= 10;
  6316. /* Store result of digit divided by 10. */
  6317. r->dp[i] = tt;
  6318. }
  6319. /* Set the used amount to maximal amount. */
  6320. r->used = a->used;
  6321. /* Remove leading zeros. */
  6322. sp_clamp(r);
  6323. /* Return remainder if required. */
  6324. if (rem != NULL) {
  6325. *rem = tr;
  6326. }
  6327. }
  6328. }
  6329. #endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
  6330. #if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
  6331. /* Divide by small number: r = a / d and rem = a % d
  6332. *
  6333. * @param [in] a SP integer to be divided.
  6334. * @param [in] d Digit to divide by.
  6335. * @param [out] r SP integer that is the quotient. May be NULL.
  6336. * @param [out] rem SP integer that is the remainder. May be NULL.
  6337. */
  6338. static void _sp_div_small(const sp_int* a, sp_int_digit d, sp_int* r,
  6339. sp_int_digit* rem)
  6340. {
  6341. int i;
  6342. #ifndef SQR_MUL_ASM
  6343. sp_int_word t;
  6344. sp_int_digit tt;
  6345. #else
  6346. sp_int_digit l = 0;
  6347. sp_int_digit tt = 0;
  6348. #endif
  6349. sp_int_digit tr = 0;
  6350. sp_int_digit m = SP_DIGIT_MAX / d;
  6351. #ifndef WOLFSSL_SP_SMALL
  6352. /* Check whether only mod value needed. */
  6353. if (r == NULL) {
  6354. /* Divide starting at most significant word down to least. */
  6355. for (i = (int)(a->used - 1); i >= 0; i--) {
  6356. #ifndef SQR_MUL_ASM
  6357. /* Combine remainder from last operation with this word. */
  6358. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6359. /* Get top digit after multipling. */
  6360. tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
  6361. /* Subtract trial division. */
  6362. tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
  6363. #else
  6364. /* Multiply digit. */
  6365. SP_ASM_MUL(l, tt, a->dp[i], m);
  6366. /* Add multiplied remainder to top digit. */
  6367. tt += tr * m;
  6368. /* Subtract trial division from digit. */
  6369. tr = a->dp[i] - (tt * d);
  6370. #endif
  6371. /* tr < d * d */
  6372. /* Fix up remainder. */
  6373. tr = tr % d;
  6374. }
  6375. *rem = tr;
  6376. }
  6377. /* At least result needed - remainder is calculated anyway. */
  6378. else
  6379. #endif /* !WOLFSSL_SP_SMALL */
  6380. {
  6381. /* Divide starting at most significant word down to least. */
  6382. for (i = (int)(a->used - 1); i >= 0; i--) {
  6383. #ifndef SQR_MUL_ASM
  6384. /* Combine remainder from last operation with this word. */
  6385. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6386. /* Get top digit after multipling. */
  6387. tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
  6388. /* Subtract trial division. */
  6389. tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
  6390. #else
  6391. /* Multiply digit. */
  6392. SP_ASM_MUL(l, tt, a->dp[i], m);
  6393. /* Add multiplied remainder to top digit. */
  6394. tt += tr * m;
  6395. /* Subtract trial division from digit. */
  6396. tr = a->dp[i] - (tt * d);
  6397. #endif
  6398. /* tr < d * d */
  6399. /* Fix up result. */
  6400. tt += tr / d;
  6401. /* Fix up remainder. */
  6402. tr %= d;
  6403. /* Store result of dividing the digit. */
  6404. #ifdef WOLFSSL_SP_SMALL
  6405. if (r != NULL)
  6406. #endif
  6407. {
  6408. r->dp[i] = tt;
  6409. }
  6410. }
  6411. #ifdef WOLFSSL_SP_SMALL
  6412. if (r != NULL)
  6413. #endif
  6414. {
  6415. /* Set the used amount to maximal amount. */
  6416. r->used = a->used;
  6417. /* Remove leading zeros. */
  6418. sp_clamp(r);
  6419. }
  6420. /* Return remainder if required. */
  6421. if (rem != NULL) {
  6422. *rem = tr;
  6423. }
  6424. }
  6425. }
  6426. #endif
  6427. #ifdef WOLFSSL_SP_DIV_D
  6428. /* Divide a multi-precision number by a digit size number and calculate
  6429. * remainder.
  6430. * r = a / d; rem = a % d
  6431. *
  6432. * Use trial division algorithm.
  6433. *
  6434. * @param [in] a SP integer to be divided.
  6435. * @param [in] d Digit to divide by.
  6436. * @param [out] r SP integer that is the quotient. May be NULL.
  6437. * @param [out] rem Digit that is the remainder. May be NULL.
  6438. */
  6439. static void _sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r,
  6440. sp_int_digit* rem)
  6441. {
  6442. int i;
  6443. #ifndef SQR_MUL_ASM
  6444. sp_int_word w = 0;
  6445. #else
  6446. sp_int_digit l;
  6447. sp_int_digit h = 0;
  6448. #endif
  6449. sp_int_digit t;
  6450. /* Divide starting at most significant word down to least. */
  6451. for (i = (int)(a->used - 1); i >= 0; i--) {
  6452. #ifndef SQR_MUL_ASM
  6453. /* Combine remainder from last operation with this word and divide. */
  6454. t = sp_div_word((sp_int_digit)w, a->dp[i], d);
  6455. /* Combine remainder from last operation with this word. */
  6456. w = (w << SP_WORD_SIZE) | a->dp[i];
  6457. /* Subtract to get modulo result. */
  6458. w -= (sp_int_word)t * d;
  6459. #else
  6460. /* Get current word. */
  6461. l = a->dp[i];
  6462. /* Combine remainder from last operation with this word and divide. */
  6463. t = sp_div_word(h, l, d);
  6464. /* Subtract to get modulo result. */
  6465. h = l - t * d;
  6466. #endif
  6467. /* Store result of dividing the digit. */
  6468. if (r != NULL) {
  6469. r->dp[i] = t;
  6470. }
  6471. }
  6472. if (r != NULL) {
  6473. /* Set the used amount to maximal amount. */
  6474. r->used = a->used;
  6475. /* Remove leading zeros. */
  6476. sp_clamp(r);
  6477. }
  6478. /* Return remainder if required. */
  6479. if (rem != NULL) {
  6480. #ifndef SQR_MUL_ASM
  6481. *rem = (sp_int_digit)w;
  6482. #else
  6483. *rem = h;
  6484. #endif
  6485. }
  6486. }
  6487. /* Divide a multi-precision number by a digit size number and calculate
  6488. * remainder.
  6489. * r = a / d; rem = a % d
  6490. *
  6491. * @param [in] a SP integer to be divided.
  6492. * @param [in] d Digit to divide by.
  6493. * @param [out] r SP integer that is the quotient. May be NULL.
  6494. * @param [out] rem Digit that is the remainder. May be NULL.
  6495. *
  6496. * @return MP_OKAY on success.
  6497. * @return MP_VAL when a is NULL or d is 0.
  6498. */
  6499. int sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
  6500. {
  6501. int err = MP_OKAY;
  6502. /* Validate parameters. */
  6503. if ((a == NULL) || (d == 0)) {
  6504. err = MP_VAL;
  6505. }
  6506. /* Check space for maximal sized result. */
  6507. if ((err == MP_OKAY) && (r != NULL) && (a->used > r->size)) {
  6508. err = MP_VAL;
  6509. }
  6510. if (err == MP_OKAY) {
  6511. #if !defined(WOLFSSL_SP_SMALL)
  6512. #if SP_WORD_SIZE < 64
  6513. if (d == 3) {
  6514. /* Fast implementation for divisor of 3. */
  6515. _sp_div_3(a, r, rem);
  6516. }
  6517. else
  6518. #endif
  6519. if (d == 10) {
  6520. /* Fast implementation for divisor of 10 - sp_todecimal(). */
  6521. _sp_div_10(a, r, rem);
  6522. }
  6523. else
  6524. #endif
  6525. if (d <= SP_HALF_MAX) {
  6526. /* For small divisors. */
  6527. _sp_div_small(a, d, r, rem);
  6528. }
  6529. else
  6530. {
  6531. _sp_div_d(a, d, r, rem);
  6532. }
  6533. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6534. if (r != NULL) {
  6535. r->sign = a->sign;
  6536. }
  6537. #endif
  6538. }
  6539. return err;
  6540. }
  6541. #endif /* WOLFSSL_SP_DIV_D */
  6542. #ifdef WOLFSSL_SP_MOD_D
  6543. /* Calculate a modulo the digit d into r: r = a mod d
  6544. *
  6545. * @param [in] a SP integer to reduce.
  6546. * @param [in] d Digit to that is the modulus.
  6547. * @param [out] r Digit that is the result.
  6548. */
  6549. static void _sp_mod_d(const sp_int* a, const sp_int_digit d, sp_int_digit* r)
  6550. {
  6551. int i;
  6552. #ifndef SQR_MUL_ASM
  6553. sp_int_word w = 0;
  6554. #else
  6555. sp_int_digit h = 0;
  6556. #endif
  6557. /* Divide starting at most significant word down to least. */
  6558. for (i = (int)(a->used - 1); i >= 0; i--) {
  6559. #ifndef SQR_MUL_ASM
  6560. /* Combine remainder from last operation with this word and divide. */
  6561. sp_int_digit t = sp_div_word((sp_int_digit)w, a->dp[i], d);
  6562. /* Combine remainder from last operation with this word. */
  6563. w = (w << SP_WORD_SIZE) | a->dp[i];
  6564. /* Subtract to get modulo result. */
  6565. w -= (sp_int_word)t * d;
  6566. #else
  6567. /* Combine remainder from last operation with this word and divide. */
  6568. sp_int_digit t = sp_div_word(h, a->dp[i], d);
  6569. /* Subtract to get modulo result. */
  6570. h = a->dp[i] - t * d;
  6571. #endif
  6572. }
  6573. /* Return remainder. */
  6574. #ifndef SQR_MUL_ASM
  6575. *r = (sp_int_digit)w;
  6576. #else
  6577. *r = h;
  6578. #endif
  6579. }
  6580. /* Calculate a modulo the digit d into r: r = a mod d
  6581. *
  6582. * @param [in] a SP integer to reduce.
  6583. * @param [in] d Digit to that is the modulus.
  6584. * @param [out] r Digit that is the result.
  6585. *
  6586. * @return MP_OKAY on success.
  6587. * @return MP_VAL when a is NULL or d is 0.
  6588. */
  6589. #if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
  6590. !defined(HAVE_COMP_KEY)) && !defined(OPENSSL_EXTRA)
  6591. static
  6592. #endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
  6593. int sp_mod_d(const sp_int* a, sp_int_digit d, sp_int_digit* r)
  6594. {
  6595. int err = MP_OKAY;
  6596. /* Validate parameters. */
  6597. if ((a == NULL) || (r == NULL) || (d == 0)) {
  6598. err = MP_VAL;
  6599. }
  6600. #if 0
  6601. sp_print(a, "a");
  6602. sp_print_digit(d, "m");
  6603. #endif
  6604. if (err == MP_OKAY) {
  6605. /* Check whether d is a power of 2. */
  6606. if ((d & (d - 1)) == 0) {
  6607. if (a->used == 0) {
  6608. *r = 0;
  6609. }
  6610. else {
  6611. *r = a->dp[0] & (d - 1);
  6612. }
  6613. }
  6614. #if !defined(WOLFSSL_SP_SMALL)
  6615. #if SP_WORD_SIZE < 64
  6616. else if (d == 3) {
  6617. /* Fast implementation for divisor of 3. */
  6618. _sp_div_3(a, NULL, r);
  6619. }
  6620. #endif
  6621. else if (d == 10) {
  6622. /* Fast implementation for divisor of 10. */
  6623. _sp_div_10(a, NULL, r);
  6624. }
  6625. #endif
  6626. else if (d <= SP_HALF_MAX) {
  6627. /* For small divisors. */
  6628. _sp_div_small(a, d, NULL, r);
  6629. }
  6630. else {
  6631. _sp_mod_d(a, d, r);
  6632. }
  6633. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6634. if (a->sign == MP_NEG) {
  6635. *r = d - *r;
  6636. }
  6637. #endif
  6638. }
  6639. #if 0
  6640. sp_print_digit(*r, "rmod");
  6641. #endif
  6642. return err;
  6643. }
  6644. #endif /* WOLFSSL_SP_MOD_D */
  6645. #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
  6646. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  6647. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  6648. /* Divides a by 2 and stores in r: r = a >> 1
  6649. *
  6650. * @param [in] a SP integer to divide.
  6651. * @param [out] r SP integer to hold result.
  6652. */
  6653. static void _sp_div_2(const sp_int* a, sp_int* r)
  6654. {
  6655. int i;
  6656. /* Shift down each word by 1 and include bottom bit of next at top. */
  6657. for (i = 0; i < (int)a->used - 1; i++) {
  6658. r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
  6659. }
  6660. /* Last word only needs to be shifted down. */
  6661. r->dp[i] = a->dp[i] >> 1;
  6662. /* Set used to be all words seen. */
  6663. r->used = (unsigned int)i + 1;
  6664. /* Remove leading zeros. */
  6665. sp_clamp(r);
  6666. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6667. /* Same sign in result. */
  6668. r->sign = a->sign;
  6669. #endif
  6670. }
  6671. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  6672. /* Divides a by 2 and stores in r: r = a >> 1
  6673. *
  6674. * @param [in] a SP integer to divide.
  6675. * @param [out] r SP integer to hold result.
  6676. *
  6677. * @return MP_OKAY on success.
  6678. * @return MP_VAL when a or r is NULL.
  6679. */
  6680. int sp_div_2(const sp_int* a, sp_int* r)
  6681. {
  6682. int err = MP_OKAY;
  6683. /* Only when a public API. */
  6684. if ((a == NULL) || (r == NULL)) {
  6685. err = MP_VAL;
  6686. }
  6687. /* Ensure maximal size is supported by result. */
  6688. if ((err == MP_OKAY) && (a->used > r->size)) {
  6689. err = MP_VAL;
  6690. }
  6691. if (err == MP_OKAY) {
  6692. _sp_div_2(a, r);
  6693. }
  6694. return err;
  6695. }
  6696. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  6697. #endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
  6698. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  6699. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  6700. /* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
  6701. *
  6702. * r = a / 2 (mod m) - constant time (a < m and positive)
  6703. *
  6704. * @param [in] a SP integer to divide.
  6705. * @param [in] m SP integer that is modulus.
  6706. * @param [out] r SP integer to hold result.
  6707. *
  6708. * @return MP_OKAY on success.
  6709. * @return MP_VAL when a, m or r is NULL.
  6710. */
  6711. int sp_div_2_mod_ct(const sp_int* a, const sp_int* m, sp_int* r)
  6712. {
  6713. int err = MP_OKAY;
  6714. /* Validate parameters. */
  6715. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  6716. err = MP_VAL;
  6717. }
  6718. /* Check result has enough space for a + m. */
  6719. if ((err == MP_OKAY) && (m->used + 1 > r->size)) {
  6720. err = MP_VAL;
  6721. }
  6722. if (err == MP_OKAY) {
  6723. #ifndef SQR_MUL_ASM
  6724. sp_int_word w = 0;
  6725. #else
  6726. sp_int_digit l = 0;
  6727. sp_int_digit h;
  6728. sp_int_digit t;
  6729. #endif
  6730. /* Mask to apply to modulus. */
  6731. sp_int_digit mask = (sp_int_digit)0 - (a->dp[0] & 1);
  6732. unsigned int i;
  6733. #if 0
  6734. sp_print(a, "a");
  6735. sp_print(m, "m");
  6736. #endif
  6737. /* Add a to m, if a is odd, into r in constant time. */
  6738. for (i = 0; i < m->used; i++) {
  6739. /* Mask to apply to a - set when used value at index. */
  6740. sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
  6741. #ifndef SQR_MUL_ASM
  6742. /* Conditionally add modulus. */
  6743. w += m->dp[i] & mask;
  6744. /* Conditionally add a. */
  6745. w += a->dp[i] & mask_a;
  6746. /* Store low digit in result. */
  6747. r->dp[i] = (sp_int_digit)w;
  6748. /* Move high digit down. */
  6749. w >>= DIGIT_BIT;
  6750. #else
  6751. /* No high digit. */
  6752. h = 0;
  6753. /* Conditionally use modulus. */
  6754. t = m->dp[i] & mask;
  6755. /* Add with carry modulus. */
  6756. SP_ASM_ADDC_REG(l, h, t);
  6757. /* Conditionally use a. */
  6758. t = a->dp[i] & mask_a;
  6759. /* Add with carry a. */
  6760. SP_ASM_ADDC_REG(l, h, t);
  6761. /* Store low digit in result. */
  6762. r->dp[i] = l;
  6763. /* Move high digit down. */
  6764. l = h;
  6765. #endif
  6766. }
  6767. /* Store carry. */
  6768. #ifndef SQR_MUL_ASM
  6769. r->dp[i] = (sp_int_digit)w;
  6770. #else
  6771. r->dp[i] = l;
  6772. #endif
  6773. /* Used includes carry - set or not. */
  6774. r->used = i + 1;
  6775. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6776. r->sign = MP_ZPOS;
  6777. #endif
  6778. /* Divide conditional sum by 2. */
  6779. _sp_div_2(r, r);
  6780. #if 0
  6781. sp_print(r, "rd2");
  6782. #endif
  6783. }
  6784. return err;
  6785. }
  6786. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  6787. /************************
  6788. * Add/Subtract Functions
  6789. ************************/
  6790. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
  6791. /* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZEOF))
  6792. *
  6793. * @param [in] a SP integer to add to.
  6794. * @param [in] b SP integer to add.
  6795. * @param [out] r SP integer to store result in.
  6796. * @param [in] o Number of digits to offset b.
  6797. */
  6798. static void _sp_add_off(const sp_int* a, const sp_int* b, sp_int* r, int o)
  6799. {
  6800. unsigned int i = 0;
  6801. #ifndef SQR_MUL_ASM
  6802. sp_int_word t = 0;
  6803. #else
  6804. sp_int_digit l = 0;
  6805. sp_int_digit h = 0;
  6806. sp_int_digit t = 0;
  6807. #endif
  6808. #ifdef SP_MATH_NEED_ADD_OFF
  6809. unsigned int j;
  6810. /* Copy a into result up to offset. */
  6811. for (; (i < o) && (i < a->used); i++) {
  6812. r->dp[i] = a->dp[i];
  6813. }
  6814. /* Set result to 0 for digits beyonf those in a. */
  6815. for (; i < o; i++) {
  6816. r->dp[i] = 0;
  6817. }
  6818. /* Add each digit from a and b where both have values. */
  6819. for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
  6820. #ifndef SQR_MUL_ASM
  6821. t += a->dp[i];
  6822. t += b->dp[j];
  6823. r->dp[i] = (sp_int_digit)t;
  6824. t >>= SP_WORD_SIZE;
  6825. #else
  6826. t = a->dp[i];
  6827. SP_ASM_ADDC(l, h, t);
  6828. t = b->dp[j];
  6829. SP_ASM_ADDC(l, h, t);
  6830. r->dp[i] = l;
  6831. l = h;
  6832. h = 0;
  6833. #endif
  6834. }
  6835. /* Either a and/or b are out of digits. Add carry and remaining a digits. */
  6836. for (; i < a->used; i++) {
  6837. #ifndef SQR_MUL_ASM
  6838. t += a->dp[i];
  6839. r->dp[i] = (sp_int_digit)t;
  6840. t >>= SP_WORD_SIZE;
  6841. #else
  6842. t = a->dp[i];
  6843. SP_ASM_ADDC(l, h, t);
  6844. r->dp[i] = l;
  6845. l = h;
  6846. h = 0;
  6847. #endif
  6848. }
  6849. /* a is out of digits. Add carry and remaining b digits. */
  6850. for (; j < b->used; i++, j++) {
  6851. #ifndef SQR_MUL_ASM
  6852. t += b->dp[j];
  6853. r->dp[i] = (sp_int_digit)t;
  6854. t >>= SP_WORD_SIZE;
  6855. #else
  6856. t = b->dp[j];
  6857. SP_ASM_ADDC(l, h, t);
  6858. r->dp[i] = l;
  6859. l = h;
  6860. h = 0;
  6861. #endif
  6862. }
  6863. #else
  6864. (void)o;
  6865. /* Add each digit from a and b where both have values. */
  6866. for (; (i < a->used) && (i < b->used); i++) {
  6867. #ifndef SQR_MUL_ASM
  6868. t += a->dp[i];
  6869. t += b->dp[i];
  6870. r->dp[i] = (sp_int_digit)t;
  6871. t >>= SP_WORD_SIZE;
  6872. #else
  6873. t = a->dp[i];
  6874. SP_ASM_ADDC(l, h, t);
  6875. t = b->dp[i];
  6876. SP_ASM_ADDC(l, h, t);
  6877. r->dp[i] = l;
  6878. l = h;
  6879. h = 0;
  6880. #endif
  6881. }
  6882. /* Either a and/or b are out of digits. Add carry and remaining a digits. */
  6883. for (; i < a->used; i++) {
  6884. #ifndef SQR_MUL_ASM
  6885. t += a->dp[i];
  6886. r->dp[i] = (sp_int_digit)t;
  6887. t >>= SP_WORD_SIZE;
  6888. #else
  6889. t = a->dp[i];
  6890. SP_ASM_ADDC(l, h, t);
  6891. r->dp[i] = l;
  6892. l = h;
  6893. h = 0;
  6894. #endif
  6895. }
  6896. /* a is out of digits. Add carry and remaining b digits. */
  6897. for (; i < b->used; i++) {
  6898. #ifndef SQR_MUL_ASM
  6899. t += b->dp[i];
  6900. r->dp[i] = (sp_int_digit)t;
  6901. t >>= SP_WORD_SIZE;
  6902. #else
  6903. t = b->dp[i];
  6904. SP_ASM_ADDC(l, h, t);
  6905. r->dp[i] = l;
  6906. l = h;
  6907. h = 0;
  6908. #endif
  6909. }
  6910. #endif
  6911. /* Set used based on last digit put in. */
  6912. r->used = i;
  6913. /* Put in carry. */
  6914. #ifndef SQR_MUL_ASM
  6915. r->dp[i] = (sp_int_digit)t;
  6916. r->used += (t != 0);
  6917. #else
  6918. r->dp[i] = l;
  6919. r->used += (l != 0);
  6920. #endif
  6921. /* Remove leading zeros. */
  6922. sp_clamp(r);
  6923. }
  6924. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  6925. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
  6926. !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
  6927. !defined(WOLFSSL_RSA_VERIFY_ONLY))
  6928. /* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZEOF))
  6929. * a must be greater than b.
  6930. *
  6931. * When using offset, r == a is faster.
  6932. *
  6933. * @param [in] a SP integer to subtract from.
  6934. * @param [in] b SP integer to subtract.
  6935. * @param [out] r SP integer to store result in.
  6936. * @param [in] o Number of digits to offset b.
  6937. */
  6938. static void _sp_sub_off(const sp_int* a, const sp_int* b, sp_int* r,
  6939. unsigned int o)
  6940. {
  6941. unsigned int i = 0;
  6942. unsigned int j;
  6943. #ifndef SQR_MUL_ASM
  6944. sp_int_sword t = 0;
  6945. #else
  6946. sp_int_digit l = 0;
  6947. sp_int_digit h = 0;
  6948. #endif
  6949. /* Need to copy digits up to offset into result. */
  6950. if (r != a) {
  6951. for (; (i < o) && (i < a->used); i++) {
  6952. r->dp[i] = a->dp[i];
  6953. }
  6954. }
  6955. else {
  6956. i = o;
  6957. }
  6958. /* Index to add at is the offset now. */
  6959. for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
  6960. #ifndef SQR_MUL_ASM
  6961. /* Add a into and subtract b from current value. */
  6962. t += a->dp[i];
  6963. t -= b->dp[j];
  6964. /* Store low digit in result. */
  6965. r->dp[i] = (sp_int_digit)t;
  6966. /* Move high digit down. */
  6967. t >>= SP_WORD_SIZE;
  6968. #else
  6969. /* Add a into and subtract b from current value. */
  6970. SP_ASM_ADDC(l, h, a->dp[i]);
  6971. SP_ASM_SUBB(l, h, b->dp[j]);
  6972. /* Store low digit in result. */
  6973. r->dp[i] = l;
  6974. /* Move high digit down. */
  6975. l = h;
  6976. /* High digit is 0 when positive or -1 on negative. */
  6977. h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
  6978. #endif
  6979. }
  6980. for (; i < a->used; i++) {
  6981. #ifndef SQR_MUL_ASM
  6982. /* Add a into current value. */
  6983. t += a->dp[i];
  6984. /* Store low digit in result. */
  6985. r->dp[i] = (sp_int_digit)t;
  6986. /* Move high digit down. */
  6987. t >>= SP_WORD_SIZE;
  6988. #else
  6989. /* Add a into current value. */
  6990. SP_ASM_ADDC(l, h, a->dp[i]);
  6991. /* Store low digit in result. */
  6992. r->dp[i] = l;
  6993. /* Move high digit down. */
  6994. l = h;
  6995. /* High digit is 0 when positive or -1 on negative. */
  6996. h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
  6997. #endif
  6998. }
  6999. /* Set used based on last digit put in. */
  7000. r->used = i;
  7001. /* Remove leading zeros. */
  7002. sp_clamp(r);
  7003. }
  7004. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
  7005. * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  7006. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
  7007. /* Add b to a into r: r = a + b
  7008. *
  7009. * @param [in] a SP integer to add to.
  7010. * @param [in] b SP integer to add.
  7011. * @param [out] r SP integer to store result in.
  7012. *
  7013. * @return MP_OKAY on success.
  7014. * @return MP_VAL when a, b, or r is NULL.
  7015. */
  7016. int sp_add(const sp_int* a, const sp_int* b, sp_int* r)
  7017. {
  7018. int err = MP_OKAY;
  7019. /* Validate parameters. */
  7020. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  7021. err = MP_VAL;
  7022. }
  7023. /* Check that r as big as a and b plus one word. */
  7024. if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
  7025. err = MP_VAL;
  7026. }
  7027. if (err == MP_OKAY) {
  7028. #ifndef WOLFSSL_SP_INT_NEGATIVE
  7029. /* Add two positive numbers. */
  7030. _sp_add_off(a, b, r, 0);
  7031. #else
  7032. /* Same sign then add absolute values and use sign. */
  7033. if (a->sign == b->sign) {
  7034. _sp_add_off(a, b, r, 0);
  7035. r->sign = a->sign;
  7036. }
  7037. /* Different sign and abs(a) >= abs(b). */
  7038. else if (_sp_cmp_abs(a, b) != MP_LT) {
  7039. /* Subtract absolute values and use sign of a unless result 0. */
  7040. _sp_sub_off(a, b, r, 0);
  7041. if (sp_iszero(r)) {
  7042. r->sign = MP_ZPOS;
  7043. }
  7044. else {
  7045. r->sign = a->sign;
  7046. }
  7047. }
  7048. /* Different sign and abs(a) < abs(b). */
  7049. else {
  7050. /* Reverse subtract absolute values and use sign of b. */
  7051. _sp_sub_off(b, a, r, 0);
  7052. r->sign = b->sign;
  7053. }
  7054. #endif
  7055. }
  7056. return err;
  7057. }
  7058. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  7059. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7060. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  7061. /* Subtract b from a into r: r = a - b
  7062. *
  7063. * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
  7064. *
  7065. * @param [in] a SP integer to subtract from.
  7066. * @param [in] b SP integer to subtract.
  7067. * @param [out] r SP integer to store result in.
  7068. *
  7069. * @return MP_OKAY on success.
  7070. * @return MP_VAL when a, b, or r is NULL.
  7071. */
  7072. int sp_sub(const sp_int* a, const sp_int* b, sp_int* r)
  7073. {
  7074. int err = MP_OKAY;
  7075. /* Validate parameters. */
  7076. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  7077. err = MP_VAL;
  7078. }
  7079. /* Check that r as big as a and b plus one word. */
  7080. if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
  7081. err = MP_VAL;
  7082. }
  7083. if (err == MP_OKAY) {
  7084. #ifndef WOLFSSL_SP_INT_NEGATIVE
  7085. /* Subtract positive numbers b from a. */
  7086. _sp_sub_off(a, b, r, 0);
  7087. #else
  7088. /* Different sign. */
  7089. if (a->sign != b->sign) {
  7090. /* Add absolute values and use sign of a. */
  7091. _sp_add_off(a, b, r, 0);
  7092. r->sign = a->sign;
  7093. }
  7094. /* Same sign and abs(a) >= abs(b). */
  7095. else if (_sp_cmp_abs(a, b) != MP_LT) {
  7096. /* Subtract absolute values and use sign of a unless result 0. */
  7097. _sp_sub_off(a, b, r, 0);
  7098. if (sp_iszero(r)) {
  7099. r->sign = MP_ZPOS;
  7100. }
  7101. else {
  7102. r->sign = a->sign;
  7103. }
  7104. }
  7105. /* Same sign and abs(a) < abs(b). */
  7106. else {
  7107. /* Reverse subtract absolute values and use opposite sign of a */
  7108. _sp_sub_off(b, a, r, 0);
  7109. r->sign = 1 - a->sign;
  7110. }
  7111. #endif
  7112. }
  7113. return err;
  7114. }
  7115. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  7116. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
  7117. /****************************
  7118. * Add/Subtract mod functions
  7119. ****************************/
  7120. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  7121. (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
  7122. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
  7123. /* Add two value and reduce: r = (a + b) % m
  7124. *
  7125. * @param [in] a SP integer to add.
  7126. * @param [in] b SP integer to add with.
  7127. * @param [in] m SP integer that is the modulus.
  7128. * @param [out] r SP integer to hold result.
  7129. *
  7130. * @return MP_OKAY on success.
  7131. * @return MP_MEM when dynamic memory allocation fails.
  7132. */
  7133. static int _sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m,
  7134. sp_int* r)
  7135. {
  7136. int err = MP_OKAY;
  7137. /* Calculate used based on digits used in a and b. */
  7138. unsigned int used = ((a->used >= b->used) ? a->used + 1 : b->used + 1);
  7139. DECL_SP_INT(t, used);
  7140. /* Allocate a temporary SP int to hold sum. */
  7141. ALLOC_SP_INT_SIZE(t, used, err, NULL);
  7142. if (err == MP_OKAY) {
  7143. /* Do sum. */
  7144. err = sp_add(a, b, t);
  7145. }
  7146. if (err == MP_OKAY) {
  7147. /* Mod result. */
  7148. err = sp_mod(t, m, r);
  7149. }
  7150. FREE_SP_INT(t, NULL);
  7151. return err;
  7152. }
  7153. /* Add two value and reduce: r = (a + b) % m
  7154. *
  7155. * @param [in] a SP integer to add.
  7156. * @param [in] b SP integer to add with.
  7157. * @param [in] m SP integer that is the modulus.
  7158. * @param [out] r SP integer to hold result.
  7159. *
  7160. * @return MP_OKAY on success.
  7161. * @return MP_VAL when a, b, m or r is NULL.
  7162. * @return MP_MEM when dynamic memory allocation fails.
  7163. */
  7164. int sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7165. {
  7166. int err = MP_OKAY;
  7167. /* Validate parameters. */
  7168. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  7169. err = MP_VAL;
  7170. }
  7171. /* Ensure a and b aren't too big a number to operate on. */
  7172. else if (a->used >= SP_INT_DIGITS) {
  7173. err = MP_VAL;
  7174. }
  7175. else if (b->used >= SP_INT_DIGITS) {
  7176. err = MP_VAL;
  7177. }
  7178. #if 0
  7179. if (err == MP_OKAY) {
  7180. sp_print(a, "a");
  7181. sp_print(b, "b");
  7182. sp_print(m, "m");
  7183. }
  7184. #endif
  7185. if (err == MP_OKAY) {
  7186. /* Do add and modular reduction. */
  7187. err = _sp_addmod(a, b, m, r);
  7188. }
  7189. #if 0
  7190. if (err == MP_OKAY) {
  7191. sp_print(r, "rma");
  7192. }
  7193. #endif
  7194. return err;
  7195. }
  7196. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
  7197. * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
  7198. #if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  7199. defined(HAVE_ECC))
  7200. /* Sub b from a and reduce: r = (a - b) % m
  7201. * Result is always positive.
  7202. *
  7203. * @param [in] a SP integer to subtract from
  7204. * @param [in] b SP integer to subtract.
  7205. * @param [in] m SP integer that is the modulus.
  7206. * @param [out] r SP integer to hold result.
  7207. *
  7208. * @return MP_OKAY on success.
  7209. * @return MP_MEM when dynamic memory allocation fails.
  7210. */
  7211. static int _sp_submod(const sp_int* a, const sp_int* b, const sp_int* m,
  7212. sp_int* r)
  7213. {
  7214. int err = MP_OKAY;
  7215. #ifndef WOLFSSL_SP_INT_NEGATIVE
  7216. unsigned int used = ((a->used >= m->used) ?
  7217. ((a->used >= b->used) ? (a->used + 1) : (b->used + 1)) :
  7218. ((b->used >= m->used)) ? (b->used + 1) : (m->used + 1));
  7219. DECL_SP_INT_ARRAY(t, used, 2);
  7220. ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
  7221. if (err == MP_OKAY) {
  7222. /* Reduce a to less than m. */
  7223. if (_sp_cmp(a, m) != MP_LT) {
  7224. err = sp_mod(a, m, t[0]);
  7225. a = t[0];
  7226. }
  7227. }
  7228. if (err == MP_OKAY) {
  7229. /* Reduce b to less than m. */
  7230. if (_sp_cmp(b, m) != MP_LT) {
  7231. err = sp_mod(b, m, t[1]);
  7232. b = t[1];
  7233. }
  7234. }
  7235. if (err == MP_OKAY) {
  7236. /* Add m to a if a smaller than b. */
  7237. if (_sp_cmp(a, b) == MP_LT) {
  7238. err = sp_add(a, m, t[0]);
  7239. a = t[0];
  7240. }
  7241. }
  7242. if (err == MP_OKAY) {
  7243. /* Subtract b from a. */
  7244. err = sp_sub(a, b, r);
  7245. }
  7246. FREE_SP_INT_ARRAY(t, NULL);
  7247. #else /* WOLFSSL_SP_INT_NEGATIVE */
  7248. unsigned int used = ((a->used >= b->used) ? a->used + 1 : b->used + 1);
  7249. DECL_SP_INT(t, used);
  7250. ALLOC_SP_INT_SIZE(t, used, err, NULL);
  7251. /* Subtract b from a into temporary. */
  7252. if (err == MP_OKAY) {
  7253. err = sp_sub(a, b, t);
  7254. }
  7255. if (err == MP_OKAY) {
  7256. /* Reduce result mod m into result. */
  7257. err = sp_mod(t, m, r);
  7258. }
  7259. FREE_SP_INT(t, NULL);
  7260. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7261. return err;
  7262. }
  7263. /* Sub b from a and reduce: r = (a - b) % m
  7264. * Result is always positive.
  7265. *
  7266. * @param [in] a SP integer to subtract from
  7267. * @param [in] b SP integer to subtract.
  7268. * @param [in] m SP integer that is the modulus.
  7269. * @param [out] r SP integer to hold result.
  7270. *
  7271. * @return MP_OKAY on success.
  7272. * @return MP_VAL when a, b, m or r is NULL.
  7273. * @return MP_MEM when dynamic memory allocation fails.
  7274. */
  7275. int sp_submod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7276. {
  7277. int err = MP_OKAY;
  7278. /* Validate parameters. */
  7279. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  7280. err = MP_VAL;
  7281. }
  7282. /* Ensure a, b and m aren't too big a number to operate on. */
  7283. else if (a->used >= SP_INT_DIGITS) {
  7284. err = MP_VAL;
  7285. }
  7286. else if (b->used >= SP_INT_DIGITS) {
  7287. err = MP_VAL;
  7288. }
  7289. else if (m->used >= SP_INT_DIGITS) {
  7290. err = MP_VAL;
  7291. }
  7292. #if 0
  7293. if (err == MP_OKAY) {
  7294. sp_print(a, "a");
  7295. sp_print(b, "b");
  7296. sp_print(m, "m");
  7297. }
  7298. #endif
  7299. if (err == MP_OKAY) {
  7300. /* Do submod. */
  7301. err = _sp_submod(a, b, m, r);
  7302. }
  7303. #if 0
  7304. if (err == MP_OKAY) {
  7305. sp_print(r, "rms");
  7306. }
  7307. #endif
  7308. return err;
  7309. }
  7310. #endif /* WOLFSSL_SP_MATH_ALL */
  7311. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  7312. /* Add two value and reduce: r = (a + b) % m
  7313. *
  7314. * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
  7315. *
  7316. * Assumes a, b, m and r are not NULL.
  7317. * m and r must not be the same pointer.
  7318. *
  7319. * @param [in] a SP integer to add.
  7320. * @param [in] b SP integer to add with.
  7321. * @param [in] m SP integer that is the modulus.
  7322. * @param [out] r SP integer to hold result.
  7323. *
  7324. * @return MP_OKAY on success.
  7325. */
  7326. int sp_addmod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7327. {
  7328. int err = MP_OKAY;
  7329. #ifndef SQR_MUL_ASM
  7330. sp_int_sword w;
  7331. sp_int_sword s;
  7332. #else
  7333. sp_int_digit wl;
  7334. sp_int_digit wh;
  7335. sp_int_digit sl;
  7336. sp_int_digit sh;
  7337. sp_int_digit t;
  7338. #endif
  7339. sp_int_digit mask;
  7340. sp_int_digit mask_a = (sp_int_digit)-1;
  7341. sp_int_digit mask_b = (sp_int_digit)-1;
  7342. unsigned int i;
  7343. /* Check result is as big as modulus. */
  7344. if (m->used > r->size) {
  7345. err = MP_VAL;
  7346. }
  7347. /* Validate parameters. */
  7348. if ((err == MP_OKAY) && (r == m)) {
  7349. err = MP_VAL;
  7350. }
  7351. if (err == MP_OKAY) {
  7352. #if 0
  7353. sp_print(a, "a");
  7354. sp_print(b, "b");
  7355. sp_print(m, "m");
  7356. #endif
  7357. /* Add a to b into r. Do the subtract of modulus but don't store result.
  7358. * When subtract result is negative, the overflow will be negative.
  7359. * Only need to subtract mod when result is positive - overflow is
  7360. * positive.
  7361. */
  7362. #ifndef SQR_MUL_ASM
  7363. w = 0;
  7364. s = 0;
  7365. #else
  7366. wl = 0;
  7367. sl = 0;
  7368. sh = 0;
  7369. #endif
  7370. /* Constant time - add modulus digits worth from a and b. */
  7371. for (i = 0; i < m->used; i++) {
  7372. /* Values past 'used' are not initialized. */
  7373. mask_a += (i == a->used);
  7374. mask_b += (i == b->used);
  7375. #ifndef SQR_MUL_ASM
  7376. /* Add next digits from a and b to current value. */
  7377. w += a->dp[i] & mask_a;
  7378. w += b->dp[i] & mask_b;
  7379. /* Store low digit in result. */
  7380. r->dp[i] = (sp_int_digit)w;
  7381. /* Add result to reducing value. */
  7382. s += (sp_int_digit)w;
  7383. /* Subtract next digit of modulus. */
  7384. s -= m->dp[i];
  7385. /* Move high digit of reduced result down. */
  7386. s >>= DIGIT_BIT;
  7387. /* Move high digit of sum result down. */
  7388. w >>= DIGIT_BIT;
  7389. #else
  7390. wh = 0;
  7391. /* Add next digits from a and b to current value. */
  7392. t = a->dp[i] & mask_a;
  7393. SP_ASM_ADDC_REG(wl, wh, t);
  7394. t = b->dp[i] & mask_b;
  7395. SP_ASM_ADDC_REG(wl, wh, t);
  7396. /* Store low digit in result. */
  7397. r->dp[i] = wl;
  7398. /* Add result to reducing value. */
  7399. SP_ASM_ADDC_REG(sl, sh, wl);
  7400. /* Subtract next digit of modulus. */
  7401. SP_ASM_SUBB(sl, sh, m->dp[i]);
  7402. /* Move high digit of reduced result down. */
  7403. sl = sh;
  7404. /* High digit is 0 when positive or -1 on negative. */
  7405. sh = (sp_int_digit)0 - (sh >> (SP_WORD_SIZE-1));
  7406. /* Move high digit of sum result down. */
  7407. wl = wh;
  7408. #endif
  7409. }
  7410. #ifndef SQR_MUL_ASM
  7411. /* Add carry into reduced result. */
  7412. s += (sp_int_digit)w;
  7413. /* s will be positive when subtracting modulus is needed. */
  7414. mask = (sp_int_digit)0 - (s >= 0);
  7415. #else
  7416. /* Add carry into reduced result. */
  7417. SP_ASM_ADDC_REG(sl, sh, wl);
  7418. /* s will be positive when subtracting modulus is needed. */
  7419. mask = (sh >> (SP_WORD_SIZE-1)) - 1;
  7420. #endif
  7421. /* Constant time, conditionally, subtract modulus from sum. */
  7422. #ifndef SQR_MUL_ASM
  7423. w = 0;
  7424. #else
  7425. wl = 0;
  7426. wh = 0;
  7427. #endif
  7428. for (i = 0; i < m->used; i++) {
  7429. #ifndef SQR_MUL_ASM
  7430. /* Add result to current value and conditionally subtract modulus.
  7431. */
  7432. w += r->dp[i];
  7433. w -= m->dp[i] & mask;
  7434. /* Store low digit in result. */
  7435. r->dp[i] = (sp_int_digit)w;
  7436. /* Move high digit of sum result down. */
  7437. w >>= DIGIT_BIT;
  7438. #else
  7439. /* Add result to current value and conditionally subtract modulus.
  7440. */
  7441. SP_ASM_ADDC(wl, wh, r->dp[i]);
  7442. t = m->dp[i] & mask;
  7443. SP_ASM_SUBB_REG(wl, wh, t);
  7444. /* Store low digit in result. */
  7445. r->dp[i] = wl;
  7446. /* Move high digit of sum result down. */
  7447. wl = wh;
  7448. /* High digit is 0 when positive or -1 on negative. */
  7449. wh = (sp_int_digit)0 - (wl >> (SP_WORD_SIZE-1));
  7450. #endif
  7451. }
  7452. /* Result will always have digits equal to or less than those in
  7453. * modulus. */
  7454. r->used = i;
  7455. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7456. r->sign = MP_ZPOS;
  7457. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7458. /* Remove leading zeros. */
  7459. sp_clamp(r);
  7460. #if 0
  7461. sp_print(r, "rma");
  7462. #endif
  7463. }
  7464. return err;
  7465. }
  7466. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  7467. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  7468. /* Sub b from a and reduce: r = (a - b) % m
  7469. * Result is always positive.
  7470. *
  7471. * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
  7472. *
  7473. * Assumes a, b, m and r are not NULL.
  7474. * m and r must not be the same pointer.
  7475. *
  7476. * @param [in] a SP integer to subtract from
  7477. * @param [in] b SP integer to subtract.
  7478. * @param [in] m SP integer that is the modulus.
  7479. * @param [out] r SP integer to hold result.
  7480. *
  7481. * @return MP_OKAY on success.
  7482. */
  7483. int sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7484. {
  7485. int err = MP_OKAY;
  7486. #ifndef SQR_MUL_ASM
  7487. sp_int_sword w;
  7488. #else
  7489. sp_int_digit l;
  7490. sp_int_digit h;
  7491. sp_int_digit t;
  7492. #endif
  7493. sp_int_digit mask;
  7494. sp_int_digit mask_a = (sp_int_digit)-1;
  7495. sp_int_digit mask_b = (sp_int_digit)-1;
  7496. unsigned int i;
  7497. /* Check result is as big as modulus plus one digit. */
  7498. if (m->used > r->size) {
  7499. err = MP_VAL;
  7500. }
  7501. /* Validate parameters. */
  7502. if ((err == MP_OKAY) && (r == m)) {
  7503. err = MP_VAL;
  7504. }
  7505. if (err == MP_OKAY) {
  7506. #if 0
  7507. sp_print(a, "a");
  7508. sp_print(b, "b");
  7509. sp_print(m, "m");
  7510. #endif
  7511. /* In constant time, subtract b from a putting result in r. */
  7512. #ifndef SQR_MUL_ASM
  7513. w = 0;
  7514. #else
  7515. l = 0;
  7516. h = 0;
  7517. #endif
  7518. for (i = 0; i < m->used; i++) {
  7519. /* Values past 'used' are not initialized. */
  7520. mask_a += (i == a->used);
  7521. mask_b += (i == b->used);
  7522. #ifndef SQR_MUL_ASM
  7523. /* Add a to and subtract b from current value. */
  7524. w += a->dp[i] & mask_a;
  7525. w -= b->dp[i] & mask_b;
  7526. /* Store low digit in result. */
  7527. r->dp[i] = (sp_int_digit)w;
  7528. /* Move high digit down. */
  7529. w >>= DIGIT_BIT;
  7530. #else
  7531. /* Add a and subtract b from current value. */
  7532. t = a->dp[i] & mask_a;
  7533. SP_ASM_ADDC_REG(l, h, t);
  7534. t = b->dp[i] & mask_b;
  7535. SP_ASM_SUBB_REG(l, h, t);
  7536. /* Store low digit in result. */
  7537. r->dp[i] = l;
  7538. /* Move high digit down. */
  7539. l = h;
  7540. /* High digit is 0 when positive or -1 on negative. */
  7541. h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
  7542. #endif
  7543. }
  7544. /* When w is negative then we need to add modulus to make result
  7545. * positive. */
  7546. #ifndef SQR_MUL_ASM
  7547. mask = (sp_int_digit)0 - (w < 0);
  7548. #else
  7549. mask = h;
  7550. #endif
  7551. /* Constant time, conditionally, add modulus to difference. */
  7552. #ifndef SQR_MUL_ASM
  7553. w = 0;
  7554. #else
  7555. l = 0;
  7556. #endif
  7557. for (i = 0; i < m->used; i++) {
  7558. #ifndef SQR_MUL_ASM
  7559. /* Add result and conditionally modulus to current value. */
  7560. w += r->dp[i];
  7561. w += m->dp[i] & mask;
  7562. /* Store low digit in result. */
  7563. r->dp[i] = (sp_int_digit)w;
  7564. /* Move high digit down. */
  7565. w >>= DIGIT_BIT;
  7566. #else
  7567. h = 0;
  7568. /* Add result and conditionally modulus to current value. */
  7569. SP_ASM_ADDC(l, h, r->dp[i]);
  7570. t = m->dp[i] & mask;
  7571. SP_ASM_ADDC_REG(l, h, t);
  7572. /* Store low digit in result. */
  7573. r->dp[i] = l;
  7574. /* Move high digit down. */
  7575. l = h;
  7576. #endif
  7577. }
  7578. /* Result will always have digits equal to or less than those in
  7579. * modulus. */
  7580. r->used = i;
  7581. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7582. r->sign = MP_ZPOS;
  7583. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7584. /* Remove leading zeros. */
  7585. sp_clamp(r);
  7586. #if 0
  7587. sp_print(r, "rms");
  7588. #endif
  7589. }
  7590. return err;
  7591. }
  7592. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  7593. /********************
  7594. * Shifting functoins
  7595. ********************/
  7596. #if !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
  7597. defined(WC_RSA_BLINDING) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  7598. /* Left shift the multi-precision number by a number of digits.
  7599. *
  7600. * @param [in,out] a SP integer to shift.
  7601. * @param [in] s Number of digits to shift.
  7602. *
  7603. * @return MP_OKAY on success.
  7604. * @return MP_VAL when a is NULL, s is negative or the result is too big.
  7605. */
  7606. int sp_lshd(sp_int* a, int s)
  7607. {
  7608. int err = MP_OKAY;
  7609. /* Validate parameters. */
  7610. if ((a == NULL) || (s < 0)) {
  7611. err = MP_VAL;
  7612. }
  7613. /* Ensure number has enough digits for operation. */
  7614. if ((err == MP_OKAY) && (a->used + (unsigned int)s > a->size)) {
  7615. err = MP_VAL;
  7616. }
  7617. if (err == MP_OKAY) {
  7618. /* Move up digits. */
  7619. XMEMMOVE(a->dp + s, a->dp, a->used * SP_WORD_SIZEOF);
  7620. /* Back fill with zeros. */
  7621. XMEMSET(a->dp, 0, (size_t)s * SP_WORD_SIZEOF);
  7622. /* Update used. */
  7623. a->used += (unsigned int)s;
  7624. /* Remove leading zeros. */
  7625. sp_clamp(a);
  7626. }
  7627. return err;
  7628. }
  7629. #endif
  7630. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7631. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  7632. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  7633. /* Left shift the multi-precision number by n bits.
  7634. * Bits may be larger than the word size.
  7635. *
  7636. * Used by sp_mul_2d() and other internal functions.
  7637. *
  7638. * @param [in,out] a SP integer to shift.
  7639. * @param [in] n Number of bits to shift left.
  7640. *
  7641. * @return MP_OKAY on success.
  7642. * @return MP_VAL when the result is too big.
  7643. */
  7644. static int sp_lshb(sp_int* a, int n)
  7645. {
  7646. int err = MP_OKAY;
  7647. if (a->used != 0) {
  7648. /* Calculate number of digits to shift. */
  7649. unsigned int s = (unsigned int)n >> SP_WORD_SHIFT;
  7650. /* Ensure number has enough digits for result. */
  7651. if (a->used + s >= a->size) {
  7652. err = MP_VAL;
  7653. }
  7654. if (err == MP_OKAY) {
  7655. /* Get count of bits to move in digit. */
  7656. n &= SP_WORD_MASK;
  7657. /* Check whether this is a complicated case. */
  7658. if (n != 0) {
  7659. unsigned int i;
  7660. /* Shift up starting at most significant digit. */
  7661. /* Get new most significant digit. */
  7662. sp_int_digit v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
  7663. /* Shift up each digit. */
  7664. for (i = a->used - 1; i >= 1; i--) {
  7665. a->dp[i + s] = (a->dp[i] << n) |
  7666. (a->dp[i - 1] >> (SP_WORD_SIZE - n));
  7667. }
  7668. /* Shift up least significant digit. */
  7669. a->dp[s] = a->dp[0] << n;
  7670. /* Add new high digit unless zero. */
  7671. if (v != 0) {
  7672. a->dp[a->used + s] = v;
  7673. a->used++;
  7674. }
  7675. }
  7676. /* Only digits to move and ensure not zero. */
  7677. else if (s > 0) {
  7678. /* Move up digits. */
  7679. XMEMMOVE(a->dp + s, a->dp, a->used * SP_WORD_SIZEOF);
  7680. }
  7681. /* Update used digit count. */
  7682. a->used += s;
  7683. /* Back fill with zeros. */
  7684. XMEMSET(a->dp, 0, SP_WORD_SIZEOF * s);
  7685. }
  7686. }
  7687. return err;
  7688. }
  7689. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  7690. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  7691. #ifdef WOLFSSL_SP_MATH_ALL
  7692. /* Shift a right by c digits: a = a >> (n * SP_WORD_SIZE)
  7693. *
  7694. * @param [in, out] a SP integer to shift.
  7695. * @param [in] c Number of digits to shift.
  7696. */
  7697. void sp_rshd(sp_int* a, int c)
  7698. {
  7699. /* Do shift if we have an SP int. */
  7700. if ((a != NULL) && (c > 0)) {
  7701. /* Make zero if shift removes all digits. */
  7702. if ((unsigned int)c >= a->used) {
  7703. _sp_zero(a);
  7704. }
  7705. else {
  7706. unsigned int i;
  7707. /* Update used digits count. */
  7708. a->used -= (unsigned int)c;
  7709. /* Move digits down. */
  7710. for (i = 0; i < a->used; i++, c++) {
  7711. a->dp[i] = a->dp[c];
  7712. }
  7713. }
  7714. }
  7715. }
  7716. #endif /* WOLFSSL_SP_MATH_ALL */
  7717. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7718. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  7719. defined(WOLFSSL_HAVE_SP_DH)
  7720. /* Shift a right by n bits into r: r = a >> n
  7721. *
  7722. * @param [in] a SP integer to shift.
  7723. * @param [in] n Number of bits to shift.
  7724. * @param [out] r SP integer to store result in.
  7725. */
  7726. int sp_rshb(const sp_int* a, int n, sp_int* r)
  7727. {
  7728. int err = MP_OKAY;
  7729. /* Number of digits to shift down. */
  7730. unsigned int i = (unsigned int)(n >> SP_WORD_SHIFT);
  7731. if ((a == NULL) || (n < 0)) {
  7732. err = MP_VAL;
  7733. }
  7734. /* Handle case where shifting out all digits. */
  7735. if ((err == MP_OKAY) && (i >= a->used)) {
  7736. _sp_zero(r);
  7737. }
  7738. /* Change callers when more error cases returned. */
  7739. else if ((err == MP_OKAY) && (a->used - i > r->size)) {
  7740. err = MP_VAL;
  7741. }
  7742. else if (err == MP_OKAY) {
  7743. unsigned int j;
  7744. /* Number of bits to shift in digits. */
  7745. n &= SP_WORD_SIZE - 1;
  7746. /* Handle simple case. */
  7747. if (n == 0) {
  7748. /* Set the count of used digits. */
  7749. r->used = a->used - i;
  7750. /* Move digits down. */
  7751. if (r == a) {
  7752. XMEMMOVE(r->dp, r->dp + i, SP_WORD_SIZEOF * r->used);
  7753. }
  7754. else {
  7755. XMEMCPY(r->dp, a->dp + i, SP_WORD_SIZEOF * r->used);
  7756. }
  7757. }
  7758. else {
  7759. /* Move the bits down starting at least significant digit. */
  7760. for (j = 0; i < a->used-1; i++, j++)
  7761. r->dp[j] = (a->dp[i] >> n) | (a->dp[i+1] << (SP_WORD_SIZE - n));
  7762. /* Most significant digit has no higher digit to pull from. */
  7763. r->dp[j] = a->dp[i] >> n;
  7764. /* Set the count of used digits. */
  7765. r->used = j + (r->dp[j] > 0);
  7766. }
  7767. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7768. if (sp_iszero(r)) {
  7769. /* Set zero sign. */
  7770. r->sign = MP_ZPOS;
  7771. }
  7772. else {
  7773. /* Retain sign. */
  7774. r->sign = a->sign;
  7775. }
  7776. #endif
  7777. }
  7778. return err;
  7779. }
  7780. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  7781. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
  7782. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7783. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  7784. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  7785. static void _sp_div_same_size(sp_int* a, const sp_int* d, sp_int* r)
  7786. {
  7787. unsigned int i;
  7788. /* Compare top digits of dividend with those of divisor up to last. */
  7789. for (i = d->used - 1; i > 0; i--) {
  7790. /* Break if top divisor is not equal to dividend. */
  7791. if (a->dp[a->used - d->used + i] != d->dp[i]) {
  7792. break;
  7793. }
  7794. }
  7795. /* Check if top dividend is greater than or equal to divisor. */
  7796. if (a->dp[a->used - d->used + i] >= d->dp[i]) {
  7797. /* Update quotient result. */
  7798. r->dp[a->used - d->used] += 1;
  7799. /* Get 'used' to restore - ensure zeros put into quotient. */
  7800. i = a->used;
  7801. /* Subtract d from top of a. */
  7802. _sp_sub_off(a, d, a, a->used - d->used);
  7803. /* Restore 'used' on remainder. */
  7804. a->used = i;
  7805. }
  7806. }
  7807. /* Divide a by d and return the quotient in r and the remainder in a.
  7808. * r = a / d; a = a % d
  7809. *
  7810. * Note: a is constantly having multiplies of d subtracted.
  7811. *
  7812. * @param [in, out] a SP integer to be divided and remainder on out.
  7813. * @param [in] d SP integer to divide by.
  7814. * @param [out] r SP integer that is the quotient.
  7815. * @param [out] trial SP integer that is product in trial division.
  7816. *
  7817. * @return MP_OKAY on success.
  7818. * @return MP_VAL when operation fails - only when compiling small code.
  7819. */
  7820. static int _sp_div_impl(sp_int* a, const sp_int* d, sp_int* r, sp_int* trial)
  7821. {
  7822. int err = MP_OKAY;
  7823. unsigned int i;
  7824. #ifdef WOLFSSL_SP_SMALL
  7825. int c;
  7826. #else
  7827. unsigned int j;
  7828. unsigned int o;
  7829. #ifndef SQR_MUL_ASM
  7830. sp_int_sword sw;
  7831. #else
  7832. sp_int_digit sl;
  7833. sp_int_digit sh;
  7834. sp_int_digit st;
  7835. #endif
  7836. #endif /* WOLFSSL_SP_SMALL */
  7837. sp_int_digit t;
  7838. sp_int_digit dt;
  7839. /* Set result size to clear. */
  7840. r->used = a->used - d->used + 1;
  7841. /* Set all potentially used digits to zero. */
  7842. for (i = 0; i < r->used; i++) {
  7843. r->dp[i] = 0;
  7844. }
  7845. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7846. r->sign = MP_ZPOS;
  7847. #endif
  7848. /* Get the most significant digit (will have top bit set). */
  7849. dt = d->dp[d->used-1];
  7850. /* Handle when a >= d ^ (2 ^ (SP_WORD_SIZE * x)). */
  7851. _sp_div_same_size(a, d, r);
  7852. /* Keep subtracting multiples of d as long as the digit count of a is
  7853. * greater than equal to d.
  7854. */
  7855. for (i = a->used - 1; i >= d->used; i--) {
  7856. /* When top digits equal, guestimate maximum multiplier.
  7857. * Worst case, multiplier is actually SP_DIGIT_MAX - 1.
  7858. * That is, for w (word size in bits) > 1, n > 1, let:
  7859. * a = 2^((n+1)*w-1), d = 2^(n*w-1) + 2^((n-1)*w) - 1, t = 2^w - 2
  7860. * Then,
  7861. * d * t
  7862. * = (2^(n*w-1) + 2^((n-1)*w) - 1) * (2^w - 2)
  7863. * = 2^((n+1)*w-1) - 2^(n*w) + 2^(n*w) - 2^((n-1)*w+1) - 2^w + 2
  7864. * = 2^((n+1)*w-1) - 2^((n-1)*w+1) - 2^w + 2
  7865. * = a - 2^((n-1)*w+1) - 2^w + 2
  7866. * d > 2^((n-1)*w+1) + 2^w - 2, when w > 1, n > 1
  7867. */
  7868. if (a->dp[i] == dt) {
  7869. t = SP_DIGIT_MAX;
  7870. }
  7871. else {
  7872. /* Calculate trial quotient by dividing top word of dividend by top
  7873. * digit of divisor.
  7874. * Some implementations segfault when quotient > SP_DIGIT_MAX.
  7875. * Implementations in assembly, using builtins or using
  7876. * digits only (WOLFSSL_SP_DIV_WORD_HALF).
  7877. */
  7878. t = sp_div_word(a->dp[i], a->dp[i-1], dt);
  7879. }
  7880. #ifdef WOLFSSL_SP_SMALL
  7881. do {
  7882. /* Calculate trial from trial quotient. */
  7883. err = _sp_mul_d(d, t, trial, i - d->used);
  7884. if (err != MP_OKAY) {
  7885. break;
  7886. }
  7887. /* Check if trial is bigger. */
  7888. c = _sp_cmp_abs(trial, a);
  7889. if (c == MP_GT) {
  7890. /* Decrement trial quotient and try again. */
  7891. t--;
  7892. }
  7893. }
  7894. while (c == MP_GT);
  7895. if (err != MP_OKAY) {
  7896. break;
  7897. }
  7898. /* Subtract the trial and add qoutient to result. */
  7899. _sp_sub_off(a, trial, a, 0);
  7900. r->dp[i - d->used] += t;
  7901. /* Handle overflow of digit. */
  7902. if (r->dp[i - d->used] < t) {
  7903. r->dp[i + 1 - d->used]++;
  7904. }
  7905. #else
  7906. /* Index of lowest digit trial is subtracted from. */
  7907. o = i - d->used;
  7908. do {
  7909. #ifndef SQR_MUL_ASM
  7910. sp_int_word tw = 0;
  7911. #else
  7912. sp_int_digit tl = 0;
  7913. sp_int_digit th = 0;
  7914. #endif
  7915. /* Multiply divisor by trial quotient. */
  7916. for (j = 0; j < d->used; j++) {
  7917. #ifndef SQR_MUL_ASM
  7918. tw += (sp_int_word)d->dp[j] * t;
  7919. trial->dp[j] = (sp_int_digit)tw;
  7920. tw >>= SP_WORD_SIZE;
  7921. #else
  7922. SP_ASM_MUL_ADD_NO(tl, th, d->dp[j], t);
  7923. trial->dp[j] = tl;
  7924. tl = th;
  7925. th = 0;
  7926. #endif
  7927. }
  7928. #ifndef SQR_MUL_ASM
  7929. trial->dp[j] = (sp_int_digit)tw;
  7930. #else
  7931. trial->dp[j] = tl;
  7932. #endif
  7933. /* Check trial quotient isn't larger than dividend. */
  7934. for (j = d->used; j > 0; j--) {
  7935. if (trial->dp[j] != a->dp[j + o]) {
  7936. break;
  7937. }
  7938. }
  7939. /* Decrement trial quotient if larger and try again. */
  7940. if (trial->dp[j] > a->dp[j + o]) {
  7941. t--;
  7942. }
  7943. }
  7944. while (trial->dp[j] > a->dp[j + o]);
  7945. #ifndef SQR_MUL_ASM
  7946. sw = 0;
  7947. #else
  7948. sl = 0;
  7949. sh = 0;
  7950. #endif
  7951. /* Subtract trial - don't need to update used. */
  7952. for (j = 0; j <= d->used; j++) {
  7953. #ifndef SQR_MUL_ASM
  7954. sw += a->dp[j + o];
  7955. sw -= trial->dp[j];
  7956. a->dp[j + o] = (sp_int_digit)sw;
  7957. sw >>= SP_WORD_SIZE;
  7958. #else
  7959. st = a->dp[j + o];
  7960. SP_ASM_ADDC(sl, sh, st);
  7961. st = trial->dp[j];
  7962. SP_ASM_SUBB(sl, sh, st);
  7963. a->dp[j + o] = sl;
  7964. sl = sh;
  7965. sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE - 1));
  7966. #endif
  7967. }
  7968. r->dp[o] = t;
  7969. #endif /* WOLFSSL_SP_SMALL */
  7970. }
  7971. /* Update used. */
  7972. a->used = i + 1;
  7973. if (a->used == d->used) {
  7974. /* Finish div now that length of dividend is same as divisor. */
  7975. _sp_div_same_size(a, d, r);
  7976. }
  7977. return err;
  7978. }
  7979. /* Divide a by d and return the quotient in r and the remainder in rem.
  7980. * r = a / d; rem = a % d
  7981. *
  7982. * @param [in] a SP integer to be divided.
  7983. * @param [in] d SP integer to divide by.
  7984. * @param [out] r SP integer that is the quotient.
  7985. * @param [out] rem SP integer that is the remainder.
  7986. * @param [in] used Number of digits in temporaries to use.
  7987. *
  7988. * @return MP_OKAY on success.
  7989. * @return MP_MEM when dynamic memory allocation fails.
  7990. */
  7991. static int _sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem,
  7992. unsigned int used)
  7993. {
  7994. int err = MP_OKAY;
  7995. int ret;
  7996. int done = 0;
  7997. int s = 0;
  7998. sp_int* sa = NULL;
  7999. sp_int* sd = NULL;
  8000. sp_int* tr = NULL;
  8001. sp_int* trial = NULL;
  8002. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8003. unsigned int signA = MP_ZPOS;
  8004. unsigned int signD = MP_ZPOS;
  8005. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8006. /* Intermediates will always be less than or equal to dividend. */
  8007. DECL_SP_INT_ARRAY(td, used, 4);
  8008. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8009. /* Cache sign for results. */
  8010. signA = a->sign;
  8011. signD = d->sign;
  8012. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8013. /* Handle simple case of: dividend < divisor. */
  8014. ret = _sp_cmp_abs(a, d);
  8015. if (ret == MP_LT) {
  8016. /* a = 0 * d + a */
  8017. if ((rem != NULL) && (a != rem)) {
  8018. _sp_copy(a, rem);
  8019. }
  8020. if (r != NULL) {
  8021. _sp_set(r, 0);
  8022. }
  8023. done = 1;
  8024. }
  8025. /* Handle simple case of: dividend == divisor. */
  8026. else if (ret == MP_EQ) {
  8027. /* a = 1 * d + 0 */
  8028. if (rem != NULL) {
  8029. _sp_set(rem, 0);
  8030. }
  8031. if (r != NULL) {
  8032. _sp_set(r, 1);
  8033. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8034. r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
  8035. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8036. }
  8037. done = 1;
  8038. }
  8039. else if (sp_count_bits(a) == sp_count_bits(d)) {
  8040. /* a is greater than d but same bit length - subtract. */
  8041. if (rem != NULL) {
  8042. _sp_sub_off(a, d, rem, 0);
  8043. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8044. rem->sign = signA;
  8045. #endif
  8046. }
  8047. if (r != NULL) {
  8048. _sp_set(r, 1);
  8049. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8050. r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
  8051. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8052. }
  8053. done = 1;
  8054. }
  8055. /* Allocate temporary 'sp_int's and assign. */
  8056. if ((!done) && (err == MP_OKAY)) {
  8057. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  8058. !defined(WOLFSSL_SP_NO_MALLOC)
  8059. int cnt = 4;
  8060. /* Reuse remainder sp_int where possible. */
  8061. if ((rem != NULL) && (rem != d) && (rem->size > a->used)) {
  8062. sa = rem;
  8063. cnt--;
  8064. }
  8065. /* Reuse result sp_int where possible. */
  8066. if ((r != NULL) && (r != d)) {
  8067. tr = r;
  8068. cnt--;
  8069. }
  8070. /* Macro always has code associated with it and checks err first. */
  8071. ALLOC_SP_INT_ARRAY(td, used, cnt, err, NULL);
  8072. #else
  8073. ALLOC_SP_INT_ARRAY(td, used, 4, err, NULL);
  8074. #endif
  8075. }
  8076. if ((!done) && (err == MP_OKAY)) {
  8077. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  8078. !defined(WOLFSSL_SP_NO_MALLOC)
  8079. int i = 2;
  8080. /* Set to temporary when not reusing. */
  8081. if (sa == NULL) {
  8082. sa = td[i++];
  8083. _sp_init_size(sa, used);
  8084. }
  8085. if (tr == NULL) {
  8086. tr = td[i];
  8087. _sp_init_size(tr, a->used - d->used + 2);
  8088. }
  8089. #else
  8090. sa = td[2];
  8091. tr = td[3];
  8092. _sp_init_size(sa, used);
  8093. _sp_init_size(tr, a->used - d->used + 2);
  8094. #endif
  8095. sd = td[0];
  8096. trial = td[1];
  8097. /* Initialize sizes to minimal values. */
  8098. _sp_init_size(sd, d->used + 1);
  8099. _sp_init_size(trial, used);
  8100. /* Move divisor to top of word. Adjust dividend as well. */
  8101. s = sp_count_bits(d);
  8102. s = SP_WORD_SIZE - (s & SP_WORD_MASK);
  8103. _sp_copy(a, sa);
  8104. /* Only shift if top bit of divisor no set. */
  8105. if (s != SP_WORD_SIZE) {
  8106. err = sp_lshb(sa, s);
  8107. if (err == MP_OKAY) {
  8108. _sp_copy(d, sd);
  8109. d = sd;
  8110. err = sp_lshb(sd, s);
  8111. }
  8112. }
  8113. }
  8114. if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
  8115. /* Do division: tr = sa / d, sa = sa % d. */
  8116. err = _sp_div_impl(sa, d, tr, trial);
  8117. /* Return the remainder if required. */
  8118. if ((err == MP_OKAY) && (rem != NULL)) {
  8119. /* Move result back down if moved up for divisor value. */
  8120. if (s != SP_WORD_SIZE) {
  8121. (void)sp_rshb(sa, s, sa);
  8122. }
  8123. _sp_copy(sa, rem);
  8124. sp_clamp(rem);
  8125. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8126. rem->sign = (rem->used == 0) ? MP_ZPOS : signA;
  8127. #endif
  8128. }
  8129. /* Return the quotient if required. */
  8130. if ((err == MP_OKAY) && (r != NULL)) {
  8131. _sp_copy(tr, r);
  8132. sp_clamp(r);
  8133. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8134. if ((r->used == 0) || (signA == signD)) {
  8135. r->sign = MP_ZPOS;
  8136. }
  8137. else {
  8138. r->sign = MP_NEG;
  8139. }
  8140. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8141. }
  8142. }
  8143. FREE_SP_INT_ARRAY(td, NULL);
  8144. return err;
  8145. }
  8146. /* Divide a by d and return the quotient in r and the remainder in rem.
  8147. * r = a / d; rem = a % d
  8148. *
  8149. * @param [in] a SP integer to be divided.
  8150. * @param [in] d SP integer to divide by.
  8151. * @param [out] r SP integer that is the quotient.
  8152. * @param [out] rem SP integer that is the remainder.
  8153. *
  8154. * @return MP_OKAY on success.
  8155. * @return MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
  8156. * @return MP_MEM when dynamic memory allocation fails.
  8157. */
  8158. int sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem)
  8159. {
  8160. int err = MP_OKAY;
  8161. unsigned int used = 1;
  8162. /* Validate parameters. */
  8163. if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
  8164. err = MP_VAL;
  8165. }
  8166. /* a / 0 = infinity. */
  8167. if ((err == MP_OKAY) && sp_iszero(d)) {
  8168. err = MP_VAL;
  8169. }
  8170. /* Ensure quotient result has enough memory. */
  8171. if ((err == MP_OKAY) && (r != NULL) && (r->size < a->used - d->used + 2)) {
  8172. err = MP_VAL;
  8173. }
  8174. if ((err == MP_OKAY) && (rem != NULL)) {
  8175. /* Ensure remainder has enough memory. */
  8176. if ((a->used <= d->used) && (rem->size < a->used + 1)) {
  8177. err = MP_VAL;
  8178. }
  8179. else if ((a->used > d->used) && (rem->size < d->used + 1)) {
  8180. err = MP_VAL;
  8181. }
  8182. }
  8183. if (err == MP_OKAY) {
  8184. if (a->used == SP_INT_DIGITS) {
  8185. /* May need to shift number being divided left into a new word. */
  8186. int bits = SP_WORD_SIZE - (sp_count_bits(d) % SP_WORD_SIZE);
  8187. if ((bits != SP_WORD_SIZE) &&
  8188. (sp_count_bits(a) + bits > SP_INT_DIGITS * SP_WORD_SIZE)) {
  8189. err = MP_VAL;
  8190. }
  8191. else {
  8192. used = SP_INT_DIGITS;
  8193. }
  8194. }
  8195. else {
  8196. used = a->used + 1;
  8197. }
  8198. }
  8199. if (err == MP_OKAY) {
  8200. #if 0
  8201. sp_print(a, "a");
  8202. sp_print(d, "b");
  8203. #endif
  8204. /* Do operation. */
  8205. err = _sp_div(a, d, r, rem, used);
  8206. #if 0
  8207. if (err == MP_OKAY) {
  8208. if (rem != NULL) {
  8209. sp_print(rem, "rdr");
  8210. }
  8211. if (r != NULL) {
  8212. sp_print(r, "rdw");
  8213. }
  8214. }
  8215. #endif
  8216. }
  8217. return err;
  8218. }
  8219. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
  8220. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  8221. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  8222. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  8223. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  8224. #ifndef FREESCALE_LTC_TFM
  8225. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8226. /* Calculate the remainder of dividing a by m: r = a mod m. r is m.
  8227. *
  8228. * @param [in] a SP integer to reduce.
  8229. * @param [in] m SP integer that is the modulus.
  8230. * @param [out] r SP integer to store result in.
  8231. *
  8232. * @return MP_OKAY on success.
  8233. * @return MP_MEM when dynamic memory allocation fails.
  8234. */
  8235. static int _sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
  8236. {
  8237. int err = MP_OKAY;
  8238. /* Remainder will start as a. */
  8239. DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
  8240. /* In case remainder is modulus - allocate temporary. */
  8241. ALLOC_SP_INT(t, a->used + 1, err, NULL);
  8242. if (err == MP_OKAY) {
  8243. _sp_init_size(t, a->used + 1);
  8244. /* Use divide to calculate remainder and don't get quotient. */
  8245. err = sp_div(a, m, NULL, t);
  8246. }
  8247. if (err == MP_OKAY) {
  8248. /* Make remainder positive and copy into result. */
  8249. if ((!sp_iszero(t)) && (t->sign != m->sign)) {
  8250. err = sp_add(t, m, r);
  8251. }
  8252. else {
  8253. _sp_copy(t, r);
  8254. }
  8255. }
  8256. FREE_SP_INT(t, NULL);
  8257. return err;
  8258. }
  8259. #endif
  8260. /* Calculate the remainder of dividing a by m: r = a mod m.
  8261. *
  8262. * @param [in] a SP integer to reduce.
  8263. * @param [in] m SP integer that is the modulus.
  8264. * @param [out] r SP integer to store result in.
  8265. *
  8266. * @return MP_OKAY on success.
  8267. * @return MP_VAL when a, m or r is NULL or m is 0.
  8268. * @return MP_MEM when dynamic memory allocation fails.
  8269. */
  8270. int sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
  8271. {
  8272. int err = MP_OKAY;
  8273. /* Validate parameters. */
  8274. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  8275. err = MP_VAL;
  8276. }
  8277. /* Ensure a isn't too big a number to operate on. */
  8278. else if (a->used >= SP_INT_DIGITS) {
  8279. err = MP_VAL;
  8280. }
  8281. #ifndef WOLFSSL_SP_INT_NEGATIVE
  8282. if (err == MP_OKAY) {
  8283. /* Use divide to calculate remainder and don't get quotient. */
  8284. err = sp_div(a, m, NULL, r);
  8285. }
  8286. #else
  8287. if ((err == MP_OKAY) && (r != m)) {
  8288. err = sp_div(a, m, NULL, r);
  8289. if ((err == MP_OKAY) && (!sp_iszero(r)) && (r->sign != m->sign)) {
  8290. err = sp_add(r, m, r);
  8291. }
  8292. }
  8293. else if (err == MP_OKAY) {
  8294. err = _sp_mod(a, m, r);
  8295. }
  8296. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8297. return err;
  8298. }
  8299. #endif /* !FREESCALE_LTC_TFM */
  8300. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
  8301. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  8302. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  8303. defined(HAVE_ECC) || !defined(NO_RSA)
  8304. /* START SP_MUL implementations. */
  8305. /* This code is generated.
  8306. * To generate:
  8307. * cd scripts/sp/sp_int
  8308. * ./gen.sh
  8309. * File sp_mul.c contains code.
  8310. */
  8311. #ifdef SQR_MUL_ASM
  8312. /* Multiply a by b into r where a and b have same no. digits. r = a * b
  8313. *
  8314. * Optimised code for when number of digits in a and b are the same.
  8315. *
  8316. * @param [in] a SP integer to mulitply.
  8317. * @param [in] b SP integer to mulitply by.
  8318. * @param [out] r SP integer to hod reult.
  8319. *
  8320. * @return MP_OKAY otherwise.
  8321. * @return MP_MEM when dynamic memory allocation fails.
  8322. */
  8323. static int _sp_mul_nxn(const sp_int* a, const sp_int* b, sp_int* r)
  8324. {
  8325. int err = MP_OKAY;
  8326. unsigned int i;
  8327. int j;
  8328. unsigned int k;
  8329. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8330. sp_int_digit* t = NULL;
  8331. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8332. !defined(WOLFSSL_SP_NO_DYN_STACK)
  8333. sp_int_digit t[a->used];
  8334. #else
  8335. sp_int_digit t[SP_INT_DIGITS / 2];
  8336. #endif
  8337. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8338. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * a->used, NULL,
  8339. DYNAMIC_TYPE_BIGINT);
  8340. if (t == NULL) {
  8341. err = MP_MEM;
  8342. }
  8343. #endif
  8344. if (err == MP_OKAY) {
  8345. sp_int_digit l;
  8346. sp_int_digit h;
  8347. sp_int_digit o;
  8348. const sp_int_digit* dp;
  8349. h = 0;
  8350. l = 0;
  8351. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8352. t[0] = h;
  8353. h = 0;
  8354. o = 0;
  8355. for (k = 1; k <= a->used - 1; k++) {
  8356. j = (int)k;
  8357. dp = a->dp;
  8358. for (; j >= 0; dp++, j--) {
  8359. SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
  8360. }
  8361. t[k] = l;
  8362. l = h;
  8363. h = o;
  8364. o = 0;
  8365. }
  8366. for (; k <= (a->used - 1) * 2; k++) {
  8367. i = k - (b->used - 1);
  8368. dp = &b->dp[b->used - 1];
  8369. for (; i < a->used; i++, dp--) {
  8370. SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
  8371. }
  8372. r->dp[k] = l;
  8373. l = h;
  8374. h = o;
  8375. o = 0;
  8376. }
  8377. r->dp[k] = l;
  8378. XMEMCPY(r->dp, t, a->used * sizeof(sp_int_digit));
  8379. r->used = k + 1;
  8380. sp_clamp(r);
  8381. }
  8382. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8383. if (t != NULL) {
  8384. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8385. }
  8386. #endif
  8387. return err;
  8388. }
  8389. /* Multiply a by b into r. r = a * b
  8390. *
  8391. * @param [in] a SP integer to mulitply.
  8392. * @param [in] b SP integer to mulitply by.
  8393. * @param [out] r SP integer to hod reult.
  8394. *
  8395. * @return MP_OKAY otherwise.
  8396. * @return MP_MEM when dynamic memory allocation fails.
  8397. */
  8398. static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
  8399. {
  8400. int err = MP_OKAY;
  8401. unsigned int i;
  8402. int j;
  8403. unsigned int k;
  8404. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8405. sp_int_digit* t = NULL;
  8406. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8407. !defined(WOLFSSL_SP_NO_DYN_STACK)
  8408. sp_int_digit t[a->used + b->used];
  8409. #else
  8410. sp_int_digit t[SP_INT_DIGITS];
  8411. #endif
  8412. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8413. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
  8414. DYNAMIC_TYPE_BIGINT);
  8415. if (t == NULL) {
  8416. err = MP_MEM;
  8417. }
  8418. #endif
  8419. if (err == MP_OKAY) {
  8420. sp_int_digit l;
  8421. sp_int_digit h;
  8422. sp_int_digit o;
  8423. h = 0;
  8424. l = 0;
  8425. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8426. t[0] = h;
  8427. h = 0;
  8428. o = 0;
  8429. for (k = 1; k <= b->used - 1; k++) {
  8430. i = 0;
  8431. j = (int)k;
  8432. for (; (i < a->used) && (j >= 0); i++, j--) {
  8433. SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
  8434. }
  8435. t[k] = l;
  8436. l = h;
  8437. h = o;
  8438. o = 0;
  8439. }
  8440. for (; k <= (a->used - 1) + (b->used - 1); k++) {
  8441. j = (int)(b->used - 1);
  8442. i = k - (unsigned int)j;
  8443. for (; (i < a->used) && (j >= 0); i++, j--) {
  8444. SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
  8445. }
  8446. t[k] = l;
  8447. l = h;
  8448. h = o;
  8449. o = 0;
  8450. }
  8451. t[k] = l;
  8452. r->used = k + 1;
  8453. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  8454. sp_clamp(r);
  8455. }
  8456. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8457. if (t != NULL) {
  8458. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8459. }
  8460. #endif
  8461. return err;
  8462. }
  8463. #else
  8464. /* Multiply a by b into r. r = a * b
  8465. *
  8466. * @param [in] a SP integer to mulitply.
  8467. * @param [in] b SP integer to mulitply by.
  8468. * @param [out] r SP integer to hod reult.
  8469. *
  8470. * @return MP_OKAY otherwise.
  8471. * @return MP_MEM when dynamic memory allocation fails.
  8472. */
  8473. static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
  8474. {
  8475. int err = MP_OKAY;
  8476. unsigned int i;
  8477. int j;
  8478. unsigned int k;
  8479. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8480. sp_int_digit* t = NULL;
  8481. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8482. !defined(WOLFSSL_SP_NO_DYN_STACK)
  8483. sp_int_digit t[a->used + b->used];
  8484. #else
  8485. sp_int_digit t[SP_INT_DIGITS];
  8486. #endif
  8487. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8488. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
  8489. DYNAMIC_TYPE_BIGINT);
  8490. if (t == NULL) {
  8491. err = MP_MEM;
  8492. }
  8493. #endif
  8494. if (err == MP_OKAY) {
  8495. sp_int_word w;
  8496. sp_int_word l;
  8497. sp_int_word h;
  8498. #ifdef SP_WORD_OVERFLOW
  8499. sp_int_word o;
  8500. #endif
  8501. w = (sp_int_word)a->dp[0] * b->dp[0];
  8502. t[0] = (sp_int_digit)w;
  8503. l = (sp_int_digit)(w >> SP_WORD_SIZE);
  8504. h = 0;
  8505. #ifdef SP_WORD_OVERFLOW
  8506. o = 0;
  8507. #endif
  8508. for (k = 1; k <= (a->used - 1) + (b->used - 1); k++) {
  8509. i = k - (b->used - 1);
  8510. i &= (((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U);
  8511. j = (int)(k - i);
  8512. for (; (i < a->used) && (j >= 0); i++, j--) {
  8513. w = (sp_int_word)a->dp[i] * b->dp[j];
  8514. l += (sp_int_digit)w;
  8515. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  8516. #ifdef SP_WORD_OVERFLOW
  8517. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  8518. l &= SP_MASK;
  8519. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  8520. h &= SP_MASK;
  8521. #endif
  8522. }
  8523. t[k] = (sp_int_digit)l;
  8524. l >>= SP_WORD_SIZE;
  8525. l += (sp_int_digit)h;
  8526. h >>= SP_WORD_SIZE;
  8527. #ifdef SP_WORD_OVERFLOW
  8528. h += o & SP_MASK;
  8529. o >>= SP_WORD_SIZE;
  8530. #endif
  8531. }
  8532. t[k] = (sp_int_digit)l;
  8533. r->used = k + 1;
  8534. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  8535. sp_clamp(r);
  8536. }
  8537. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8538. if (t != NULL) {
  8539. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8540. }
  8541. #endif
  8542. return err;
  8543. }
  8544. #endif
  8545. #ifndef WOLFSSL_SP_SMALL
  8546. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  8547. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  8548. #ifndef SQR_MUL_ASM
  8549. /* Multiply a by b and store in r: r = a * b
  8550. *
  8551. * Long-hand implementation.
  8552. *
  8553. * @param [in] a SP integer to multiply.
  8554. * @param [in] b SP integer to multiply.
  8555. * @param [out] r SP integer result.
  8556. *
  8557. * @return MP_OKAY on success.
  8558. * @return MP_MEM when dynamic memory allocation fails.
  8559. */
  8560. static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
  8561. {
  8562. int err = MP_OKAY;
  8563. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8564. sp_int_word* w = NULL;
  8565. #else
  8566. sp_int_word w[16];
  8567. #endif
  8568. const sp_int_digit* da = a->dp;
  8569. const sp_int_digit* db = b->dp;
  8570. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8571. w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
  8572. DYNAMIC_TYPE_BIGINT);
  8573. if (w == NULL) {
  8574. err = MP_MEM;
  8575. }
  8576. #endif
  8577. if (err == MP_OKAY) {
  8578. w[0] = (sp_int_word)da[0] * db[0];
  8579. w[1] = (sp_int_word)da[0] * db[1];
  8580. w[2] = (sp_int_word)da[1] * db[0];
  8581. w[3] = (sp_int_word)da[0] * db[2];
  8582. w[4] = (sp_int_word)da[1] * db[1];
  8583. w[5] = (sp_int_word)da[2] * db[0];
  8584. w[6] = (sp_int_word)da[0] * db[3];
  8585. w[7] = (sp_int_word)da[1] * db[2];
  8586. w[8] = (sp_int_word)da[2] * db[1];
  8587. w[9] = (sp_int_word)da[3] * db[0];
  8588. w[10] = (sp_int_word)da[1] * db[3];
  8589. w[11] = (sp_int_word)da[2] * db[2];
  8590. w[12] = (sp_int_word)da[3] * db[1];
  8591. w[13] = (sp_int_word)da[2] * db[3];
  8592. w[14] = (sp_int_word)da[3] * db[2];
  8593. w[15] = (sp_int_word)da[3] * db[3];
  8594. r->dp[0] = (sp_int_digit)w[0];
  8595. w[0] >>= SP_WORD_SIZE;
  8596. w[0] += (sp_int_digit)w[1];
  8597. w[0] += (sp_int_digit)w[2];
  8598. r->dp[1] = (sp_int_digit)w[0];
  8599. w[0] >>= SP_WORD_SIZE;
  8600. w[1] >>= SP_WORD_SIZE;
  8601. w[0] += (sp_int_digit)w[1];
  8602. w[2] >>= SP_WORD_SIZE;
  8603. w[0] += (sp_int_digit)w[2];
  8604. w[0] += (sp_int_digit)w[3];
  8605. w[0] += (sp_int_digit)w[4];
  8606. w[0] += (sp_int_digit)w[5];
  8607. r->dp[2] = (sp_int_digit)w[0];
  8608. w[0] >>= SP_WORD_SIZE;
  8609. w[3] >>= SP_WORD_SIZE;
  8610. w[0] += (sp_int_digit)w[3];
  8611. w[4] >>= SP_WORD_SIZE;
  8612. w[0] += (sp_int_digit)w[4];
  8613. w[5] >>= SP_WORD_SIZE;
  8614. w[0] += (sp_int_digit)w[5];
  8615. w[0] += (sp_int_digit)w[6];
  8616. w[0] += (sp_int_digit)w[7];
  8617. w[0] += (sp_int_digit)w[8];
  8618. w[0] += (sp_int_digit)w[9];
  8619. r->dp[3] = (sp_int_digit)w[0];
  8620. w[0] >>= SP_WORD_SIZE;
  8621. w[6] >>= SP_WORD_SIZE;
  8622. w[0] += (sp_int_digit)w[6];
  8623. w[7] >>= SP_WORD_SIZE;
  8624. w[0] += (sp_int_digit)w[7];
  8625. w[8] >>= SP_WORD_SIZE;
  8626. w[0] += (sp_int_digit)w[8];
  8627. w[9] >>= SP_WORD_SIZE;
  8628. w[0] += (sp_int_digit)w[9];
  8629. w[0] += (sp_int_digit)w[10];
  8630. w[0] += (sp_int_digit)w[11];
  8631. w[0] += (sp_int_digit)w[12];
  8632. r->dp[4] = (sp_int_digit)w[0];
  8633. w[0] >>= SP_WORD_SIZE;
  8634. w[10] >>= SP_WORD_SIZE;
  8635. w[0] += (sp_int_digit)w[10];
  8636. w[11] >>= SP_WORD_SIZE;
  8637. w[0] += (sp_int_digit)w[11];
  8638. w[12] >>= SP_WORD_SIZE;
  8639. w[0] += (sp_int_digit)w[12];
  8640. w[0] += (sp_int_digit)w[13];
  8641. w[0] += (sp_int_digit)w[14];
  8642. r->dp[5] = (sp_int_digit)w[0];
  8643. w[0] >>= SP_WORD_SIZE;
  8644. w[13] >>= SP_WORD_SIZE;
  8645. w[0] += (sp_int_digit)w[13];
  8646. w[14] >>= SP_WORD_SIZE;
  8647. w[0] += (sp_int_digit)w[14];
  8648. w[0] += (sp_int_digit)w[15];
  8649. r->dp[6] = (sp_int_digit)w[0];
  8650. w[0] >>= SP_WORD_SIZE;
  8651. w[15] >>= SP_WORD_SIZE;
  8652. w[0] += (sp_int_digit)w[15];
  8653. r->dp[7] = (sp_int_digit)w[0];
  8654. r->used = 8;
  8655. sp_clamp(r);
  8656. }
  8657. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8658. if (w != NULL) {
  8659. XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
  8660. }
  8661. #endif
  8662. return err;
  8663. }
  8664. #else /* SQR_MUL_ASM */
  8665. /* Multiply a by b and store in r: r = a * b
  8666. *
  8667. * Comba implementation.
  8668. *
  8669. * @param [in] a SP integer to multiply.
  8670. * @param [in] b SP integer to multiply.
  8671. * @param [out] r SP integer result.
  8672. *
  8673. * @return MP_OKAY on success.
  8674. * @return MP_MEM when dynamic memory allocation fails.
  8675. */
  8676. static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
  8677. {
  8678. sp_int_digit l = 0;
  8679. sp_int_digit h = 0;
  8680. sp_int_digit o = 0;
  8681. sp_int_digit t[4];
  8682. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8683. t[0] = h;
  8684. h = 0;
  8685. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8686. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8687. t[1] = l;
  8688. l = h;
  8689. h = o;
  8690. o = 0;
  8691. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8692. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8693. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8694. t[2] = l;
  8695. l = h;
  8696. h = o;
  8697. o = 0;
  8698. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8699. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8700. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8701. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8702. t[3] = l;
  8703. l = h;
  8704. h = o;
  8705. o = 0;
  8706. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8707. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8708. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8709. r->dp[4] = l;
  8710. l = h;
  8711. h = o;
  8712. o = 0;
  8713. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8714. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8715. r->dp[5] = l;
  8716. l = h;
  8717. h = o;
  8718. SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
  8719. r->dp[6] = l;
  8720. r->dp[7] = h;
  8721. XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
  8722. r->used = 8;
  8723. sp_clamp(r);
  8724. return MP_OKAY;
  8725. }
  8726. #endif /* SQR_MUL_ASM */
  8727. #endif /* SP_WORD_SIZE == 64 */
  8728. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  8729. #ifdef SQR_MUL_ASM
  8730. /* Multiply a by b and store in r: r = a * b
  8731. *
  8732. * Comba implementation.
  8733. *
  8734. * @param [in] a SP integer to multiply.
  8735. * @param [in] b SP integer to multiply.
  8736. * @param [out] r SP integer result.
  8737. *
  8738. * @return MP_OKAY on success.
  8739. * @return MP_MEM when dynamic memory allocation fails.
  8740. */
  8741. static int _sp_mul_6(const sp_int* a, const sp_int* b, sp_int* r)
  8742. {
  8743. sp_int_digit l = 0;
  8744. sp_int_digit h = 0;
  8745. sp_int_digit o = 0;
  8746. sp_int_digit t[6];
  8747. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8748. t[0] = h;
  8749. h = 0;
  8750. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8751. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8752. t[1] = l;
  8753. l = h;
  8754. h = o;
  8755. o = 0;
  8756. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8757. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8758. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8759. t[2] = l;
  8760. l = h;
  8761. h = o;
  8762. o = 0;
  8763. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8764. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8765. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8766. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8767. t[3] = l;
  8768. l = h;
  8769. h = o;
  8770. o = 0;
  8771. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  8772. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8773. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8774. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8775. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  8776. t[4] = l;
  8777. l = h;
  8778. h = o;
  8779. o = 0;
  8780. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  8781. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  8782. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8783. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8784. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  8785. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  8786. t[5] = l;
  8787. l = h;
  8788. h = o;
  8789. o = 0;
  8790. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  8791. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  8792. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  8793. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  8794. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  8795. r->dp[6] = l;
  8796. l = h;
  8797. h = o;
  8798. o = 0;
  8799. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  8800. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  8801. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  8802. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  8803. r->dp[7] = l;
  8804. l = h;
  8805. h = o;
  8806. o = 0;
  8807. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  8808. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  8809. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  8810. r->dp[8] = l;
  8811. l = h;
  8812. h = o;
  8813. o = 0;
  8814. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  8815. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  8816. r->dp[9] = l;
  8817. l = h;
  8818. h = o;
  8819. SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
  8820. r->dp[10] = l;
  8821. r->dp[11] = h;
  8822. XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
  8823. r->used = 12;
  8824. sp_clamp(r);
  8825. return MP_OKAY;
  8826. }
  8827. #endif /* SQR_MUL_ASM */
  8828. #endif /* SP_WORD_SIZE == 64 */
  8829. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  8830. #ifdef SQR_MUL_ASM
  8831. /* Multiply a by b and store in r: r = a * b
  8832. *
  8833. * Comba implementation.
  8834. *
  8835. * @param [in] a SP integer to multiply.
  8836. * @param [in] b SP integer to multiply.
  8837. * @param [out] r SP integer result.
  8838. *
  8839. * @return MP_OKAY on success.
  8840. * @return MP_MEM when dynamic memory allocation fails.
  8841. */
  8842. static int _sp_mul_8(const sp_int* a, const sp_int* b, sp_int* r)
  8843. {
  8844. sp_int_digit l = 0;
  8845. sp_int_digit h = 0;
  8846. sp_int_digit o = 0;
  8847. sp_int_digit t[8];
  8848. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8849. t[0] = h;
  8850. h = 0;
  8851. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8852. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8853. t[1] = l;
  8854. l = h;
  8855. h = o;
  8856. o = 0;
  8857. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8858. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8859. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8860. t[2] = l;
  8861. l = h;
  8862. h = o;
  8863. o = 0;
  8864. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8865. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8866. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8867. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8868. t[3] = l;
  8869. l = h;
  8870. h = o;
  8871. o = 0;
  8872. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  8873. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8874. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8875. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8876. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  8877. t[4] = l;
  8878. l = h;
  8879. h = o;
  8880. o = 0;
  8881. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  8882. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  8883. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8884. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8885. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  8886. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  8887. t[5] = l;
  8888. l = h;
  8889. h = o;
  8890. o = 0;
  8891. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  8892. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  8893. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  8894. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  8895. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  8896. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  8897. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  8898. t[6] = l;
  8899. l = h;
  8900. h = o;
  8901. o = 0;
  8902. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  8903. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  8904. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  8905. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  8906. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  8907. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  8908. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  8909. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  8910. t[7] = l;
  8911. l = h;
  8912. h = o;
  8913. o = 0;
  8914. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  8915. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  8916. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  8917. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  8918. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  8919. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  8920. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  8921. r->dp[8] = l;
  8922. l = h;
  8923. h = o;
  8924. o = 0;
  8925. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  8926. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  8927. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  8928. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  8929. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  8930. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  8931. r->dp[9] = l;
  8932. l = h;
  8933. h = o;
  8934. o = 0;
  8935. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  8936. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  8937. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  8938. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  8939. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  8940. r->dp[10] = l;
  8941. l = h;
  8942. h = o;
  8943. o = 0;
  8944. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  8945. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  8946. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  8947. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  8948. r->dp[11] = l;
  8949. l = h;
  8950. h = o;
  8951. o = 0;
  8952. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  8953. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  8954. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  8955. r->dp[12] = l;
  8956. l = h;
  8957. h = o;
  8958. o = 0;
  8959. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  8960. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  8961. r->dp[13] = l;
  8962. l = h;
  8963. h = o;
  8964. SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
  8965. r->dp[14] = l;
  8966. r->dp[15] = h;
  8967. XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
  8968. r->used = 16;
  8969. sp_clamp(r);
  8970. return MP_OKAY;
  8971. }
  8972. #endif /* SQR_MUL_ASM */
  8973. #endif /* SP_WORD_SIZE == 32 */
  8974. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  8975. #ifdef SQR_MUL_ASM
  8976. /* Multiply a by b and store in r: r = a * b
  8977. *
  8978. * Comba implementation.
  8979. *
  8980. * @param [in] a SP integer to multiply.
  8981. * @param [in] b SP integer to multiply.
  8982. * @param [out] r SP integer result.
  8983. *
  8984. * @return MP_OKAY on success.
  8985. * @return MP_MEM when dynamic memory allocation fails.
  8986. */
  8987. static int _sp_mul_12(const sp_int* a, const sp_int* b, sp_int* r)
  8988. {
  8989. sp_int_digit l = 0;
  8990. sp_int_digit h = 0;
  8991. sp_int_digit o = 0;
  8992. sp_int_digit t[12];
  8993. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8994. t[0] = h;
  8995. h = 0;
  8996. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8997. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8998. t[1] = l;
  8999. l = h;
  9000. h = o;
  9001. o = 0;
  9002. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  9003. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  9004. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  9005. t[2] = l;
  9006. l = h;
  9007. h = o;
  9008. o = 0;
  9009. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  9010. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  9011. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  9012. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  9013. t[3] = l;
  9014. l = h;
  9015. h = o;
  9016. o = 0;
  9017. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  9018. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  9019. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  9020. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  9021. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  9022. t[4] = l;
  9023. l = h;
  9024. h = o;
  9025. o = 0;
  9026. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  9027. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  9028. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  9029. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  9030. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  9031. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  9032. t[5] = l;
  9033. l = h;
  9034. h = o;
  9035. o = 0;
  9036. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  9037. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  9038. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  9039. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  9040. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  9041. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  9042. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  9043. t[6] = l;
  9044. l = h;
  9045. h = o;
  9046. o = 0;
  9047. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  9048. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  9049. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  9050. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  9051. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  9052. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  9053. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  9054. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  9055. t[7] = l;
  9056. l = h;
  9057. h = o;
  9058. o = 0;
  9059. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  9060. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  9061. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  9062. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  9063. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  9064. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  9065. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  9066. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  9067. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  9068. t[8] = l;
  9069. l = h;
  9070. h = o;
  9071. o = 0;
  9072. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  9073. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  9074. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  9075. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  9076. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  9077. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  9078. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  9079. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  9080. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  9081. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  9082. t[9] = l;
  9083. l = h;
  9084. h = o;
  9085. o = 0;
  9086. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  9087. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  9088. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  9089. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  9090. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  9091. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  9092. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  9093. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  9094. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  9095. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  9096. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  9097. t[10] = l;
  9098. l = h;
  9099. h = o;
  9100. o = 0;
  9101. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  9102. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  9103. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  9104. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  9105. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  9106. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  9107. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  9108. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  9109. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  9110. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  9111. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  9112. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  9113. t[11] = l;
  9114. l = h;
  9115. h = o;
  9116. o = 0;
  9117. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  9118. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  9119. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  9120. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  9121. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  9122. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  9123. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  9124. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  9125. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  9126. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  9127. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  9128. r->dp[12] = l;
  9129. l = h;
  9130. h = o;
  9131. o = 0;
  9132. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  9133. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  9134. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  9135. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  9136. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  9137. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  9138. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  9139. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  9140. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  9141. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  9142. r->dp[13] = l;
  9143. l = h;
  9144. h = o;
  9145. o = 0;
  9146. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  9147. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  9148. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  9149. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  9150. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  9151. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  9152. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  9153. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  9154. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  9155. r->dp[14] = l;
  9156. l = h;
  9157. h = o;
  9158. o = 0;
  9159. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  9160. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  9161. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  9162. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  9163. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  9164. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  9165. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  9166. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  9167. r->dp[15] = l;
  9168. l = h;
  9169. h = o;
  9170. o = 0;
  9171. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  9172. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  9173. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  9174. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  9175. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  9176. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  9177. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  9178. r->dp[16] = l;
  9179. l = h;
  9180. h = o;
  9181. o = 0;
  9182. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  9183. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  9184. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  9185. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  9186. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  9187. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  9188. r->dp[17] = l;
  9189. l = h;
  9190. h = o;
  9191. o = 0;
  9192. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  9193. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  9194. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  9195. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  9196. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  9197. r->dp[18] = l;
  9198. l = h;
  9199. h = o;
  9200. o = 0;
  9201. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  9202. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  9203. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  9204. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  9205. r->dp[19] = l;
  9206. l = h;
  9207. h = o;
  9208. o = 0;
  9209. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  9210. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  9211. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  9212. r->dp[20] = l;
  9213. l = h;
  9214. h = o;
  9215. o = 0;
  9216. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  9217. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  9218. r->dp[21] = l;
  9219. l = h;
  9220. h = o;
  9221. SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
  9222. r->dp[22] = l;
  9223. r->dp[23] = h;
  9224. XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
  9225. r->used = 24;
  9226. sp_clamp(r);
  9227. return MP_OKAY;
  9228. }
  9229. #endif /* SQR_MUL_ASM */
  9230. #endif /* SP_WORD_SIZE == 32 */
  9231. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  9232. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  9233. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  9234. (SP_WORD_SIZE == 64)))
  9235. #if SP_INT_DIGITS >= 32
  9236. /* Multiply a by b and store in r: r = a * b
  9237. *
  9238. * Comba implementation.
  9239. *
  9240. * @param [in] a SP integer to multiply.
  9241. * @param [in] b SP integer to multiply.
  9242. * @param [out] r SP integer result.
  9243. *
  9244. * @return MP_OKAY on success.
  9245. * @return MP_MEM when dynamic memory allocation fails.
  9246. */
  9247. static int _sp_mul_16(const sp_int* a, const sp_int* b, sp_int* r)
  9248. {
  9249. int err = MP_OKAY;
  9250. sp_int_digit l = 0;
  9251. sp_int_digit h = 0;
  9252. sp_int_digit o = 0;
  9253. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9254. sp_int_digit* t = NULL;
  9255. #else
  9256. sp_int_digit t[16];
  9257. #endif
  9258. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9259. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
  9260. DYNAMIC_TYPE_BIGINT);
  9261. if (t == NULL) {
  9262. err = MP_MEM;
  9263. }
  9264. #endif
  9265. if (err == MP_OKAY) {
  9266. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  9267. t[0] = h;
  9268. h = 0;
  9269. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  9270. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  9271. t[1] = l;
  9272. l = h;
  9273. h = o;
  9274. o = 0;
  9275. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  9276. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  9277. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  9278. t[2] = l;
  9279. l = h;
  9280. h = o;
  9281. o = 0;
  9282. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  9283. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  9284. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  9285. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  9286. t[3] = l;
  9287. l = h;
  9288. h = o;
  9289. o = 0;
  9290. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  9291. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  9292. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  9293. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  9294. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  9295. t[4] = l;
  9296. l = h;
  9297. h = o;
  9298. o = 0;
  9299. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  9300. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  9301. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  9302. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  9303. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  9304. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  9305. t[5] = l;
  9306. l = h;
  9307. h = o;
  9308. o = 0;
  9309. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  9310. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  9311. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  9312. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  9313. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  9314. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  9315. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  9316. t[6] = l;
  9317. l = h;
  9318. h = o;
  9319. o = 0;
  9320. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  9321. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  9322. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  9323. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  9324. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  9325. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  9326. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  9327. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  9328. t[7] = l;
  9329. l = h;
  9330. h = o;
  9331. o = 0;
  9332. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  9333. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  9334. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  9335. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  9336. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  9337. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  9338. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  9339. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  9340. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  9341. t[8] = l;
  9342. l = h;
  9343. h = o;
  9344. o = 0;
  9345. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  9346. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  9347. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  9348. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  9349. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  9350. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  9351. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  9352. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  9353. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  9354. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  9355. t[9] = l;
  9356. l = h;
  9357. h = o;
  9358. o = 0;
  9359. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  9360. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  9361. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  9362. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  9363. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  9364. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  9365. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  9366. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  9367. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  9368. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  9369. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  9370. t[10] = l;
  9371. l = h;
  9372. h = o;
  9373. o = 0;
  9374. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  9375. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  9376. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  9377. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  9378. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  9379. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  9380. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  9381. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  9382. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  9383. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  9384. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  9385. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  9386. t[11] = l;
  9387. l = h;
  9388. h = o;
  9389. o = 0;
  9390. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
  9391. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  9392. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  9393. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  9394. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  9395. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  9396. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  9397. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  9398. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  9399. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  9400. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  9401. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  9402. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
  9403. t[12] = l;
  9404. l = h;
  9405. h = o;
  9406. o = 0;
  9407. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
  9408. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
  9409. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  9410. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  9411. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  9412. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  9413. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  9414. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  9415. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  9416. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  9417. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  9418. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  9419. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
  9420. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
  9421. t[13] = l;
  9422. l = h;
  9423. h = o;
  9424. o = 0;
  9425. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
  9426. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
  9427. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
  9428. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  9429. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  9430. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  9431. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  9432. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  9433. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  9434. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  9435. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  9436. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  9437. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
  9438. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
  9439. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
  9440. t[14] = l;
  9441. l = h;
  9442. h = o;
  9443. o = 0;
  9444. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
  9445. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
  9446. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
  9447. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
  9448. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  9449. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  9450. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  9451. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  9452. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  9453. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  9454. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  9455. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  9456. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
  9457. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
  9458. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
  9459. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
  9460. t[15] = l;
  9461. l = h;
  9462. h = o;
  9463. o = 0;
  9464. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
  9465. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
  9466. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
  9467. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
  9468. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  9469. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  9470. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  9471. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  9472. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  9473. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  9474. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  9475. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
  9476. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
  9477. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
  9478. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
  9479. r->dp[16] = l;
  9480. l = h;
  9481. h = o;
  9482. o = 0;
  9483. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
  9484. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
  9485. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
  9486. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
  9487. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  9488. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  9489. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  9490. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  9491. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  9492. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  9493. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
  9494. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
  9495. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
  9496. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
  9497. r->dp[17] = l;
  9498. l = h;
  9499. h = o;
  9500. o = 0;
  9501. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
  9502. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
  9503. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
  9504. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
  9505. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  9506. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  9507. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  9508. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  9509. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  9510. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
  9511. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
  9512. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
  9513. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
  9514. r->dp[18] = l;
  9515. l = h;
  9516. h = o;
  9517. o = 0;
  9518. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
  9519. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
  9520. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
  9521. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
  9522. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  9523. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  9524. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  9525. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  9526. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
  9527. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
  9528. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
  9529. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
  9530. r->dp[19] = l;
  9531. l = h;
  9532. h = o;
  9533. o = 0;
  9534. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
  9535. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
  9536. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
  9537. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
  9538. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  9539. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  9540. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  9541. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
  9542. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
  9543. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
  9544. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
  9545. r->dp[20] = l;
  9546. l = h;
  9547. h = o;
  9548. o = 0;
  9549. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
  9550. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
  9551. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
  9552. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
  9553. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  9554. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  9555. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
  9556. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
  9557. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
  9558. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
  9559. r->dp[21] = l;
  9560. l = h;
  9561. h = o;
  9562. o = 0;
  9563. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
  9564. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
  9565. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
  9566. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
  9567. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
  9568. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
  9569. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
  9570. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
  9571. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
  9572. r->dp[22] = l;
  9573. l = h;
  9574. h = o;
  9575. o = 0;
  9576. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
  9577. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
  9578. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
  9579. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
  9580. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
  9581. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
  9582. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
  9583. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
  9584. r->dp[23] = l;
  9585. l = h;
  9586. h = o;
  9587. o = 0;
  9588. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
  9589. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
  9590. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
  9591. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
  9592. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
  9593. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
  9594. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
  9595. r->dp[24] = l;
  9596. l = h;
  9597. h = o;
  9598. o = 0;
  9599. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
  9600. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
  9601. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
  9602. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
  9603. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
  9604. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
  9605. r->dp[25] = l;
  9606. l = h;
  9607. h = o;
  9608. o = 0;
  9609. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
  9610. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
  9611. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
  9612. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
  9613. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
  9614. r->dp[26] = l;
  9615. l = h;
  9616. h = o;
  9617. o = 0;
  9618. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
  9619. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
  9620. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
  9621. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
  9622. r->dp[27] = l;
  9623. l = h;
  9624. h = o;
  9625. o = 0;
  9626. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
  9627. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
  9628. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
  9629. r->dp[28] = l;
  9630. l = h;
  9631. h = o;
  9632. o = 0;
  9633. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
  9634. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
  9635. r->dp[29] = l;
  9636. l = h;
  9637. h = o;
  9638. SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
  9639. r->dp[30] = l;
  9640. r->dp[31] = h;
  9641. XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
  9642. r->used = 32;
  9643. sp_clamp(r);
  9644. }
  9645. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9646. if (t != NULL) {
  9647. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  9648. }
  9649. #endif
  9650. return err;
  9651. }
  9652. #endif /* SP_INT_DIGITS >= 32 */
  9653. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  9654. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  9655. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  9656. #if SP_INT_DIGITS >= 48
  9657. /* Multiply a by b and store in r: r = a * b
  9658. *
  9659. * Comba implementation.
  9660. *
  9661. * @param [in] a SP integer to multiply.
  9662. * @param [in] b SP integer to multiply.
  9663. * @param [out] r SP integer result.
  9664. *
  9665. * @return MP_OKAY on success.
  9666. * @return MP_MEM when dynamic memory allocation fails.
  9667. */
  9668. static int _sp_mul_24(const sp_int* a, const sp_int* b, sp_int* r)
  9669. {
  9670. int err = MP_OKAY;
  9671. sp_int_digit l = 0;
  9672. sp_int_digit h = 0;
  9673. sp_int_digit o = 0;
  9674. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9675. sp_int_digit* t = NULL;
  9676. #else
  9677. sp_int_digit t[24];
  9678. #endif
  9679. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9680. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
  9681. DYNAMIC_TYPE_BIGINT);
  9682. if (t == NULL) {
  9683. err = MP_MEM;
  9684. }
  9685. #endif
  9686. if (err == MP_OKAY) {
  9687. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  9688. t[0] = h;
  9689. h = 0;
  9690. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  9691. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  9692. t[1] = l;
  9693. l = h;
  9694. h = o;
  9695. o = 0;
  9696. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  9697. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  9698. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  9699. t[2] = l;
  9700. l = h;
  9701. h = o;
  9702. o = 0;
  9703. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  9704. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  9705. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  9706. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  9707. t[3] = l;
  9708. l = h;
  9709. h = o;
  9710. o = 0;
  9711. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  9712. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  9713. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  9714. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  9715. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  9716. t[4] = l;
  9717. l = h;
  9718. h = o;
  9719. o = 0;
  9720. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  9721. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  9722. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  9723. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  9724. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  9725. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  9726. t[5] = l;
  9727. l = h;
  9728. h = o;
  9729. o = 0;
  9730. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  9731. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  9732. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  9733. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  9734. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  9735. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  9736. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  9737. t[6] = l;
  9738. l = h;
  9739. h = o;
  9740. o = 0;
  9741. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  9742. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  9743. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  9744. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  9745. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  9746. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  9747. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  9748. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  9749. t[7] = l;
  9750. l = h;
  9751. h = o;
  9752. o = 0;
  9753. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  9754. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  9755. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  9756. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  9757. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  9758. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  9759. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  9760. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  9761. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  9762. t[8] = l;
  9763. l = h;
  9764. h = o;
  9765. o = 0;
  9766. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  9767. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  9768. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  9769. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  9770. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  9771. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  9772. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  9773. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  9774. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  9775. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  9776. t[9] = l;
  9777. l = h;
  9778. h = o;
  9779. o = 0;
  9780. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  9781. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  9782. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  9783. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  9784. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  9785. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  9786. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  9787. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  9788. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  9789. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  9790. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  9791. t[10] = l;
  9792. l = h;
  9793. h = o;
  9794. o = 0;
  9795. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  9796. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  9797. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  9798. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  9799. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  9800. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  9801. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  9802. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  9803. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  9804. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  9805. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  9806. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  9807. t[11] = l;
  9808. l = h;
  9809. h = o;
  9810. o = 0;
  9811. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
  9812. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  9813. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  9814. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  9815. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  9816. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  9817. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  9818. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  9819. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  9820. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  9821. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  9822. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  9823. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
  9824. t[12] = l;
  9825. l = h;
  9826. h = o;
  9827. o = 0;
  9828. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
  9829. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
  9830. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  9831. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  9832. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  9833. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  9834. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  9835. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  9836. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  9837. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  9838. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  9839. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  9840. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
  9841. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
  9842. t[13] = l;
  9843. l = h;
  9844. h = o;
  9845. o = 0;
  9846. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
  9847. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
  9848. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
  9849. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  9850. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  9851. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  9852. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  9853. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  9854. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  9855. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  9856. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  9857. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  9858. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
  9859. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
  9860. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
  9861. t[14] = l;
  9862. l = h;
  9863. h = o;
  9864. o = 0;
  9865. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
  9866. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
  9867. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
  9868. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
  9869. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  9870. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  9871. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  9872. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  9873. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  9874. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  9875. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  9876. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  9877. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
  9878. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
  9879. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
  9880. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
  9881. t[15] = l;
  9882. l = h;
  9883. h = o;
  9884. o = 0;
  9885. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
  9886. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
  9887. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
  9888. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
  9889. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
  9890. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  9891. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  9892. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  9893. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  9894. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  9895. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  9896. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  9897. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
  9898. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
  9899. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
  9900. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
  9901. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
  9902. t[16] = l;
  9903. l = h;
  9904. h = o;
  9905. o = 0;
  9906. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
  9907. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
  9908. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
  9909. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
  9910. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
  9911. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
  9912. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  9913. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  9914. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  9915. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  9916. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  9917. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  9918. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
  9919. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
  9920. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
  9921. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
  9922. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
  9923. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
  9924. t[17] = l;
  9925. l = h;
  9926. h = o;
  9927. o = 0;
  9928. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
  9929. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
  9930. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
  9931. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
  9932. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
  9933. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
  9934. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
  9935. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  9936. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  9937. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  9938. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  9939. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  9940. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
  9941. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
  9942. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
  9943. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
  9944. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
  9945. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
  9946. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
  9947. t[18] = l;
  9948. l = h;
  9949. h = o;
  9950. o = 0;
  9951. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
  9952. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
  9953. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
  9954. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
  9955. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
  9956. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
  9957. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
  9958. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
  9959. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  9960. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  9961. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  9962. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  9963. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
  9964. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
  9965. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
  9966. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
  9967. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
  9968. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
  9969. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
  9970. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
  9971. t[19] = l;
  9972. l = h;
  9973. h = o;
  9974. o = 0;
  9975. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
  9976. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
  9977. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
  9978. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
  9979. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
  9980. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
  9981. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
  9982. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
  9983. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
  9984. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  9985. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  9986. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  9987. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
  9988. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
  9989. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
  9990. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
  9991. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
  9992. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
  9993. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
  9994. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
  9995. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
  9996. t[20] = l;
  9997. l = h;
  9998. h = o;
  9999. o = 0;
  10000. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
  10001. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
  10002. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
  10003. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
  10004. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
  10005. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
  10006. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
  10007. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
  10008. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
  10009. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
  10010. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  10011. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  10012. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
  10013. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
  10014. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
  10015. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
  10016. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
  10017. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
  10018. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
  10019. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
  10020. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
  10021. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
  10022. t[21] = l;
  10023. l = h;
  10024. h = o;
  10025. o = 0;
  10026. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
  10027. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
  10028. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
  10029. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
  10030. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
  10031. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
  10032. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
  10033. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
  10034. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
  10035. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
  10036. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
  10037. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
  10038. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
  10039. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
  10040. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
  10041. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
  10042. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
  10043. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
  10044. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
  10045. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
  10046. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
  10047. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
  10048. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
  10049. t[22] = l;
  10050. l = h;
  10051. h = o;
  10052. o = 0;
  10053. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
  10054. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
  10055. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
  10056. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
  10057. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
  10058. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
  10059. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
  10060. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
  10061. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
  10062. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
  10063. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
  10064. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
  10065. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
  10066. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
  10067. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
  10068. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
  10069. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
  10070. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
  10071. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
  10072. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
  10073. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
  10074. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
  10075. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
  10076. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
  10077. t[23] = l;
  10078. l = h;
  10079. h = o;
  10080. o = 0;
  10081. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
  10082. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
  10083. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
  10084. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
  10085. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
  10086. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
  10087. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
  10088. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
  10089. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
  10090. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
  10091. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
  10092. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
  10093. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
  10094. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
  10095. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
  10096. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
  10097. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
  10098. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
  10099. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
  10100. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
  10101. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
  10102. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
  10103. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
  10104. r->dp[24] = l;
  10105. l = h;
  10106. h = o;
  10107. o = 0;
  10108. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
  10109. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
  10110. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
  10111. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
  10112. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
  10113. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
  10114. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
  10115. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
  10116. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
  10117. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
  10118. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
  10119. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
  10120. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
  10121. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
  10122. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
  10123. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
  10124. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
  10125. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
  10126. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
  10127. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
  10128. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
  10129. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
  10130. r->dp[25] = l;
  10131. l = h;
  10132. h = o;
  10133. o = 0;
  10134. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
  10135. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
  10136. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
  10137. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
  10138. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
  10139. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
  10140. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
  10141. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
  10142. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
  10143. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
  10144. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
  10145. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
  10146. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
  10147. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
  10148. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
  10149. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
  10150. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
  10151. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
  10152. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
  10153. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
  10154. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
  10155. r->dp[26] = l;
  10156. l = h;
  10157. h = o;
  10158. o = 0;
  10159. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
  10160. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
  10161. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
  10162. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
  10163. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
  10164. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
  10165. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
  10166. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
  10167. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
  10168. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
  10169. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
  10170. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
  10171. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
  10172. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
  10173. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
  10174. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
  10175. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
  10176. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
  10177. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
  10178. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
  10179. r->dp[27] = l;
  10180. l = h;
  10181. h = o;
  10182. o = 0;
  10183. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
  10184. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
  10185. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
  10186. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
  10187. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
  10188. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
  10189. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
  10190. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
  10191. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
  10192. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
  10193. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
  10194. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
  10195. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
  10196. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
  10197. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
  10198. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
  10199. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
  10200. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
  10201. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
  10202. r->dp[28] = l;
  10203. l = h;
  10204. h = o;
  10205. o = 0;
  10206. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
  10207. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
  10208. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
  10209. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
  10210. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
  10211. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
  10212. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
  10213. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
  10214. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
  10215. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
  10216. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
  10217. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
  10218. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
  10219. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
  10220. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
  10221. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
  10222. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
  10223. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
  10224. r->dp[29] = l;
  10225. l = h;
  10226. h = o;
  10227. o = 0;
  10228. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
  10229. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
  10230. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
  10231. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
  10232. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
  10233. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
  10234. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
  10235. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
  10236. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
  10237. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
  10238. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
  10239. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
  10240. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
  10241. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
  10242. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
  10243. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
  10244. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
  10245. r->dp[30] = l;
  10246. l = h;
  10247. h = o;
  10248. o = 0;
  10249. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
  10250. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
  10251. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
  10252. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
  10253. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
  10254. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
  10255. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
  10256. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
  10257. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
  10258. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
  10259. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
  10260. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
  10261. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
  10262. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
  10263. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
  10264. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
  10265. r->dp[31] = l;
  10266. l = h;
  10267. h = o;
  10268. o = 0;
  10269. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
  10270. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
  10271. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
  10272. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
  10273. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
  10274. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
  10275. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
  10276. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
  10277. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
  10278. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
  10279. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
  10280. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
  10281. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
  10282. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
  10283. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
  10284. r->dp[32] = l;
  10285. l = h;
  10286. h = o;
  10287. o = 0;
  10288. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
  10289. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
  10290. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
  10291. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
  10292. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
  10293. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
  10294. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
  10295. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
  10296. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
  10297. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
  10298. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
  10299. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
  10300. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
  10301. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
  10302. r->dp[33] = l;
  10303. l = h;
  10304. h = o;
  10305. o = 0;
  10306. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
  10307. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
  10308. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
  10309. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
  10310. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
  10311. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
  10312. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
  10313. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
  10314. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
  10315. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
  10316. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
  10317. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
  10318. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
  10319. r->dp[34] = l;
  10320. l = h;
  10321. h = o;
  10322. o = 0;
  10323. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
  10324. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
  10325. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
  10326. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
  10327. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
  10328. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
  10329. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
  10330. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
  10331. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
  10332. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
  10333. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
  10334. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
  10335. r->dp[35] = l;
  10336. l = h;
  10337. h = o;
  10338. o = 0;
  10339. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
  10340. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
  10341. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
  10342. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
  10343. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
  10344. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
  10345. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
  10346. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
  10347. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
  10348. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
  10349. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
  10350. r->dp[36] = l;
  10351. l = h;
  10352. h = o;
  10353. o = 0;
  10354. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
  10355. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
  10356. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
  10357. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
  10358. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
  10359. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
  10360. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
  10361. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
  10362. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
  10363. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
  10364. r->dp[37] = l;
  10365. l = h;
  10366. h = o;
  10367. o = 0;
  10368. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
  10369. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
  10370. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
  10371. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
  10372. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
  10373. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
  10374. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
  10375. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
  10376. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
  10377. r->dp[38] = l;
  10378. l = h;
  10379. h = o;
  10380. o = 0;
  10381. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
  10382. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
  10383. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
  10384. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
  10385. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
  10386. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
  10387. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
  10388. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
  10389. r->dp[39] = l;
  10390. l = h;
  10391. h = o;
  10392. o = 0;
  10393. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
  10394. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
  10395. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
  10396. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
  10397. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
  10398. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
  10399. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
  10400. r->dp[40] = l;
  10401. l = h;
  10402. h = o;
  10403. o = 0;
  10404. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
  10405. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
  10406. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
  10407. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
  10408. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
  10409. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
  10410. r->dp[41] = l;
  10411. l = h;
  10412. h = o;
  10413. o = 0;
  10414. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
  10415. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
  10416. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
  10417. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
  10418. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
  10419. r->dp[42] = l;
  10420. l = h;
  10421. h = o;
  10422. o = 0;
  10423. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
  10424. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
  10425. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
  10426. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
  10427. r->dp[43] = l;
  10428. l = h;
  10429. h = o;
  10430. o = 0;
  10431. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
  10432. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
  10433. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
  10434. r->dp[44] = l;
  10435. l = h;
  10436. h = o;
  10437. o = 0;
  10438. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
  10439. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
  10440. r->dp[45] = l;
  10441. l = h;
  10442. h = o;
  10443. SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
  10444. r->dp[46] = l;
  10445. r->dp[47] = h;
  10446. XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
  10447. r->used = 48;
  10448. sp_clamp(r);
  10449. }
  10450. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  10451. if (t != NULL) {
  10452. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  10453. }
  10454. #endif
  10455. return err;
  10456. }
  10457. #endif /* SP_INT_DIGITS >= 48 */
  10458. #if SP_INT_DIGITS >= 64
  10459. /* Multiply a by b and store in r: r = a * b
  10460. *
  10461. * Karatsuba implementation.
  10462. *
  10463. * @param [in] a SP integer to multiply.
  10464. * @param [in] b SP integer to multiply.
  10465. * @param [out] r SP integer result.
  10466. *
  10467. * @return MP_OKAY on success.
  10468. * @return MP_MEM when dynamic memory allocation fails.
  10469. */
  10470. static int _sp_mul_32(const sp_int* a, const sp_int* b, sp_int* r)
  10471. {
  10472. int err = MP_OKAY;
  10473. unsigned int i;
  10474. sp_int_digit l;
  10475. sp_int_digit h;
  10476. sp_int* a1;
  10477. sp_int* b1;
  10478. sp_int* z0;
  10479. sp_int* z1;
  10480. sp_int* z2;
  10481. sp_int_digit ca;
  10482. sp_int_digit cb;
  10483. DECL_SP_INT_ARRAY(t, 16, 2);
  10484. DECL_SP_INT_ARRAY(z, 33, 2);
  10485. ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
  10486. ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
  10487. if (err == MP_OKAY) {
  10488. a1 = t[0];
  10489. b1 = t[1];
  10490. z1 = z[0];
  10491. z2 = z[1];
  10492. z0 = r;
  10493. XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
  10494. a1->used = 16;
  10495. XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
  10496. b1->used = 16;
  10497. /* z2 = a1 * b1 */
  10498. err = _sp_mul_16(a1, b1, z2);
  10499. }
  10500. if (err == MP_OKAY) {
  10501. l = a1->dp[0];
  10502. h = 0;
  10503. SP_ASM_ADDC(l, h, a->dp[0]);
  10504. a1->dp[0] = l;
  10505. l = h;
  10506. h = 0;
  10507. for (i = 1; i < 16; i++) {
  10508. SP_ASM_ADDC(l, h, a1->dp[i]);
  10509. SP_ASM_ADDC(l, h, a->dp[i]);
  10510. a1->dp[i] = l;
  10511. l = h;
  10512. h = 0;
  10513. }
  10514. ca = l;
  10515. /* b01 = b0 + b1 */
  10516. l = b1->dp[0];
  10517. h = 0;
  10518. SP_ASM_ADDC(l, h, b->dp[0]);
  10519. b1->dp[0] = l;
  10520. l = h;
  10521. h = 0;
  10522. for (i = 1; i < 16; i++) {
  10523. SP_ASM_ADDC(l, h, b1->dp[i]);
  10524. SP_ASM_ADDC(l, h, b->dp[i]);
  10525. b1->dp[i] = l;
  10526. l = h;
  10527. h = 0;
  10528. }
  10529. cb = l;
  10530. /* z0 = a0 * b0 */
  10531. err = _sp_mul_16(a, b, z0);
  10532. }
  10533. if (err == MP_OKAY) {
  10534. /* z1 = (a0 + a1) * (b0 + b1) */
  10535. err = _sp_mul_16(a1, b1, z1);
  10536. }
  10537. if (err == MP_OKAY) {
  10538. /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
  10539. /* r = z0 */
  10540. /* r += (z1 - z0 - z2) << 16 */
  10541. z1->dp[32] = ca & cb;
  10542. l = 0;
  10543. if (ca) {
  10544. h = 0;
  10545. for (i = 0; i < 16; i++) {
  10546. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  10547. SP_ASM_ADDC(l, h, b1->dp[i]);
  10548. z1->dp[i + 16] = l;
  10549. l = h;
  10550. h = 0;
  10551. }
  10552. }
  10553. z1->dp[32] += l;
  10554. l = 0;
  10555. if (cb) {
  10556. h = 0;
  10557. for (i = 0; i < 16; i++) {
  10558. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  10559. SP_ASM_ADDC(l, h, a1->dp[i]);
  10560. z1->dp[i + 16] = l;
  10561. l = h;
  10562. h = 0;
  10563. }
  10564. }
  10565. z1->dp[32] += l;
  10566. /* z1 = z1 - z0 - z1 */
  10567. l = 0;
  10568. h = 0;
  10569. for (i = 0; i < 32; i++) {
  10570. l += z1->dp[i];
  10571. SP_ASM_SUBB(l, h, z0->dp[i]);
  10572. SP_ASM_SUBB(l, h, z2->dp[i]);
  10573. z1->dp[i] = l;
  10574. l = h;
  10575. h = 0;
  10576. }
  10577. z1->dp[i] += l;
  10578. /* r += z1 << 16 */
  10579. l = 0;
  10580. h = 0;
  10581. for (i = 0; i < 16; i++) {
  10582. SP_ASM_ADDC(l, h, r->dp[i + 16]);
  10583. SP_ASM_ADDC(l, h, z1->dp[i]);
  10584. r->dp[i + 16] = l;
  10585. l = h;
  10586. h = 0;
  10587. }
  10588. for (; i < 33; i++) {
  10589. SP_ASM_ADDC(l, h, z1->dp[i]);
  10590. r->dp[i + 16] = l;
  10591. l = h;
  10592. h = 0;
  10593. }
  10594. /* r += z2 << 32 */
  10595. l = 0;
  10596. h = 0;
  10597. for (i = 0; i < 17; i++) {
  10598. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  10599. SP_ASM_ADDC(l, h, z2->dp[i]);
  10600. r->dp[i + 32] = l;
  10601. l = h;
  10602. h = 0;
  10603. }
  10604. for (; i < 32; i++) {
  10605. SP_ASM_ADDC(l, h, z2->dp[i]);
  10606. r->dp[i + 32] = l;
  10607. l = h;
  10608. h = 0;
  10609. }
  10610. r->used = 64;
  10611. sp_clamp(r);
  10612. }
  10613. FREE_SP_INT_ARRAY(z, NULL);
  10614. FREE_SP_INT_ARRAY(t, NULL);
  10615. return err;
  10616. }
  10617. #endif /* SP_INT_DIGITS >= 64 */
  10618. #if SP_INT_DIGITS >= 96
  10619. /* Multiply a by b and store in r: r = a * b
  10620. *
  10621. * Karatsuba implementation.
  10622. *
  10623. * @param [in] a SP integer to multiply.
  10624. * @param [in] b SP integer to multiply.
  10625. * @param [out] r SP integer result.
  10626. *
  10627. * @return MP_OKAY on success.
  10628. * @return MP_MEM when dynamic memory allocation fails.
  10629. */
  10630. static int _sp_mul_48(const sp_int* a, const sp_int* b, sp_int* r)
  10631. {
  10632. int err = MP_OKAY;
  10633. unsigned int i;
  10634. sp_int_digit l;
  10635. sp_int_digit h;
  10636. sp_int* a1;
  10637. sp_int* b1;
  10638. sp_int* z0;
  10639. sp_int* z1;
  10640. sp_int* z2;
  10641. sp_int_digit ca;
  10642. sp_int_digit cb;
  10643. DECL_SP_INT_ARRAY(t, 24, 2);
  10644. DECL_SP_INT_ARRAY(z, 49, 2);
  10645. ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
  10646. ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
  10647. if (err == MP_OKAY) {
  10648. a1 = t[0];
  10649. b1 = t[1];
  10650. z1 = z[0];
  10651. z2 = z[1];
  10652. z0 = r;
  10653. XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
  10654. a1->used = 24;
  10655. XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
  10656. b1->used = 24;
  10657. /* z2 = a1 * b1 */
  10658. err = _sp_mul_24(a1, b1, z2);
  10659. }
  10660. if (err == MP_OKAY) {
  10661. l = a1->dp[0];
  10662. h = 0;
  10663. SP_ASM_ADDC(l, h, a->dp[0]);
  10664. a1->dp[0] = l;
  10665. l = h;
  10666. h = 0;
  10667. for (i = 1; i < 24; i++) {
  10668. SP_ASM_ADDC(l, h, a1->dp[i]);
  10669. SP_ASM_ADDC(l, h, a->dp[i]);
  10670. a1->dp[i] = l;
  10671. l = h;
  10672. h = 0;
  10673. }
  10674. ca = l;
  10675. /* b01 = b0 + b1 */
  10676. l = b1->dp[0];
  10677. h = 0;
  10678. SP_ASM_ADDC(l, h, b->dp[0]);
  10679. b1->dp[0] = l;
  10680. l = h;
  10681. h = 0;
  10682. for (i = 1; i < 24; i++) {
  10683. SP_ASM_ADDC(l, h, b1->dp[i]);
  10684. SP_ASM_ADDC(l, h, b->dp[i]);
  10685. b1->dp[i] = l;
  10686. l = h;
  10687. h = 0;
  10688. }
  10689. cb = l;
  10690. /* z0 = a0 * b0 */
  10691. err = _sp_mul_24(a, b, z0);
  10692. }
  10693. if (err == MP_OKAY) {
  10694. /* z1 = (a0 + a1) * (b0 + b1) */
  10695. err = _sp_mul_24(a1, b1, z1);
  10696. }
  10697. if (err == MP_OKAY) {
  10698. /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
  10699. /* r = z0 */
  10700. /* r += (z1 - z0 - z2) << 24 */
  10701. z1->dp[48] = ca & cb;
  10702. l = 0;
  10703. if (ca) {
  10704. h = 0;
  10705. for (i = 0; i < 24; i++) {
  10706. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  10707. SP_ASM_ADDC(l, h, b1->dp[i]);
  10708. z1->dp[i + 24] = l;
  10709. l = h;
  10710. h = 0;
  10711. }
  10712. }
  10713. z1->dp[48] += l;
  10714. l = 0;
  10715. if (cb) {
  10716. h = 0;
  10717. for (i = 0; i < 24; i++) {
  10718. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  10719. SP_ASM_ADDC(l, h, a1->dp[i]);
  10720. z1->dp[i + 24] = l;
  10721. l = h;
  10722. h = 0;
  10723. }
  10724. }
  10725. z1->dp[48] += l;
  10726. /* z1 = z1 - z0 - z1 */
  10727. l = 0;
  10728. h = 0;
  10729. for (i = 0; i < 48; i++) {
  10730. l += z1->dp[i];
  10731. SP_ASM_SUBB(l, h, z0->dp[i]);
  10732. SP_ASM_SUBB(l, h, z2->dp[i]);
  10733. z1->dp[i] = l;
  10734. l = h;
  10735. h = 0;
  10736. }
  10737. z1->dp[i] += l;
  10738. /* r += z1 << 16 */
  10739. l = 0;
  10740. h = 0;
  10741. for (i = 0; i < 24; i++) {
  10742. SP_ASM_ADDC(l, h, r->dp[i + 24]);
  10743. SP_ASM_ADDC(l, h, z1->dp[i]);
  10744. r->dp[i + 24] = l;
  10745. l = h;
  10746. h = 0;
  10747. }
  10748. for (; i < 49; i++) {
  10749. SP_ASM_ADDC(l, h, z1->dp[i]);
  10750. r->dp[i + 24] = l;
  10751. l = h;
  10752. h = 0;
  10753. }
  10754. /* r += z2 << 48 */
  10755. l = 0;
  10756. h = 0;
  10757. for (i = 0; i < 25; i++) {
  10758. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  10759. SP_ASM_ADDC(l, h, z2->dp[i]);
  10760. r->dp[i + 48] = l;
  10761. l = h;
  10762. h = 0;
  10763. }
  10764. for (; i < 48; i++) {
  10765. SP_ASM_ADDC(l, h, z2->dp[i]);
  10766. r->dp[i + 48] = l;
  10767. l = h;
  10768. h = 0;
  10769. }
  10770. r->used = 96;
  10771. sp_clamp(r);
  10772. }
  10773. FREE_SP_INT_ARRAY(z, NULL);
  10774. FREE_SP_INT_ARRAY(t, NULL);
  10775. return err;
  10776. }
  10777. #endif /* SP_INT_DIGITS >= 96 */
  10778. #if SP_INT_DIGITS >= 128
  10779. /* Multiply a by b and store in r: r = a * b
  10780. *
  10781. * Karatsuba implementation.
  10782. *
  10783. * @param [in] a SP integer to multiply.
  10784. * @param [in] b SP integer to multiply.
  10785. * @param [out] r SP integer result.
  10786. *
  10787. * @return MP_OKAY on success.
  10788. * @return MP_MEM when dynamic memory allocation fails.
  10789. */
  10790. static int _sp_mul_64(const sp_int* a, const sp_int* b, sp_int* r)
  10791. {
  10792. int err = MP_OKAY;
  10793. unsigned int i;
  10794. sp_int_digit l;
  10795. sp_int_digit h;
  10796. sp_int* a1;
  10797. sp_int* b1;
  10798. sp_int* z0;
  10799. sp_int* z1;
  10800. sp_int* z2;
  10801. sp_int_digit ca;
  10802. sp_int_digit cb;
  10803. DECL_SP_INT_ARRAY(t, 32, 2);
  10804. DECL_SP_INT_ARRAY(z, 65, 2);
  10805. ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
  10806. ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
  10807. if (err == MP_OKAY) {
  10808. a1 = t[0];
  10809. b1 = t[1];
  10810. z1 = z[0];
  10811. z2 = z[1];
  10812. z0 = r;
  10813. XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
  10814. a1->used = 32;
  10815. XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
  10816. b1->used = 32;
  10817. /* z2 = a1 * b1 */
  10818. err = _sp_mul_32(a1, b1, z2);
  10819. }
  10820. if (err == MP_OKAY) {
  10821. l = a1->dp[0];
  10822. h = 0;
  10823. SP_ASM_ADDC(l, h, a->dp[0]);
  10824. a1->dp[0] = l;
  10825. l = h;
  10826. h = 0;
  10827. for (i = 1; i < 32; i++) {
  10828. SP_ASM_ADDC(l, h, a1->dp[i]);
  10829. SP_ASM_ADDC(l, h, a->dp[i]);
  10830. a1->dp[i] = l;
  10831. l = h;
  10832. h = 0;
  10833. }
  10834. ca = l;
  10835. /* b01 = b0 + b1 */
  10836. l = b1->dp[0];
  10837. h = 0;
  10838. SP_ASM_ADDC(l, h, b->dp[0]);
  10839. b1->dp[0] = l;
  10840. l = h;
  10841. h = 0;
  10842. for (i = 1; i < 32; i++) {
  10843. SP_ASM_ADDC(l, h, b1->dp[i]);
  10844. SP_ASM_ADDC(l, h, b->dp[i]);
  10845. b1->dp[i] = l;
  10846. l = h;
  10847. h = 0;
  10848. }
  10849. cb = l;
  10850. /* z0 = a0 * b0 */
  10851. err = _sp_mul_32(a, b, z0);
  10852. }
  10853. if (err == MP_OKAY) {
  10854. /* z1 = (a0 + a1) * (b0 + b1) */
  10855. err = _sp_mul_32(a1, b1, z1);
  10856. }
  10857. if (err == MP_OKAY) {
  10858. /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
  10859. /* r = z0 */
  10860. /* r += (z1 - z0 - z2) << 32 */
  10861. z1->dp[64] = ca & cb;
  10862. l = 0;
  10863. if (ca) {
  10864. h = 0;
  10865. for (i = 0; i < 32; i++) {
  10866. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  10867. SP_ASM_ADDC(l, h, b1->dp[i]);
  10868. z1->dp[i + 32] = l;
  10869. l = h;
  10870. h = 0;
  10871. }
  10872. }
  10873. z1->dp[64] += l;
  10874. l = 0;
  10875. if (cb) {
  10876. h = 0;
  10877. for (i = 0; i < 32; i++) {
  10878. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  10879. SP_ASM_ADDC(l, h, a1->dp[i]);
  10880. z1->dp[i + 32] = l;
  10881. l = h;
  10882. h = 0;
  10883. }
  10884. }
  10885. z1->dp[64] += l;
  10886. /* z1 = z1 - z0 - z1 */
  10887. l = 0;
  10888. h = 0;
  10889. for (i = 0; i < 64; i++) {
  10890. l += z1->dp[i];
  10891. SP_ASM_SUBB(l, h, z0->dp[i]);
  10892. SP_ASM_SUBB(l, h, z2->dp[i]);
  10893. z1->dp[i] = l;
  10894. l = h;
  10895. h = 0;
  10896. }
  10897. z1->dp[i] += l;
  10898. /* r += z1 << 16 */
  10899. l = 0;
  10900. h = 0;
  10901. for (i = 0; i < 32; i++) {
  10902. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  10903. SP_ASM_ADDC(l, h, z1->dp[i]);
  10904. r->dp[i + 32] = l;
  10905. l = h;
  10906. h = 0;
  10907. }
  10908. for (; i < 65; i++) {
  10909. SP_ASM_ADDC(l, h, z1->dp[i]);
  10910. r->dp[i + 32] = l;
  10911. l = h;
  10912. h = 0;
  10913. }
  10914. /* r += z2 << 64 */
  10915. l = 0;
  10916. h = 0;
  10917. for (i = 0; i < 33; i++) {
  10918. SP_ASM_ADDC(l, h, r->dp[i + 64]);
  10919. SP_ASM_ADDC(l, h, z2->dp[i]);
  10920. r->dp[i + 64] = l;
  10921. l = h;
  10922. h = 0;
  10923. }
  10924. for (; i < 64; i++) {
  10925. SP_ASM_ADDC(l, h, z2->dp[i]);
  10926. r->dp[i + 64] = l;
  10927. l = h;
  10928. h = 0;
  10929. }
  10930. r->used = 128;
  10931. sp_clamp(r);
  10932. }
  10933. FREE_SP_INT_ARRAY(z, NULL);
  10934. FREE_SP_INT_ARRAY(t, NULL);
  10935. return err;
  10936. }
  10937. #endif /* SP_INT_DIGITS >= 128 */
  10938. #if SP_INT_DIGITS >= 192
  10939. /* Multiply a by b and store in r: r = a * b
  10940. *
  10941. * Karatsuba implementation.
  10942. *
  10943. * @param [in] a SP integer to multiply.
  10944. * @param [in] b SP integer to multiply.
  10945. * @param [out] r SP integer result.
  10946. *
  10947. * @return MP_OKAY on success.
  10948. * @return MP_MEM when dynamic memory allocation fails.
  10949. */
  10950. static int _sp_mul_96(const sp_int* a, const sp_int* b, sp_int* r)
  10951. {
  10952. int err = MP_OKAY;
  10953. unsigned int i;
  10954. sp_int_digit l;
  10955. sp_int_digit h;
  10956. sp_int* a1;
  10957. sp_int* b1;
  10958. sp_int* z0;
  10959. sp_int* z1;
  10960. sp_int* z2;
  10961. sp_int_digit ca;
  10962. sp_int_digit cb;
  10963. DECL_SP_INT_ARRAY(t, 48, 2);
  10964. DECL_SP_INT_ARRAY(z, 97, 2);
  10965. ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
  10966. ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
  10967. if (err == MP_OKAY) {
  10968. a1 = t[0];
  10969. b1 = t[1];
  10970. z1 = z[0];
  10971. z2 = z[1];
  10972. z0 = r;
  10973. XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
  10974. a1->used = 48;
  10975. XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
  10976. b1->used = 48;
  10977. /* z2 = a1 * b1 */
  10978. err = _sp_mul_48(a1, b1, z2);
  10979. }
  10980. if (err == MP_OKAY) {
  10981. l = a1->dp[0];
  10982. h = 0;
  10983. SP_ASM_ADDC(l, h, a->dp[0]);
  10984. a1->dp[0] = l;
  10985. l = h;
  10986. h = 0;
  10987. for (i = 1; i < 48; i++) {
  10988. SP_ASM_ADDC(l, h, a1->dp[i]);
  10989. SP_ASM_ADDC(l, h, a->dp[i]);
  10990. a1->dp[i] = l;
  10991. l = h;
  10992. h = 0;
  10993. }
  10994. ca = l;
  10995. /* b01 = b0 + b1 */
  10996. l = b1->dp[0];
  10997. h = 0;
  10998. SP_ASM_ADDC(l, h, b->dp[0]);
  10999. b1->dp[0] = l;
  11000. l = h;
  11001. h = 0;
  11002. for (i = 1; i < 48; i++) {
  11003. SP_ASM_ADDC(l, h, b1->dp[i]);
  11004. SP_ASM_ADDC(l, h, b->dp[i]);
  11005. b1->dp[i] = l;
  11006. l = h;
  11007. h = 0;
  11008. }
  11009. cb = l;
  11010. /* z0 = a0 * b0 */
  11011. err = _sp_mul_48(a, b, z0);
  11012. }
  11013. if (err == MP_OKAY) {
  11014. /* z1 = (a0 + a1) * (b0 + b1) */
  11015. err = _sp_mul_48(a1, b1, z1);
  11016. }
  11017. if (err == MP_OKAY) {
  11018. /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
  11019. /* r = z0 */
  11020. /* r += (z1 - z0 - z2) << 48 */
  11021. z1->dp[96] = ca & cb;
  11022. l = 0;
  11023. if (ca) {
  11024. h = 0;
  11025. for (i = 0; i < 48; i++) {
  11026. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  11027. SP_ASM_ADDC(l, h, b1->dp[i]);
  11028. z1->dp[i + 48] = l;
  11029. l = h;
  11030. h = 0;
  11031. }
  11032. }
  11033. z1->dp[96] += l;
  11034. l = 0;
  11035. if (cb) {
  11036. h = 0;
  11037. for (i = 0; i < 48; i++) {
  11038. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  11039. SP_ASM_ADDC(l, h, a1->dp[i]);
  11040. z1->dp[i + 48] = l;
  11041. l = h;
  11042. h = 0;
  11043. }
  11044. }
  11045. z1->dp[96] += l;
  11046. /* z1 = z1 - z0 - z1 */
  11047. l = 0;
  11048. h = 0;
  11049. for (i = 0; i < 96; i++) {
  11050. l += z1->dp[i];
  11051. SP_ASM_SUBB(l, h, z0->dp[i]);
  11052. SP_ASM_SUBB(l, h, z2->dp[i]);
  11053. z1->dp[i] = l;
  11054. l = h;
  11055. h = 0;
  11056. }
  11057. z1->dp[i] += l;
  11058. /* r += z1 << 16 */
  11059. l = 0;
  11060. h = 0;
  11061. for (i = 0; i < 48; i++) {
  11062. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  11063. SP_ASM_ADDC(l, h, z1->dp[i]);
  11064. r->dp[i + 48] = l;
  11065. l = h;
  11066. h = 0;
  11067. }
  11068. for (; i < 97; i++) {
  11069. SP_ASM_ADDC(l, h, z1->dp[i]);
  11070. r->dp[i + 48] = l;
  11071. l = h;
  11072. h = 0;
  11073. }
  11074. /* r += z2 << 96 */
  11075. l = 0;
  11076. h = 0;
  11077. for (i = 0; i < 49; i++) {
  11078. SP_ASM_ADDC(l, h, r->dp[i + 96]);
  11079. SP_ASM_ADDC(l, h, z2->dp[i]);
  11080. r->dp[i + 96] = l;
  11081. l = h;
  11082. h = 0;
  11083. }
  11084. for (; i < 96; i++) {
  11085. SP_ASM_ADDC(l, h, z2->dp[i]);
  11086. r->dp[i + 96] = l;
  11087. l = h;
  11088. h = 0;
  11089. }
  11090. r->used = 192;
  11091. sp_clamp(r);
  11092. }
  11093. FREE_SP_INT_ARRAY(z, NULL);
  11094. FREE_SP_INT_ARRAY(t, NULL);
  11095. return err;
  11096. }
  11097. #endif /* SP_INT_DIGITS >= 192 */
  11098. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  11099. #endif /* !WOLFSSL_SP_SMALL */
  11100. /* Multiply a by b and store in r: r = a * b
  11101. *
  11102. * @param [in] a SP integer to multiply.
  11103. * @param [in] b SP integer to multiply.
  11104. * @param [out] r SP integer result.
  11105. *
  11106. * @return MP_OKAY on success.
  11107. * @return MP_VAL when a, b or is NULL; or the result will be too big for fixed
  11108. * data length.
  11109. * @return MP_MEM when dynamic memory allocation fails.
  11110. */
  11111. int sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
  11112. {
  11113. int err = MP_OKAY;
  11114. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11115. unsigned int sign = MP_ZPOS;
  11116. #endif
  11117. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  11118. err = MP_VAL;
  11119. }
  11120. /* Need extra digit during calculation. */
  11121. if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
  11122. err = MP_VAL;
  11123. }
  11124. #if 0
  11125. if (err == MP_OKAY) {
  11126. sp_print(a, "a");
  11127. sp_print(b, "b");
  11128. }
  11129. #endif
  11130. if (err == MP_OKAY) {
  11131. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11132. sign = a->sign ^ b->sign;
  11133. #endif
  11134. if ((a->used == 0) || (b->used == 0)) {
  11135. _sp_zero(r);
  11136. }
  11137. else
  11138. #ifndef WOLFSSL_SP_SMALL
  11139. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  11140. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  11141. if ((a->used == 4) && (b->used == 4)) {
  11142. err = _sp_mul_4(a, b, r);
  11143. }
  11144. else
  11145. #endif /* SP_WORD_SIZE == 64 */
  11146. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  11147. #ifdef SQR_MUL_ASM
  11148. if ((a->used == 6) && (b->used == 6)) {
  11149. err = _sp_mul_6(a, b, r);
  11150. }
  11151. else
  11152. #endif /* SQR_MUL_ASM */
  11153. #endif /* SP_WORD_SIZE == 64 */
  11154. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  11155. #ifdef SQR_MUL_ASM
  11156. if ((a->used == 8) && (b->used == 8)) {
  11157. err = _sp_mul_8(a, b, r);
  11158. }
  11159. else
  11160. #endif /* SQR_MUL_ASM */
  11161. #endif /* SP_WORD_SIZE == 32 */
  11162. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  11163. #ifdef SQR_MUL_ASM
  11164. if ((a->used == 12) && (b->used == 12)) {
  11165. err = _sp_mul_12(a, b, r);
  11166. }
  11167. else
  11168. #endif /* SQR_MUL_ASM */
  11169. #endif /* SP_WORD_SIZE == 32 */
  11170. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  11171. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  11172. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  11173. (SP_WORD_SIZE == 64)))
  11174. #if SP_INT_DIGITS >= 32
  11175. if ((a->used == 16) && (b->used == 16)) {
  11176. err = _sp_mul_16(a, b, r);
  11177. }
  11178. else
  11179. #endif /* SP_INT_DIGITS >= 32 */
  11180. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  11181. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  11182. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  11183. #if SP_INT_DIGITS >= 48
  11184. if ((a->used == 24) && (b->used == 24)) {
  11185. err = _sp_mul_24(a, b, r);
  11186. }
  11187. else
  11188. #endif /* SP_INT_DIGITS >= 48 */
  11189. #if SP_INT_DIGITS >= 64
  11190. if ((a->used == 32) && (b->used == 32)) {
  11191. err = _sp_mul_32(a, b, r);
  11192. }
  11193. else
  11194. #endif /* SP_INT_DIGITS >= 64 */
  11195. #if SP_INT_DIGITS >= 96
  11196. if ((a->used == 48) && (b->used == 48)) {
  11197. err = _sp_mul_48(a, b, r);
  11198. }
  11199. else
  11200. #endif /* SP_INT_DIGITS >= 96 */
  11201. #if SP_INT_DIGITS >= 128
  11202. if ((a->used == 64) && (b->used == 64)) {
  11203. err = _sp_mul_64(a, b, r);
  11204. }
  11205. else
  11206. #endif /* SP_INT_DIGITS >= 128 */
  11207. #if SP_INT_DIGITS >= 192
  11208. if ((a->used == 96) && (b->used == 96)) {
  11209. err = _sp_mul_96(a, b, r);
  11210. }
  11211. else
  11212. #endif /* SP_INT_DIGITS >= 192 */
  11213. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  11214. #endif /* !WOLFSSL_SP_SMALL */
  11215. #ifdef SQR_MUL_ASM
  11216. if (a->used == b->used) {
  11217. err = _sp_mul_nxn(a, b, r);
  11218. }
  11219. else
  11220. #endif
  11221. {
  11222. err = _sp_mul(a, b, r);
  11223. }
  11224. }
  11225. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11226. if (err == MP_OKAY) {
  11227. r->sign = (r->used == 0) ? MP_ZPOS : sign;
  11228. }
  11229. #endif
  11230. #if 0
  11231. if (err == MP_OKAY) {
  11232. sp_print(r, "rmul");
  11233. }
  11234. #endif
  11235. return err;
  11236. }
  11237. /* END SP_MUL implementations. */
  11238. #endif
  11239. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  11240. defined(WOLFCRYPT_HAVE_ECCSI) || \
  11241. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || defined(OPENSSL_ALL)
  11242. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  11243. *
  11244. * @param [in] a SP integer to multiply.
  11245. * @param [in] b SP integer to multiply.
  11246. * @param [in] m SP integer that is the modulus.
  11247. * @param [out] r SP integer result.
  11248. *
  11249. * @return MP_OKAY on success.
  11250. * @return MP_MEM when dynamic memory allocation fails.
  11251. */
  11252. static int _sp_mulmod_tmp(const sp_int* a, const sp_int* b, const sp_int* m,
  11253. sp_int* r)
  11254. {
  11255. int err = MP_OKAY;
  11256. /* Create temporary for multiplication result. */
  11257. DECL_SP_INT(t, a->used + b->used);
  11258. ALLOC_SP_INT(t, a->used + b->used, err, NULL);
  11259. if (err == MP_OKAY) {
  11260. err = sp_init_size(t, a->used + b->used);
  11261. }
  11262. /* Multiply and reduce. */
  11263. if (err == MP_OKAY) {
  11264. err = sp_mul(a, b, t);
  11265. }
  11266. if (err == MP_OKAY) {
  11267. err = sp_mod(t, m, r);
  11268. }
  11269. /* Dispose of an allocated SP int. */
  11270. FREE_SP_INT(t, NULL);
  11271. return err;
  11272. }
  11273. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  11274. *
  11275. * @param [in] a SP integer to multiply.
  11276. * @param [in] b SP integer to multiply.
  11277. * @param [in] m SP integer that is the modulus.
  11278. * @param [out] r SP integer result.
  11279. *
  11280. * @return MP_OKAY on success.
  11281. * @return MP_MEM when dynamic memory allocation fails.
  11282. */
  11283. static int _sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m,
  11284. sp_int* r)
  11285. {
  11286. int err = MP_OKAY;
  11287. /* Use r as intermediate result if not same as pointer m which is needed
  11288. * after first intermediate result.
  11289. */
  11290. if (r != m) {
  11291. /* Multiply and reduce. */
  11292. err = sp_mul(a, b, r);
  11293. if (err == MP_OKAY) {
  11294. err = sp_mod(r, m, r);
  11295. }
  11296. }
  11297. else {
  11298. /* Do operation using temporary. */
  11299. err = _sp_mulmod_tmp(a, b, m, r);
  11300. }
  11301. return err;
  11302. }
  11303. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  11304. *
  11305. * @param [in] a SP integer to multiply.
  11306. * @param [in] b SP integer to multiply.
  11307. * @param [in] m SP integer that is the modulus.
  11308. * @param [out] r SP integer result.
  11309. *
  11310. * @return MP_OKAY on success.
  11311. * @return MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
  11312. * fixed data length.
  11313. * @return MP_MEM when dynamic memory allocation fails.
  11314. */
  11315. int sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  11316. {
  11317. int err = MP_OKAY;
  11318. /* Validate parameters. */
  11319. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  11320. err = MP_VAL;
  11321. }
  11322. /* Ensure result SP int is big enough for intermediates. */
  11323. if ((err == MP_OKAY) && (r != m) && (a->used + b->used > r->size)) {
  11324. err = MP_VAL;
  11325. }
  11326. #if 0
  11327. if (err == 0) {
  11328. sp_print(a, "a");
  11329. sp_print(b, "b");
  11330. sp_print(m, "m");
  11331. }
  11332. #endif
  11333. if (err == MP_OKAY) {
  11334. err = _sp_mulmod(a, b, m, r);
  11335. }
  11336. #if 0
  11337. if (err == 0) {
  11338. sp_print(r, "rmm");
  11339. }
  11340. #endif
  11341. return err;
  11342. }
  11343. #endif
  11344. #ifdef WOLFSSL_SP_INVMOD
  11345. /* Calculates the multiplicative inverse in the field. r*a = x*m + 1
  11346. * Right-shift Algorithm. NOT constant time.
  11347. *
  11348. * Algorithm:
  11349. * 1. u = m, v = a, b = 0, c = 1
  11350. * 2. While v != 1 and u != 0
  11351. * 2.1. If u even
  11352. * 2.1.1. u /= 2
  11353. * 2.1.2. b = (b / 2) mod m
  11354. * 2.2. Else if v even
  11355. * 2.2.1. v /= 2
  11356. * 2.2.2. c = (c / 2) mod m
  11357. * 2.3. Else if u >= v
  11358. * 2.3.1. u -= v
  11359. * 2.3.2. b = (c - b) mod m
  11360. * 2.4. Else (v > u)
  11361. * 2.4.1. v -= u
  11362. * 2.4.2. c = (b - c) mod m
  11363. * 3. NO_INVERSE if u == 0
  11364. *
  11365. * @param [in] a SP integer to find inverse of.
  11366. * @param [in] m SP integer this is the modulus.
  11367. * @param [in] u SP integer to use in calculation.
  11368. * @param [in] v SP integer to use in calculation.
  11369. * @param [in] b SP integer to use in calculation
  11370. * @param [out] c SP integer that is the inverse.
  11371. *
  11372. * @return MP_OKAY on success.
  11373. * @return MP_VAL when no inverse.
  11374. */
  11375. static int _sp_invmod_bin(const sp_int* a, const sp_int* m, sp_int* u,
  11376. sp_int* v, sp_int* b, sp_int* c)
  11377. {
  11378. int err = MP_OKAY;
  11379. /* 1. u = m, v = a, b = 0, c = 1 */
  11380. _sp_copy(m, u);
  11381. if (a != v) {
  11382. _sp_copy(a, v);
  11383. }
  11384. _sp_zero(b);
  11385. _sp_set(c, 1);
  11386. /* 2. While v != 1 and u != 0 */
  11387. while (!sp_isone(v) && !sp_iszero(u)) {
  11388. /* 2.1. If u even */
  11389. if ((u->dp[0] & 1) == 0) {
  11390. /* 2.1.1. u /= 2 */
  11391. _sp_div_2(u, u);
  11392. /* 2.1.2. b = (b / 2) mod m */
  11393. if (sp_isodd(b)) {
  11394. _sp_add_off(b, m, b, 0);
  11395. }
  11396. _sp_div_2(b, b);
  11397. }
  11398. /* 2.2. Else if v even */
  11399. else if ((v->dp[0] & 1) == 0) {
  11400. /* 2.2.1. v /= 2 */
  11401. _sp_div_2(v, v);
  11402. /* 2.1.2. c = (c / 2) mod m */
  11403. if (sp_isodd(c)) {
  11404. _sp_add_off(c, m, c, 0);
  11405. }
  11406. _sp_div_2(c, c);
  11407. }
  11408. /* 2.3. Else if u >= v */
  11409. else if (_sp_cmp_abs(u, v) != MP_LT) {
  11410. /* 2.3.1. u -= v */
  11411. _sp_sub_off(u, v, u, 0);
  11412. /* 2.3.2. b = (c - b) mod m */
  11413. if (_sp_cmp_abs(b, c) == MP_LT) {
  11414. _sp_add_off(b, m, b, 0);
  11415. }
  11416. _sp_sub_off(b, c, b, 0);
  11417. }
  11418. /* 2.4. Else (v > u) */
  11419. else {
  11420. /* 2.4.1. v -= u */
  11421. _sp_sub_off(v, u, v, 0);
  11422. /* 2.4.2. c = (b - c) mod m */
  11423. if (_sp_cmp_abs(c, b) == MP_LT) {
  11424. _sp_add_off(c, m, c, 0);
  11425. }
  11426. _sp_sub_off(c, b, c, 0);
  11427. }
  11428. }
  11429. /* 3. NO_INVERSE if u == 0 */
  11430. if (sp_iszero(u)) {
  11431. err = MP_VAL;
  11432. }
  11433. return err;
  11434. }
  11435. #if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
  11436. (!defined(NO_RSA) || !defined(NO_DH))
  11437. /* Calculates the multiplicative inverse in the field. r*a = x*m + 1
  11438. * Extended Euclidean Algorithm. NOT constant time.
  11439. *
  11440. * Creates two new SP ints.
  11441. *
  11442. * Algorithm:
  11443. * 1. x = m, y = a, b = 1, c = 0
  11444. * 2. while x > 1
  11445. * 2.1. d = x / y, r = x mod y
  11446. * 2.2. c -= d * b
  11447. * 2.3. x = y, y = r
  11448. * 2.4. s = b, b = c, c = s
  11449. * 3. If y != 0 then NO_INVERSE
  11450. * 4. If c < 0 then c += m
  11451. * 5. inv = c
  11452. *
  11453. * @param [in] a SP integer to find inverse of.
  11454. * @param [in] m SP integer this is the modulus.
  11455. * @param [in] u SP integer to use in calculation.
  11456. * @param [in] v SP integer to use in calculation.
  11457. * @param [in] b SP integer to use in calculation
  11458. * @param [in] c SP integer to use in calculation
  11459. * @param [out] inv SP integer that is the inverse.
  11460. *
  11461. * @return MP_OKAY on success.
  11462. * @return MP_VAL when no inverse.
  11463. * @return MP_MEM when dynamic memory allocation fails.
  11464. */
  11465. static int _sp_invmod_div(const sp_int* a, const sp_int* m, sp_int* x,
  11466. sp_int* y, sp_int* b, sp_int* c, sp_int* inv)
  11467. {
  11468. int err = MP_OKAY;
  11469. sp_int* s;
  11470. #ifndef WOLFSSL_SP_INT_NEGATIVE
  11471. int bneg = 0;
  11472. int cneg = 0;
  11473. int neg;
  11474. #endif
  11475. DECL_SP_INT(d, m->used + 1);
  11476. ALLOC_SP_INT(d, m->used + 1, err, NULL);
  11477. if (err == MP_OKAY) {
  11478. mp_init(d);
  11479. /* 1. x = m, y = a, b = 1, c = 0 */
  11480. if (a != y) {
  11481. _sp_copy(a, y);
  11482. }
  11483. _sp_copy(m, x);
  11484. _sp_set(b, 1);
  11485. _sp_zero(c);
  11486. }
  11487. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11488. /* 2. while x > 1 */
  11489. while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
  11490. /* 2.1. d = x / y, r = x mod y */
  11491. err = sp_div(x, y, d, x);
  11492. if (err == MP_OKAY) {
  11493. /* 2.2. c -= d * b */
  11494. if (sp_isone(d)) {
  11495. /* c -= 1 * b */
  11496. err = sp_sub(c, b, c);
  11497. }
  11498. else {
  11499. /* d *= b */
  11500. err = sp_mul(d, b, d);
  11501. /* c -= d */
  11502. if (err == MP_OKAY) {
  11503. err = sp_sub(c, d, c);
  11504. }
  11505. }
  11506. /* 2.3. x = y, y = r */
  11507. s = y; y = x; x = s;
  11508. /* 2.4. s = b, b = c, c = s */
  11509. s = b; b = c; c = s;
  11510. }
  11511. }
  11512. /* 3. If y != 0 then NO_INVERSE */
  11513. if ((err == MP_OKAY) && (!sp_iszero(y))) {
  11514. err = MP_VAL;
  11515. }
  11516. /* 4. If c < 0 then c += m */
  11517. if ((err == MP_OKAY) && sp_isneg(c)) {
  11518. err = sp_add(c, m, c);
  11519. }
  11520. if (err == MP_OKAY) {
  11521. /* 5. inv = c */
  11522. err = sp_copy(c, inv);
  11523. }
  11524. #else
  11525. /* 2. while x > 1 */
  11526. while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
  11527. /* 2.1. d = x / y, r = x mod y */
  11528. err = sp_div(x, y, d, x);
  11529. if (err == MP_OKAY) {
  11530. if (sp_isone(d)) {
  11531. /* c -= 1 * b */
  11532. if ((bneg ^ cneg) == 1) {
  11533. /* c -= -b or -c -= b, therefore add. */
  11534. _sp_add_off(c, b, c, 0);
  11535. }
  11536. else if (_sp_cmp_abs(c, b) == MP_LT) {
  11537. /* |c| < |b| and same sign, reverse subtract and negate. */
  11538. _sp_sub_off(b, c, c, 0);
  11539. cneg = !cneg;
  11540. }
  11541. else {
  11542. /* |c| >= |b| */
  11543. _sp_sub_off(c, b, c, 0);
  11544. }
  11545. }
  11546. else {
  11547. /* d *= b */
  11548. err = sp_mul(d, b, d);
  11549. /* c -= d */
  11550. if (err == MP_OKAY) {
  11551. if ((bneg ^ cneg) == 1) {
  11552. /* c -= -d or -c -= d, therefore add. */
  11553. _sp_add_off(c, d, c, 0);
  11554. }
  11555. else if (_sp_cmp_abs(c, d) == MP_LT) {
  11556. /* |c| < |d| and same sign, reverse subtract and negate.
  11557. */
  11558. _sp_sub_off(d, c, c, 0);
  11559. cneg = !cneg;
  11560. }
  11561. else {
  11562. _sp_sub_off(c, d, c, 0);
  11563. }
  11564. }
  11565. }
  11566. /* 2.3. x = y, y = r */
  11567. s = y; y = x; x = s;
  11568. /* 2.4. s = b, b = c, c = s */
  11569. s = b; b = c; c = s;
  11570. neg = bneg; bneg = cneg; cneg = neg;
  11571. }
  11572. }
  11573. /* 3. If y != 0 then NO_INVERSE */
  11574. if ((err == MP_OKAY) && (!sp_iszero(y))) {
  11575. err = MP_VAL;
  11576. }
  11577. /* 4. If c < 0 then c += m */
  11578. if ((err == MP_OKAY) && cneg) {
  11579. /* c = m - |c| */
  11580. _sp_sub_off(m, c, c, 0);
  11581. }
  11582. if (err == MP_OKAY) {
  11583. /* 5. inv = c */
  11584. err = sp_copy(c, inv);
  11585. }
  11586. #endif
  11587. FREE_SP_INT(d, NULL);
  11588. return err;
  11589. }
  11590. #endif
  11591. /* Calculates the multiplicative inverse in the field.
  11592. * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
  11593. *
  11594. * r*a = x*m + 1
  11595. *
  11596. * @param [in] a SP integer to find inverse of.
  11597. * @param [in] m SP integer this is the modulus.
  11598. * @param [out] r SP integer to hold result. r cannot be m.
  11599. *
  11600. * @return MP_OKAY on success.
  11601. * @return MP_VAL when m is even and a divides m evenly.
  11602. * @return MP_MEM when dynamic memory allocation fails.
  11603. */
  11604. static int _sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
  11605. {
  11606. int err = MP_OKAY;
  11607. sp_int* u = NULL;
  11608. sp_int* v = NULL;
  11609. sp_int* b = NULL;
  11610. DECL_SP_INT_ARRAY(t, m->used + 1, 3);
  11611. DECL_SP_INT(c, 2 * m->used + 1);
  11612. /* Allocate SP ints:
  11613. * - x3 one word larger than modulus
  11614. * - x1 one word longer than twice modulus used
  11615. */
  11616. ALLOC_SP_INT_ARRAY(t, m->used + 1, 3, err, NULL);
  11617. ALLOC_SP_INT(c, 2 * m->used + 1, err, NULL);
  11618. if (err == MP_OKAY) {
  11619. u = t[0];
  11620. v = t[1];
  11621. b = t[2];
  11622. /* c allocated separately and larger for even mod case. */
  11623. }
  11624. /* Initialize intermediate values with minimal sizes. */
  11625. if (err == MP_OKAY) {
  11626. err = sp_init_size(u, m->used + 1);
  11627. }
  11628. if (err == MP_OKAY) {
  11629. err = sp_init_size(v, m->used + 1);
  11630. }
  11631. if (err == MP_OKAY) {
  11632. err = sp_init_size(b, m->used + 1);
  11633. }
  11634. if (err == MP_OKAY) {
  11635. err = sp_init_size(c, 2 * m->used + 1);
  11636. }
  11637. if (err == MP_OKAY) {
  11638. const sp_int* mm = m;
  11639. const sp_int* ma = a;
  11640. int evenMod = 0;
  11641. if (sp_iseven(m)) {
  11642. /* a^-1 mod m = m + ((1 - m*(m^-1 % a)) / a) */
  11643. mm = a;
  11644. ma = v;
  11645. _sp_copy(a, u);
  11646. err = sp_mod(m, a, v);
  11647. /* v == 0 when a divides m evenly - no inverse. */
  11648. if ((err == MP_OKAY) && sp_iszero(v)) {
  11649. err = MP_VAL;
  11650. }
  11651. evenMod = 1;
  11652. }
  11653. if (err == MP_OKAY) {
  11654. /* Calculate inverse. */
  11655. #if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
  11656. (!defined(NO_RSA) || !defined(NO_DH))
  11657. if (sp_count_bits(mm) >= 1024) {
  11658. err = _sp_invmod_div(ma, mm, u, v, b, c, c);
  11659. }
  11660. else
  11661. #endif
  11662. {
  11663. err = _sp_invmod_bin(ma, mm, u, v, b, c);
  11664. }
  11665. }
  11666. /* Fixup for even modulus. */
  11667. if ((err == MP_OKAY) && evenMod) {
  11668. /* Finish operation.
  11669. * a^-1 mod m = m + ((1 - m*c) / a)
  11670. * => a^-1 mod m = m - ((m*c - 1) / a)
  11671. */
  11672. err = sp_mul(c, m, c);
  11673. if (err == MP_OKAY) {
  11674. _sp_sub_d(c, 1, c);
  11675. err = sp_div(c, a, c, NULL);
  11676. }
  11677. if (err == MP_OKAY) {
  11678. err = sp_sub(m, c, r);
  11679. }
  11680. }
  11681. else if (err == MP_OKAY) {
  11682. _sp_copy(c, r);
  11683. }
  11684. }
  11685. FREE_SP_INT(c, NULL);
  11686. FREE_SP_INT_ARRAY(t, NULL);
  11687. return err;
  11688. }
  11689. /* Calculates the multiplicative inverse in the field.
  11690. * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
  11691. *
  11692. * r*a = x*m + 1
  11693. *
  11694. * @param [in] a SP integer to find inverse of.
  11695. * @param [in] m SP integer this is the modulus.
  11696. * @param [out] r SP integer to hold result. r cannot be m.
  11697. *
  11698. * @return MP_OKAY on success.
  11699. * @return MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
  11700. * m is negative.
  11701. * @return MP_MEM when dynamic memory allocation fails.
  11702. */
  11703. int sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
  11704. {
  11705. int err = MP_OKAY;
  11706. /* Validate parameters. */
  11707. if ((a == NULL) || (m == NULL) || (r == NULL) || (r == m)) {
  11708. err = MP_VAL;
  11709. }
  11710. if ((err == MP_OKAY) && (m->used * 2 > r->size)) {
  11711. err = MP_VAL;
  11712. }
  11713. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11714. /* Don't support negative modulus. */
  11715. if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
  11716. err = MP_VAL;
  11717. }
  11718. #endif
  11719. if (err == MP_OKAY) {
  11720. /* Ensure number is less than modulus. */
  11721. if (_sp_cmp_abs(a, m) != MP_LT) {
  11722. err = sp_mod(a, m, r);
  11723. a = r;
  11724. }
  11725. }
  11726. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11727. if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
  11728. /* Make 'a' positive */
  11729. err = sp_add(m, a, r);
  11730. a = r;
  11731. }
  11732. #endif
  11733. /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
  11734. if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
  11735. err = MP_VAL;
  11736. }
  11737. /* r*2*x != n*2*y + 1 for integer x,y */
  11738. if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
  11739. err = MP_VAL;
  11740. }
  11741. /* 1*1 = 0*m + 1 */
  11742. if ((err == MP_OKAY) && sp_isone(a)) {
  11743. _sp_set(r, 1);
  11744. }
  11745. else if (err == MP_OKAY) {
  11746. err = _sp_invmod(a, m, r);
  11747. }
  11748. return err;
  11749. }
  11750. #endif /* WOLFSSL_SP_INVMOD */
  11751. #ifdef WOLFSSL_SP_INVMOD_MONT_CT
  11752. /* Number of entries to pre-compute.
  11753. * Many pre-defined primes have multiple of 8 consecutive 1s.
  11754. * P-256 modulus - 2 => 32x1, 31x0, 1x1, 96x0, 94x1, 1x0, 1x1.
  11755. */
  11756. #define CT_INV_MOD_PRE_CNT 8
  11757. /* Calculates the multiplicative inverse in the field - constant time.
  11758. *
  11759. * Modulus (m) must be a prime and greater than 2.
  11760. * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
  11761. *
  11762. * Algorithm:
  11763. * pre = pre-computed values, m = modulus, a = value to find inverse of,
  11764. * e = exponent
  11765. * Pre-calc:
  11766. * 1. pre[0] = 2^0 * a mod m
  11767. * 2. For i in 2..CT_INV_MOD_PRE_CNT
  11768. * 2.1. pre[i-1] = ((pre[i-2] ^ 2) * a) mod m
  11769. * Calc inverse:
  11770. * 1. e = m - 2
  11771. * 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
  11772. * 3. t = pre[j-1]
  11773. * 4. s = 0
  11774. * 5. j = 0
  11775. * 6. For i index of next top bit..0
  11776. * 6.1. bit = e[i]
  11777. * 6.2. j += bit
  11778. * 6.3. s += 1
  11779. * 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
  11780. * 6.4.1. s -= 1 - bit
  11781. * 6.4.2. For s downto 1
  11782. * 6.4.2.1. t = (t ^ 2) mod m
  11783. * 6.4.3. s = 1 - bit
  11784. * 6.4.4. t = (t * pre[j-1]) mod m
  11785. * 6.4.5. j = 0
  11786. * 7. For s downto 1
  11787. * 7.1. t = (t ^ 2) mod m
  11788. * 8. If j > 0 then r = (t * pre[j-1]) mod m
  11789. * 9. Else r = t
  11790. *
  11791. * @param [in] a SP integer, Montgomery form, to find inverse of.
  11792. * @param [in] m SP integer this is the modulus.
  11793. * @param [out] r SP integer to hold result.
  11794. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  11795. *
  11796. * @return MP_OKAY on success.
  11797. * @return MP_MEM when dynamic memory allocation fails.
  11798. */
  11799. static int _sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
  11800. sp_int_digit mp)
  11801. {
  11802. int err = MP_OKAY;
  11803. int i;
  11804. int j = 0;
  11805. int s = 0;
  11806. sp_int* t = NULL;
  11807. sp_int* e = NULL;
  11808. #ifndef WOLFSSL_SP_NO_MALLOC
  11809. DECL_DYN_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
  11810. #else
  11811. DECL_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
  11812. #endif
  11813. #ifndef WOLFSSL_SP_NO_MALLOC
  11814. ALLOC_DYN_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err,
  11815. NULL);
  11816. #else
  11817. ALLOC_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err, NULL);
  11818. #endif
  11819. if (err == MP_OKAY) {
  11820. t = pre[CT_INV_MOD_PRE_CNT + 0];
  11821. e = pre[CT_INV_MOD_PRE_CNT + 1];
  11822. /* Space for sqr and mul result. */
  11823. _sp_init_size(t, m->used * 2 + 1);
  11824. /* e = mod - 2 */
  11825. _sp_init_size(e, m->used + 1);
  11826. /* Create pre-computation results: ((2^(1..8))-1).a. */
  11827. _sp_init_size(pre[0], m->used * 2 + 1);
  11828. /* 1. pre[0] = 2^0 * a mod m
  11829. * Start with 1.a = a.
  11830. */
  11831. _sp_copy(a, pre[0]);
  11832. /* 2. For i in 2..CT_INV_MOD_PRE_CNT
  11833. * For rest of entries in table.
  11834. */
  11835. for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
  11836. /* 2.1 pre[i-1] = ((pre[i-1] ^ 2) * a) mod m */
  11837. /* Previous value ..1 -> ..10 */
  11838. _sp_init_size(pre[i], m->used * 2 + 1);
  11839. err = sp_sqr(pre[i-1], pre[i]);
  11840. if (err == MP_OKAY) {
  11841. err = _sp_mont_red(pre[i], m, mp);
  11842. }
  11843. /* ..10 -> ..11 */
  11844. if (err == MP_OKAY) {
  11845. err = sp_mul(pre[i], a, pre[i]);
  11846. }
  11847. if (err == MP_OKAY) {
  11848. err = _sp_mont_red(pre[i], m, mp);
  11849. }
  11850. }
  11851. }
  11852. if (err == MP_OKAY) {
  11853. /* 1. e = m - 2 */
  11854. _sp_sub_d(m, 2, e);
  11855. /* 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
  11856. * One or more of the top bits is 1 so count.
  11857. */
  11858. for (i = sp_count_bits(e)-2, j = 1; i >= 0; i--, j++) {
  11859. if ((!sp_is_bit_set(e, (unsigned int)i)) ||
  11860. (j == CT_INV_MOD_PRE_CNT)) {
  11861. break;
  11862. }
  11863. }
  11864. /* 3. Set tmp to product of leading bits. */
  11865. _sp_copy(pre[j-1], t);
  11866. /* 4. s = 0 */
  11867. s = 0;
  11868. /* 5. j = 0 */
  11869. j = 0;
  11870. /* 6. For i index of next top bit..0
  11871. * Do remaining bits in exponent.
  11872. */
  11873. for (; (err == MP_OKAY) && (i >= 0); i--) {
  11874. /* 6.1. bit = e[i] */
  11875. int bit = sp_is_bit_set(e, (unsigned int)i);
  11876. /* 6.2. j += bit
  11877. * Update count of consequitive 1 bits.
  11878. */
  11879. j += bit;
  11880. /* 6.3. s += 1
  11881. * Update count of squares required.
  11882. */
  11883. s++;
  11884. /* 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
  11885. * Check if max 1 bits or 0 and have seen at least one 1 bit.
  11886. */
  11887. if ((j == CT_INV_MOD_PRE_CNT) || ((!bit) && (j > 0))) {
  11888. /* 6.4.1. s -= 1 - bit */
  11889. bit = 1 - bit;
  11890. s -= bit;
  11891. /* 6.4.2. For s downto 1
  11892. * Do s squares.
  11893. */
  11894. for (; (err == MP_OKAY) && (s > 0); s--) {
  11895. /* 6.4.2.1. t = (t ^ 2) mod m */
  11896. err = sp_sqr(t, t);
  11897. if (err == MP_OKAY) {
  11898. err = _sp_mont_red(t, m, mp);
  11899. }
  11900. }
  11901. /* 6.4.3. s = 1 - bit */
  11902. s = bit;
  11903. /* 6.4.4. t = (t * pre[j-1]) mod m */
  11904. if (err == MP_OKAY) {
  11905. err = sp_mul(t, pre[j-1], t);
  11906. }
  11907. if (err == MP_OKAY) {
  11908. err = _sp_mont_red(t, m, mp);
  11909. }
  11910. /* 6.4.5. j = 0
  11911. * Reset number of 1 bits seen.
  11912. */
  11913. j = 0;
  11914. }
  11915. }
  11916. }
  11917. if (err == MP_OKAY) {
  11918. /* 7. For s downto 1
  11919. * Do s squares - total remaining. */
  11920. for (; (err == MP_OKAY) && (s > 0); s--) {
  11921. /* 7.1. t = (t ^ 2) mod m */
  11922. err = sp_sqr(t, t);
  11923. if (err == MP_OKAY) {
  11924. err = _sp_mont_red(t, m, mp);
  11925. }
  11926. }
  11927. }
  11928. if (err == MP_OKAY) {
  11929. /* 8. If j > 0 then r = (t * pre[j-1]) mod m */
  11930. if (j > 0) {
  11931. err = sp_mul(t, pre[j-1], r);
  11932. if (err == MP_OKAY) {
  11933. err = _sp_mont_red(r, m, mp);
  11934. }
  11935. }
  11936. /* 9. Else r = t */
  11937. else {
  11938. _sp_copy(t, r);
  11939. }
  11940. }
  11941. #ifndef WOLFSSL_SP_NO_MALLOC
  11942. FREE_DYN_SP_INT_ARRAY(pre, NULL);
  11943. #else
  11944. FREE_SP_INT_ARRAY(pre, NULL);
  11945. #endif
  11946. return err;
  11947. }
  11948. /* Calculates the multiplicative inverse in the field - constant time.
  11949. *
  11950. * Modulus (m) must be a prime and greater than 2.
  11951. * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
  11952. *
  11953. * @param [in] a SP integer, Montgomery form, to find inverse of.
  11954. * @param [in] m SP integer this is the modulus.
  11955. * @param [out] r SP integer to hold result.
  11956. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  11957. *
  11958. * @return MP_OKAY on success.
  11959. * @return MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
  11960. * @return MP_MEM when dynamic memory allocation fails.
  11961. */
  11962. int sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
  11963. sp_int_digit mp)
  11964. {
  11965. int err = MP_OKAY;
  11966. /* Validate parameters. */
  11967. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  11968. err = MP_VAL;
  11969. }
  11970. /* Ensure m is not too big. */
  11971. else if (m->used * 2 >= SP_INT_DIGITS) {
  11972. err = MP_VAL;
  11973. }
  11974. /* check that r can hold the range of the modulus result */
  11975. else if (m->used > r->size) {
  11976. err = MP_VAL;
  11977. }
  11978. /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
  11979. if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
  11980. ((m->used == 1) && (m->dp[0] < 3)))) {
  11981. err = MP_VAL;
  11982. }
  11983. if (err == MP_OKAY) {
  11984. /* Do operation. */
  11985. err = _sp_invmod_mont_ct(a, m, r, mp);
  11986. }
  11987. return err;
  11988. }
  11989. #endif /* WOLFSSL_SP_INVMOD_MONT_CT */
  11990. /**************************
  11991. * Exponentiation functions
  11992. **************************/
  11993. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  11994. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  11995. defined(OPENSSL_ALL)
  11996. #ifndef WC_PROTECT_ENCRYPTED_MEM
  11997. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  11998. * Process the exponent one bit at a time.
  11999. * Is constant time and can be cache attack resistant.
  12000. *
  12001. * Algorithm:
  12002. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12003. * 1. s = 0
  12004. * 2. t[0] = b mod m.
  12005. * 3. t[1] = t[0]
  12006. * 4. For i in (bits-1)...0
  12007. * 4.1. t[s] = t[s] ^ 2
  12008. * 4.2. y = e[i]
  12009. * 4.3 j = y & s
  12010. * 4.4 s = s | y
  12011. * 4.5. t[j] = t[j] * b
  12012. * 5. r = t[1]
  12013. *
  12014. * @param [in] b SP integer that is the base.
  12015. * @param [in] e SP integer that is the exponent.
  12016. * @param [in] bits Number of bits in exponent to use. May be greater than
  12017. * count of bits in e.
  12018. * @param [in] m SP integer that is the modulus.
  12019. * @param [out] r SP integer to hold result.
  12020. *
  12021. * @return MP_OKAY on success.
  12022. * @return MP_MEM when dynamic memory allocation fails.
  12023. */
  12024. static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
  12025. const sp_int* m, sp_int* r)
  12026. {
  12027. int i;
  12028. int err = MP_OKAY;
  12029. int done = 0;
  12030. /* 1. s = 0 */
  12031. int s = 0;
  12032. #ifdef WC_NO_CACHE_RESISTANT
  12033. DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
  12034. #else
  12035. DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
  12036. #endif
  12037. /* Allocate temporaries. */
  12038. #ifdef WC_NO_CACHE_RESISTANT
  12039. ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
  12040. #else
  12041. /* Working SP int needed when cache resistant. */
  12042. ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 3, err, NULL);
  12043. #endif
  12044. if (err == MP_OKAY) {
  12045. /* Initialize temporaries. */
  12046. _sp_init_size(t[0], 2 * m->used + 1);
  12047. _sp_init_size(t[1], 2 * m->used + 1);
  12048. #ifndef WC_NO_CACHE_RESISTANT
  12049. _sp_init_size(t[2], 2 * m->used + 1);
  12050. #endif
  12051. /* 2. t[0] = b mod m
  12052. * Ensure base is less than modulus - set fake working value to base.
  12053. */
  12054. if (_sp_cmp_abs(b, m) != MP_LT) {
  12055. err = sp_mod(b, m, t[0]);
  12056. /* Handle base == modulus. */
  12057. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  12058. _sp_set(r, 0);
  12059. done = 1;
  12060. }
  12061. }
  12062. else {
  12063. /* Copy base into working variable. */
  12064. _sp_copy(b, t[0]);
  12065. }
  12066. }
  12067. if ((!done) && (err == MP_OKAY)) {
  12068. /* 3. t[1] = t[0]
  12069. * Set real working value to base.
  12070. */
  12071. _sp_copy(t[0], t[1]);
  12072. /* 4. For i in (bits-1)...0 */
  12073. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12074. #ifdef WC_NO_CACHE_RESISTANT
  12075. /* 4.1. t[s] = t[s] ^ 2 */
  12076. err = sp_sqrmod(t[s], m, t[s]);
  12077. if (err == MP_OKAY) {
  12078. /* 4.2. y = e[i] */
  12079. int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  12080. /* 4.3. j = y & s */
  12081. int j = y & s;
  12082. /* 4.4 s = s | y */
  12083. s |= y;
  12084. /* 4.5. t[j] = t[j] * b */
  12085. err = _sp_mulmod(t[j], b, m, t[j]);
  12086. }
  12087. #else
  12088. /* 4.1. t[s] = t[s] ^ 2 */
  12089. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12090. ((size_t)t[1] & sp_off_on_addr[s ])),
  12091. t[2]);
  12092. err = sp_sqrmod(t[2], m, t[2]);
  12093. _sp_copy(t[2],
  12094. (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12095. ((size_t)t[1] & sp_off_on_addr[s ])));
  12096. if (err == MP_OKAY) {
  12097. /* 4.2. y = e[i] */
  12098. int y = (int)((e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1);
  12099. /* 4.3. j = y & s */
  12100. int j = y & s;
  12101. /* 4.4 s = s | y */
  12102. s |= y;
  12103. /* 4.5. t[j] = t[j] * b */
  12104. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12105. ((size_t)t[1] & sp_off_on_addr[j ])),
  12106. t[2]);
  12107. err = _sp_mulmod(t[2], b, m, t[2]);
  12108. _sp_copy(t[2],
  12109. (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12110. ((size_t)t[1] & sp_off_on_addr[j ])));
  12111. }
  12112. #endif
  12113. }
  12114. }
  12115. if ((!done) && (err == MP_OKAY)) {
  12116. /* 5. r = t[1] */
  12117. _sp_copy(t[1], r);
  12118. }
  12119. FREE_SP_INT_ARRAY(t, NULL);
  12120. return err;
  12121. }
  12122. #else
  12123. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12124. * Process the exponent one bit at a time with base in Montgomery form.
  12125. * Is constant time and cache attack resistant.
  12126. *
  12127. * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
  12128. * Cryptographic Hardware and Embedded Systems, CHES 2002
  12129. *
  12130. * Algorithm:
  12131. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12132. * 1. t[1] = b mod m.
  12133. * 2. t[0] = 1
  12134. * 3. For i in (bits-1)...0
  12135. * 3.1. y = e[i]
  12136. * 3.2. t[2] = t[0] * t[1]
  12137. * 3.3. t[3] = t[y] ^ 2
  12138. * 3.4. t[y] = t[3], t[y^1] = t[2]
  12139. * 4. r = t[0]
  12140. *
  12141. * @param [in] b SP integer that is the base.
  12142. * @param [in] e SP integer that is the exponent.
  12143. * @param [in] bits Number of bits in exponent to use. May be greater than
  12144. * count of bits in e.
  12145. * @param [in] m SP integer that is the modulus.
  12146. * @param [out] r SP integer to hold result.
  12147. *
  12148. * @return MP_OKAY on success.
  12149. * @return MP_MEM when dynamic memory allocation fails.
  12150. */
  12151. static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
  12152. const sp_int* m, sp_int* r)
  12153. {
  12154. int err = MP_OKAY;
  12155. int done = 0;
  12156. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  12157. /* Allocate temporaries. */
  12158. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  12159. if (err == MP_OKAY) {
  12160. /* Initialize temporaries. */
  12161. _sp_init_size(t[0], m->used * 2 + 1);
  12162. _sp_init_size(t[1], m->used * 2 + 1);
  12163. _sp_init_size(t[2], m->used * 2 + 1);
  12164. _sp_init_size(t[3], m->used * 2 + 1);
  12165. /* 1. Ensure base is less than modulus. */
  12166. if (_sp_cmp_abs(b, m) != MP_LT) {
  12167. err = sp_mod(b, m, t[1]);
  12168. /* Handle base == modulus. */
  12169. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  12170. _sp_set(r, 0);
  12171. done = 1;
  12172. }
  12173. }
  12174. else {
  12175. /* Copy base into working variable. */
  12176. err = sp_copy(b, t[1]);
  12177. }
  12178. }
  12179. if ((!done) && (err == MP_OKAY)) {
  12180. int i;
  12181. /* 2. t[0] = 1 */
  12182. _sp_set(t[0], 1);
  12183. /* 3. For i in (bits-1)...0 */
  12184. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12185. /* 3.1. y = e[i] */
  12186. int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  12187. /* 3.2. t[2] = t[0] * t[1] */
  12188. err = sp_mulmod(t[0], t[1], m, t[2]);
  12189. /* 3.3. t[3] = t[y] ^ 2 */
  12190. if (err == MP_OKAY) {
  12191. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
  12192. ((size_t)t[1] & sp_off_on_addr[y ])),
  12193. t[3]);
  12194. err = sp_sqrmod(t[3], m, t[3]);
  12195. }
  12196. /* 3.4. t[y] = t[3], t[y^1] = t[2] */
  12197. if (err == MP_OKAY) {
  12198. _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
  12199. }
  12200. }
  12201. }
  12202. if ((!done) && (err == MP_OKAY)) {
  12203. /* 4. r = t[0] */
  12204. err = sp_copy(t[0], r);
  12205. }
  12206. FREE_SP_INT_ARRAY(t, NULL);
  12207. return err;
  12208. }
  12209. #endif /* WC_PROTECT_ENCRYPTED_MEM */
  12210. #endif
  12211. #if (defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  12212. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
  12213. defined(OPENSSL_ALL)
  12214. #ifndef WC_NO_HARDEN
  12215. #if !defined(WC_NO_CACHE_RESISTANT)
  12216. #ifndef WC_PROTECT_ENCRYPTED_MEM
  12217. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12218. * Process the exponent one bit at a time with base in Montgomery form.
  12219. * Is constant time and cache attack resistant.
  12220. *
  12221. * Algorithm:
  12222. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12223. * 1. t[0] = b mod m.
  12224. * 2. s = 0
  12225. * 3. t[0] = ToMont(t[0])
  12226. * 4. t[1] = t[0]
  12227. * 5. bm = t[0]
  12228. * 6. For i in (bits-1)...0
  12229. * 6.1. t[s] = t[s] ^ 2
  12230. * 6.2. y = e[i]
  12231. * 6.3 j = y & s
  12232. * 6.4 s = s | y
  12233. * 6.5. t[j] = t[j] * bm
  12234. * 7. t[1] = FromMont(t[1])
  12235. * 8. r = t[1]
  12236. *
  12237. * @param [in] b SP integer that is the base.
  12238. * @param [in] e SP integer that is the exponent.
  12239. * @param [in] bits Number of bits in exponent to use. May be greater than
  12240. * count of bits in e.
  12241. * @param [in] m SP integer that is the modulus.
  12242. * @param [out] r SP integer to hold result.
  12243. *
  12244. * @return MP_OKAY on success.
  12245. * @return MP_MEM when dynamic memory allocation fails.
  12246. */
  12247. static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
  12248. const sp_int* m, sp_int* r)
  12249. {
  12250. int err = MP_OKAY;
  12251. int done = 0;
  12252. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  12253. /* Allocate temporaries. */
  12254. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  12255. if (err == MP_OKAY) {
  12256. /* Initialize temporaries. */
  12257. _sp_init_size(t[0], m->used * 2 + 1);
  12258. _sp_init_size(t[1], m->used * 2 + 1);
  12259. _sp_init_size(t[2], m->used * 2 + 1);
  12260. _sp_init_size(t[3], m->used * 2 + 1);
  12261. /* 1. Ensure base is less than modulus. */
  12262. if (_sp_cmp_abs(b, m) != MP_LT) {
  12263. err = sp_mod(b, m, t[0]);
  12264. /* Handle base == modulus. */
  12265. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  12266. _sp_set(r, 0);
  12267. done = 1;
  12268. }
  12269. }
  12270. else {
  12271. /* Copy base into working variable. */
  12272. _sp_copy(b, t[0]);
  12273. }
  12274. }
  12275. if ((!done) && (err == MP_OKAY)) {
  12276. int i;
  12277. /* 2. s = 0 */
  12278. int s = 0;
  12279. sp_int_digit mp;
  12280. /* Calculate Montgomery multiplier for reduction. */
  12281. _sp_mont_setup(m, &mp);
  12282. /* 3. t[0] = ToMont(t[0])
  12283. * Convert base to Montgomery form - as fake working value.
  12284. */
  12285. err = sp_mont_norm(t[1], m);
  12286. if (err == MP_OKAY) {
  12287. err = sp_mul(t[0], t[1], t[0]);
  12288. }
  12289. if (err == MP_OKAY) {
  12290. /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
  12291. err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1);
  12292. }
  12293. if (err == MP_OKAY) {
  12294. /* 4. t[1] = t[0]
  12295. * Set real working value to base.
  12296. */
  12297. _sp_copy(t[0], t[1]);
  12298. /* 5. bm = t[0]. */
  12299. _sp_copy(t[0], t[2]);
  12300. }
  12301. /* 6. For i in (bits-1)...0 */
  12302. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12303. /* 6.1. t[s] = t[s] ^ 2 */
  12304. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12305. ((size_t)t[1] & sp_off_on_addr[s ])),
  12306. t[3]);
  12307. err = sp_sqr(t[3], t[3]);
  12308. if (err == MP_OKAY) {
  12309. err = _sp_mont_red(t[3], m, mp);
  12310. }
  12311. _sp_copy(t[3],
  12312. (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12313. ((size_t)t[1] & sp_off_on_addr[s ])));
  12314. if (err == MP_OKAY) {
  12315. /* 6.2. y = e[i] */
  12316. int y = (int)((e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1);
  12317. /* 6.3 j = y & s */
  12318. int j = y & s;
  12319. /* 6.4 s = s | y */
  12320. s |= y;
  12321. /* 6.5. t[j] = t[j] * bm */
  12322. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12323. ((size_t)t[1] & sp_off_on_addr[j ])),
  12324. t[3]);
  12325. err = sp_mul(t[3], t[2], t[3]);
  12326. if (err == MP_OKAY) {
  12327. err = _sp_mont_red(t[3], m, mp);
  12328. }
  12329. _sp_copy(t[3],
  12330. (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12331. ((size_t)t[1] & sp_off_on_addr[j ])));
  12332. }
  12333. }
  12334. if (err == MP_OKAY) {
  12335. /* 7. t[1] = FromMont(t[1]) */
  12336. err = _sp_mont_red(t[1], m, mp);
  12337. /* Reduction implementation returns number to range: 0..m-1. */
  12338. }
  12339. }
  12340. if ((!done) && (err == MP_OKAY)) {
  12341. /* 8. r = t[1] */
  12342. _sp_copy(t[1], r);
  12343. }
  12344. FREE_SP_INT_ARRAY(t, NULL);
  12345. return err;
  12346. }
  12347. #else
  12348. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12349. * Process the exponent one bit at a time with base in Montgomery form.
  12350. * Is constant time and cache attack resistant.
  12351. *
  12352. * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
  12353. * Cryptographic Hardware and Embedded Systems, CHES 2002
  12354. *
  12355. * Algorithm:
  12356. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12357. * 1. t[1] = b mod m.
  12358. * 2. t[0] = ToMont(1)
  12359. * 3. t[1] = ToMont(t[1])
  12360. * 4. For i in (bits-1)...0
  12361. * 4.1. y = e[i]
  12362. * 4.2. t[2] = t[0] * t[1]
  12363. * 4.3. t[3] = t[y] ^ 2
  12364. * 4.4. t[y] = t[3], t[y^1] = t[2]
  12365. * 5. t[0] = FromMont(t[0])
  12366. * 6. r = t[0]
  12367. *
  12368. * @param [in] b SP integer that is the base.
  12369. * @param [in] e SP integer that is the exponent.
  12370. * @param [in] bits Number of bits in exponent to use. May be greater than
  12371. * count of bits in e.
  12372. * @param [in] m SP integer that is the modulus.
  12373. * @param [out] r SP integer to hold result.
  12374. *
  12375. * @return MP_OKAY on success.
  12376. * @return MP_MEM when dynamic memory allocation fails.
  12377. */
  12378. static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
  12379. const sp_int* m, sp_int* r)
  12380. {
  12381. int err = MP_OKAY;
  12382. int done = 0;
  12383. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  12384. /* Allocate temporaries. */
  12385. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  12386. if (err == MP_OKAY) {
  12387. /* Initialize temporaries. */
  12388. _sp_init_size(t[0], m->used * 2 + 1);
  12389. _sp_init_size(t[1], m->used * 2 + 1);
  12390. _sp_init_size(t[2], m->used * 2 + 1);
  12391. _sp_init_size(t[3], m->used * 2 + 1);
  12392. /* 1. Ensure base is less than modulus. */
  12393. if (_sp_cmp_abs(b, m) != MP_LT) {
  12394. err = sp_mod(b, m, t[1]);
  12395. /* Handle base == modulus. */
  12396. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  12397. _sp_set(r, 0);
  12398. done = 1;
  12399. }
  12400. }
  12401. else {
  12402. /* Copy base into working variable. */
  12403. err = sp_copy(b, t[1]);
  12404. }
  12405. }
  12406. if ((!done) && (err == MP_OKAY)) {
  12407. int i;
  12408. sp_int_digit mp;
  12409. /* Calculate Montgomery multiplier for reduction. */
  12410. _sp_mont_setup(m, &mp);
  12411. /* 2. t[0] = ToMont(1)
  12412. * Calculate 1 in Montgomery form.
  12413. */
  12414. err = sp_mont_norm(t[0], m);
  12415. if (err == MP_OKAY) {
  12416. /* 3. t[1] = ToMont(t[1])
  12417. * Convert base to Montgomery form.
  12418. */
  12419. err = sp_mulmod(t[1], t[0], m, t[1]);
  12420. }
  12421. /* 4. For i in (bits-1)...0 */
  12422. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12423. /* 4.1. y = e[i] */
  12424. int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  12425. /* 4.2. t[2] = t[0] * t[1] */
  12426. err = sp_mul(t[0], t[1], t[2]);
  12427. if (err == MP_OKAY) {
  12428. err = _sp_mont_red(t[2], m, mp);
  12429. }
  12430. /* 4.3. t[3] = t[y] ^ 2 */
  12431. if (err == MP_OKAY) {
  12432. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
  12433. ((size_t)t[1] & sp_off_on_addr[y ])),
  12434. t[3]);
  12435. err = sp_sqr(t[3], t[3]);
  12436. }
  12437. if (err == MP_OKAY) {
  12438. err = _sp_mont_red(t[3], m, mp);
  12439. }
  12440. /* 4.4. t[y] = t[3], t[y^1] = t[2] */
  12441. if (err == MP_OKAY) {
  12442. _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
  12443. }
  12444. }
  12445. if (err == MP_OKAY) {
  12446. /* 5. t[0] = FromMont(t[0]) */
  12447. err = _sp_mont_red(t[0], m, mp);
  12448. /* Reduction implementation returns number to range: 0..m-1. */
  12449. }
  12450. }
  12451. if ((!done) && (err == MP_OKAY)) {
  12452. /* 6. r = t[0] */
  12453. err = sp_copy(t[0], r);
  12454. }
  12455. FREE_SP_INT_ARRAY(t, NULL);
  12456. return err;
  12457. }
  12458. #endif /* WC_PROTECT_ENCRYPTED_MEM */
  12459. #else
  12460. #ifdef SP_ALLOC
  12461. #define SP_ALLOC_PREDEFINED
  12462. #endif
  12463. /* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
  12464. #define SP_ALLOC
  12465. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12466. * Creates a window of precalculated exponents with base in Montgomery form.
  12467. * Is constant time but NOT cache attack resistant.
  12468. *
  12469. * Algorithm:
  12470. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12471. * w: window size based on bits.
  12472. * 1. t[1] = b mod m.
  12473. * 2. t[0] = MontNorm(m) = ToMont(1)
  12474. * 3. t[1] = ToMont(t[1])
  12475. * 4. For i in 2..(2 ^ w) - 1
  12476. * 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2
  12477. * 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1]
  12478. * 5. cb = w * (bits / w)
  12479. * 5. tr = t[e / (2 ^ cb)]
  12480. * 6. For i in cb..w
  12481. * 6.1. y = e[(i-1)..(i-w)]
  12482. * 6.2. tr = tr ^ (2 * w)
  12483. * 6.3. tr = tr * t[y]
  12484. * 7. tr = FromMont(tr)
  12485. * 8. r = tr
  12486. *
  12487. * @param [in] b SP integer that is the base.
  12488. * @param [in] e SP integer that is the exponent.
  12489. * @param [in] bits Number of bits in exponent to use. May be greater than
  12490. * count of bits in e.
  12491. * @param [in] m SP integer that is the modulus.
  12492. * @param [out] r SP integer to hold result.
  12493. *
  12494. * @return MP_OKAY on success.
  12495. * @return MP_MEM when dynamic memory allocation fails.
  12496. */
  12497. static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
  12498. const sp_int* m, sp_int* r)
  12499. {
  12500. int i;
  12501. int c;
  12502. int y;
  12503. int winBits;
  12504. int preCnt;
  12505. int err = MP_OKAY;
  12506. int done = 0;
  12507. sp_int_digit mask;
  12508. sp_int* tr = NULL;
  12509. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
  12510. /* Window bits based on number of pre-calculations versus number of loop
  12511. * calculcations.
  12512. * Exponents for RSA and DH will result in 6-bit windows.
  12513. */
  12514. if (bits > 450) {
  12515. winBits = 6;
  12516. }
  12517. else if (bits <= 21) {
  12518. winBits = 1;
  12519. }
  12520. else if (bits <= 36) {
  12521. winBits = 3;
  12522. }
  12523. else if (bits <= 140) {
  12524. winBits = 4;
  12525. }
  12526. else {
  12527. winBits = 5;
  12528. }
  12529. /* An entry for each possible 0..2^winBits-1 value. */
  12530. preCnt = 1 << winBits;
  12531. /* Mask for calculating index into pre-computed table. */
  12532. mask = preCnt - 1;
  12533. /* Allocate sp_ints for:
  12534. * - pre-computation table
  12535. * - temporary result
  12536. */
  12537. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
  12538. if (err == MP_OKAY) {
  12539. /* Set variable to use allocate memory. */
  12540. tr = t[preCnt];
  12541. /* Initialize all allocated. */
  12542. for (i = 0; i < preCnt; i++) {
  12543. _sp_init_size(t[i], m->used * 2 + 1);
  12544. }
  12545. _sp_init_size(tr, m->used * 2 + 1);
  12546. /* 1. t[1] = b mod m. */
  12547. if (_sp_cmp_abs(b, m) != MP_LT) {
  12548. err = sp_mod(b, m, t[1]);
  12549. /* Handle base == modulus. */
  12550. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  12551. _sp_set(r, 0);
  12552. done = 1;
  12553. }
  12554. }
  12555. else {
  12556. /* Copy base into entry of table to contain b^1. */
  12557. _sp_copy(b, t[1]);
  12558. }
  12559. }
  12560. if ((!done) && (err == MP_OKAY)) {
  12561. sp_int_digit mp;
  12562. sp_int_digit n;
  12563. /* Calculate Montgomery multiplier for reduction. */
  12564. _sp_mont_setup(m, &mp);
  12565. /* 2. t[0] = MontNorm(m) = ToMont(1) */
  12566. err = sp_mont_norm(t[0], m);
  12567. if (err == MP_OKAY) {
  12568. /* 3. t[1] = ToMont(t[1]) */
  12569. err = sp_mul(t[1], t[0], t[1]);
  12570. }
  12571. if (err == MP_OKAY) {
  12572. /* t[1] = t[1] mod m, temporary size has to be bigger than t[1]. */
  12573. err = _sp_div(t[1], m, NULL, t[1], t[1]->used + 1);
  12574. }
  12575. /* 4. For i in 2..(2 ^ w) - 1 */
  12576. for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
  12577. /* 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2 */
  12578. if ((i & 1) == 0) {
  12579. err = sp_sqr(t[i/2], t[i]);
  12580. }
  12581. /* 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1] */
  12582. else {
  12583. err = sp_mul(t[i-1], t[1], t[i]);
  12584. }
  12585. /* Montgomery reduce square or multiplication result. */
  12586. if (err == MP_OKAY) {
  12587. err = _sp_mont_red(t[i], m, mp);
  12588. }
  12589. }
  12590. if (err == MP_OKAY) {
  12591. /* 5. cb = w * (bits / w) */
  12592. i = (bits - 1) >> SP_WORD_SHIFT;
  12593. n = e->dp[i--];
  12594. /* Find top bit index in last word. */
  12595. c = bits & (SP_WORD_SIZE - 1);
  12596. if (c == 0) {
  12597. c = SP_WORD_SIZE;
  12598. }
  12599. /* Use as many bits from top to make remaining a multiple of window
  12600. * size.
  12601. */
  12602. if ((bits % winBits) != 0) {
  12603. c -= bits % winBits;
  12604. }
  12605. else {
  12606. c -= winBits;
  12607. }
  12608. /* 5. tr = t[e / (2 ^ cb)] */
  12609. y = (int)(n >> c);
  12610. n <<= SP_WORD_SIZE - c;
  12611. /* 5. Copy table value for first window. */
  12612. _sp_copy(t[y], tr);
  12613. /* 6. For i in cb..w */
  12614. for (; (i >= 0) || (c >= winBits); ) {
  12615. int j;
  12616. /* 6.1. y = e[(i-1)..(i-w)] */
  12617. if (c == 0) {
  12618. /* Bits up to end of digit */
  12619. n = e->dp[i--];
  12620. y = (int)(n >> (SP_WORD_SIZE - winBits));
  12621. n <<= winBits;
  12622. c = SP_WORD_SIZE - winBits;
  12623. }
  12624. else if (c < winBits) {
  12625. /* Bits to end of digit and part of next */
  12626. y = (int)(n >> (SP_WORD_SIZE - winBits));
  12627. n = e->dp[i--];
  12628. c = winBits - c;
  12629. y |= (int)(n >> (SP_WORD_SIZE - c));
  12630. n <<= c;
  12631. c = SP_WORD_SIZE - c;
  12632. }
  12633. else {
  12634. /* Bits from middle of digit */
  12635. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  12636. n <<= winBits;
  12637. c -= winBits;
  12638. }
  12639. /* 6.2. tr = tr ^ (2 * w) */
  12640. for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
  12641. err = sp_sqr(tr, tr);
  12642. if (err == MP_OKAY) {
  12643. err = _sp_mont_red(tr, m, mp);
  12644. }
  12645. }
  12646. /* 6.3. tr = tr * t[y] */
  12647. if (err == MP_OKAY) {
  12648. err = sp_mul(tr, t[y], tr);
  12649. }
  12650. if (err == MP_OKAY) {
  12651. err = _sp_mont_red(tr, m, mp);
  12652. }
  12653. }
  12654. }
  12655. if (err == MP_OKAY) {
  12656. /* 7. tr = FromMont(tr) */
  12657. err = _sp_mont_red(tr, m, mp);
  12658. /* Reduction implementation returns number to range: 0..m-1. */
  12659. }
  12660. }
  12661. if ((!done) && (err == MP_OKAY)) {
  12662. /* 8. r = tr */
  12663. _sp_copy(tr, r);
  12664. }
  12665. FREE_SP_INT_ARRAY(t, NULL);
  12666. return err;
  12667. }
  12668. #ifndef SP_ALLOC_PREDEFINED
  12669. #undef SP_ALLOC
  12670. #undef SP_ALLOC_PREDEFINED
  12671. #endif
  12672. #endif /* !WC_NO_CACHE_RESISTANT */
  12673. #endif /* !WC_NO_HARDEN */
  12674. /* w = Log2(SP_WORD_SIZE) - 1 */
  12675. #if SP_WORD_SIZE == 8
  12676. #define EXP2_WINSIZE 2
  12677. #elif SP_WORD_SIZE == 16
  12678. #define EXP2_WINSIZE 3
  12679. #elif SP_WORD_SIZE == 32
  12680. #define EXP2_WINSIZE 4
  12681. #elif SP_WORD_SIZE == 64
  12682. #define EXP2_WINSIZE 5
  12683. #else
  12684. #error "sp_exptmod_base_2: Unexpected SP_WORD_SIZE"
  12685. #endif
  12686. /* Mask is all bits in window set. */
  12687. #define EXP2_MASK ((1 << EXP2_WINSIZE) - 1)
  12688. /* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
  12689. * Is constant time and cache attack resistant.
  12690. *
  12691. * Calculates value to make mod operations constant time expect when
  12692. * WC_NO_HARDERN defined or modulus fits in one word.
  12693. *
  12694. * Algorithm:
  12695. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12696. * w: window size based on #bits in word.
  12697. * 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
  12698. * else tr = 1
  12699. * 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
  12700. * else a = 0
  12701. * 3. cb = w * (bits / w)
  12702. * 4. y = e / (2 ^ cb)
  12703. * 5. tr = (tr * (2 ^ y) + a) mod m
  12704. * 6. For i in cb..w
  12705. * 6.1. y = e[(i-1)..(i-w)]
  12706. * 6.2. tr = tr ^ (2 * w)
  12707. * 6.3. tr = ((tr * (2 ^ y) + a) mod m
  12708. * 7. if Words(m) > 1 then tr = FromMont(tr)
  12709. * 8. r = tr
  12710. *
  12711. * @param [in] e SP integer that is the exponent.
  12712. * @param [in] digits Number of digits in base to use. May be greater than
  12713. * count of bits in b.
  12714. * @param [in] m SP integer that is the modulus.
  12715. * @param [out] r SP integer to hold result.
  12716. *
  12717. * @return MP_OKAY on success.
  12718. * @return MP_MEM when dynamic memory allocation fails.
  12719. */
  12720. static int _sp_exptmod_base_2(const sp_int* e, int digits, const sp_int* m,
  12721. sp_int* r)
  12722. {
  12723. int i = 0;
  12724. int c = 0;
  12725. int y;
  12726. int err = MP_OKAY;
  12727. sp_int_digit mp = 0;
  12728. sp_int_digit n = 0;
  12729. #ifndef WC_NO_HARDEN
  12730. sp_int* a = NULL;
  12731. sp_int* tr = NULL;
  12732. DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
  12733. #else
  12734. DECL_SP_INT(tr, m->used * 2 + 1);
  12735. #endif
  12736. int useMont = (m->used > 1);
  12737. #if 0
  12738. sp_print_int(2, "a");
  12739. sp_print(e, "b");
  12740. sp_print(m, "m");
  12741. #endif
  12742. #ifndef WC_NO_HARDEN
  12743. /* Allocate sp_ints for:
  12744. * - constant time add value for mod operation
  12745. * - temporary result
  12746. */
  12747. ALLOC_SP_INT_ARRAY(d, m->used * 2 + 1, 2, err, NULL);
  12748. #else
  12749. /* Allocate sp_int for temporary result. */
  12750. ALLOC_SP_INT(tr, m->used * 2 + 1, err, NULL);
  12751. #endif
  12752. if (err == MP_OKAY) {
  12753. #ifndef WC_NO_HARDEN
  12754. a = d[0];
  12755. tr = d[1];
  12756. _sp_init_size(a, m->used * 2 + 1);
  12757. #endif
  12758. _sp_init_size(tr, m->used * 2 + 1);
  12759. }
  12760. if ((err == MP_OKAY) && useMont) {
  12761. /* Calculate Montgomery multiplier for reduction. */
  12762. _sp_mont_setup(m, &mp);
  12763. }
  12764. if (err == MP_OKAY) {
  12765. /* 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
  12766. * else tr = 1
  12767. */
  12768. if (useMont) {
  12769. /* Calculate Montgomery normalizer for modulus - 1 in Montgomery
  12770. * form.
  12771. */
  12772. err = sp_mont_norm(tr, m);
  12773. }
  12774. else {
  12775. /* For single word modulus don't use Montgomery form. */
  12776. err = sp_set(tr, 1);
  12777. }
  12778. }
  12779. /* 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
  12780. * else a = 0
  12781. */
  12782. #ifndef WC_NO_HARDEN
  12783. if ((err == MP_OKAY) && useMont) {
  12784. err = sp_mul_2d(m, 1 << EXP2_WINSIZE, a);
  12785. }
  12786. #endif
  12787. if (err == MP_OKAY) {
  12788. /* 3. cb = w * (bits / w) */
  12789. i = digits - 1;
  12790. n = e->dp[i--];
  12791. c = SP_WORD_SIZE;
  12792. #if EXP2_WINSIZE != 1
  12793. c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
  12794. if (c != SP_WORD_SIZE) {
  12795. /* 4. y = e / (2 ^ cb) */
  12796. y = (int)(n >> c);
  12797. n <<= SP_WORD_SIZE - c;
  12798. }
  12799. else
  12800. #endif
  12801. {
  12802. /* 4. y = e / (2 ^ cb) */
  12803. y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
  12804. n <<= EXP2_WINSIZE;
  12805. c -= EXP2_WINSIZE;
  12806. }
  12807. /* 5. tr = (tr * (2 ^ y) + a) mod m */
  12808. err = sp_mul_2d(tr, y, tr);
  12809. }
  12810. #ifndef WC_NO_HARDEN
  12811. if ((err == MP_OKAY) && useMont) {
  12812. /* Add value to make mod operation constant time. */
  12813. err = sp_add(tr, a, tr);
  12814. }
  12815. #endif
  12816. if (err == MP_OKAY) {
  12817. err = sp_mod(tr, m, tr);
  12818. }
  12819. /* 6. For i in cb..w */
  12820. for (; (err == MP_OKAY) && ((i >= 0) || (c >= EXP2_WINSIZE)); ) {
  12821. int j;
  12822. /* 6.1. y = e[(i-1)..(i-w)] */
  12823. if (c == 0) {
  12824. /* Bits from next digit. */
  12825. n = e->dp[i--];
  12826. y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
  12827. n <<= EXP2_WINSIZE;
  12828. c = SP_WORD_SIZE - EXP2_WINSIZE;
  12829. }
  12830. #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
  12831. else if (c < EXP2_WINSIZE) {
  12832. /* Bits to end of digit and part of next */
  12833. y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
  12834. n = e->dp[i--];
  12835. c = EXP2_WINSIZE - c;
  12836. y |= (int)(n >> (SP_WORD_SIZE - c));
  12837. n <<= c;
  12838. c = SP_WORD_SIZE - c;
  12839. }
  12840. #endif
  12841. else {
  12842. /* Bits from middle of digit */
  12843. y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
  12844. n <<= EXP2_WINSIZE;
  12845. c -= EXP2_WINSIZE;
  12846. }
  12847. /* 6.2. tr = tr ^ (2 * w) */
  12848. for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
  12849. err = sp_sqr(tr, tr);
  12850. if (err == MP_OKAY) {
  12851. if (useMont) {
  12852. err = _sp_mont_red(tr, m, mp);
  12853. }
  12854. else {
  12855. err = sp_mod(tr, m, tr);
  12856. }
  12857. }
  12858. }
  12859. /* 6.3. tr = ((tr * (2 ^ y) + a) mod m */
  12860. if (err == MP_OKAY) {
  12861. err = sp_mul_2d(tr, y, tr);
  12862. }
  12863. #ifndef WC_NO_HARDEN
  12864. if ((err == MP_OKAY) && useMont) {
  12865. /* Add value to make mod operation constant time. */
  12866. err = sp_add(tr, a, tr);
  12867. }
  12868. #endif
  12869. if (err == MP_OKAY) {
  12870. /* Reduce current result by modulus. */
  12871. err = sp_mod(tr, m, tr);
  12872. }
  12873. }
  12874. /* 7. if Words(m) > 1 then tr = FromMont(tr) */
  12875. if ((err == MP_OKAY) && useMont) {
  12876. err = _sp_mont_red(tr, m, mp);
  12877. /* Reduction implementation returns number to range: 0..m-1. */
  12878. }
  12879. if (err == MP_OKAY) {
  12880. /* 8. r = tr */
  12881. _sp_copy(tr, r);
  12882. }
  12883. #if 0
  12884. sp_print(r, "rme");
  12885. #endif
  12886. #ifndef WC_NO_HARDEN
  12887. FREE_SP_INT_ARRAY(d, NULL);
  12888. #else
  12889. FREE_SP_INT(tr, m->used * 2 + 1);
  12890. #endif
  12891. return err;
  12892. }
  12893. #endif
  12894. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  12895. !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
  12896. defined(OPENSSL_ALL)
  12897. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12898. *
  12899. * Error returned when parameters r == e or r == m and base >= modulus.
  12900. *
  12901. * @param [in] b SP integer that is the base.
  12902. * @param [in] e SP integer that is the exponent.
  12903. * @param [in] digits Number of digits in exponent to use. May be greater
  12904. * than count of digits in e.
  12905. * @param [in] m SP integer that is the modulus.
  12906. * @param [out] r SP integer to hold result.
  12907. *
  12908. * @return MP_OKAY on success.
  12909. * @return MP_VAL when b, e, m or r is NULL, digits is negative, or m <= 0 or
  12910. * e is negative.
  12911. * @return MP_MEM when dynamic memory allocation fails.
  12912. */
  12913. int sp_exptmod_ex(const sp_int* b, const sp_int* e, int digits, const sp_int* m,
  12914. sp_int* r)
  12915. {
  12916. int err = MP_OKAY;
  12917. int done = 0;
  12918. int mBits = sp_count_bits(m);
  12919. int bBits = sp_count_bits(b);
  12920. int eBits = sp_count_bits(e);
  12921. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL) ||
  12922. (digits < 0)) {
  12923. err = MP_VAL;
  12924. }
  12925. /* Ensure m is not too big. */
  12926. else if (m->used * 2 >= SP_INT_DIGITS) {
  12927. err = MP_VAL;
  12928. }
  12929. #if 0
  12930. if (err == MP_OKAY) {
  12931. sp_print(b, "a");
  12932. sp_print(e, "b");
  12933. sp_print(m, "m");
  12934. }
  12935. #endif
  12936. /* Check for invalid modulus. */
  12937. if ((err == MP_OKAY) && sp_iszero(m)) {
  12938. err = MP_VAL;
  12939. }
  12940. #ifdef WOLFSSL_SP_INT_NEGATIVE
  12941. /* Check for unsupported negative values of exponent and modulus. */
  12942. if ((err == MP_OKAY) && ((e->sign == MP_NEG) || (m->sign == MP_NEG))) {
  12943. err = MP_VAL;
  12944. }
  12945. #endif
  12946. /* Check for degenerate cases. */
  12947. if ((err == MP_OKAY) && sp_isone(m)) {
  12948. _sp_set(r, 0);
  12949. done = 1;
  12950. }
  12951. if ((!done) && (err == MP_OKAY) && sp_iszero(e)) {
  12952. _sp_set(r, 1);
  12953. done = 1;
  12954. }
  12955. /* Ensure base is less than modulus. */
  12956. if ((!done) && (err == MP_OKAY) && (_sp_cmp_abs(b, m) != MP_LT)) {
  12957. if ((r == e) || (r == m)) {
  12958. err = MP_VAL;
  12959. }
  12960. if (err == MP_OKAY) {
  12961. err = sp_mod(b, m, r);
  12962. }
  12963. if (err == MP_OKAY) {
  12964. b = r;
  12965. }
  12966. }
  12967. /* Check for degenerate case of base. */
  12968. if ((!done) && (err == MP_OKAY) && sp_iszero(b)) {
  12969. _sp_set(r, 0);
  12970. done = 1;
  12971. }
  12972. /* Ensure SP integers have space for intermediate values. */
  12973. if ((!done) && (err == MP_OKAY) && (m->used * 2 >= r->size)) {
  12974. err = MP_VAL;
  12975. }
  12976. if ((!done) && (err == MP_OKAY)) {
  12977. /* Use code optimized for specific sizes if possible */
  12978. #if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
  12979. (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))
  12980. #ifndef WOLFSSL_SP_NO_2048
  12981. if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
  12982. (eBits <= 1024)) {
  12983. err = sp_ModExp_1024((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  12984. done = 1;
  12985. }
  12986. else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
  12987. (eBits <= 2048)) {
  12988. err = sp_ModExp_2048((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  12989. done = 1;
  12990. }
  12991. else
  12992. #endif
  12993. #ifndef WOLFSSL_SP_NO_3072
  12994. if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
  12995. (eBits <= 1536)) {
  12996. err = sp_ModExp_1536((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  12997. done = 1;
  12998. }
  12999. else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
  13000. (eBits <= 3072)) {
  13001. err = sp_ModExp_3072((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  13002. done = 1;
  13003. }
  13004. else
  13005. #endif
  13006. #ifdef WOLFSSL_SP_4096
  13007. if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
  13008. (eBits <= 4096)) {
  13009. err = sp_ModExp_4096((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  13010. done = 1;
  13011. }
  13012. else
  13013. #endif
  13014. #endif
  13015. {
  13016. /* SP does not support size. */
  13017. }
  13018. }
  13019. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(OPENSSL_ALL)
  13020. #if (defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)) && \
  13021. defined(NO_DH)
  13022. if ((!done) && (err == MP_OKAY)) {
  13023. /* Use non-constant time version - fastest. */
  13024. err = sp_exptmod_nct(b, e, m, r);
  13025. }
  13026. #else
  13027. #if defined(WOLFSSL_SP_MATH_ALL) || defined(OPENSSL_ALL)
  13028. if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
  13029. mp_isodd(m)) {
  13030. /* Use the generic base 2 implementation. */
  13031. err = _sp_exptmod_base_2(e, digits, m, r);
  13032. }
  13033. else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
  13034. #ifndef WC_NO_HARDEN
  13035. /* Use constant time version hardened against timing attacks and
  13036. * cache attacks when WC_NO_CACHE_RESISTANT not defined. */
  13037. err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
  13038. #else
  13039. /* Use non-constant time version - fastest. */
  13040. err = sp_exptmod_nct(b, e, m, r);
  13041. #endif
  13042. }
  13043. else
  13044. #endif /* WOLFSSL_SP_MATH_ALL || OPENSSL_ALL */
  13045. if ((!done) && (err == MP_OKAY)) {
  13046. /* Otherwise use the generic implementation hardened against
  13047. * timing and cache attacks. */
  13048. err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
  13049. }
  13050. #endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
  13051. #else
  13052. if ((!done) && (err == MP_OKAY)) {
  13053. err = MP_VAL;
  13054. }
  13055. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  13056. (void)mBits;
  13057. (void)bBits;
  13058. (void)eBits;
  13059. (void)digits;
  13060. #if 0
  13061. if (err == MP_OKAY) {
  13062. sp_print(r, "rme");
  13063. }
  13064. #endif
  13065. return err;
  13066. }
  13067. #endif
  13068. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  13069. !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
  13070. defined(OPENSSL_ALL)
  13071. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13072. *
  13073. * @param [in] b SP integer that is the base.
  13074. * @param [in] e SP integer that is the exponent.
  13075. * @param [in] m SP integer that is the modulus.
  13076. * @param [out] r SP integer to hold result.
  13077. *
  13078. * @return MP_OKAY on success.
  13079. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  13080. * @return MP_MEM when dynamic memory allocation fails.
  13081. */
  13082. int sp_exptmod(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
  13083. {
  13084. int err = MP_OKAY;
  13085. /* Validate parameters. */
  13086. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  13087. err = MP_VAL;
  13088. }
  13089. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  13090. if (err == MP_OKAY) {
  13091. err = sp_exptmod_ex(b, e, (int)e->used, m, r);
  13092. }
  13093. RESTORE_VECTOR_REGISTERS();
  13094. return err;
  13095. }
  13096. #endif
  13097. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
  13098. #if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
  13099. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13100. * Creates a window of precalculated exponents with base in Montgomery form.
  13101. * Sliding window and is NOT constant time.
  13102. *
  13103. * n-bit window is: (b^(2^(n-1))*b^0)...(b^(2^(n-1))*b^(2^(n-1)-1))
  13104. * e.g. when n=6, b^32..b^63
  13105. * Algorithm:
  13106. * 1. Ensure base is less than modulus.
  13107. * 2. Convert base to Montgomery form
  13108. * 3. Set result to table entry for top window bits, or
  13109. * if less than windows bits in exponent, 1 in Montgomery form.
  13110. * 4. While at least window bits left:
  13111. * 4.1. Count number of and skip leading 0 bits unless less then window bits
  13112. * left.
  13113. * 4.2. Montgomery square result for each leading 0 and window bits if bits
  13114. * left.
  13115. * 4.3. Break if less than window bits left.
  13116. * 4.4. Get top window bits from expononent and drop.
  13117. * 4.5. Montgomery multiply result by table entry.
  13118. * 5. While bits left:
  13119. * 5.1. Montogmery square result
  13120. * 5.2. If exponent bit set
  13121. * 5.2.1. Montgomery multiply result by Montgomery form of base.
  13122. * 6. Convert result back from Montgomery form.
  13123. *
  13124. * @param [in] b SP integer that is the base.
  13125. * @param [in] e SP integer that is the exponent.
  13126. * @param [in] bits Number of bits in exponent to use. May be greater than
  13127. * count of bits in e.
  13128. * @param [in] m SP integer that is the modulus.
  13129. * @param [out] r SP integer to hold result.
  13130. *
  13131. * @return MP_OKAY on success.
  13132. * @return MP_MEM when dynamic memory allocation fails.
  13133. */
  13134. static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
  13135. sp_int* r)
  13136. {
  13137. int i = 0;
  13138. int bits;
  13139. int winBits;
  13140. int preCnt;
  13141. int err = MP_OKAY;
  13142. int done = 0;
  13143. sp_int* tr = NULL;
  13144. sp_int* bm = NULL;
  13145. /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
  13146. #ifndef WOLFSSL_SP_NO_MALLOC
  13147. DECL_DYN_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
  13148. #else
  13149. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
  13150. #endif
  13151. bits = sp_count_bits(e);
  13152. /* Window bits based on number of pre-calculations versus number of loop
  13153. * calculcations.
  13154. * Exponents for RSA and DH will result in 6-bit windows.
  13155. * Note: for 4096-bit values, 7-bit window is slightly better.
  13156. */
  13157. if (bits > 450) {
  13158. winBits = 6;
  13159. }
  13160. else if (bits <= 21) {
  13161. winBits = 1;
  13162. }
  13163. else if (bits <= 36) {
  13164. winBits = 3;
  13165. }
  13166. else if (bits <= 140) {
  13167. winBits = 4;
  13168. }
  13169. else {
  13170. winBits = 5;
  13171. }
  13172. /* Top bit of exponent fixed as 1 for pre-calculated window. */
  13173. preCnt = 1 << (winBits - 1);
  13174. /* Allocate sp_ints for:
  13175. * - pre-computation table
  13176. * - temporary result
  13177. * - Montgomery form of base
  13178. */
  13179. #ifndef WOLFSSL_SP_NO_MALLOC
  13180. ALLOC_DYN_SP_INT_ARRAY(t, m->used * 2 + 1, (size_t)preCnt + 2, err, NULL);
  13181. #else
  13182. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, (size_t)preCnt + 2, err, NULL);
  13183. #endif
  13184. if (err == MP_OKAY) {
  13185. /* Set variables to use allocate memory. */
  13186. tr = t[preCnt + 0];
  13187. bm = t[preCnt + 1];
  13188. /* Iniitialize all allocated */
  13189. for (i = 0; i < preCnt; i++) {
  13190. _sp_init_size(t[i], m->used * 2 + 1);
  13191. }
  13192. _sp_init_size(tr, m->used * 2 + 1);
  13193. _sp_init_size(bm, m->used * 2 + 1);
  13194. /* 1. Ensure base is less than modulus. */
  13195. if (_sp_cmp_abs(b, m) != MP_LT) {
  13196. err = sp_mod(b, m, bm);
  13197. /* Handle base == modulus. */
  13198. if ((err == MP_OKAY) && sp_iszero(bm)) {
  13199. _sp_set(r, 0);
  13200. done = 1;
  13201. }
  13202. }
  13203. else {
  13204. /* Copy base into Montogmery base variable. */
  13205. _sp_copy(b, bm);
  13206. }
  13207. }
  13208. if ((!done) && (err == MP_OKAY)) {
  13209. int y = 0;
  13210. int c = 0;
  13211. sp_int_digit mp;
  13212. /* Calculate Montgomery multiplier for reduction. */
  13213. _sp_mont_setup(m, &mp);
  13214. /* Calculate Montgomery normalizer for modulus. */
  13215. err = sp_mont_norm(t[0], m);
  13216. if (err == MP_OKAY) {
  13217. /* 2. Convert base to Montgomery form. */
  13218. err = sp_mul(bm, t[0], bm);
  13219. }
  13220. if (err == MP_OKAY) {
  13221. /* bm = bm mod m, temporary size has to be bigger than bm->used. */
  13222. err = _sp_div(bm, m, NULL, bm, bm->used + 1);
  13223. }
  13224. if (err == MP_OKAY) {
  13225. /* Copy Montgomery form of base into first element of table. */
  13226. _sp_copy(bm, t[0]);
  13227. }
  13228. /* Calculate b^(2^(winBits-1)) */
  13229. for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
  13230. err = sp_sqr(t[0], t[0]);
  13231. if (err == MP_OKAY) {
  13232. err = _sp_mont_red(t[0], m, mp);
  13233. }
  13234. }
  13235. /* For each table entry after first. */
  13236. for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
  13237. /* Multiply previous entry by the base in Mont form into table. */
  13238. err = sp_mul(t[i-1], bm, t[i]);
  13239. if (err == MP_OKAY) {
  13240. err = _sp_mont_red(t[i], m, mp);
  13241. }
  13242. }
  13243. /* 3. Set result to table entry for top window bits, or
  13244. * if less than windows bits in exponent, 1 in Montgomery form.
  13245. */
  13246. if (err == MP_OKAY) {
  13247. sp_int_digit n;
  13248. /* Mask for calculating index into pre-computed table. */
  13249. sp_int_digit mask = (sp_int_digit)preCnt - 1;
  13250. /* Find the top bit. */
  13251. i = (bits - 1) >> SP_WORD_SHIFT;
  13252. n = e->dp[i--];
  13253. c = bits % SP_WORD_SIZE;
  13254. if (c == 0) {
  13255. c = SP_WORD_SIZE;
  13256. }
  13257. /* Put top bit at highest offset in digit. */
  13258. n <<= SP_WORD_SIZE - c;
  13259. if (bits >= winBits) {
  13260. /* Top bit set. Copy from window. */
  13261. if (c < winBits) {
  13262. /* Bits to end of digit and part of next */
  13263. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  13264. n = e->dp[i--];
  13265. c = winBits - c;
  13266. y |= (int)(n >> (SP_WORD_SIZE - c));
  13267. n <<= c;
  13268. c = SP_WORD_SIZE - c;
  13269. }
  13270. else {
  13271. /* Bits from middle of digit */
  13272. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  13273. n <<= winBits;
  13274. c -= winBits;
  13275. }
  13276. _sp_copy(t[y], tr);
  13277. }
  13278. else {
  13279. /* 1 in Montgomery form. */
  13280. err = sp_mont_norm(tr, m);
  13281. }
  13282. /* 4. While at least window bits left. */
  13283. while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
  13284. /* Number of squares to before due to top bits being 0. */
  13285. int sqrs = 0;
  13286. /* 4.1. Count number of and skip leading 0 bits unless less
  13287. * than window bits.
  13288. */
  13289. do {
  13290. /* Make sure n has bits from the right digit. */
  13291. if (c == 0) {
  13292. n = e->dp[i--];
  13293. c = SP_WORD_SIZE;
  13294. }
  13295. /* Mask off the next bit. */
  13296. if ((n & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) != 0) {
  13297. break;
  13298. }
  13299. /* Another square needed. */
  13300. sqrs++;
  13301. /* Skip bit. */
  13302. n <<= 1;
  13303. c--;
  13304. }
  13305. while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits)));
  13306. if ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
  13307. /* Add squares needed before using table entry. */
  13308. sqrs += winBits;
  13309. }
  13310. /* 4.2. Montgomery square result for each leading 0 and window
  13311. * bits if bits left.
  13312. */
  13313. for (; (err == MP_OKAY) && (sqrs > 0); sqrs--) {
  13314. err = sp_sqr(tr, tr);
  13315. if (err == MP_OKAY) {
  13316. err = _sp_mont_red(tr, m, mp);
  13317. }
  13318. }
  13319. /* 4.3. Break if less than window bits left. */
  13320. if ((err == MP_OKAY) && (i < 0) && (c < winBits)) {
  13321. break;
  13322. }
  13323. /* 4.4. Get top window bits from expononent and drop. */
  13324. if (err == MP_OKAY) {
  13325. if (c == 0) {
  13326. /* Bits from next digit. */
  13327. n = e->dp[i--];
  13328. y = (int)(n >> (SP_WORD_SIZE - winBits));
  13329. n <<= winBits;
  13330. c = SP_WORD_SIZE - winBits;
  13331. }
  13332. else if (c < winBits) {
  13333. /* Bits to end of digit and part of next. */
  13334. y = (int)(n >> (SP_WORD_SIZE - winBits));
  13335. n = e->dp[i--];
  13336. c = winBits - c;
  13337. y |= (int)(n >> (SP_WORD_SIZE - c));
  13338. n <<= c;
  13339. c = SP_WORD_SIZE - c;
  13340. }
  13341. else {
  13342. /* Bits from middle of digit. */
  13343. y = (int)(n >> (SP_WORD_SIZE - winBits));
  13344. n <<= winBits;
  13345. c -= winBits;
  13346. }
  13347. y &= (int)mask;
  13348. }
  13349. /* 4.5. Montgomery multiply result by table entry. */
  13350. if (err == MP_OKAY) {
  13351. err = sp_mul(tr, t[y], tr);
  13352. }
  13353. if (err == MP_OKAY) {
  13354. err = _sp_mont_red(tr, m, mp);
  13355. }
  13356. }
  13357. /* Finished multiplying in table entries. */
  13358. if ((err == MP_OKAY) && (c > 0)) {
  13359. /* Handle remaining bits.
  13360. * Window values have top bit set and can't be used. */
  13361. n = e->dp[0];
  13362. /* 5. While bits left: */
  13363. for (--c; (err == MP_OKAY) && (c >= 0); c--) {
  13364. /* 5.1. Montogmery square result */
  13365. err = sp_sqr(tr, tr);
  13366. if (err == MP_OKAY) {
  13367. err = _sp_mont_red(tr, m, mp);
  13368. }
  13369. /* 5.2. If exponent bit set */
  13370. if ((err == MP_OKAY) && ((n >> c) & 1)) {
  13371. /* 5.2.1. Montgomery multiply result by Montgomery form
  13372. * of base.
  13373. */
  13374. err = sp_mul(tr, bm, tr);
  13375. if (err == MP_OKAY) {
  13376. err = _sp_mont_red(tr, m, mp);
  13377. }
  13378. }
  13379. }
  13380. }
  13381. }
  13382. if (err == MP_OKAY) {
  13383. /* 6. Convert result back from Montgomery form. */
  13384. err = _sp_mont_red(tr, m, mp);
  13385. /* Reduction implementation returns number to range: 0..m-1. */
  13386. }
  13387. }
  13388. if ((!done) && (err == MP_OKAY)) {
  13389. /* Copy temporary result into parameter. */
  13390. _sp_copy(tr, r);
  13391. }
  13392. #ifndef WOLFSSL_SP_NO_MALLOC
  13393. FREE_DYN_SP_INT_ARRAY(t, NULL);
  13394. #else
  13395. FREE_SP_INT_ARRAY(t, NULL);
  13396. #endif
  13397. return err;
  13398. }
  13399. #else
  13400. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13401. * Non-constant time implementation.
  13402. *
  13403. * Algorithm:
  13404. * 1. Convert base to Montgomery form
  13405. * 2. Set result to base (assumes exponent is not zero)
  13406. * 3. For each bit in exponent starting at second highest
  13407. * 3.1. Montogmery square result
  13408. * 3.2. If exponent bit set
  13409. * 3.2.1. Montgomery multiply result by Montgomery form of base.
  13410. * 4. Convert result back from Montgomery form.
  13411. *
  13412. * @param [in] b SP integer that is the base.
  13413. * @param [in] e SP integer that is the exponent.
  13414. * @param [in] m SP integer that is the modulus.
  13415. * @param [out] r SP integer to hold result.
  13416. *
  13417. * @return MP_OKAY on success.
  13418. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  13419. * @return MP_MEM when dynamic memory allocation fails.
  13420. */
  13421. static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
  13422. sp_int* r)
  13423. {
  13424. int i;
  13425. int err = MP_OKAY;
  13426. int done = 0;
  13427. int y = 0;
  13428. int bits = sp_count_bits(e);
  13429. sp_int_digit mp;
  13430. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
  13431. /* Allocate memory for:
  13432. * - Montgomery form of base
  13433. * - Temporary result (in case r is same var as another parameter). */
  13434. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
  13435. if (err == MP_OKAY) {
  13436. _sp_init_size(t[0], m->used * 2 + 1);
  13437. _sp_init_size(t[1], m->used * 2 + 1);
  13438. /* Ensure base is less than modulus and copy into temp. */
  13439. if (_sp_cmp_abs(b, m) != MP_LT) {
  13440. err = sp_mod(b, m, t[0]);
  13441. /* Handle base == modulus. */
  13442. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  13443. _sp_set(r, 0);
  13444. done = 1;
  13445. }
  13446. }
  13447. else {
  13448. /* Copy base into temp. */
  13449. _sp_copy(b, t[0]);
  13450. }
  13451. }
  13452. if ((!done) && (err == MP_OKAY)) {
  13453. /* Calculate Montgomery multiplier for reduction. */
  13454. _sp_mont_setup(m, &mp);
  13455. /* Calculate Montgomery normalizer for modulus. */
  13456. err = sp_mont_norm(t[1], m);
  13457. if (err == MP_OKAY) {
  13458. /* 1. Convert base to Montgomery form. */
  13459. err = sp_mul(t[0], t[1], t[0]);
  13460. }
  13461. if (err == MP_OKAY) {
  13462. /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
  13463. err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1);
  13464. }
  13465. if (err == MP_OKAY) {
  13466. /* 2. Result starts as Montgomery form of base (assuming e > 0). */
  13467. _sp_copy(t[0], t[1]);
  13468. }
  13469. /* 3. For each bit in exponent starting at second highest. */
  13470. for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
  13471. /* 3.1. Montgomery square result. */
  13472. err = sp_sqr(t[0], t[0]);
  13473. if (err == MP_OKAY) {
  13474. err = _sp_mont_red(t[0], m, mp);
  13475. }
  13476. if (err == MP_OKAY) {
  13477. /* Get bit and index i. */
  13478. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  13479. /* 3.2. If exponent bit set */
  13480. if (y != 0) {
  13481. /* 3.2.1. Montgomery multiply result by Mont of base. */
  13482. err = sp_mul(t[0], t[1], t[0]);
  13483. if (err == MP_OKAY) {
  13484. err = _sp_mont_red(t[0], m, mp);
  13485. }
  13486. }
  13487. }
  13488. }
  13489. if (err == MP_OKAY) {
  13490. /* 4. Convert from Montgomery form. */
  13491. err = _sp_mont_red(t[0], m, mp);
  13492. /* Reduction implementation returns number of range 0..m-1. */
  13493. }
  13494. }
  13495. if ((!done) && (err == MP_OKAY)) {
  13496. /* Copy temporary result into parameter. */
  13497. _sp_copy(t[0], r);
  13498. }
  13499. FREE_SP_INT_ARRAY(t, NULL);
  13500. return err;
  13501. }
  13502. #endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
  13503. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13504. * Non-constant time implementation.
  13505. *
  13506. * @param [in] b SP integer that is the base.
  13507. * @param [in] e SP integer that is the exponent.
  13508. * @param [in] m SP integer that is the modulus.
  13509. * @param [out] r SP integer to hold result.
  13510. *
  13511. * @return MP_OKAY on success.
  13512. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  13513. * @return MP_MEM when dynamic memory allocation fails.
  13514. */
  13515. int sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
  13516. {
  13517. int err = MP_OKAY;
  13518. /* Validate parameters. */
  13519. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  13520. err = MP_VAL;
  13521. }
  13522. #if 0
  13523. if (err == MP_OKAY) {
  13524. sp_print(b, "a");
  13525. sp_print(e, "b");
  13526. sp_print(m, "m");
  13527. }
  13528. #endif
  13529. if (err != MP_OKAY) {
  13530. }
  13531. /* Handle special cases. */
  13532. else if (sp_iszero(m)) {
  13533. err = MP_VAL;
  13534. }
  13535. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13536. else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
  13537. err = MP_VAL;
  13538. }
  13539. #endif
  13540. /* x mod 1 is always 0. */
  13541. else if (sp_isone(m)) {
  13542. _sp_set(r, 0);
  13543. }
  13544. /* b^0 mod m = 1 mod m = 1. */
  13545. else if (sp_iszero(e)) {
  13546. _sp_set(r, 1);
  13547. }
  13548. /* 0^x mod m = 0 mod m = 0. */
  13549. else if (sp_iszero(b)) {
  13550. _sp_set(r, 0);
  13551. }
  13552. /* Ensure SP integers have space for intermediate values. */
  13553. else if (m->used * 2 >= r->size) {
  13554. err = MP_VAL;
  13555. }
  13556. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  13557. else if (mp_iseven(m)) {
  13558. err = _sp_exptmod_ex(b, e, (int)(e->used * SP_WORD_SIZE), m, r);
  13559. }
  13560. #endif
  13561. else {
  13562. err = _sp_exptmod_nct(b, e, m, r);
  13563. }
  13564. #if 0
  13565. if (err == MP_OKAY) {
  13566. sp_print(r, "rme");
  13567. }
  13568. #endif
  13569. return err;
  13570. }
  13571. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  13572. /***************
  13573. * 2^e functions
  13574. ***************/
  13575. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  13576. /* Divide by 2^e: r = a >> e and rem = bits shifted out
  13577. *
  13578. * @param [in] a SP integer to divide.
  13579. * @param [in] e Exponent bits (dividing by 2^e).
  13580. * @param [in] m SP integer that is the modulus.
  13581. * @param [out] r SP integer to hold result.
  13582. * @param [out] rem SP integer to hold remainder.
  13583. *
  13584. * @return MP_OKAY on success.
  13585. * @return MP_VAL when a is NULL or e is negative.
  13586. */
  13587. int sp_div_2d(const sp_int* a, int e, sp_int* r, sp_int* rem)
  13588. {
  13589. int err = MP_OKAY;
  13590. if ((a == NULL) || (e < 0)) {
  13591. err = MP_VAL;
  13592. }
  13593. if (err == MP_OKAY) {
  13594. /* Number of bits remaining after shift. */
  13595. int remBits = sp_count_bits(a) - e;
  13596. if (remBits <= 0) {
  13597. /* Shifting down by more bits than in number. */
  13598. _sp_zero(r);
  13599. if (rem != NULL) {
  13600. err = sp_copy(a, rem);
  13601. }
  13602. }
  13603. else {
  13604. if (rem != NULL) {
  13605. /* Copy a in to remainder. */
  13606. err = sp_copy(a, rem);
  13607. }
  13608. if (err == MP_OKAY) {
  13609. /* Shift a down by into result. */
  13610. err = sp_rshb(a, e, r);
  13611. }
  13612. if ((err == MP_OKAY) && (rem != NULL)) {
  13613. /* Set used and mask off top digit of remainder. */
  13614. rem->used = ((unsigned int)e + SP_WORD_SIZE - 1) >>
  13615. SP_WORD_SHIFT;
  13616. e &= SP_WORD_MASK;
  13617. if (e > 0) {
  13618. rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
  13619. }
  13620. /* Remove leading zeros from remainder. */
  13621. sp_clamp(rem);
  13622. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13623. rem->sign = MP_ZPOS;
  13624. #endif
  13625. }
  13626. }
  13627. }
  13628. return err;
  13629. }
  13630. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  13631. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  13632. /* The bottom e bits: r = a & ((1 << e) - 1)
  13633. *
  13634. * @param [in] a SP integer to reduce.
  13635. * @param [in] e Modulus bits (modulus equals 2^e).
  13636. * @param [out] r SP integer to hold result.
  13637. *
  13638. * @return MP_OKAY on success.
  13639. * @return MP_VAL when a or r is NULL, e is negative or e is too large for
  13640. * result.
  13641. */
  13642. int sp_mod_2d(const sp_int* a, int e, sp_int* r)
  13643. {
  13644. int err = MP_OKAY;
  13645. unsigned int digits = ((unsigned int)e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
  13646. if ((a == NULL) || (r == NULL) || (e < 0)) {
  13647. err = MP_VAL;
  13648. }
  13649. if ((err == MP_OKAY) && (digits > r->size)) {
  13650. err = MP_VAL;
  13651. }
  13652. if (err == MP_OKAY) {
  13653. /* Copy a into r if not same pointer. */
  13654. if (a != r) {
  13655. XMEMCPY(r->dp, a->dp, digits * SP_WORD_SIZEOF);
  13656. r->used = a->used;
  13657. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13658. r->sign = a->sign;
  13659. #endif
  13660. }
  13661. /* Modify result if a is bigger or same digit size. */
  13662. #ifndef WOLFSSL_SP_INT_NEGATIVE
  13663. if (digits <= a->used)
  13664. #else
  13665. /* Need to make negative positive and mask. */
  13666. if ((a->sign == MP_NEG) || (digits <= a->used))
  13667. #endif
  13668. {
  13669. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13670. if (a->sign == MP_NEG) {
  13671. unsigned int i;
  13672. sp_int_digit carry = 0;
  13673. /* Negate value. */
  13674. for (i = 0; i < r->used; i++) {
  13675. sp_int_digit next = r->dp[i] > 0;
  13676. r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
  13677. carry |= next;
  13678. }
  13679. for (; i < digits; i++) {
  13680. r->dp[i] = (sp_int_digit)0 - carry;
  13681. }
  13682. r->sign = MP_ZPOS;
  13683. }
  13684. #endif
  13685. /* Set used and mask off top digit of result. */
  13686. r->used = digits;
  13687. e &= SP_WORD_MASK;
  13688. if (e > 0) {
  13689. r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
  13690. }
  13691. sp_clamp(r);
  13692. }
  13693. }
  13694. return err;
  13695. }
  13696. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  13697. #if (defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  13698. !defined(NO_DH))) || defined(OPENSSL_ALL)
  13699. /* Multiply by 2^e: r = a << e
  13700. *
  13701. * @param [in] a SP integer to multiply.
  13702. * @param [in] e Multiplier bits (multiplier equals 2^e).
  13703. * @param [out] r SP integer to hold result.
  13704. *
  13705. * @return MP_OKAY on success.
  13706. * @return MP_VAL when a or r is NULL, e is negative, or result is too big for
  13707. * result size.
  13708. */
  13709. int sp_mul_2d(const sp_int* a, int e, sp_int* r)
  13710. {
  13711. int err = MP_OKAY;
  13712. /* Validate parameters. */
  13713. if ((a == NULL) || (r == NULL) || (e < 0)) {
  13714. err = MP_VAL;
  13715. }
  13716. /* Ensure result has enough allocated digits for result. */
  13717. if ((err == MP_OKAY) &&
  13718. ((unsigned int)(sp_count_bits(a) + e) > r->size * SP_WORD_SIZE)) {
  13719. err = MP_VAL;
  13720. }
  13721. if (err == MP_OKAY) {
  13722. /* Copy a into r as left shift function works on the number. */
  13723. if (a != r) {
  13724. err = sp_copy(a, r);
  13725. }
  13726. }
  13727. if (err == MP_OKAY) {
  13728. #if 0
  13729. sp_print(a, "a");
  13730. sp_print_int(e, "n");
  13731. #endif
  13732. err = sp_lshb(r, e);
  13733. #if 0
  13734. sp_print(r, "rsl");
  13735. #endif
  13736. }
  13737. return err;
  13738. }
  13739. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  13740. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  13741. defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  13742. /* START SP_SQR implementations */
  13743. /* This code is generated.
  13744. * To generate:
  13745. * cd scripts/sp/sp_int
  13746. * ./gen.sh
  13747. * File sp_sqr.c contains code.
  13748. */
  13749. #if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
  13750. #ifdef SQR_MUL_ASM
  13751. /* Square a and store in r. r = a * a
  13752. *
  13753. * @param [in] a SP integer to square.
  13754. * @param [out] r SP integer result.
  13755. *
  13756. * @return MP_OKAY on success.
  13757. * @return MP_MEM when dynamic memory allocation fails.
  13758. */
  13759. static int _sp_sqr(const sp_int* a, sp_int* r)
  13760. {
  13761. int err = MP_OKAY;
  13762. unsigned int i;
  13763. int j;
  13764. unsigned int k;
  13765. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13766. sp_int_digit* t = NULL;
  13767. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  13768. !defined(WOLFSSL_SP_NO_DYN_STACK)
  13769. sp_int_digit t[((a->used + 1) / 2) * 2 + 1];
  13770. #else
  13771. sp_int_digit t[(SP_INT_DIGITS + 1) / 2];
  13772. #endif
  13773. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13774. t = (sp_int_digit*)XMALLOC(
  13775. sizeof(sp_int_digit) * (((a->used + 1) / 2) * 2 + 1), NULL,
  13776. DYNAMIC_TYPE_BIGINT);
  13777. if (t == NULL) {
  13778. err = MP_MEM;
  13779. }
  13780. #endif
  13781. if ((err == MP_OKAY) && (a->used <= 1)) {
  13782. sp_int_digit l;
  13783. sp_int_digit h;
  13784. h = 0;
  13785. l = 0;
  13786. SP_ASM_SQR(h, l, a->dp[0]);
  13787. r->dp[0] = h;
  13788. r->dp[1] = l;
  13789. }
  13790. else if (err == MP_OKAY) {
  13791. sp_int_digit l;
  13792. sp_int_digit h;
  13793. sp_int_digit o;
  13794. sp_int_digit* p = t;
  13795. h = 0;
  13796. l = 0;
  13797. SP_ASM_SQR(h, l, a->dp[0]);
  13798. t[0] = h;
  13799. h = 0;
  13800. o = 0;
  13801. for (k = 1; k < (a->used + 1) / 2; k++) {
  13802. i = k;
  13803. j = (int)(k - 1);
  13804. for (; (j >= 0); i++, j--) {
  13805. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13806. }
  13807. t[k * 2 - 1] = l;
  13808. l = h;
  13809. h = o;
  13810. o = 0;
  13811. SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
  13812. i = k + 1;
  13813. j = (int)(k - 1);
  13814. for (; (j >= 0); i++, j--) {
  13815. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13816. }
  13817. t[k * 2] = l;
  13818. l = h;
  13819. h = o;
  13820. o = 0;
  13821. }
  13822. for (; k < a->used; k++) {
  13823. i = k;
  13824. j = (int)(k - 1);
  13825. for (; (i < a->used); i++, j--) {
  13826. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13827. }
  13828. p[k * 2 - 1] = l;
  13829. l = h;
  13830. h = o;
  13831. o = 0;
  13832. SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
  13833. i = k + 1;
  13834. j = (int)(k - 1);
  13835. for (; (i < a->used); i++, j--) {
  13836. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13837. }
  13838. p[k * 2] = l;
  13839. l = h;
  13840. h = o;
  13841. o = 0;
  13842. p = r->dp;
  13843. }
  13844. r->dp[k * 2 - 1] = l;
  13845. XMEMCPY(r->dp, t, (((a->used + 1) / 2) * 2 + 1) * sizeof(sp_int_digit));
  13846. }
  13847. if (err == MP_OKAY) {
  13848. r->used = a->used * 2;
  13849. sp_clamp(r);
  13850. }
  13851. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13852. if (t != NULL) {
  13853. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  13854. }
  13855. #endif
  13856. return err;
  13857. }
  13858. #else /* !SQR_MUL_ASM */
  13859. /* Square a and store in r. r = a * a
  13860. *
  13861. * @param [in] a SP integer to square.
  13862. * @param [out] r SP integer result.
  13863. *
  13864. * @return MP_OKAY on success.
  13865. * @return MP_MEM when dynamic memory allocation fails.
  13866. */
  13867. static int _sp_sqr(const sp_int* a, sp_int* r)
  13868. {
  13869. int err = MP_OKAY;
  13870. unsigned int i;
  13871. int j;
  13872. unsigned int k;
  13873. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13874. sp_int_digit* t = NULL;
  13875. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  13876. !defined(WOLFSSL_SP_NO_DYN_STACK)
  13877. sp_int_digit t[a->used * 2];
  13878. #else
  13879. sp_int_digit t[SP_INT_DIGITS];
  13880. #endif
  13881. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13882. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
  13883. DYNAMIC_TYPE_BIGINT);
  13884. if (t == NULL) {
  13885. err = MP_MEM;
  13886. }
  13887. #endif
  13888. if (err == MP_OKAY) {
  13889. sp_int_word w;
  13890. sp_int_word l;
  13891. sp_int_word h;
  13892. #ifdef SP_WORD_OVERFLOW
  13893. sp_int_word o;
  13894. #endif
  13895. w = (sp_int_word)a->dp[0] * a->dp[0];
  13896. t[0] = (sp_int_digit)w;
  13897. l = (sp_int_digit)(w >> SP_WORD_SIZE);
  13898. h = 0;
  13899. #ifdef SP_WORD_OVERFLOW
  13900. o = 0;
  13901. #endif
  13902. for (k = 1; k <= (a->used - 1) * 2; k++) {
  13903. i = k / 2;
  13904. j = (int)(k - i);
  13905. if (i == (unsigned int)j) {
  13906. w = (sp_int_word)a->dp[i] * a->dp[j];
  13907. l += (sp_int_digit)w;
  13908. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  13909. #ifdef SP_WORD_OVERFLOW
  13910. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  13911. l &= SP_MASK;
  13912. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  13913. h &= SP_MASK;
  13914. #endif
  13915. }
  13916. for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
  13917. w = (sp_int_word)a->dp[i] * a->dp[j];
  13918. l += (sp_int_digit)w;
  13919. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  13920. #ifdef SP_WORD_OVERFLOW
  13921. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  13922. l &= SP_MASK;
  13923. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  13924. h &= SP_MASK;
  13925. #endif
  13926. l += (sp_int_digit)w;
  13927. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  13928. #ifdef SP_WORD_OVERFLOW
  13929. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  13930. l &= SP_MASK;
  13931. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  13932. h &= SP_MASK;
  13933. #endif
  13934. }
  13935. t[k] = (sp_int_digit)l;
  13936. l >>= SP_WORD_SIZE;
  13937. l += (sp_int_digit)h;
  13938. h >>= SP_WORD_SIZE;
  13939. #ifdef SP_WORD_OVERFLOW
  13940. h += o & SP_MASK;
  13941. o >>= SP_WORD_SIZE;
  13942. #endif
  13943. }
  13944. t[k] = (sp_int_digit)l;
  13945. r->used = k + 1;
  13946. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  13947. sp_clamp(r);
  13948. }
  13949. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13950. if (t != NULL) {
  13951. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  13952. }
  13953. #endif
  13954. return err;
  13955. }
  13956. #endif /* SQR_MUL_ASM */
  13957. #endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
  13958. #ifndef WOLFSSL_SP_SMALL
  13959. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  13960. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  13961. #ifndef SQR_MUL_ASM
  13962. /* Square a and store in r. r = a * a
  13963. *
  13964. * Long-hand implementation.
  13965. *
  13966. * @param [in] a SP integer to square.
  13967. * @param [out] r SP integer result.
  13968. *
  13969. * @return MP_OKAY on success.
  13970. * @return MP_MEM when dynamic memory allocation fails.
  13971. */
  13972. static int _sp_sqr_4(const sp_int* a, sp_int* r)
  13973. {
  13974. int err = MP_OKAY;
  13975. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13976. sp_int_word* w = NULL;
  13977. #else
  13978. sp_int_word w[10];
  13979. #endif
  13980. const sp_int_digit* da = a->dp;
  13981. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13982. w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
  13983. DYNAMIC_TYPE_BIGINT);
  13984. if (w == NULL) {
  13985. err = MP_MEM;
  13986. }
  13987. #endif
  13988. if (err == MP_OKAY) {
  13989. w[0] = (sp_int_word)da[0] * da[0];
  13990. w[1] = (sp_int_word)da[0] * da[1];
  13991. w[2] = (sp_int_word)da[0] * da[2];
  13992. w[3] = (sp_int_word)da[1] * da[1];
  13993. w[4] = (sp_int_word)da[0] * da[3];
  13994. w[5] = (sp_int_word)da[1] * da[2];
  13995. w[6] = (sp_int_word)da[1] * da[3];
  13996. w[7] = (sp_int_word)da[2] * da[2];
  13997. w[8] = (sp_int_word)da[2] * da[3];
  13998. w[9] = (sp_int_word)da[3] * da[3];
  13999. r->dp[0] = (sp_int_digit)w[0];
  14000. w[0] >>= SP_WORD_SIZE;
  14001. w[0] += (sp_int_digit)w[1];
  14002. w[0] += (sp_int_digit)w[1];
  14003. r->dp[1] = (sp_int_digit)w[0];
  14004. w[0] >>= SP_WORD_SIZE;
  14005. w[1] >>= SP_WORD_SIZE;
  14006. w[0] += (sp_int_digit)w[1];
  14007. w[0] += (sp_int_digit)w[1];
  14008. w[0] += (sp_int_digit)w[2];
  14009. w[0] += (sp_int_digit)w[2];
  14010. w[0] += (sp_int_digit)w[3];
  14011. r->dp[2] = (sp_int_digit)w[0];
  14012. w[0] >>= SP_WORD_SIZE;
  14013. w[2] >>= SP_WORD_SIZE;
  14014. w[0] += (sp_int_digit)w[2];
  14015. w[0] += (sp_int_digit)w[2];
  14016. w[3] >>= SP_WORD_SIZE;
  14017. w[0] += (sp_int_digit)w[3];
  14018. w[0] += (sp_int_digit)w[4];
  14019. w[0] += (sp_int_digit)w[4];
  14020. w[0] += (sp_int_digit)w[5];
  14021. w[0] += (sp_int_digit)w[5];
  14022. r->dp[3] = (sp_int_digit)w[0];
  14023. w[0] >>= SP_WORD_SIZE;
  14024. w[4] >>= SP_WORD_SIZE;
  14025. w[0] += (sp_int_digit)w[4];
  14026. w[0] += (sp_int_digit)w[4];
  14027. w[5] >>= SP_WORD_SIZE;
  14028. w[0] += (sp_int_digit)w[5];
  14029. w[0] += (sp_int_digit)w[5];
  14030. w[0] += (sp_int_digit)w[6];
  14031. w[0] += (sp_int_digit)w[6];
  14032. w[0] += (sp_int_digit)w[7];
  14033. r->dp[4] = (sp_int_digit)w[0];
  14034. w[0] >>= SP_WORD_SIZE;
  14035. w[6] >>= SP_WORD_SIZE;
  14036. w[0] += (sp_int_digit)w[6];
  14037. w[0] += (sp_int_digit)w[6];
  14038. w[7] >>= SP_WORD_SIZE;
  14039. w[0] += (sp_int_digit)w[7];
  14040. w[0] += (sp_int_digit)w[8];
  14041. w[0] += (sp_int_digit)w[8];
  14042. r->dp[5] = (sp_int_digit)w[0];
  14043. w[0] >>= SP_WORD_SIZE;
  14044. w[8] >>= SP_WORD_SIZE;
  14045. w[0] += (sp_int_digit)w[8];
  14046. w[0] += (sp_int_digit)w[8];
  14047. w[0] += (sp_int_digit)w[9];
  14048. r->dp[6] = (sp_int_digit)w[0];
  14049. w[0] >>= SP_WORD_SIZE;
  14050. w[9] >>= SP_WORD_SIZE;
  14051. w[0] += (sp_int_digit)w[9];
  14052. r->dp[7] = (sp_int_digit)w[0];
  14053. r->used = 8;
  14054. sp_clamp(r);
  14055. }
  14056. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14057. if (w != NULL) {
  14058. XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
  14059. }
  14060. #endif
  14061. return err;
  14062. }
  14063. #else /* SQR_MUL_ASM */
  14064. /* Square a and store in r. r = a * a
  14065. *
  14066. * Comba implementation.
  14067. *
  14068. * @param [in] a SP integer to square.
  14069. * @param [out] r SP integer result.
  14070. *
  14071. * @return MP_OKAY on success.
  14072. * @return MP_MEM when dynamic memory allocation fails.
  14073. */
  14074. static int _sp_sqr_4(const sp_int* a, sp_int* r)
  14075. {
  14076. sp_int_digit l = 0;
  14077. sp_int_digit h = 0;
  14078. sp_int_digit o = 0;
  14079. sp_int_digit t[4];
  14080. SP_ASM_SQR(h, l, a->dp[0]);
  14081. t[0] = h;
  14082. h = 0;
  14083. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14084. t[1] = l;
  14085. l = h;
  14086. h = o;
  14087. o = 0;
  14088. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14089. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14090. t[2] = l;
  14091. l = h;
  14092. h = o;
  14093. o = 0;
  14094. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14095. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14096. t[3] = l;
  14097. l = h;
  14098. h = o;
  14099. o = 0;
  14100. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14101. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14102. r->dp[4] = l;
  14103. l = h;
  14104. h = o;
  14105. o = 0;
  14106. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
  14107. r->dp[5] = l;
  14108. l = h;
  14109. h = o;
  14110. SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
  14111. r->dp[6] = l;
  14112. r->dp[7] = h;
  14113. XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
  14114. r->used = 8;
  14115. sp_clamp(r);
  14116. return MP_OKAY;
  14117. }
  14118. #endif /* SQR_MUL_ASM */
  14119. #endif /* SP_WORD_SIZE == 64 */
  14120. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  14121. #ifdef SQR_MUL_ASM
  14122. /* Square a and store in r. r = a * a
  14123. *
  14124. * Comba implementation.
  14125. *
  14126. * @param [in] a SP integer to square.
  14127. * @param [out] r SP integer result.
  14128. *
  14129. * @return MP_OKAY on success.
  14130. * @return MP_MEM when dynamic memory allocation fails.
  14131. */
  14132. static int _sp_sqr_6(const sp_int* a, sp_int* r)
  14133. {
  14134. sp_int_digit l = 0;
  14135. sp_int_digit h = 0;
  14136. sp_int_digit o = 0;
  14137. sp_int_digit tl = 0;
  14138. sp_int_digit th = 0;
  14139. sp_int_digit to;
  14140. sp_int_digit t[6];
  14141. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14142. to = 0;
  14143. #endif
  14144. SP_ASM_SQR(h, l, a->dp[0]);
  14145. t[0] = h;
  14146. h = 0;
  14147. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14148. t[1] = l;
  14149. l = h;
  14150. h = o;
  14151. o = 0;
  14152. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14153. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14154. t[2] = l;
  14155. l = h;
  14156. h = o;
  14157. o = 0;
  14158. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14159. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14160. t[3] = l;
  14161. l = h;
  14162. h = o;
  14163. o = 0;
  14164. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14165. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14166. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14167. t[4] = l;
  14168. l = h;
  14169. h = o;
  14170. o = 0;
  14171. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14172. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14173. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14174. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14175. t[5] = l;
  14176. l = h;
  14177. h = o;
  14178. o = 0;
  14179. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
  14180. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
  14181. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14182. r->dp[6] = l;
  14183. l = h;
  14184. h = o;
  14185. o = 0;
  14186. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
  14187. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
  14188. r->dp[7] = l;
  14189. l = h;
  14190. h = o;
  14191. o = 0;
  14192. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
  14193. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14194. r->dp[8] = l;
  14195. l = h;
  14196. h = o;
  14197. o = 0;
  14198. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
  14199. r->dp[9] = l;
  14200. l = h;
  14201. h = o;
  14202. SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
  14203. r->dp[10] = l;
  14204. r->dp[11] = h;
  14205. XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
  14206. r->used = 12;
  14207. sp_clamp(r);
  14208. return MP_OKAY;
  14209. }
  14210. #endif /* SQR_MUL_ASM */
  14211. #endif /* SP_WORD_SIZE == 64 */
  14212. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  14213. #ifdef SQR_MUL_ASM
  14214. /* Square a and store in r. r = a * a
  14215. *
  14216. * Comba implementation.
  14217. *
  14218. * @param [in] a SP integer to square.
  14219. * @param [out] r SP integer result.
  14220. *
  14221. * @return MP_OKAY on success.
  14222. * @return MP_MEM when dynamic memory allocation fails.
  14223. */
  14224. static int _sp_sqr_8(const sp_int* a, sp_int* r)
  14225. {
  14226. sp_int_digit l = 0;
  14227. sp_int_digit h = 0;
  14228. sp_int_digit o = 0;
  14229. sp_int_digit tl = 0;
  14230. sp_int_digit th = 0;
  14231. sp_int_digit to;
  14232. sp_int_digit t[8];
  14233. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14234. to = 0;
  14235. #endif
  14236. SP_ASM_SQR(h, l, a->dp[0]);
  14237. t[0] = h;
  14238. h = 0;
  14239. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14240. t[1] = l;
  14241. l = h;
  14242. h = o;
  14243. o = 0;
  14244. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14245. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14246. t[2] = l;
  14247. l = h;
  14248. h = o;
  14249. o = 0;
  14250. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14251. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14252. t[3] = l;
  14253. l = h;
  14254. h = o;
  14255. o = 0;
  14256. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14257. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14258. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14259. t[4] = l;
  14260. l = h;
  14261. h = o;
  14262. o = 0;
  14263. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14264. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14265. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14266. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14267. t[5] = l;
  14268. l = h;
  14269. h = o;
  14270. o = 0;
  14271. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14272. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14273. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14274. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14275. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14276. t[6] = l;
  14277. l = h;
  14278. h = o;
  14279. o = 0;
  14280. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14281. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14282. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14283. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14284. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14285. t[7] = l;
  14286. l = h;
  14287. h = o;
  14288. o = 0;
  14289. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
  14290. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14291. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14292. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14293. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14294. r->dp[8] = l;
  14295. l = h;
  14296. h = o;
  14297. o = 0;
  14298. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
  14299. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14300. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14301. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14302. r->dp[9] = l;
  14303. l = h;
  14304. h = o;
  14305. o = 0;
  14306. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
  14307. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
  14308. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14309. r->dp[10] = l;
  14310. l = h;
  14311. h = o;
  14312. o = 0;
  14313. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
  14314. SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
  14315. r->dp[11] = l;
  14316. l = h;
  14317. h = o;
  14318. o = 0;
  14319. SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
  14320. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  14321. r->dp[12] = l;
  14322. l = h;
  14323. h = o;
  14324. o = 0;
  14325. SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
  14326. r->dp[13] = l;
  14327. l = h;
  14328. h = o;
  14329. SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
  14330. r->dp[14] = l;
  14331. r->dp[15] = h;
  14332. XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
  14333. r->used = 16;
  14334. sp_clamp(r);
  14335. return MP_OKAY;
  14336. }
  14337. #endif /* SQR_MUL_ASM */
  14338. #endif /* SP_WORD_SIZE == 32 */
  14339. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  14340. #ifdef SQR_MUL_ASM
  14341. /* Square a and store in r. r = a * a
  14342. *
  14343. * Comba implementation.
  14344. *
  14345. * @param [in] a SP integer to square.
  14346. * @param [out] r SP integer result.
  14347. *
  14348. * @return MP_OKAY on success.
  14349. * @return MP_MEM when dynamic memory allocation fails.
  14350. */
  14351. static int _sp_sqr_12(const sp_int* a, sp_int* r)
  14352. {
  14353. sp_int_digit l = 0;
  14354. sp_int_digit h = 0;
  14355. sp_int_digit o = 0;
  14356. sp_int_digit tl = 0;
  14357. sp_int_digit th = 0;
  14358. sp_int_digit to;
  14359. sp_int_digit t[12];
  14360. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14361. to = 0;
  14362. #endif
  14363. SP_ASM_SQR(h, l, a->dp[0]);
  14364. t[0] = h;
  14365. h = 0;
  14366. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14367. t[1] = l;
  14368. l = h;
  14369. h = o;
  14370. o = 0;
  14371. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14372. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14373. t[2] = l;
  14374. l = h;
  14375. h = o;
  14376. o = 0;
  14377. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14378. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14379. t[3] = l;
  14380. l = h;
  14381. h = o;
  14382. o = 0;
  14383. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14384. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14385. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14386. t[4] = l;
  14387. l = h;
  14388. h = o;
  14389. o = 0;
  14390. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14391. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14392. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14393. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14394. t[5] = l;
  14395. l = h;
  14396. h = o;
  14397. o = 0;
  14398. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14399. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14400. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14401. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14402. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14403. t[6] = l;
  14404. l = h;
  14405. h = o;
  14406. o = 0;
  14407. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14408. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14409. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14410. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14411. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14412. t[7] = l;
  14413. l = h;
  14414. h = o;
  14415. o = 0;
  14416. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  14417. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  14418. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14419. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14420. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14421. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14422. t[8] = l;
  14423. l = h;
  14424. h = o;
  14425. o = 0;
  14426. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  14427. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  14428. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  14429. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14430. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14431. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14432. t[9] = l;
  14433. l = h;
  14434. h = o;
  14435. o = 0;
  14436. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  14437. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  14438. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  14439. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  14440. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  14441. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14442. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14443. t[10] = l;
  14444. l = h;
  14445. h = o;
  14446. o = 0;
  14447. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  14448. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  14449. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  14450. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  14451. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  14452. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  14453. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14454. t[11] = l;
  14455. l = h;
  14456. h = o;
  14457. o = 0;
  14458. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
  14459. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  14460. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  14461. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  14462. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  14463. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  14464. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14465. r->dp[12] = l;
  14466. l = h;
  14467. h = o;
  14468. o = 0;
  14469. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
  14470. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  14471. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  14472. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  14473. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  14474. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14475. r->dp[13] = l;
  14476. l = h;
  14477. h = o;
  14478. o = 0;
  14479. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
  14480. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  14481. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  14482. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  14483. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  14484. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14485. r->dp[14] = l;
  14486. l = h;
  14487. h = o;
  14488. o = 0;
  14489. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
  14490. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  14491. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  14492. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  14493. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14494. r->dp[15] = l;
  14495. l = h;
  14496. h = o;
  14497. o = 0;
  14498. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
  14499. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  14500. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  14501. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  14502. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14503. r->dp[16] = l;
  14504. l = h;
  14505. h = o;
  14506. o = 0;
  14507. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
  14508. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  14509. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  14510. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14511. r->dp[17] = l;
  14512. l = h;
  14513. h = o;
  14514. o = 0;
  14515. SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
  14516. SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
  14517. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  14518. r->dp[18] = l;
  14519. l = h;
  14520. h = o;
  14521. o = 0;
  14522. SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
  14523. SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
  14524. r->dp[19] = l;
  14525. l = h;
  14526. h = o;
  14527. o = 0;
  14528. SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
  14529. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  14530. r->dp[20] = l;
  14531. l = h;
  14532. h = o;
  14533. o = 0;
  14534. SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
  14535. r->dp[21] = l;
  14536. l = h;
  14537. h = o;
  14538. SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
  14539. r->dp[22] = l;
  14540. r->dp[23] = h;
  14541. XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
  14542. r->used = 24;
  14543. sp_clamp(r);
  14544. return MP_OKAY;
  14545. }
  14546. #endif /* SQR_MUL_ASM */
  14547. #endif /* SP_WORD_SIZE == 32 */
  14548. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  14549. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  14550. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  14551. (SP_WORD_SIZE == 64)))
  14552. #if SP_INT_DIGITS >= 32
  14553. /* Square a and store in r. r = a * a
  14554. *
  14555. * Comba implementation.
  14556. *
  14557. * @param [in] a SP integer to square.
  14558. * @param [out] r SP integer result.
  14559. *
  14560. * @return MP_OKAY on success.
  14561. * @return MP_MEM when dynamic memory allocation fails.
  14562. */
  14563. static int _sp_sqr_16(const sp_int* a, sp_int* r)
  14564. {
  14565. int err = MP_OKAY;
  14566. sp_int_digit l = 0;
  14567. sp_int_digit h = 0;
  14568. sp_int_digit o = 0;
  14569. sp_int_digit tl = 0;
  14570. sp_int_digit th = 0;
  14571. sp_int_digit to;
  14572. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14573. sp_int_digit* t = NULL;
  14574. #else
  14575. sp_int_digit t[16];
  14576. #endif
  14577. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14578. to = 0;
  14579. #endif
  14580. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14581. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
  14582. DYNAMIC_TYPE_BIGINT);
  14583. if (t == NULL) {
  14584. err = MP_MEM;
  14585. }
  14586. #endif
  14587. if (err == MP_OKAY) {
  14588. SP_ASM_SQR(h, l, a->dp[0]);
  14589. t[0] = h;
  14590. h = 0;
  14591. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14592. t[1] = l;
  14593. l = h;
  14594. h = o;
  14595. o = 0;
  14596. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14597. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14598. t[2] = l;
  14599. l = h;
  14600. h = o;
  14601. o = 0;
  14602. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14603. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14604. t[3] = l;
  14605. l = h;
  14606. h = o;
  14607. o = 0;
  14608. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14609. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14610. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14611. t[4] = l;
  14612. l = h;
  14613. h = o;
  14614. o = 0;
  14615. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14616. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14617. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14618. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14619. t[5] = l;
  14620. l = h;
  14621. h = o;
  14622. o = 0;
  14623. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14624. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14625. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14626. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14627. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14628. t[6] = l;
  14629. l = h;
  14630. h = o;
  14631. o = 0;
  14632. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14633. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14634. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14635. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14636. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14637. t[7] = l;
  14638. l = h;
  14639. h = o;
  14640. o = 0;
  14641. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  14642. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  14643. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14644. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14645. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14646. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14647. t[8] = l;
  14648. l = h;
  14649. h = o;
  14650. o = 0;
  14651. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  14652. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  14653. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  14654. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14655. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14656. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14657. t[9] = l;
  14658. l = h;
  14659. h = o;
  14660. o = 0;
  14661. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  14662. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  14663. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  14664. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  14665. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  14666. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14667. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14668. t[10] = l;
  14669. l = h;
  14670. h = o;
  14671. o = 0;
  14672. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  14673. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  14674. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  14675. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  14676. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  14677. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  14678. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14679. t[11] = l;
  14680. l = h;
  14681. h = o;
  14682. o = 0;
  14683. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
  14684. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
  14685. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  14686. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  14687. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  14688. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  14689. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  14690. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14691. t[12] = l;
  14692. l = h;
  14693. h = o;
  14694. o = 0;
  14695. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
  14696. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
  14697. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
  14698. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  14699. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  14700. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  14701. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  14702. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14703. t[13] = l;
  14704. l = h;
  14705. h = o;
  14706. o = 0;
  14707. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
  14708. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
  14709. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
  14710. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
  14711. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  14712. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  14713. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  14714. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  14715. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14716. t[14] = l;
  14717. l = h;
  14718. h = o;
  14719. o = 0;
  14720. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
  14721. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
  14722. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
  14723. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
  14724. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
  14725. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  14726. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  14727. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  14728. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14729. t[15] = l;
  14730. l = h;
  14731. h = o;
  14732. o = 0;
  14733. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
  14734. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
  14735. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
  14736. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
  14737. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
  14738. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  14739. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  14740. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  14741. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14742. r->dp[16] = l;
  14743. l = h;
  14744. h = o;
  14745. o = 0;
  14746. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
  14747. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
  14748. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
  14749. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
  14750. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
  14751. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  14752. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  14753. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14754. r->dp[17] = l;
  14755. l = h;
  14756. h = o;
  14757. o = 0;
  14758. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
  14759. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
  14760. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
  14761. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
  14762. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
  14763. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
  14764. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  14765. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14766. r->dp[18] = l;
  14767. l = h;
  14768. h = o;
  14769. o = 0;
  14770. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
  14771. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
  14772. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
  14773. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
  14774. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
  14775. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
  14776. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14777. r->dp[19] = l;
  14778. l = h;
  14779. h = o;
  14780. o = 0;
  14781. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
  14782. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
  14783. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
  14784. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
  14785. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
  14786. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  14787. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14788. r->dp[20] = l;
  14789. l = h;
  14790. h = o;
  14791. o = 0;
  14792. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
  14793. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
  14794. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
  14795. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
  14796. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
  14797. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14798. r->dp[21] = l;
  14799. l = h;
  14800. h = o;
  14801. o = 0;
  14802. SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
  14803. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
  14804. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
  14805. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
  14806. SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
  14807. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14808. r->dp[22] = l;
  14809. l = h;
  14810. h = o;
  14811. o = 0;
  14812. SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
  14813. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
  14814. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
  14815. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
  14816. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14817. r->dp[23] = l;
  14818. l = h;
  14819. h = o;
  14820. o = 0;
  14821. SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
  14822. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
  14823. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
  14824. SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
  14825. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14826. r->dp[24] = l;
  14827. l = h;
  14828. h = o;
  14829. o = 0;
  14830. SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
  14831. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
  14832. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
  14833. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14834. r->dp[25] = l;
  14835. l = h;
  14836. h = o;
  14837. o = 0;
  14838. SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
  14839. SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
  14840. SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
  14841. r->dp[26] = l;
  14842. l = h;
  14843. h = o;
  14844. o = 0;
  14845. SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
  14846. SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
  14847. r->dp[27] = l;
  14848. l = h;
  14849. h = o;
  14850. o = 0;
  14851. SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
  14852. SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
  14853. r->dp[28] = l;
  14854. l = h;
  14855. h = o;
  14856. o = 0;
  14857. SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
  14858. r->dp[29] = l;
  14859. l = h;
  14860. h = o;
  14861. SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
  14862. r->dp[30] = l;
  14863. r->dp[31] = h;
  14864. XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
  14865. r->used = 32;
  14866. sp_clamp(r);
  14867. }
  14868. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14869. if (t != NULL) {
  14870. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  14871. }
  14872. #endif
  14873. return err;
  14874. }
  14875. #endif /* SP_INT_DIGITS >= 32 */
  14876. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  14877. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  14878. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  14879. #if SP_INT_DIGITS >= 48
  14880. /* Square a and store in r. r = a * a
  14881. *
  14882. * Comba implementation.
  14883. *
  14884. * @param [in] a SP integer to square.
  14885. * @param [out] r SP integer result.
  14886. *
  14887. * @return MP_OKAY on success.
  14888. * @return MP_MEM when dynamic memory allocation fails.
  14889. */
  14890. static int _sp_sqr_24(const sp_int* a, sp_int* r)
  14891. {
  14892. int err = MP_OKAY;
  14893. sp_int_digit l = 0;
  14894. sp_int_digit h = 0;
  14895. sp_int_digit o = 0;
  14896. sp_int_digit tl = 0;
  14897. sp_int_digit th = 0;
  14898. sp_int_digit to;
  14899. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14900. sp_int_digit* t = NULL;
  14901. #else
  14902. sp_int_digit t[24];
  14903. #endif
  14904. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14905. to = 0;
  14906. #endif
  14907. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14908. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
  14909. DYNAMIC_TYPE_BIGINT);
  14910. if (t == NULL) {
  14911. err = MP_MEM;
  14912. }
  14913. #endif
  14914. if (err == MP_OKAY) {
  14915. SP_ASM_SQR(h, l, a->dp[0]);
  14916. t[0] = h;
  14917. h = 0;
  14918. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14919. t[1] = l;
  14920. l = h;
  14921. h = o;
  14922. o = 0;
  14923. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14924. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14925. t[2] = l;
  14926. l = h;
  14927. h = o;
  14928. o = 0;
  14929. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14930. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14931. t[3] = l;
  14932. l = h;
  14933. h = o;
  14934. o = 0;
  14935. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14936. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14937. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14938. t[4] = l;
  14939. l = h;
  14940. h = o;
  14941. o = 0;
  14942. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14943. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14944. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14945. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14946. t[5] = l;
  14947. l = h;
  14948. h = o;
  14949. o = 0;
  14950. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14951. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14952. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14953. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14954. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14955. t[6] = l;
  14956. l = h;
  14957. h = o;
  14958. o = 0;
  14959. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14960. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14961. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14962. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14963. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14964. t[7] = l;
  14965. l = h;
  14966. h = o;
  14967. o = 0;
  14968. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  14969. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  14970. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14971. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14972. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14973. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14974. t[8] = l;
  14975. l = h;
  14976. h = o;
  14977. o = 0;
  14978. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  14979. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  14980. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  14981. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14982. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14983. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14984. t[9] = l;
  14985. l = h;
  14986. h = o;
  14987. o = 0;
  14988. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  14989. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  14990. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  14991. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  14992. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  14993. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14994. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14995. t[10] = l;
  14996. l = h;
  14997. h = o;
  14998. o = 0;
  14999. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  15000. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  15001. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  15002. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  15003. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  15004. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  15005. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15006. t[11] = l;
  15007. l = h;
  15008. h = o;
  15009. o = 0;
  15010. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
  15011. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
  15012. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  15013. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  15014. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  15015. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  15016. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  15017. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15018. t[12] = l;
  15019. l = h;
  15020. h = o;
  15021. o = 0;
  15022. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
  15023. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
  15024. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
  15025. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  15026. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  15027. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  15028. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  15029. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15030. t[13] = l;
  15031. l = h;
  15032. h = o;
  15033. o = 0;
  15034. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
  15035. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
  15036. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
  15037. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
  15038. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  15039. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  15040. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  15041. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  15042. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15043. t[14] = l;
  15044. l = h;
  15045. h = o;
  15046. o = 0;
  15047. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
  15048. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
  15049. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
  15050. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
  15051. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
  15052. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  15053. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  15054. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  15055. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15056. t[15] = l;
  15057. l = h;
  15058. h = o;
  15059. o = 0;
  15060. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
  15061. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
  15062. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
  15063. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
  15064. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
  15065. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
  15066. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  15067. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  15068. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  15069. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15070. t[16] = l;
  15071. l = h;
  15072. h = o;
  15073. o = 0;
  15074. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
  15075. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
  15076. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
  15077. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
  15078. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
  15079. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
  15080. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
  15081. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  15082. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  15083. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15084. t[17] = l;
  15085. l = h;
  15086. h = o;
  15087. o = 0;
  15088. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
  15089. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
  15090. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
  15091. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
  15092. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
  15093. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
  15094. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
  15095. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
  15096. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
  15097. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  15098. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15099. t[18] = l;
  15100. l = h;
  15101. h = o;
  15102. o = 0;
  15103. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
  15104. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
  15105. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
  15106. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
  15107. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
  15108. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
  15109. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
  15110. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
  15111. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
  15112. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
  15113. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15114. t[19] = l;
  15115. l = h;
  15116. h = o;
  15117. o = 0;
  15118. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
  15119. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
  15120. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
  15121. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
  15122. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
  15123. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
  15124. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
  15125. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
  15126. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
  15127. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
  15128. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  15129. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15130. t[20] = l;
  15131. l = h;
  15132. h = o;
  15133. o = 0;
  15134. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
  15135. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
  15136. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
  15137. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
  15138. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
  15139. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
  15140. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
  15141. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
  15142. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
  15143. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
  15144. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
  15145. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15146. t[21] = l;
  15147. l = h;
  15148. h = o;
  15149. o = 0;
  15150. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
  15151. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
  15152. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
  15153. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
  15154. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
  15155. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
  15156. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
  15157. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
  15158. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
  15159. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
  15160. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
  15161. SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
  15162. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15163. t[22] = l;
  15164. l = h;
  15165. h = o;
  15166. o = 0;
  15167. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
  15168. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
  15169. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
  15170. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
  15171. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
  15172. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
  15173. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
  15174. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
  15175. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
  15176. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
  15177. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
  15178. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
  15179. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15180. t[23] = l;
  15181. l = h;
  15182. h = o;
  15183. o = 0;
  15184. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
  15185. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
  15186. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
  15187. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
  15188. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
  15189. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
  15190. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
  15191. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
  15192. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
  15193. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
  15194. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
  15195. SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
  15196. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15197. r->dp[24] = l;
  15198. l = h;
  15199. h = o;
  15200. o = 0;
  15201. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
  15202. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
  15203. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
  15204. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
  15205. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
  15206. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
  15207. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
  15208. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
  15209. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
  15210. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
  15211. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
  15212. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15213. r->dp[25] = l;
  15214. l = h;
  15215. h = o;
  15216. o = 0;
  15217. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
  15218. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
  15219. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
  15220. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
  15221. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
  15222. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
  15223. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
  15224. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
  15225. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
  15226. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
  15227. SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
  15228. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15229. r->dp[26] = l;
  15230. l = h;
  15231. h = o;
  15232. o = 0;
  15233. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
  15234. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
  15235. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
  15236. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
  15237. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
  15238. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
  15239. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
  15240. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
  15241. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
  15242. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
  15243. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15244. r->dp[27] = l;
  15245. l = h;
  15246. h = o;
  15247. o = 0;
  15248. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
  15249. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
  15250. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
  15251. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
  15252. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
  15253. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
  15254. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
  15255. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
  15256. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
  15257. SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
  15258. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15259. r->dp[28] = l;
  15260. l = h;
  15261. h = o;
  15262. o = 0;
  15263. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
  15264. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
  15265. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
  15266. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
  15267. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
  15268. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
  15269. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
  15270. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
  15271. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
  15272. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15273. r->dp[29] = l;
  15274. l = h;
  15275. h = o;
  15276. o = 0;
  15277. SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
  15278. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
  15279. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
  15280. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
  15281. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
  15282. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
  15283. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
  15284. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
  15285. SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
  15286. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15287. r->dp[30] = l;
  15288. l = h;
  15289. h = o;
  15290. o = 0;
  15291. SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
  15292. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
  15293. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
  15294. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
  15295. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
  15296. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
  15297. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
  15298. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
  15299. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15300. r->dp[31] = l;
  15301. l = h;
  15302. h = o;
  15303. o = 0;
  15304. SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
  15305. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
  15306. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
  15307. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
  15308. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
  15309. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
  15310. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
  15311. SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
  15312. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15313. r->dp[32] = l;
  15314. l = h;
  15315. h = o;
  15316. o = 0;
  15317. SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
  15318. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
  15319. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
  15320. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
  15321. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
  15322. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
  15323. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
  15324. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15325. r->dp[33] = l;
  15326. l = h;
  15327. h = o;
  15328. o = 0;
  15329. SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
  15330. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
  15331. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
  15332. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
  15333. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
  15334. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
  15335. SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
  15336. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15337. r->dp[34] = l;
  15338. l = h;
  15339. h = o;
  15340. o = 0;
  15341. SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
  15342. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
  15343. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
  15344. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
  15345. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
  15346. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
  15347. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15348. r->dp[35] = l;
  15349. l = h;
  15350. h = o;
  15351. o = 0;
  15352. SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
  15353. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
  15354. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
  15355. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
  15356. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
  15357. SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
  15358. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15359. r->dp[36] = l;
  15360. l = h;
  15361. h = o;
  15362. o = 0;
  15363. SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
  15364. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
  15365. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
  15366. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
  15367. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
  15368. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15369. r->dp[37] = l;
  15370. l = h;
  15371. h = o;
  15372. o = 0;
  15373. SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
  15374. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
  15375. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
  15376. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
  15377. SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
  15378. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15379. r->dp[38] = l;
  15380. l = h;
  15381. h = o;
  15382. o = 0;
  15383. SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
  15384. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
  15385. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
  15386. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
  15387. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15388. r->dp[39] = l;
  15389. l = h;
  15390. h = o;
  15391. o = 0;
  15392. SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
  15393. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
  15394. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
  15395. SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
  15396. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15397. r->dp[40] = l;
  15398. l = h;
  15399. h = o;
  15400. o = 0;
  15401. SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
  15402. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
  15403. SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
  15404. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15405. r->dp[41] = l;
  15406. l = h;
  15407. h = o;
  15408. o = 0;
  15409. SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
  15410. SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
  15411. SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
  15412. r->dp[42] = l;
  15413. l = h;
  15414. h = o;
  15415. o = 0;
  15416. SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
  15417. SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
  15418. r->dp[43] = l;
  15419. l = h;
  15420. h = o;
  15421. o = 0;
  15422. SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
  15423. SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
  15424. r->dp[44] = l;
  15425. l = h;
  15426. h = o;
  15427. o = 0;
  15428. SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
  15429. r->dp[45] = l;
  15430. l = h;
  15431. h = o;
  15432. SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
  15433. r->dp[46] = l;
  15434. r->dp[47] = h;
  15435. XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
  15436. r->used = 48;
  15437. sp_clamp(r);
  15438. }
  15439. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  15440. if (t != NULL) {
  15441. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  15442. }
  15443. #endif
  15444. return err;
  15445. }
  15446. #endif /* SP_INT_DIGITS >= 48 */
  15447. #if SP_INT_DIGITS >= 64
  15448. /* Square a and store in r. r = a * a
  15449. *
  15450. * Karatsuba implementation.
  15451. *
  15452. * @param [in] a SP integer to square.
  15453. * @param [out] r SP integer result.
  15454. *
  15455. * @return MP_OKAY on success.
  15456. * @return MP_MEM when dynamic memory allocation fails.
  15457. */
  15458. static int _sp_sqr_32(const sp_int* a, sp_int* r)
  15459. {
  15460. int err = MP_OKAY;
  15461. unsigned int i;
  15462. sp_int_digit l;
  15463. sp_int_digit h;
  15464. sp_int* z0;
  15465. sp_int* z1;
  15466. sp_int* z2;
  15467. sp_int_digit ca;
  15468. DECL_SP_INT(a1, 16);
  15469. DECL_SP_INT_ARRAY(z, 33, 2);
  15470. ALLOC_SP_INT(a1, 16, err, NULL);
  15471. ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
  15472. if (err == MP_OKAY) {
  15473. z1 = z[0];
  15474. z2 = z[1];
  15475. z0 = r;
  15476. XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
  15477. a1->used = 16;
  15478. /* z2 = a1 ^ 2 */
  15479. err = _sp_sqr_16(a1, z2);
  15480. }
  15481. if (err == MP_OKAY) {
  15482. l = 0;
  15483. h = 0;
  15484. for (i = 0; i < 16; i++) {
  15485. SP_ASM_ADDC(l, h, a1->dp[i]);
  15486. SP_ASM_ADDC(l, h, a->dp[i]);
  15487. a1->dp[i] = l;
  15488. l = h;
  15489. h = 0;
  15490. }
  15491. ca = l;
  15492. /* z0 = a0 ^ 2 */
  15493. err = _sp_sqr_16(a, z0);
  15494. }
  15495. if (err == MP_OKAY) {
  15496. /* z1 = (a0 + a1) ^ 2 */
  15497. err = _sp_sqr_16(a1, z1);
  15498. }
  15499. if (err == MP_OKAY) {
  15500. /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
  15501. /* r = z0 */
  15502. /* r += (z1 - z0 - z2) << 16 */
  15503. z1->dp[32] = ca;
  15504. l = 0;
  15505. if (ca) {
  15506. l = z1->dp[0 + 16];
  15507. h = 0;
  15508. SP_ASM_ADDC(l, h, a1->dp[0]);
  15509. SP_ASM_ADDC(l, h, a1->dp[0]);
  15510. z1->dp[0 + 16] = l;
  15511. l = h;
  15512. h = 0;
  15513. for (i = 1; i < 16; i++) {
  15514. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  15515. SP_ASM_ADDC(l, h, a1->dp[i]);
  15516. SP_ASM_ADDC(l, h, a1->dp[i]);
  15517. z1->dp[i + 16] = l;
  15518. l = h;
  15519. h = 0;
  15520. }
  15521. }
  15522. z1->dp[32] += l;
  15523. /* z1 = z1 - z0 - z1 */
  15524. l = z1->dp[0];
  15525. h = 0;
  15526. SP_ASM_SUBB(l, h, z0->dp[0]);
  15527. SP_ASM_SUBB(l, h, z2->dp[0]);
  15528. z1->dp[0] = l;
  15529. l = h;
  15530. h = 0;
  15531. for (i = 1; i < 32; i++) {
  15532. l += z1->dp[i];
  15533. SP_ASM_SUBB(l, h, z0->dp[i]);
  15534. SP_ASM_SUBB(l, h, z2->dp[i]);
  15535. z1->dp[i] = l;
  15536. l = h;
  15537. h = 0;
  15538. }
  15539. z1->dp[i] += l;
  15540. /* r += z1 << 16 */
  15541. l = 0;
  15542. h = 0;
  15543. for (i = 0; i < 16; i++) {
  15544. SP_ASM_ADDC(l, h, r->dp[i + 16]);
  15545. SP_ASM_ADDC(l, h, z1->dp[i]);
  15546. r->dp[i + 16] = l;
  15547. l = h;
  15548. h = 0;
  15549. }
  15550. for (; i < 33; i++) {
  15551. SP_ASM_ADDC(l, h, z1->dp[i]);
  15552. r->dp[i + 16] = l;
  15553. l = h;
  15554. h = 0;
  15555. }
  15556. /* r += z2 << 32 */
  15557. l = 0;
  15558. h = 0;
  15559. for (i = 0; i < 17; i++) {
  15560. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  15561. SP_ASM_ADDC(l, h, z2->dp[i]);
  15562. r->dp[i + 32] = l;
  15563. l = h;
  15564. h = 0;
  15565. }
  15566. for (; i < 32; i++) {
  15567. SP_ASM_ADDC(l, h, z2->dp[i]);
  15568. r->dp[i + 32] = l;
  15569. l = h;
  15570. h = 0;
  15571. }
  15572. r->used = 64;
  15573. sp_clamp(r);
  15574. }
  15575. FREE_SP_INT_ARRAY(z, NULL);
  15576. FREE_SP_INT(a1, NULL);
  15577. return err;
  15578. }
  15579. #endif /* SP_INT_DIGITS >= 64 */
  15580. #if SP_INT_DIGITS >= 96
  15581. /* Square a and store in r. r = a * a
  15582. *
  15583. * Karatsuba implementation.
  15584. *
  15585. * @param [in] a SP integer to square.
  15586. * @param [out] r SP integer result.
  15587. *
  15588. * @return MP_OKAY on success.
  15589. * @return MP_MEM when dynamic memory allocation fails.
  15590. */
  15591. static int _sp_sqr_48(const sp_int* a, sp_int* r)
  15592. {
  15593. int err = MP_OKAY;
  15594. unsigned int i;
  15595. sp_int_digit l;
  15596. sp_int_digit h;
  15597. sp_int* z0;
  15598. sp_int* z1;
  15599. sp_int* z2;
  15600. sp_int_digit ca;
  15601. DECL_SP_INT(a1, 24);
  15602. DECL_SP_INT_ARRAY(z, 49, 2);
  15603. ALLOC_SP_INT(a1, 24, err, NULL);
  15604. ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
  15605. if (err == MP_OKAY) {
  15606. z1 = z[0];
  15607. z2 = z[1];
  15608. z0 = r;
  15609. XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
  15610. a1->used = 24;
  15611. /* z2 = a1 ^ 2 */
  15612. err = _sp_sqr_24(a1, z2);
  15613. }
  15614. if (err == MP_OKAY) {
  15615. l = 0;
  15616. h = 0;
  15617. for (i = 0; i < 24; i++) {
  15618. SP_ASM_ADDC(l, h, a1->dp[i]);
  15619. SP_ASM_ADDC(l, h, a->dp[i]);
  15620. a1->dp[i] = l;
  15621. l = h;
  15622. h = 0;
  15623. }
  15624. ca = l;
  15625. /* z0 = a0 ^ 2 */
  15626. err = _sp_sqr_24(a, z0);
  15627. }
  15628. if (err == MP_OKAY) {
  15629. /* z1 = (a0 + a1) ^ 2 */
  15630. err = _sp_sqr_24(a1, z1);
  15631. }
  15632. if (err == MP_OKAY) {
  15633. /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
  15634. /* r = z0 */
  15635. /* r += (z1 - z0 - z2) << 24 */
  15636. z1->dp[48] = ca;
  15637. l = 0;
  15638. if (ca) {
  15639. l = z1->dp[0 + 24];
  15640. h = 0;
  15641. SP_ASM_ADDC(l, h, a1->dp[0]);
  15642. SP_ASM_ADDC(l, h, a1->dp[0]);
  15643. z1->dp[0 + 24] = l;
  15644. l = h;
  15645. h = 0;
  15646. for (i = 1; i < 24; i++) {
  15647. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  15648. SP_ASM_ADDC(l, h, a1->dp[i]);
  15649. SP_ASM_ADDC(l, h, a1->dp[i]);
  15650. z1->dp[i + 24] = l;
  15651. l = h;
  15652. h = 0;
  15653. }
  15654. }
  15655. z1->dp[48] += l;
  15656. /* z1 = z1 - z0 - z1 */
  15657. l = z1->dp[0];
  15658. h = 0;
  15659. SP_ASM_SUBB(l, h, z0->dp[0]);
  15660. SP_ASM_SUBB(l, h, z2->dp[0]);
  15661. z1->dp[0] = l;
  15662. l = h;
  15663. h = 0;
  15664. for (i = 1; i < 48; i++) {
  15665. l += z1->dp[i];
  15666. SP_ASM_SUBB(l, h, z0->dp[i]);
  15667. SP_ASM_SUBB(l, h, z2->dp[i]);
  15668. z1->dp[i] = l;
  15669. l = h;
  15670. h = 0;
  15671. }
  15672. z1->dp[i] += l;
  15673. /* r += z1 << 16 */
  15674. l = 0;
  15675. h = 0;
  15676. for (i = 0; i < 24; i++) {
  15677. SP_ASM_ADDC(l, h, r->dp[i + 24]);
  15678. SP_ASM_ADDC(l, h, z1->dp[i]);
  15679. r->dp[i + 24] = l;
  15680. l = h;
  15681. h = 0;
  15682. }
  15683. for (; i < 49; i++) {
  15684. SP_ASM_ADDC(l, h, z1->dp[i]);
  15685. r->dp[i + 24] = l;
  15686. l = h;
  15687. h = 0;
  15688. }
  15689. /* r += z2 << 48 */
  15690. l = 0;
  15691. h = 0;
  15692. for (i = 0; i < 25; i++) {
  15693. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  15694. SP_ASM_ADDC(l, h, z2->dp[i]);
  15695. r->dp[i + 48] = l;
  15696. l = h;
  15697. h = 0;
  15698. }
  15699. for (; i < 48; i++) {
  15700. SP_ASM_ADDC(l, h, z2->dp[i]);
  15701. r->dp[i + 48] = l;
  15702. l = h;
  15703. h = 0;
  15704. }
  15705. r->used = 96;
  15706. sp_clamp(r);
  15707. }
  15708. FREE_SP_INT_ARRAY(z, NULL);
  15709. FREE_SP_INT(a1, NULL);
  15710. return err;
  15711. }
  15712. #endif /* SP_INT_DIGITS >= 96 */
  15713. #if SP_INT_DIGITS >= 128
  15714. /* Square a and store in r. r = a * a
  15715. *
  15716. * Karatsuba implementation.
  15717. *
  15718. * @param [in] a SP integer to square.
  15719. * @param [out] r SP integer result.
  15720. *
  15721. * @return MP_OKAY on success.
  15722. * @return MP_MEM when dynamic memory allocation fails.
  15723. */
  15724. static int _sp_sqr_64(const sp_int* a, sp_int* r)
  15725. {
  15726. int err = MP_OKAY;
  15727. unsigned int i;
  15728. sp_int_digit l;
  15729. sp_int_digit h;
  15730. sp_int* z0;
  15731. sp_int* z1;
  15732. sp_int* z2;
  15733. sp_int_digit ca;
  15734. DECL_SP_INT(a1, 32);
  15735. DECL_SP_INT_ARRAY(z, 65, 2);
  15736. ALLOC_SP_INT(a1, 32, err, NULL);
  15737. ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
  15738. if (err == MP_OKAY) {
  15739. z1 = z[0];
  15740. z2 = z[1];
  15741. z0 = r;
  15742. XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
  15743. a1->used = 32;
  15744. /* z2 = a1 ^ 2 */
  15745. err = _sp_sqr_32(a1, z2);
  15746. }
  15747. if (err == MP_OKAY) {
  15748. l = 0;
  15749. h = 0;
  15750. for (i = 0; i < 32; i++) {
  15751. SP_ASM_ADDC(l, h, a1->dp[i]);
  15752. SP_ASM_ADDC(l, h, a->dp[i]);
  15753. a1->dp[i] = l;
  15754. l = h;
  15755. h = 0;
  15756. }
  15757. ca = l;
  15758. /* z0 = a0 ^ 2 */
  15759. err = _sp_sqr_32(a, z0);
  15760. }
  15761. if (err == MP_OKAY) {
  15762. /* z1 = (a0 + a1) ^ 2 */
  15763. err = _sp_sqr_32(a1, z1);
  15764. }
  15765. if (err == MP_OKAY) {
  15766. /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
  15767. /* r = z0 */
  15768. /* r += (z1 - z0 - z2) << 32 */
  15769. z1->dp[64] = ca;
  15770. l = 0;
  15771. if (ca) {
  15772. l = z1->dp[0 + 32];
  15773. h = 0;
  15774. SP_ASM_ADDC(l, h, a1->dp[0]);
  15775. SP_ASM_ADDC(l, h, a1->dp[0]);
  15776. z1->dp[0 + 32] = l;
  15777. l = h;
  15778. h = 0;
  15779. for (i = 1; i < 32; i++) {
  15780. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  15781. SP_ASM_ADDC(l, h, a1->dp[i]);
  15782. SP_ASM_ADDC(l, h, a1->dp[i]);
  15783. z1->dp[i + 32] = l;
  15784. l = h;
  15785. h = 0;
  15786. }
  15787. }
  15788. z1->dp[64] += l;
  15789. /* z1 = z1 - z0 - z1 */
  15790. l = z1->dp[0];
  15791. h = 0;
  15792. SP_ASM_SUBB(l, h, z0->dp[0]);
  15793. SP_ASM_SUBB(l, h, z2->dp[0]);
  15794. z1->dp[0] = l;
  15795. l = h;
  15796. h = 0;
  15797. for (i = 1; i < 64; i++) {
  15798. l += z1->dp[i];
  15799. SP_ASM_SUBB(l, h, z0->dp[i]);
  15800. SP_ASM_SUBB(l, h, z2->dp[i]);
  15801. z1->dp[i] = l;
  15802. l = h;
  15803. h = 0;
  15804. }
  15805. z1->dp[i] += l;
  15806. /* r += z1 << 16 */
  15807. l = 0;
  15808. h = 0;
  15809. for (i = 0; i < 32; i++) {
  15810. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  15811. SP_ASM_ADDC(l, h, z1->dp[i]);
  15812. r->dp[i + 32] = l;
  15813. l = h;
  15814. h = 0;
  15815. }
  15816. for (; i < 65; i++) {
  15817. SP_ASM_ADDC(l, h, z1->dp[i]);
  15818. r->dp[i + 32] = l;
  15819. l = h;
  15820. h = 0;
  15821. }
  15822. /* r += z2 << 64 */
  15823. l = 0;
  15824. h = 0;
  15825. for (i = 0; i < 33; i++) {
  15826. SP_ASM_ADDC(l, h, r->dp[i + 64]);
  15827. SP_ASM_ADDC(l, h, z2->dp[i]);
  15828. r->dp[i + 64] = l;
  15829. l = h;
  15830. h = 0;
  15831. }
  15832. for (; i < 64; i++) {
  15833. SP_ASM_ADDC(l, h, z2->dp[i]);
  15834. r->dp[i + 64] = l;
  15835. l = h;
  15836. h = 0;
  15837. }
  15838. r->used = 128;
  15839. sp_clamp(r);
  15840. }
  15841. FREE_SP_INT_ARRAY(z, NULL);
  15842. FREE_SP_INT(a1, NULL);
  15843. return err;
  15844. }
  15845. #endif /* SP_INT_DIGITS >= 128 */
  15846. #if SP_INT_DIGITS >= 192
  15847. /* Square a and store in r. r = a * a
  15848. *
  15849. * Karatsuba implementation.
  15850. *
  15851. * @param [in] a SP integer to square.
  15852. * @param [out] r SP integer result.
  15853. *
  15854. * @return MP_OKAY on success.
  15855. * @return MP_MEM when dynamic memory allocation fails.
  15856. */
  15857. static int _sp_sqr_96(const sp_int* a, sp_int* r)
  15858. {
  15859. int err = MP_OKAY;
  15860. unsigned int i;
  15861. sp_int_digit l;
  15862. sp_int_digit h;
  15863. sp_int* z0;
  15864. sp_int* z1;
  15865. sp_int* z2;
  15866. sp_int_digit ca;
  15867. DECL_SP_INT(a1, 48);
  15868. DECL_SP_INT_ARRAY(z, 97, 2);
  15869. ALLOC_SP_INT(a1, 48, err, NULL);
  15870. ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
  15871. if (err == MP_OKAY) {
  15872. z1 = z[0];
  15873. z2 = z[1];
  15874. z0 = r;
  15875. XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
  15876. a1->used = 48;
  15877. /* z2 = a1 ^ 2 */
  15878. err = _sp_sqr_48(a1, z2);
  15879. }
  15880. if (err == MP_OKAY) {
  15881. l = 0;
  15882. h = 0;
  15883. for (i = 0; i < 48; i++) {
  15884. SP_ASM_ADDC(l, h, a1->dp[i]);
  15885. SP_ASM_ADDC(l, h, a->dp[i]);
  15886. a1->dp[i] = l;
  15887. l = h;
  15888. h = 0;
  15889. }
  15890. ca = l;
  15891. /* z0 = a0 ^ 2 */
  15892. err = _sp_sqr_48(a, z0);
  15893. }
  15894. if (err == MP_OKAY) {
  15895. /* z1 = (a0 + a1) ^ 2 */
  15896. err = _sp_sqr_48(a1, z1);
  15897. }
  15898. if (err == MP_OKAY) {
  15899. /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
  15900. /* r = z0 */
  15901. /* r += (z1 - z0 - z2) << 48 */
  15902. z1->dp[96] = ca;
  15903. l = 0;
  15904. if (ca) {
  15905. l = z1->dp[0 + 48];
  15906. h = 0;
  15907. SP_ASM_ADDC(l, h, a1->dp[0]);
  15908. SP_ASM_ADDC(l, h, a1->dp[0]);
  15909. z1->dp[0 + 48] = l;
  15910. l = h;
  15911. h = 0;
  15912. for (i = 1; i < 48; i++) {
  15913. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  15914. SP_ASM_ADDC(l, h, a1->dp[i]);
  15915. SP_ASM_ADDC(l, h, a1->dp[i]);
  15916. z1->dp[i + 48] = l;
  15917. l = h;
  15918. h = 0;
  15919. }
  15920. }
  15921. z1->dp[96] += l;
  15922. /* z1 = z1 - z0 - z1 */
  15923. l = z1->dp[0];
  15924. h = 0;
  15925. SP_ASM_SUBB(l, h, z0->dp[0]);
  15926. SP_ASM_SUBB(l, h, z2->dp[0]);
  15927. z1->dp[0] = l;
  15928. l = h;
  15929. h = 0;
  15930. for (i = 1; i < 96; i++) {
  15931. l += z1->dp[i];
  15932. SP_ASM_SUBB(l, h, z0->dp[i]);
  15933. SP_ASM_SUBB(l, h, z2->dp[i]);
  15934. z1->dp[i] = l;
  15935. l = h;
  15936. h = 0;
  15937. }
  15938. z1->dp[i] += l;
  15939. /* r += z1 << 16 */
  15940. l = 0;
  15941. h = 0;
  15942. for (i = 0; i < 48; i++) {
  15943. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  15944. SP_ASM_ADDC(l, h, z1->dp[i]);
  15945. r->dp[i + 48] = l;
  15946. l = h;
  15947. h = 0;
  15948. }
  15949. for (; i < 97; i++) {
  15950. SP_ASM_ADDC(l, h, z1->dp[i]);
  15951. r->dp[i + 48] = l;
  15952. l = h;
  15953. h = 0;
  15954. }
  15955. /* r += z2 << 96 */
  15956. l = 0;
  15957. h = 0;
  15958. for (i = 0; i < 49; i++) {
  15959. SP_ASM_ADDC(l, h, r->dp[i + 96]);
  15960. SP_ASM_ADDC(l, h, z2->dp[i]);
  15961. r->dp[i + 96] = l;
  15962. l = h;
  15963. h = 0;
  15964. }
  15965. for (; i < 96; i++) {
  15966. SP_ASM_ADDC(l, h, z2->dp[i]);
  15967. r->dp[i + 96] = l;
  15968. l = h;
  15969. h = 0;
  15970. }
  15971. r->used = 192;
  15972. sp_clamp(r);
  15973. }
  15974. FREE_SP_INT_ARRAY(z, NULL);
  15975. FREE_SP_INT(a1, NULL);
  15976. return err;
  15977. }
  15978. #endif /* SP_INT_DIGITS >= 192 */
  15979. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  15980. #endif /* !WOLFSSL_SP_SMALL */
  15981. /* Square a and store in r. r = a * a
  15982. *
  15983. * @param [in] a SP integer to square.
  15984. * @param [out] r SP integer result.
  15985. *
  15986. * @return MP_OKAY on success.
  15987. * @return MP_VAL when a or r is NULL, or the result will be too big for fixed
  15988. * data length.
  15989. * @return MP_MEM when dynamic memory allocation fails.
  15990. */
  15991. int sp_sqr(const sp_int* a, sp_int* r)
  15992. {
  15993. #if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
  15994. return sp_mul(a, a, r);
  15995. #else
  15996. int err = MP_OKAY;
  15997. if ((a == NULL) || (r == NULL)) {
  15998. err = MP_VAL;
  15999. }
  16000. /* Need extra digit during calculation. */
  16001. if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
  16002. err = MP_VAL;
  16003. }
  16004. #if 0
  16005. if (err == MP_OKAY) {
  16006. sp_print(a, "a");
  16007. }
  16008. #endif
  16009. if (err == MP_OKAY) {
  16010. if (a->used == 0) {
  16011. _sp_zero(r);
  16012. }
  16013. else
  16014. #ifndef WOLFSSL_SP_SMALL
  16015. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  16016. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  16017. if (a->used == 4) {
  16018. err = _sp_sqr_4(a, r);
  16019. }
  16020. else
  16021. #endif /* SP_WORD_SIZE == 64 */
  16022. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  16023. #ifdef SQR_MUL_ASM
  16024. if (a->used == 6) {
  16025. err = _sp_sqr_6(a, r);
  16026. }
  16027. else
  16028. #endif /* SQR_MUL_ASM */
  16029. #endif /* SP_WORD_SIZE == 64 */
  16030. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  16031. #ifdef SQR_MUL_ASM
  16032. if (a->used == 8) {
  16033. err = _sp_sqr_8(a, r);
  16034. }
  16035. else
  16036. #endif /* SQR_MUL_ASM */
  16037. #endif /* SP_WORD_SIZE == 32 */
  16038. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  16039. #ifdef SQR_MUL_ASM
  16040. if (a->used == 12) {
  16041. err = _sp_sqr_12(a, r);
  16042. }
  16043. else
  16044. #endif /* SQR_MUL_ASM */
  16045. #endif /* SP_WORD_SIZE == 32 */
  16046. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  16047. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  16048. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  16049. (SP_WORD_SIZE == 64)))
  16050. #if SP_INT_DIGITS >= 32
  16051. if (a->used == 16) {
  16052. err = _sp_sqr_16(a, r);
  16053. }
  16054. else
  16055. #endif /* SP_INT_DIGITS >= 32 */
  16056. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  16057. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  16058. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  16059. #if SP_INT_DIGITS >= 48
  16060. if (a->used == 24) {
  16061. err = _sp_sqr_24(a, r);
  16062. }
  16063. else
  16064. #endif /* SP_INT_DIGITS >= 48 */
  16065. #if SP_INT_DIGITS >= 64
  16066. if (a->used == 32) {
  16067. err = _sp_sqr_32(a, r);
  16068. }
  16069. else
  16070. #endif /* SP_INT_DIGITS >= 64 */
  16071. #if SP_INT_DIGITS >= 96
  16072. if (a->used == 48) {
  16073. err = _sp_sqr_48(a, r);
  16074. }
  16075. else
  16076. #endif /* SP_INT_DIGITS >= 96 */
  16077. #if SP_INT_DIGITS >= 128
  16078. if (a->used == 64) {
  16079. err = _sp_sqr_64(a, r);
  16080. }
  16081. else
  16082. #endif /* SP_INT_DIGITS >= 128 */
  16083. #if SP_INT_DIGITS >= 192
  16084. if (a->used == 96) {
  16085. err = _sp_sqr_96(a, r);
  16086. }
  16087. else
  16088. #endif /* SP_INT_DIGITS >= 192 */
  16089. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  16090. #endif /* !WOLFSSL_SP_SMALL */
  16091. {
  16092. err = _sp_sqr(a, r);
  16093. }
  16094. }
  16095. #ifdef WOLFSSL_SP_INT_NEGATIVE
  16096. if (err == MP_OKAY) {
  16097. r->sign = MP_ZPOS;
  16098. }
  16099. #endif
  16100. #if 0
  16101. if (err == MP_OKAY) {
  16102. sp_print(r, "rsqr");
  16103. }
  16104. #endif
  16105. return err;
  16106. #endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
  16107. }
  16108. /* END SP_SQR implementations */
  16109. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
  16110. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  16111. #if defined(WOLFSSL_SP_MATH_ALL) || \
  16112. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  16113. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || defined(HAVE_ECC)
  16114. /* Square a mod m and store in r: r = (a * a) mod m
  16115. *
  16116. * @param [in] a SP integer to square.
  16117. * @param [in] m SP integer that is the modulus.
  16118. * @param [out] r SP integer result.
  16119. *
  16120. * @return MP_OKAY on success.
  16121. * @return MP_MEM when dynamic memory allocation fails.
  16122. */
  16123. static int _sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
  16124. {
  16125. int err = MP_OKAY;
  16126. /* Create temporary for multiplication result. */
  16127. DECL_SP_INT(t, a->used * 2);
  16128. ALLOC_SP_INT(t, a->used * 2, err, NULL);
  16129. if (err == MP_OKAY) {
  16130. err = sp_init_size(t, a->used * 2);
  16131. }
  16132. /* Square and reduce. */
  16133. if (err == MP_OKAY) {
  16134. err = sp_sqr(a, t);
  16135. }
  16136. if (err == MP_OKAY) {
  16137. err = sp_mod(t, m, r);
  16138. }
  16139. /* Dispose of an allocated SP int. */
  16140. FREE_SP_INT(t, NULL);
  16141. return err;
  16142. }
  16143. /* Square a mod m and store in r: r = (a * a) mod m
  16144. *
  16145. * @param [in] a SP integer to square.
  16146. * @param [in] m SP integer that is the modulus.
  16147. * @param [out] r SP integer result.
  16148. *
  16149. * @return MP_OKAY on success.
  16150. * @return MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
  16151. * for fixed data length.
  16152. * @return MP_MEM when dynamic memory allocation fails.
  16153. */
  16154. int sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
  16155. {
  16156. int err = MP_OKAY;
  16157. /* Validate parameters. */
  16158. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  16159. err = MP_VAL;
  16160. }
  16161. /* Ensure r has space for intermediate result. */
  16162. if ((err == MP_OKAY) && (r != m) && (a->used * 2 > r->size)) {
  16163. err = MP_VAL;
  16164. }
  16165. /* Ensure a is not too big. */
  16166. if ((err == MP_OKAY) && (r == m) && (a->used * 2 > SP_INT_DIGITS)) {
  16167. err = MP_VAL;
  16168. }
  16169. /* Use r as intermediate result if not same as pointer m which is needed
  16170. * after first intermediate result.
  16171. */
  16172. if ((err == MP_OKAY) && (r != m)) {
  16173. /* Square and reduce. */
  16174. err = sp_sqr(a, r);
  16175. if (err == MP_OKAY) {
  16176. err = sp_mod(r, m, r);
  16177. }
  16178. }
  16179. else if (err == MP_OKAY) {
  16180. /* Do operation with temporary. */
  16181. err = _sp_sqrmod(a, m, r);
  16182. }
  16183. return err;
  16184. }
  16185. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  16186. /**********************
  16187. * Montgomery functions
  16188. **********************/
  16189. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  16190. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
  16191. defined(OPENSSL_ALL)
  16192. /* Reduce a number in Montgomery form.
  16193. *
  16194. * Assumes a and m are not NULL and m is not 0.
  16195. *
  16196. * DigitMask(a,i) := mask out the 'i'th digit in place.
  16197. *
  16198. * Algorithm:
  16199. * 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
  16200. * 2. For i = 0..NumDigits(m)-1
  16201. * 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK
  16202. * 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask
  16203. * 2.3. a += mu * DigitMask(m, 0)
  16204. * 2.4. For j = 1 up to NumDigits(m)-2
  16205. * 2.4.1 a += mu * DigitMask(m, j)
  16206. * 2.5 a += mu * DigitMask(m, NumDigits(m)-1))
  16207. * 3. a >>= NumBits(m)
  16208. * 4. a = a % m
  16209. *
  16210. * @param [in,out] a SP integer to Montgomery reduce.
  16211. * @param [in] m SP integer that is the modulus.
  16212. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  16213. *
  16214. * @return MP_OKAY on success.
  16215. */
  16216. static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp)
  16217. {
  16218. #if !defined(SQR_MUL_ASM)
  16219. unsigned int i;
  16220. int bits;
  16221. sp_int_word w;
  16222. sp_int_digit mu;
  16223. #if 0
  16224. sp_print(a, "a");
  16225. sp_print(m, "m");
  16226. #endif
  16227. /* Count bits in modulus. */
  16228. bits = sp_count_bits(m);
  16229. /* Adding numbers into m->used * 2 digits - zero out unused digits. */
  16230. for (i = a->used; i < m->used * 2; i++) {
  16231. a->dp[i] = 0;
  16232. }
  16233. /* Special case when modulus is 1 digit or less. */
  16234. if (m->used <= 1) {
  16235. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16236. mu = mp * a->dp[0];
  16237. /* a += mu * m */
  16238. w = a->dp[0];
  16239. w += (sp_int_word)mu * m->dp[0];
  16240. a->dp[0] = (sp_int_digit)w;
  16241. w >>= SP_WORD_SIZE;
  16242. w += a->dp[1];
  16243. a->dp[1] = (sp_int_digit)w;
  16244. w >>= SP_WORD_SIZE;
  16245. a->dp[2] = (sp_int_digit)w;
  16246. a->used = 3;
  16247. /* mp is SP_WORD_SIZE */
  16248. bits = SP_WORD_SIZE;
  16249. }
  16250. else {
  16251. /* 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
  16252. * Mask when last digit of modulus doesn't have highest bit set.
  16253. */
  16254. sp_int_digit mask = (sp_int_digit)
  16255. (((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1);
  16256. /* Overflow. */
  16257. sp_int_word o = 0;
  16258. /* 2. For i = 0..NumDigits(m)-1 */
  16259. for (i = 0; i < m->used; i++) {
  16260. unsigned int j;
  16261. /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16262. mu = mp * a->dp[i];
  16263. /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
  16264. if ((i == m->used - 1) && (mask != 0)) {
  16265. mu &= mask;
  16266. }
  16267. /* 2.3. a += mu * DigitMask(m, 0) */
  16268. w = a->dp[i];
  16269. w += (sp_int_word)mu * m->dp[0];
  16270. a->dp[i] = (sp_int_digit)w;
  16271. w >>= SP_WORD_SIZE;
  16272. /* 2.4. For j = 1 up to NumDigits(m)-2 */
  16273. for (j = 1; j < m->used - 1; j++) {
  16274. /* 2.4.1 a += mu * DigitMask(m, j) */
  16275. w += a->dp[i + j];
  16276. w += (sp_int_word)mu * m->dp[j];
  16277. a->dp[i + j] = (sp_int_digit)w;
  16278. w >>= SP_WORD_SIZE;
  16279. }
  16280. /* Handle overflow. */
  16281. w += o;
  16282. w += a->dp[i + j];
  16283. o = (sp_int_digit)(w >> SP_WORD_SIZE);
  16284. /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1)) */
  16285. w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
  16286. a->dp[i + j] = (sp_int_digit)w;
  16287. w >>= SP_WORD_SIZE;
  16288. o += w;
  16289. }
  16290. /* Handle overflow. */
  16291. o += a->dp[m->used * 2 - 1];
  16292. a->dp[m->used * 2 - 1] = (sp_int_digit)o;
  16293. o >>= SP_WORD_SIZE;
  16294. a->dp[m->used * 2] = (sp_int_digit)o;
  16295. a->used = m->used * 2 + 1;
  16296. }
  16297. /* Remove leading zeros. */
  16298. sp_clamp(a);
  16299. /* 3. a >>= NumBits(m) */
  16300. (void)sp_rshb(a, bits, a);
  16301. /* 4. a = a mod m */
  16302. if (_sp_cmp_abs(a, m) != MP_LT) {
  16303. _sp_sub_off(a, m, a, 0);
  16304. }
  16305. #if 0
  16306. sp_print(a, "rr");
  16307. #endif
  16308. return MP_OKAY;
  16309. #else /* !SQR_MUL_ASM */
  16310. unsigned int i;
  16311. unsigned int j;
  16312. int bits;
  16313. sp_int_digit mu;
  16314. sp_int_digit o;
  16315. sp_int_digit mask;
  16316. #if 0
  16317. sp_print(a, "a");
  16318. sp_print(m, "m");
  16319. #endif
  16320. bits = sp_count_bits(m);
  16321. mask = ((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1;
  16322. for (i = a->used; i < m->used * 2; i++) {
  16323. a->dp[i] = 0;
  16324. }
  16325. if (m->used <= 1) {
  16326. sp_int_digit l;
  16327. sp_int_digit h;
  16328. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16329. mu = mp * a->dp[0];
  16330. /* a += mu * m */
  16331. l = a->dp[0];
  16332. h = 0;
  16333. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  16334. a->dp[0] = l;
  16335. l = h;
  16336. h = 0;
  16337. SP_ASM_ADDC(l, h, a->dp[1]);
  16338. a->dp[1] = l;
  16339. a->dp[2] = h;
  16340. a->used = m->used * 2 + 1;
  16341. /* mp is SP_WORD_SIZE */
  16342. bits = SP_WORD_SIZE;
  16343. }
  16344. #if !defined(WOLFSSL_SP_MATH) && defined(HAVE_ECC)
  16345. #if SP_WORD_SIZE == 64
  16346. #if SP_INT_DIGITS >= 8
  16347. else if ((m->used == 4) && (mask == 0)) {
  16348. sp_int_digit l;
  16349. sp_int_digit h;
  16350. sp_int_digit o2;
  16351. l = 0;
  16352. h = 0;
  16353. o = 0;
  16354. o2 = 0;
  16355. /* For i = 0..NumDigits(m)-1 */
  16356. for (i = 0; i < 4; i++) {
  16357. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16358. mu = mp * a->dp[0];
  16359. l = a->dp[0];
  16360. /* a = (a + mu * m) >> WORD_SIZE */
  16361. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  16362. l = h;
  16363. h = 0;
  16364. SP_ASM_ADDC(l, h, a->dp[1]);
  16365. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
  16366. a->dp[0] = l;
  16367. l = h;
  16368. h = 0;
  16369. SP_ASM_ADDC(l, h, a->dp[2]);
  16370. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
  16371. a->dp[1] = l;
  16372. l = h;
  16373. h = o2;
  16374. o2 = 0;
  16375. SP_ASM_ADDC_REG(l, h, o);
  16376. SP_ASM_ADDC(l, h, a->dp[i + 3]);
  16377. SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
  16378. a->dp[2] = l;
  16379. o = h;
  16380. l = h;
  16381. h = 0;
  16382. }
  16383. /* Handle overflow. */
  16384. h = o2;
  16385. SP_ASM_ADDC(l, h, a->dp[7]);
  16386. a->dp[3] = l;
  16387. a->dp[4] = h;
  16388. a->used = 5;
  16389. /* Remove leading zeros. */
  16390. sp_clamp(a);
  16391. /* a = a mod m */
  16392. if (_sp_cmp_abs(a, m) != MP_LT) {
  16393. _sp_sub_off(a, m, a, 0);
  16394. }
  16395. return MP_OKAY;
  16396. }
  16397. #endif /* SP_INT_DIGITS >= 8 */
  16398. #if SP_INT_DIGITS >= 12
  16399. else if ((m->used == 6) && (mask == 0)) {
  16400. sp_int_digit l;
  16401. sp_int_digit h;
  16402. sp_int_digit o2;
  16403. l = 0;
  16404. h = 0;
  16405. o = 0;
  16406. o2 = 0;
  16407. /* For i = 0..NumDigits(m)-1 */
  16408. for (i = 0; i < 6; i++) {
  16409. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16410. mu = mp * a->dp[0];
  16411. l = a->dp[0];
  16412. /* a = (a + mu * m) >> WORD_SIZE */
  16413. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  16414. l = h;
  16415. h = 0;
  16416. SP_ASM_ADDC(l, h, a->dp[1]);
  16417. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
  16418. a->dp[0] = l;
  16419. l = h;
  16420. h = 0;
  16421. SP_ASM_ADDC(l, h, a->dp[2]);
  16422. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
  16423. a->dp[1] = l;
  16424. l = h;
  16425. h = 0;
  16426. SP_ASM_ADDC(l, h, a->dp[3]);
  16427. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
  16428. a->dp[2] = l;
  16429. l = h;
  16430. h = 0;
  16431. SP_ASM_ADDC(l, h, a->dp[4]);
  16432. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
  16433. a->dp[3] = l;
  16434. l = h;
  16435. h = o2;
  16436. o2 = 0;
  16437. SP_ASM_ADDC_REG(l, h, o);
  16438. SP_ASM_ADDC(l, h, a->dp[i + 5]);
  16439. SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
  16440. a->dp[4] = l;
  16441. o = h;
  16442. l = h;
  16443. h = 0;
  16444. }
  16445. /* Handle overflow. */
  16446. h = o2;
  16447. SP_ASM_ADDC(l, h, a->dp[11]);
  16448. a->dp[5] = l;
  16449. a->dp[6] = h;
  16450. a->used = 7;
  16451. /* Remove leading zeros. */
  16452. sp_clamp(a);
  16453. /* a = a mod m */
  16454. if (_sp_cmp_abs(a, m) != MP_LT) {
  16455. _sp_sub_off(a, m, a, 0);
  16456. }
  16457. return MP_OKAY;
  16458. }
  16459. #endif /* SP_INT_DIGITS >= 12 */
  16460. #elif SP_WORD_SIZE == 32
  16461. else if ((m->used <= 12) && (mask == 0)) {
  16462. sp_int_digit l;
  16463. sp_int_digit h;
  16464. sp_int_digit o2;
  16465. sp_int_digit* ad;
  16466. const sp_int_digit* md;
  16467. o = 0;
  16468. o2 = 0;
  16469. ad = a->dp;
  16470. /* For i = 0..NumDigits(m)-1 */
  16471. for (i = 0; i < m->used; i++) {
  16472. md = m->dp;
  16473. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16474. mu = mp * ad[0];
  16475. /* a = (a + mu * m, 0) >> WORD_SIZE */
  16476. l = ad[0];
  16477. h = 0;
  16478. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16479. l = h;
  16480. for (j = 1; j + 1 < m->used - 1; j += 2) {
  16481. h = 0;
  16482. SP_ASM_ADDC(l, h, ad[j]);
  16483. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16484. ad[j - 1] = l;
  16485. l = 0;
  16486. SP_ASM_ADDC(h, l, ad[j + 1]);
  16487. SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
  16488. ad[j] = h;
  16489. }
  16490. for (; j < m->used - 1; j++) {
  16491. h = 0;
  16492. SP_ASM_ADDC(l, h, ad[j]);
  16493. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16494. ad[j - 1] = l;
  16495. l = h;
  16496. }
  16497. h = o2;
  16498. o2 = 0;
  16499. SP_ASM_ADDC_REG(l, h, o);
  16500. SP_ASM_ADDC(l, h, ad[i + j]);
  16501. SP_ASM_MUL_ADD(l, h, o2, mu, *md);
  16502. ad[j - 1] = l;
  16503. o = h;
  16504. }
  16505. /* Handle overflow. */
  16506. l = o;
  16507. h = o2;
  16508. SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
  16509. a->dp[m->used - 1] = l;
  16510. a->dp[m->used] = h;
  16511. a->used = m->used + 1;
  16512. /* Remove leading zeros. */
  16513. sp_clamp(a);
  16514. /* a = a mod m */
  16515. if (_sp_cmp_abs(a, m) != MP_LT) {
  16516. _sp_sub_off(a, m, a, 0);
  16517. }
  16518. return MP_OKAY;
  16519. }
  16520. #endif /* SP_WORD_SIZE == 64 | 32 */
  16521. #endif /* !WOLFSSL_SP_MATH && HAVE_ECC */
  16522. else {
  16523. sp_int_digit l;
  16524. sp_int_digit h;
  16525. sp_int_digit o2;
  16526. sp_int_digit* ad;
  16527. const sp_int_digit* md;
  16528. o = 0;
  16529. o2 = 0;
  16530. ad = a->dp;
  16531. /* 2. For i = 0..NumDigits(m)-1 */
  16532. for (i = 0; i < m->used; i++, ad++) {
  16533. md = m->dp;
  16534. /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16535. mu = mp * ad[0];
  16536. /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
  16537. if ((i == m->used - 1) && (mask != 0)) {
  16538. mu &= mask;
  16539. }
  16540. /* 2.3 a += mu * DigitMask(m, 0) */
  16541. l = ad[0];
  16542. h = 0;
  16543. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16544. ad[0] = l;
  16545. l = h;
  16546. /* 2.4. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
  16547. for (j = 1; j + 1 < m->used - 1; j += 2) {
  16548. h = 0;
  16549. /* 2.4.1. a += mu * DigitMask(m, j) */
  16550. SP_ASM_ADDC(l, h, ad[j + 0]);
  16551. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16552. ad[j + 0] = l;
  16553. l = 0;
  16554. /* 2.4.1. a += mu * DigitMask(m, j) */
  16555. SP_ASM_ADDC(h, l, ad[j + 1]);
  16556. SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
  16557. ad[j + 1] = h;
  16558. }
  16559. for (; j < m->used - 1; j++) {
  16560. h = 0;
  16561. /* 2.4.1. a += mu * DigitMask(m, j) */
  16562. SP_ASM_ADDC(l, h, ad[j]);
  16563. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16564. ad[j] = l;
  16565. l = h;
  16566. }
  16567. h = o2;
  16568. o2 = 0;
  16569. SP_ASM_ADDC_REG(l, h, o);
  16570. /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1) */
  16571. SP_ASM_ADDC(l, h, ad[j]);
  16572. SP_ASM_MUL_ADD(l, h, o2, mu, *md);
  16573. ad[j] = l;
  16574. o = h;
  16575. }
  16576. /* Handle overflow. */
  16577. l = o;
  16578. h = o2;
  16579. SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
  16580. a->dp[m->used * 2 - 1] = l;
  16581. a->dp[m->used * 2] = h;
  16582. a->used = m->used * 2 + 1;
  16583. }
  16584. /* Remove leading zeros. */
  16585. sp_clamp(a);
  16586. (void)sp_rshb(a, bits, a);
  16587. /* a = a mod m */
  16588. if (_sp_cmp_abs(a, m) != MP_LT) {
  16589. _sp_sub_off(a, m, a, 0);
  16590. }
  16591. #if 0
  16592. sp_print(a, "rr");
  16593. #endif
  16594. return MP_OKAY;
  16595. #endif /* !SQR_MUL_ASM */
  16596. }
  16597. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  16598. (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
  16599. /* Reduce a number in Montgomery form.
  16600. *
  16601. * @param [in,out] a SP integer to Montgomery reduce.
  16602. * @param [in] m SP integer that is the modulus.
  16603. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  16604. *
  16605. * @return MP_OKAY on success.
  16606. * @return MP_VAL when a or m is NULL or m is zero.
  16607. */
  16608. int sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp)
  16609. {
  16610. int err;
  16611. /* Validate parameters. */
  16612. if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
  16613. err = MP_VAL;
  16614. }
  16615. /* Ensure a has enough space for calculation. */
  16616. else if (a->size < m->used * 2 + 1) {
  16617. err = MP_VAL;
  16618. }
  16619. else {
  16620. /* Perform Montogomery Reduction. */
  16621. err = _sp_mont_red(a, m, mp);
  16622. }
  16623. return err;
  16624. }
  16625. #endif
  16626. /* Calculate the bottom digit of the inverse of negative m.
  16627. * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
  16628. *
  16629. * Used when performing Montgomery Reduction.
  16630. * m must be odd.
  16631. * Jeffrey Hurchalla’s method.
  16632. * https://arxiv.org/pdf/2204.04342.pdf
  16633. *
  16634. * @param [in] m SP integer that is the modulus.
  16635. * @param [out] mp SP integer digit that is the bottom digit of inv(-m).
  16636. */
  16637. static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho)
  16638. {
  16639. sp_int_digit d = m->dp[0];
  16640. sp_int_digit x = (3 * d) ^ 2;
  16641. sp_int_digit y = 1 - d * x;
  16642. #if SP_WORD_SIZE >= 16
  16643. x *= 1 + y; y *= y;
  16644. #endif
  16645. #if SP_WORD_SIZE >= 32
  16646. x *= 1 + y; y *= y;
  16647. #endif
  16648. #if SP_WORD_SIZE >= 64
  16649. x *= 1 + y; y *= y;
  16650. #endif
  16651. x *= 1 + y;
  16652. /* rho = -1/m mod d, subtract x (unsigned) from 0, assign negative */
  16653. *rho = (sp_int_digit)((sp_int_sdigit)0 - (sp_int_sdigit)x);
  16654. }
  16655. /* Calculate the bottom digit of the inverse of negative m.
  16656. * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
  16657. *
  16658. * Used when performing Montgomery Reduction.
  16659. *
  16660. * @param [in] m SP integer that is the modulus.
  16661. * @param [out] mp SP integer digit that is the bottom digit of inv(-m).
  16662. *
  16663. * @return MP_OKAY on success.
  16664. * @return MP_VAL when m or rho is NULL.
  16665. */
  16666. int sp_mont_setup(const sp_int* m, sp_int_digit* rho)
  16667. {
  16668. int err = MP_OKAY;
  16669. /* Validate parameters. */
  16670. if ((m == NULL) || (rho == NULL)) {
  16671. err = MP_VAL;
  16672. }
  16673. /* Calculation only works with odd modulus. */
  16674. if ((err == MP_OKAY) && !sp_isodd(m)) {
  16675. err = MP_VAL;
  16676. }
  16677. if (err == MP_OKAY) {
  16678. /* Calculate negative of inverse mod 2^n. */
  16679. _sp_mont_setup(m, rho);
  16680. }
  16681. return err;
  16682. }
  16683. /* Calculate the normalization value of m.
  16684. * norm = 2^k - m, where k is the number of bits in m
  16685. *
  16686. * @param [out] norm SP integer that normalises numbers into Montgomery
  16687. * form.
  16688. * @param [in] m SP integer that is the modulus.
  16689. *
  16690. * @return MP_OKAY on success.
  16691. * @return MP_VAL when norm or m is NULL, or number of bits in m is maximual.
  16692. */
  16693. int sp_mont_norm(sp_int* norm, const sp_int* m)
  16694. {
  16695. int err = MP_OKAY;
  16696. unsigned int bits = 0;
  16697. /* Validate parameters. */
  16698. if ((norm == NULL) || (m == NULL)) {
  16699. err = MP_VAL;
  16700. }
  16701. if (err == MP_OKAY) {
  16702. /* Find top bit and ensure norm has enough space. */
  16703. bits = (unsigned int)sp_count_bits(m);
  16704. if (bits >= norm->size * SP_WORD_SIZE) {
  16705. err = MP_VAL;
  16706. }
  16707. }
  16708. if (err == MP_OKAY) {
  16709. /* Round up for case when m is less than a word - no advantage in using
  16710. * a smaller mask and would take more operations.
  16711. */
  16712. if (bits < SP_WORD_SIZE) {
  16713. bits = SP_WORD_SIZE;
  16714. }
  16715. /* Smallest number greater than m of form 2^n. */
  16716. _sp_zero(norm);
  16717. err = sp_set_bit(norm, (int)bits);
  16718. }
  16719. if (err == MP_OKAY) {
  16720. /* norm = 2^n % m */
  16721. err = sp_sub(norm, m, norm);
  16722. }
  16723. if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
  16724. /* Sub made norm one word and now finish calculation. */
  16725. norm->dp[0] %= m->dp[0];
  16726. }
  16727. if (err == MP_OKAY) {
  16728. /* Remove leading zeros. */
  16729. sp_clamp(norm);
  16730. }
  16731. return err;
  16732. }
  16733. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
  16734. * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
  16735. /*********************************
  16736. * To and from binary and strings.
  16737. *********************************/
  16738. /* Calculate the number of 8-bit values required to represent the
  16739. * multi-precision number.
  16740. *
  16741. * When a is NULL, return s 0.
  16742. *
  16743. * @param [in] a SP integer.
  16744. *
  16745. * @return The count of 8-bit values.
  16746. * @return 0 when a is NULL.
  16747. */
  16748. int sp_unsigned_bin_size(const sp_int* a)
  16749. {
  16750. int cnt = 0;
  16751. if (a != NULL) {
  16752. cnt = (sp_count_bits(a) + 7) / 8;
  16753. }
  16754. return cnt;
  16755. }
  16756. /* Convert a number as an array of bytes in big-endian format to a
  16757. * multi-precision number.
  16758. *
  16759. * @param [out] a SP integer.
  16760. * @param [in] in Array of bytes.
  16761. * @param [in] inSz Number of data bytes in array.
  16762. *
  16763. * @return MP_OKAY on success.
  16764. * @return MP_VAL when the number is too big to fit in an SP.
  16765. */
  16766. int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
  16767. {
  16768. int err = MP_OKAY;
  16769. /* Validate parameters. */
  16770. if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
  16771. err = MP_VAL;
  16772. }
  16773. /* Check a has enough space for number. */
  16774. if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
  16775. err = MP_VAL;
  16776. }
  16777. if (err == MP_OKAY) {
  16778. /* Load full digits at a time from in. */
  16779. int i;
  16780. int j = 0;
  16781. a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  16782. #if defined(BIG_ENDIAN_ORDER) && !defined(WOLFSSL_SP_INT_DIGIT_ALIGN)
  16783. /* Data endian matches respresentation of number.
  16784. * Directly copy if we don't have alignment issues.
  16785. */
  16786. for (i = (int)(inSz-1); i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF) {
  16787. a->dp[j++] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
  16788. }
  16789. #else
  16790. /* Construct digit from required number of bytes. */
  16791. for (i = (int)(inSz-1); i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
  16792. a->dp[j] = ((sp_int_digit)in[i - 0] << 0)
  16793. #if SP_WORD_SIZE >= 16
  16794. | ((sp_int_digit)in[i - 1] << 8)
  16795. #endif
  16796. #if SP_WORD_SIZE >= 32
  16797. | ((sp_int_digit)in[i - 2] << 16) |
  16798. ((sp_int_digit)in[i - 3] << 24)
  16799. #endif
  16800. #if SP_WORD_SIZE >= 64
  16801. | ((sp_int_digit)in[i - 4] << 32) |
  16802. ((sp_int_digit)in[i - 5] << 40) |
  16803. ((sp_int_digit)in[i - 6] << 48) |
  16804. ((sp_int_digit)in[i - 7] << 56)
  16805. #endif
  16806. ;
  16807. j++;
  16808. }
  16809. #endif
  16810. #if SP_WORD_SIZE >= 16
  16811. /* Handle leftovers. */
  16812. if (i >= 0) {
  16813. #ifdef BIG_ENDIAN_ORDER
  16814. int s;
  16815. /* Place remaining bytes into last digit. */
  16816. a->dp[a->used - 1] = 0;
  16817. for (s = 0; i >= 0; i--,s += 8) {
  16818. a->dp[j] |= ((sp_int_digit)in[i]) << s;
  16819. }
  16820. #else
  16821. /* Cast digits to an array of bytes so we can insert directly. */
  16822. byte *d = (byte*)a->dp;
  16823. /* Zero out all bytes in last digit. */
  16824. a->dp[a->used - 1] = 0;
  16825. /* Place remaining bytes directly into digit. */
  16826. switch (i) {
  16827. #if SP_WORD_SIZE >= 64
  16828. case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
  16829. case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
  16830. case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
  16831. case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
  16832. #endif
  16833. #if SP_WORD_SIZE >= 32
  16834. case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
  16835. case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
  16836. #endif
  16837. case 0: d[inSz - 1 - 0] = in[0];
  16838. }
  16839. #endif /* LITTLE_ENDIAN_ORDER */
  16840. }
  16841. #endif
  16842. sp_clamp(a);
  16843. }
  16844. return err;
  16845. }
  16846. /* Convert the multi-precision number to an array of bytes in big-endian format.
  16847. *
  16848. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  16849. * to calculate the number of bytes required.
  16850. *
  16851. * @param [in] a SP integer.
  16852. * @param [out] out Array to put encoding into.
  16853. *
  16854. * @return MP_OKAY on success.
  16855. * @return MP_VAL when a or out is NULL.
  16856. */
  16857. int sp_to_unsigned_bin(const sp_int* a, byte* out)
  16858. {
  16859. /* Write assuming output buffer is big enough. */
  16860. return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
  16861. }
  16862. /* Convert the multi-precision number to an array of bytes in big-endian format.
  16863. *
  16864. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  16865. * to calculate the number of bytes required.
  16866. * Front-pads the output array with zeros to make number the size of the array.
  16867. *
  16868. * @param [in] a SP integer.
  16869. * @param [out] out Array to put encoding into.
  16870. * @param [in] outSz Size of the array in bytes.
  16871. *
  16872. * @return MP_OKAY on success.
  16873. * @return MP_VAL when a or out is NULL.
  16874. */
  16875. int sp_to_unsigned_bin_len(const sp_int* a, byte* out, int outSz)
  16876. {
  16877. int err = MP_OKAY;
  16878. /* Validate parameters. */
  16879. if ((a == NULL) || (out == NULL) || (outSz < 0)) {
  16880. err = MP_VAL;
  16881. }
  16882. #if SP_WORD_SIZE > 8
  16883. if (err == MP_OKAY) {
  16884. /* Start at the end of the buffer - least significant byte. */
  16885. int j = outSz - 1;
  16886. if (!sp_iszero(a)) {
  16887. unsigned int i;
  16888. /* Put each digit in. */
  16889. for (i = 0; (j >= 0) && (i < a->used); i++) {
  16890. int b;
  16891. sp_int_digit d = a->dp[i];
  16892. /* Place each byte of a digit into the buffer. */
  16893. for (b = 0; b < SP_WORD_SIZE; b += 8) {
  16894. out[j--] = (byte)d;
  16895. d >>= 8;
  16896. /* Stop if the output buffer is filled. */
  16897. if (j < 0) {
  16898. if ((i < a->used - 1) || (d > 0)) {
  16899. err = MP_VAL;
  16900. }
  16901. break;
  16902. }
  16903. }
  16904. }
  16905. }
  16906. /* Front pad buffer with 0s. */
  16907. for (; j >= 0; j--) {
  16908. out[j] = 0;
  16909. }
  16910. }
  16911. #else
  16912. if ((err == MP_OKAY) && ((unsigned int)outSz < a->used)) {
  16913. err = MP_VAL;
  16914. }
  16915. if (err == MP_OKAY) {
  16916. unsigned int i;
  16917. int j;
  16918. XMEMSET(out, 0, (unsigned int)outSz - a->used);
  16919. for (i = 0, j = outSz - 1; i < a->used; i++, j--) {
  16920. out[j] = a->dp[i];
  16921. }
  16922. }
  16923. #endif
  16924. return err;
  16925. }
  16926. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  16927. !defined(WOLFSSL_RSA_VERIFY_ONLY)
  16928. /* Store the number in big-endian format in array at an offset.
  16929. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  16930. * to calculate the number of bytes required.
  16931. *
  16932. * @param [in] o Offset into array o start encoding.
  16933. * @param [in] a SP integer.
  16934. * @param [out] out Array to put encoding into.
  16935. *
  16936. * @return Index of next byte after data.
  16937. * @return MP_VAL when a or out is NULL.
  16938. */
  16939. int sp_to_unsigned_bin_at_pos(int o, const sp_int* a, unsigned char* out)
  16940. {
  16941. /* Get length of data that will be written. */
  16942. int len = sp_unsigned_bin_size(a);
  16943. /* Write number to buffer at offset. */
  16944. int ret = sp_to_unsigned_bin_len(a, out + o, len);
  16945. if (ret == MP_OKAY) {
  16946. /* Return offset of next byte after number. */
  16947. ret = o + len;
  16948. }
  16949. return ret;
  16950. }
  16951. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
  16952. #ifdef WOLFSSL_SP_READ_RADIX_16
  16953. /* Convert hexadecimal number as string in big-endian format to a
  16954. * multi-precision number.
  16955. *
  16956. * Assumes negative sign and leading zeros have been stripped.
  16957. *
  16958. * @param [out] a SP integer.
  16959. * @param [in] in NUL terminated string.
  16960. *
  16961. * @return MP_OKAY on success.
  16962. * @return MP_VAL when radix not supported, value is negative, or a character
  16963. * is not valid.
  16964. */
  16965. static int _sp_read_radix_16(sp_int* a, const char* in)
  16966. {
  16967. int err = MP_OKAY;
  16968. int i;
  16969. unsigned int s = 0;
  16970. unsigned int j = 0;
  16971. sp_int_digit d;
  16972. /* Make all nibbles in digit 0. */
  16973. d = 0;
  16974. /* Step through string a character at a time starting at end - least
  16975. * significant byte. */
  16976. for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
  16977. /* Convert character from hex. */
  16978. int ch = (int)HexCharToByte(in[i]);
  16979. /* Check for invalid character. */
  16980. if (ch < 0) {
  16981. err = MP_VAL;
  16982. break;
  16983. }
  16984. /* Check whether we have filled the digit. */
  16985. if (s == SP_WORD_SIZE) {
  16986. /* Store digit and move index to next in a. */
  16987. a->dp[j++] = d;
  16988. /* Fail if we are out of space in a. */
  16989. if (j >= a->size) {
  16990. err = MP_VAL;
  16991. break;
  16992. }
  16993. /* Set shift back to 0 - lowest nibble. */
  16994. s = 0;
  16995. /* Make all nibbles in digit 0. */
  16996. d = 0;
  16997. }
  16998. /* Put next nibble into digit. */
  16999. d |= ((sp_int_digit)ch) << s;
  17000. /* Update shift for next nibble. */
  17001. s += 4;
  17002. }
  17003. if (err == MP_OKAY) {
  17004. /* If space, store last digit. */
  17005. if (j < a->size) {
  17006. a->dp[j] = d;
  17007. }
  17008. /* Update used count. */
  17009. a->used = j + 1;
  17010. /* Remove leading zeros. */
  17011. sp_clamp(a);
  17012. }
  17013. return err;
  17014. }
  17015. #endif /* WOLFSSL_SP_READ_RADIX_16 */
  17016. #ifdef WOLFSSL_SP_READ_RADIX_10
  17017. /* Convert decimal number as string in big-endian format to a multi-precision
  17018. * number.
  17019. *
  17020. * Assumes negative sign and leading zeros have been stripped.
  17021. *
  17022. * @param [out] a SP integer.
  17023. * @param [in] in NUL terminated string.
  17024. *
  17025. * @return MP_OKAY on success.
  17026. * @return MP_VAL when radix not supported, value is negative, or a character
  17027. * is not valid.
  17028. */
  17029. static int _sp_read_radix_10(sp_int* a, const char* in)
  17030. {
  17031. int err = MP_OKAY;
  17032. int i;
  17033. char ch;
  17034. /* Start with a being zero. */
  17035. _sp_zero(a);
  17036. /* Process all characters. */
  17037. for (i = 0; in[i] != '\0'; i++) {
  17038. /* Get character. */
  17039. ch = in[i];
  17040. /* Check character is valid. */
  17041. if ((ch >= '0') && (ch <= '9')) {
  17042. /* Assume '0'..'9' are continuous valus as characters. */
  17043. ch -= '0';
  17044. }
  17045. else {
  17046. /* Return error on invalid character. */
  17047. err = MP_VAL;
  17048. break;
  17049. }
  17050. /* Multiply a by 10. */
  17051. err = _sp_mul_d(a, 10, a, 0);
  17052. if (err != MP_OKAY) {
  17053. break;
  17054. }
  17055. /* Add character value. */
  17056. err = _sp_add_d(a, (sp_int_digit)ch, a);
  17057. if (err != MP_OKAY) {
  17058. break;
  17059. }
  17060. }
  17061. return err;
  17062. }
  17063. #endif /* WOLFSSL_SP_READ_RADIX_10 */
  17064. #if defined(WOLFSSL_SP_READ_RADIX_16) || defined(WOLFSSL_SP_READ_RADIX_10)
  17065. /* Convert a number as string in big-endian format to a big number.
  17066. * Only supports base-16 (hexadecimal) and base-10 (decimal).
  17067. *
  17068. * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
  17069. *
  17070. * @param [out] a SP integer.
  17071. * @param [in] in NUL terminated string.
  17072. * @param [in] radix Number of values in a digit.
  17073. *
  17074. * @return MP_OKAY on success.
  17075. * @return MP_VAL when a or in is NULL, radix not supported, value is negative,
  17076. * or a character is not valid.
  17077. */
  17078. int sp_read_radix(sp_int* a, const char* in, int radix)
  17079. {
  17080. int err = MP_OKAY;
  17081. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17082. unsigned int sign = MP_ZPOS;
  17083. #endif
  17084. if ((a == NULL) || (in == NULL)) {
  17085. err = MP_VAL;
  17086. }
  17087. if (err == MP_OKAY) {
  17088. #ifndef WOLFSSL_SP_INT_NEGATIVE
  17089. if (*in == '-') {
  17090. err = MP_VAL;
  17091. }
  17092. else
  17093. #endif
  17094. {
  17095. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17096. if (*in == '-') {
  17097. /* Make number negative if signed string. */
  17098. sign = MP_NEG;
  17099. in++;
  17100. }
  17101. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17102. /* Skip leading zeros. */
  17103. while (*in == '0') {
  17104. in++;
  17105. }
  17106. if (radix == 16) {
  17107. err = _sp_read_radix_16(a, in);
  17108. }
  17109. #ifdef WOLFSSL_SP_READ_RADIX_10
  17110. else if (radix == 10) {
  17111. err = _sp_read_radix_10(a, in);
  17112. }
  17113. #endif
  17114. else {
  17115. err = MP_VAL;
  17116. }
  17117. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17118. /* Ensure not negative when zero. */
  17119. if (err == MP_OKAY) {
  17120. if (sp_iszero(a)) {
  17121. a->sign = MP_ZPOS;
  17122. }
  17123. else {
  17124. a->sign = sign;
  17125. }
  17126. }
  17127. #endif
  17128. }
  17129. }
  17130. return err;
  17131. }
  17132. #endif /* WOLFSSL_SP_READ_RADIX_16 || WOLFSSL_SP_READ_RADIX_10 */
  17133. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17134. defined(WC_MP_TO_RADIX)
  17135. /* Put the big-endian, hex string encoding of a into str.
  17136. *
  17137. * Assumes str is large enough for result.
  17138. * Use sp_radix_size() to calculate required length.
  17139. *
  17140. * @param [in] a SP integer to convert.
  17141. * @param [out] str String to hold hex string result.
  17142. *
  17143. * @return MP_OKAY on success.
  17144. * @return MP_VAL when a or str is NULL.
  17145. */
  17146. int sp_tohex(const sp_int* a, char* str)
  17147. {
  17148. int err = MP_OKAY;
  17149. /* Validate parameters. */
  17150. if ((a == NULL) || (str == NULL)) {
  17151. err = MP_VAL;
  17152. }
  17153. if (err == MP_OKAY) {
  17154. /* Quick out if number is zero. */
  17155. if (sp_iszero(a) == MP_YES) {
  17156. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17157. /* Make string represent complete bytes. */
  17158. *str++ = '0';
  17159. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17160. *str++ = '0';
  17161. }
  17162. else {
  17163. int i;
  17164. int j;
  17165. sp_int_digit d;
  17166. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17167. if (a->sign == MP_NEG) {
  17168. /* Add negative sign character. */
  17169. *str = '-';
  17170. str++;
  17171. }
  17172. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17173. /* Start at last digit - most significant digit. */
  17174. i = (int)(a->used - 1);
  17175. d = a->dp[i];
  17176. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17177. /* Find highest non-zero byte in most-significant word. */
  17178. for (j = SP_WORD_SIZE - 8; j >= 0 && i >= 0; j -= 8) {
  17179. /* When a byte at this index is not 0 break out to start
  17180. * writing.
  17181. */
  17182. if (((d >> j) & 0xff) != 0) {
  17183. break;
  17184. }
  17185. /* Skip this digit if it was 0. */
  17186. if (j == 0) {
  17187. j = SP_WORD_SIZE - 8;
  17188. d = a->dp[--i];
  17189. }
  17190. }
  17191. /* Start with high nibble of byte. */
  17192. j += 4;
  17193. #else
  17194. /* Find highest non-zero nibble in most-significant word. */
  17195. for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
  17196. /* When a nibble at this index is not 0 break out to start
  17197. * writing.
  17198. */
  17199. if (((d >> j) & 0xf) != 0) {
  17200. break;
  17201. }
  17202. /* Skip this digit if it was 0. */
  17203. if (j == 0) {
  17204. j = SP_WORD_SIZE - 4;
  17205. d = a->dp[--i];
  17206. }
  17207. }
  17208. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17209. /* Write out as much as required from most-significant digit. */
  17210. for (; j >= 0; j -= 4) {
  17211. *(str++) = ByteToHex((byte)(d >> j));
  17212. }
  17213. /* Write rest of digits. */
  17214. for (--i; i >= 0; i--) {
  17215. /* Get digit from memory. */
  17216. d = a->dp[i];
  17217. /* Write out all nibbles of digit. */
  17218. for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
  17219. *(str++) = (char)ByteToHex((byte)(d >> j));
  17220. }
  17221. }
  17222. }
  17223. /* Terminate string. */
  17224. *str = '\0';
  17225. }
  17226. return err;
  17227. }
  17228. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  17229. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17230. defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
  17231. defined(WC_MP_TO_RADIX)
  17232. /* Put the big-endian, decimal string encoding of a into str.
  17233. *
  17234. * Assumes str is large enough for result.
  17235. * Use sp_radix_size() to calculate required length.
  17236. *
  17237. * @param [in] a SP integer to convert.
  17238. * @param [out] str String to hold hex string result.
  17239. *
  17240. * @return MP_OKAY on success.
  17241. * @return MP_VAL when a or str is NULL.
  17242. * @return MP_MEM when dynamic memory allocation fails.
  17243. */
  17244. int sp_todecimal(const sp_int* a, char* str)
  17245. {
  17246. int err = MP_OKAY;
  17247. int i;
  17248. int j;
  17249. sp_int_digit d = 0;
  17250. /* Validate parameters. */
  17251. if ((a == NULL) || (str == NULL)) {
  17252. err = MP_VAL;
  17253. }
  17254. /* Quick out if number is zero. */
  17255. else if (sp_iszero(a) == MP_YES) {
  17256. *str++ = '0';
  17257. *str = '\0';
  17258. }
  17259. else if (a->used >= SP_INT_DIGITS) {
  17260. err = MP_VAL;
  17261. }
  17262. else {
  17263. /* Temporary that is divided by 10. */
  17264. DECL_SP_INT(t, a->used + 1);
  17265. ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
  17266. if (err == MP_OKAY) {
  17267. _sp_copy(a, t);
  17268. }
  17269. if (err == MP_OKAY) {
  17270. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17271. if (a->sign == MP_NEG) {
  17272. /* Add negative sign character. */
  17273. *str = '-';
  17274. str++;
  17275. }
  17276. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17277. /* Write out little endian. */
  17278. i = 0;
  17279. do {
  17280. /* Divide by 10 and get remainder of division. */
  17281. (void)sp_div_d(t, 10, t, &d);
  17282. /* Write out remainder as a character. */
  17283. str[i++] = (char)('0' + d);
  17284. }
  17285. /* Keep going while we there is a value to write. */
  17286. while (!sp_iszero(t));
  17287. /* Terminate string. */
  17288. str[i] = '\0';
  17289. if (err == MP_OKAY) {
  17290. /* Reverse string to big endian. */
  17291. for (j = 0; j <= (i - 1) / 2; j++) {
  17292. int c = (unsigned char)str[j];
  17293. str[j] = str[i - 1 - j];
  17294. str[i - 1 - j] = (char)c;
  17295. }
  17296. }
  17297. }
  17298. FREE_SP_INT(t, NULL);
  17299. }
  17300. return err;
  17301. }
  17302. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  17303. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17304. defined(WC_MP_TO_RADIX)
  17305. /* Put the string version, big-endian, of a in str using the given radix.
  17306. *
  17307. * @param [in] a SP integer to convert.
  17308. * @param [out] str String to hold hex string result.
  17309. * @param [in] radix Base of character.
  17310. * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
  17311. *
  17312. * @return MP_OKAY on success.
  17313. * @return MP_VAL when a or str is NULL, or radix not supported.
  17314. */
  17315. int sp_toradix(const sp_int* a, char* str, int radix)
  17316. {
  17317. int err = MP_OKAY;
  17318. /* Validate parameters. */
  17319. if ((a == NULL) || (str == NULL)) {
  17320. err = MP_VAL;
  17321. }
  17322. /* Handle base 16 if requested. */
  17323. else if (radix == MP_RADIX_HEX) {
  17324. err = sp_tohex(a, str);
  17325. }
  17326. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
  17327. defined(HAVE_COMP_KEY)
  17328. /* Handle base 10 if requested. */
  17329. else if (radix == MP_RADIX_DEC) {
  17330. err = sp_todecimal(a, str);
  17331. }
  17332. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  17333. else {
  17334. /* Base not supported. */
  17335. err = MP_VAL;
  17336. }
  17337. return err;
  17338. }
  17339. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  17340. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17341. defined(WC_MP_TO_RADIX)
  17342. /* Calculate the length of the string version, big-endian, of a using the given
  17343. * radix.
  17344. *
  17345. * @param [in] a SP integer to convert.
  17346. * @param [in] radix Base of character.
  17347. * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
  17348. * @param [out] size The number of characters in encoding.
  17349. *
  17350. * @return MP_OKAY on success.
  17351. * @return MP_VAL when a or size is NULL, or radix not supported.
  17352. */
  17353. int sp_radix_size(const sp_int* a, int radix, int* size)
  17354. {
  17355. int err = MP_OKAY;
  17356. /* Validate parameters. */
  17357. if ((a == NULL) || (size == NULL)) {
  17358. err = MP_VAL;
  17359. }
  17360. /* Handle base 16 if requested. */
  17361. else if (radix == MP_RADIX_HEX) {
  17362. if (a->used == 0) {
  17363. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17364. /* 00 and '\0' */
  17365. *size = 2 + 1;
  17366. #else
  17367. /* Zero and '\0' */
  17368. *size = 1 + 1;
  17369. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17370. }
  17371. else {
  17372. /* Count of nibbles. */
  17373. int cnt = (sp_count_bits(a) + 3) / 4;
  17374. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17375. /* Must have even number of nibbles to have complete bytes. */
  17376. if (cnt & 1) {
  17377. cnt++;
  17378. }
  17379. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17380. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17381. /* Add to count of characters for negative sign. */
  17382. if (a->sign == MP_NEG) {
  17383. cnt++;
  17384. }
  17385. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17386. /* One more for \0 */
  17387. *size = cnt + 1;
  17388. }
  17389. }
  17390. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
  17391. defined(HAVE_COMP_KEY)
  17392. /* Handle base 10 if requested. */
  17393. else if (radix == MP_RADIX_DEC) {
  17394. int i;
  17395. sp_int_digit d;
  17396. /* quick out if its zero */
  17397. if (sp_iszero(a) == MP_YES) {
  17398. /* Zero and '\0' */
  17399. *size = 1 + 1;
  17400. }
  17401. else {
  17402. DECL_SP_INT(t, a->used);
  17403. /* Temporary to be divided by 10. */
  17404. ALLOC_SP_INT(t, a->used, err, NULL);
  17405. if (err == MP_OKAY) {
  17406. t->size = a->used;
  17407. _sp_copy(a, t);
  17408. }
  17409. if (err == MP_OKAY) {
  17410. /* Count number of times number can be divided by 10. */
  17411. for (i = 0; !sp_iszero(t); i++) {
  17412. (void)sp_div_d(t, 10, t, &d);
  17413. }
  17414. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17415. /* Add to count of characters for negative sign. */
  17416. if (a->sign == MP_NEG) {
  17417. i++;
  17418. }
  17419. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17420. /* One more for \0 */
  17421. *size = i + 1;
  17422. }
  17423. FREE_SP_INT(t, NULL);
  17424. }
  17425. }
  17426. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  17427. else {
  17428. /* Base not supported. */
  17429. err = MP_VAL;
  17430. }
  17431. return err;
  17432. }
  17433. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  17434. /***************************************
  17435. * Prime number generation and checking.
  17436. ***************************************/
  17437. #if defined(WOLFSSL_KEY_GEN) && (!defined(NO_RSA) || !defined(NO_DH) || \
  17438. !defined(NO_DSA)) && !defined(WC_NO_RNG)
  17439. #ifndef WOLFSSL_SP_MILLER_RABIN_CNT
  17440. /* Always done 8 iterations of Miller-Rabin on check of primality when
  17441. * generating.
  17442. */
  17443. #define WOLFSSL_SP_MILLER_RABIN_CNT 8
  17444. #endif
  17445. /* Generate a random prime for RSA only.
  17446. *
  17447. * @param [out] r SP integer to hold result.
  17448. * @param [in] len Number of bytes in prime. Use -ve to indicate the two
  17449. * lowest bits must be set.
  17450. * @param [in] rng Random number generator.
  17451. * @param [in] heap Heap hint. Unused.
  17452. *
  17453. * @return MP_OKAY on success
  17454. * @return MP_VAL when r or rng is NULL, length is not supported or random
  17455. * number generator fails.
  17456. */
  17457. int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
  17458. {
  17459. static const byte USE_BBS = 3;
  17460. int err = MP_OKAY;
  17461. byte low_bits = 1;
  17462. int isPrime = MP_NO;
  17463. #if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
  17464. int bits = 0;
  17465. #endif /* WOLFSSL_SP_MATH_ALL */
  17466. unsigned int digits = 0;
  17467. (void)heap;
  17468. /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
  17469. if ((r == NULL) || (rng == NULL) || (len == 0)) {
  17470. err = MP_VAL;
  17471. }
  17472. if (err == MP_OKAY) {
  17473. /* Get type. */
  17474. if (len < 0) {
  17475. low_bits = USE_BBS;
  17476. len = -len;
  17477. }
  17478. /* Get number of digits required to handle required number of bytes. */
  17479. digits = ((unsigned int)len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  17480. /* Ensure result has space. */
  17481. if (r->size < digits) {
  17482. err = MP_VAL;
  17483. }
  17484. }
  17485. if (err == MP_OKAY) {
  17486. #ifndef WOLFSSL_SP_MATH_ALL
  17487. /* For minimal maths, support only what's in SP and needed for DH. */
  17488. #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
  17489. if (len == 32) {
  17490. }
  17491. else
  17492. #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
  17493. /* Generate RSA primes that are half the modulus length. */
  17494. #ifdef WOLFSSL_SP_4096
  17495. if (len == 256) {
  17496. /* Support 2048-bit operations compiled in. */
  17497. }
  17498. else
  17499. #endif
  17500. #ifndef WOLFSSL_SP_NO_3072
  17501. if (len == 192) {
  17502. /* Support 1536-bit operations compiled in. */
  17503. }
  17504. else
  17505. #endif
  17506. #ifndef WOLFSSL_SP_NO_2048
  17507. if (len == 128) {
  17508. /* Support 1024-bit operations compiled in. */
  17509. }
  17510. else
  17511. #endif
  17512. {
  17513. /* Bit length not supported in SP. */
  17514. err = MP_VAL;
  17515. }
  17516. #endif /* !WOLFSSL_SP_MATH_ALL */
  17517. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17518. /* Generated number is always positive. */
  17519. r->sign = MP_ZPOS;
  17520. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17521. /* Set number of digits that will be used. */
  17522. r->used = digits;
  17523. #if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
  17524. /* Calculate number of bits in last digit. */
  17525. bits = (len * 8) & SP_WORD_MASK;
  17526. #endif /* WOLFSSL_SP_MATH_ALL || BIG_ENDIAN_ORDER */
  17527. }
  17528. /* Assume the candidate is probably prime and then test until it is proven
  17529. * composite.
  17530. */
  17531. while ((err == MP_OKAY) && (isPrime == MP_NO)) {
  17532. #ifdef SHOW_GEN
  17533. printf(".");
  17534. fflush(stdout);
  17535. #endif /* SHOW_GEN */
  17536. /* Generate bytes into digit array. */
  17537. err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, (word32)len);
  17538. if (err != 0) {
  17539. err = MP_VAL;
  17540. break;
  17541. }
  17542. /* Set top bits to ensure bit length required is generated.
  17543. * Also set second top to help ensure product of two primes is
  17544. * going to be twice the number of bits of each.
  17545. */
  17546. #ifdef LITTLE_ENDIAN_ORDER
  17547. ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
  17548. #else
  17549. ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
  17550. #endif /* LITTLE_ENDIAN_ORDER */
  17551. #ifdef BIG_ENDIAN_ORDER
  17552. /* Bytes were put into wrong place when less than full digit. */
  17553. if (bits != 0) {
  17554. r->dp[r->used - 1] >>= SP_WORD_SIZE - bits;
  17555. }
  17556. #endif /* BIG_ENDIAN_ORDER */
  17557. #ifdef WOLFSSL_SP_MATH_ALL
  17558. /* Mask top digit when less than a digit requested. */
  17559. if (bits > 0) {
  17560. r->dp[r->used - 1] &= ((sp_int_digit)1 << bits) - 1;
  17561. }
  17562. #endif /* WOLFSSL_SP_MATH_ALL */
  17563. /* Set mandatory low bits
  17564. * - bottom bit to make odd.
  17565. * - For BBS, second lowest too to make Blum integer (3 mod 4).
  17566. */
  17567. r->dp[0] |= low_bits;
  17568. /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
  17569. * of a 1024-bit candidate being a false positive, when it is our
  17570. * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
  17571. */
  17572. err = sp_prime_is_prime_ex(r, WOLFSSL_SP_MILLER_RABIN_CNT, &isPrime,
  17573. rng);
  17574. }
  17575. return err;
  17576. }
  17577. #endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
  17578. #ifdef WOLFSSL_SP_PRIME_GEN
  17579. /* Miller-Rabin test of "a" to the base of "b" as described in
  17580. * HAC pp. 139 Algorithm 4.24
  17581. *
  17582. * Sets result to 0 if definitely composite or 1 if probably prime.
  17583. * Randomly the chance of error is no more than 1/4 and often
  17584. * very much lower.
  17585. *
  17586. * a is assumed to be odd.
  17587. *
  17588. * @param [in] a SP integer to check.
  17589. * @param [in] b SP integer that is a small prime.
  17590. * @param [out] result MP_YES when number is likey prime.
  17591. * MP_NO otherwise.
  17592. * @param [in] n1 SP integer temporary.
  17593. * @param [in] r SP integer temporary.
  17594. *
  17595. * @return MP_OKAY on success.
  17596. * @return MP_MEM when dynamic memory allocation fails.
  17597. */
  17598. static int sp_prime_miller_rabin(const sp_int* a, sp_int* b, int* result,
  17599. sp_int* n1, sp_int* r)
  17600. {
  17601. int err = MP_OKAY;
  17602. int s = 0;
  17603. sp_int* y = b;
  17604. /* Assume not prime. */
  17605. *result = MP_NO;
  17606. /* Ensure small prime is 2 or more. */
  17607. if (sp_cmp_d(b, 1) != MP_GT) {
  17608. err = MP_VAL;
  17609. }
  17610. if (err == MP_OKAY) {
  17611. /* n1 = a - 1 (a is assumed odd.) */
  17612. (void)sp_copy(a, n1);
  17613. n1->dp[0]--;
  17614. /* Set 2**s * r = n1 */
  17615. /* Count the number of least significant bits which are zero. */
  17616. s = sp_cnt_lsb(n1);
  17617. /* Divide n - 1 by 2**s into r. */
  17618. (void)sp_rshb(n1, s, r);
  17619. /* Compute y = b**r mod a */
  17620. err = sp_exptmod(b, r, a, y);
  17621. }
  17622. if (err == MP_OKAY) {
  17623. /* Assume probably prime until shown otherwise. */
  17624. *result = MP_YES;
  17625. /* If y != 1 and y != n1 do */
  17626. if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
  17627. int j = 1;
  17628. /* While j <= s-1 and y != n1 */
  17629. while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
  17630. /* Square for bit shifted down. */
  17631. err = sp_sqrmod(y, a, y);
  17632. if (err != MP_OKAY) {
  17633. break;
  17634. }
  17635. /* If y == 1 then composite. */
  17636. if (sp_cmp_d(y, 1) == MP_EQ) {
  17637. *result = MP_NO;
  17638. break;
  17639. }
  17640. ++j;
  17641. }
  17642. /* If y != n1 then composite. */
  17643. if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
  17644. *result = MP_NO;
  17645. }
  17646. }
  17647. }
  17648. return err;
  17649. }
  17650. #if SP_WORD_SIZE == 8
  17651. /* Number of pre-computed primes. First n primes - fitting in a digit. */
  17652. #define SP_PRIME_SIZE 54
  17653. static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
  17654. 0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
  17655. 0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
  17656. 0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
  17657. 0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
  17658. 0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
  17659. 0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
  17660. 0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
  17661. };
  17662. #else
  17663. /* Number of pre-computed primes. First n primes. */
  17664. #define SP_PRIME_SIZE 256
  17665. /* The first 256 primes. */
  17666. static const sp_uint16 sp_primes[SP_PRIME_SIZE] = {
  17667. 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
  17668. 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
  17669. 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
  17670. 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
  17671. 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
  17672. 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
  17673. 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
  17674. 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
  17675. 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
  17676. 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
  17677. 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
  17678. 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
  17679. 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
  17680. 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
  17681. 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
  17682. 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
  17683. 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
  17684. 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
  17685. 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
  17686. 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
  17687. 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
  17688. 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
  17689. 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
  17690. 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
  17691. 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
  17692. 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
  17693. 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
  17694. 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
  17695. 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
  17696. 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
  17697. 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
  17698. 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
  17699. };
  17700. #endif
  17701. /* Compare the first n primes with a.
  17702. *
  17703. * @param [in] a Number to check.
  17704. * @param [out] result Whether number was found to be prime.
  17705. * @return 0 when no small prime matches.
  17706. * @return 1 when small prime matches.
  17707. */
  17708. static WC_INLINE int sp_cmp_primes(const sp_int* a, int* result)
  17709. {
  17710. int i;
  17711. int haveRes = 0;
  17712. *result = MP_NO;
  17713. /* Check one digit a against primes table. */
  17714. for (i = 0; i < SP_PRIME_SIZE; i++) {
  17715. if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
  17716. *result = MP_YES;
  17717. haveRes = 1;
  17718. break;
  17719. }
  17720. }
  17721. return haveRes;
  17722. }
  17723. /* Using composites is only faster when using 64-bit values. */
  17724. #if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
  17725. /* Number of composites. */
  17726. #define SP_COMP_CNT 38
  17727. /* Products of small primes that fit into 64-bits. */
  17728. static sp_int_digit sp_comp[SP_COMP_CNT] = {
  17729. 0x088886ffdb344692, 0x34091fa96ffdf47b, 0x3c47d8d728a77ebb,
  17730. 0x077ab7da9d709ea9, 0x310df3e7bd4bc897, 0xe657d7a1fd5161d1,
  17731. 0x02ad3dbe0cca85ff, 0x0787f9a02c3388a7, 0x1113c5cc6d101657,
  17732. 0x2456c94f936bdb15, 0x4236a30b85ffe139, 0x805437b38eada69d,
  17733. 0x00723e97bddcd2af, 0x00a5a792ee239667, 0x00e451352ebca269,
  17734. 0x013a7955f14b7805, 0x01d37cbd653b06ff, 0x0288fe4eca4d7cdf,
  17735. 0x039fddb60d3af63d, 0x04cd73f19080fb03, 0x0639c390b9313f05,
  17736. 0x08a1c420d25d388f, 0x0b4b5322977db499, 0x0e94c170a802ee29,
  17737. 0x11f6a0e8356100df, 0x166c8898f7b3d683, 0x1babda0a0afd724b,
  17738. 0x2471b07c44024abf, 0x2d866dbc2558ad71, 0x3891410d45fb47df,
  17739. 0x425d5866b049e263, 0x51f767298e2cf13b, 0x6d9f9ece5fc74f13,
  17740. 0x7f5ffdb0f56ee64d, 0x943740d46a1bc71f, 0xaf2d7ca25cec848f,
  17741. 0xcec010484e4ad877, 0xef972c3cfafbcd25
  17742. };
  17743. /* Index of next prime after those used to create composite. */
  17744. static int sp_comp_idx[SP_COMP_CNT] = {
  17745. 15, 25, 34, 42, 50, 58, 65, 72, 79, 86, 93, 100, 106, 112, 118,
  17746. 124, 130, 136, 142, 148, 154, 160, 166, 172, 178, 184, 190, 196, 202, 208,
  17747. 214, 220, 226, 232, 238, 244, 250, 256
  17748. };
  17749. #endif
  17750. /* Determines whether any of the first n small primes divide a evenly.
  17751. *
  17752. * @param [in] a Number to check.
  17753. * @param [in, out] haveRes Boolean indicating a no prime result found.
  17754. * @param [in, out] result Whether a is known to be prime.
  17755. * @return MP_OKAY on success.
  17756. * @return Negative on failure.
  17757. */
  17758. static WC_INLINE int sp_div_primes(const sp_int* a, int* haveRes, int* result)
  17759. {
  17760. int i;
  17761. #if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
  17762. int j;
  17763. #endif
  17764. sp_int_digit d;
  17765. int err = MP_OKAY;
  17766. #if defined(WOLFSSL_SP_SMALL) || (SP_WORD_SIZE < 64)
  17767. /* Do trial division of a with all known small primes. */
  17768. for (i = 0; i < SP_PRIME_SIZE; i++) {
  17769. /* Small prime divides a when remainder is 0. */
  17770. err = sp_mod_d(a, (sp_int_digit)sp_primes[i], &d);
  17771. if ((err != MP_OKAY) || (d == 0)) {
  17772. *result = MP_NO;
  17773. *haveRes = 1;
  17774. break;
  17775. }
  17776. }
  17777. #else
  17778. /* Start with first prime in composite. */
  17779. i = 0;
  17780. for (j = 0; (!(*haveRes)) && (j < SP_COMP_CNT); j++) {
  17781. /* Reduce a down to a single word. */
  17782. err = sp_mod_d(a, sp_comp[j], &d);
  17783. if ((err != MP_OKAY) || (d == 0)) {
  17784. *result = MP_NO;
  17785. *haveRes = 1;
  17786. break;
  17787. }
  17788. /* Do trial division of d with small primes that make up composite. */
  17789. for (; i < sp_comp_idx[j]; i++) {
  17790. /* Small prime divides a when remainder is 0. */
  17791. if (d % sp_primes[i] == 0) {
  17792. *result = MP_NO;
  17793. *haveRes = 1;
  17794. break;
  17795. }
  17796. }
  17797. }
  17798. #endif
  17799. return err;
  17800. }
  17801. /* Check whether a is prime by checking t iterations of Miller-Rabin.
  17802. *
  17803. * @param [in] a SP integer to check.
  17804. * @param [in] trials Number of trials of Miller-Rabin test to perform.
  17805. * @param [out] result MP_YES when number is prime.
  17806. * MP_NO otherwise.
  17807. *
  17808. * @return MP_OKAY on success.
  17809. * @return MP_MEM when dynamic memory allocation fails.
  17810. */
  17811. static int _sp_prime_trials(const sp_int* a, int trials, int* result)
  17812. {
  17813. int err = MP_OKAY;
  17814. int i;
  17815. sp_int* n1;
  17816. sp_int* r;
  17817. DECL_SP_INT_ARRAY(t, a->used + 1, 2);
  17818. DECL_SP_INT(b, a->used * 2 + 1);
  17819. ALLOC_SP_INT_ARRAY(t, a->used + 1, 2, err, NULL);
  17820. /* Allocate number that will hold modular exponentiation result. */
  17821. ALLOC_SP_INT(b, a->used * 2 + 1, err, NULL);
  17822. if (err == MP_OKAY) {
  17823. n1 = t[0];
  17824. r = t[1];
  17825. _sp_init_size(n1, a->used + 1);
  17826. _sp_init_size(r, a->used + 1);
  17827. _sp_init_size(b, a->used * 2 + 1);
  17828. /* Do requested number of trials of Miller-Rabin test. */
  17829. for (i = 0; i < trials; i++) {
  17830. /* Miller-Rabin test with known small prime. */
  17831. _sp_set(b, sp_primes[i]);
  17832. err = sp_prime_miller_rabin(a, b, result, n1, r);
  17833. if ((err != MP_OKAY) || (*result == MP_NO)) {
  17834. break;
  17835. }
  17836. }
  17837. /* Clear temporary values. */
  17838. sp_clear(n1);
  17839. sp_clear(r);
  17840. sp_clear(b);
  17841. }
  17842. /* Free allocated temporary. */
  17843. FREE_SP_INT(b, NULL);
  17844. FREE_SP_INT_ARRAY(t, NULL);
  17845. return err;
  17846. }
  17847. /* Check whether a is prime.
  17848. * Checks against a number of small primes and does t iterations of
  17849. * Miller-Rabin.
  17850. *
  17851. * @param [in] a SP integer to check.
  17852. * @param [in] trials Number of trials of Miller-Rabin test to perform.
  17853. * @param [out] result MP_YES when number is prime.
  17854. * MP_NO otherwise.
  17855. *
  17856. * @return MP_OKAY on success.
  17857. * @return MP_VAL when a or result is NULL, or trials is out of range.
  17858. * @return MP_MEM when dynamic memory allocation fails.
  17859. */
  17860. int sp_prime_is_prime(const sp_int* a, int trials, int* result)
  17861. {
  17862. int err = MP_OKAY;
  17863. int haveRes = 0;
  17864. /* Validate parameters. */
  17865. if ((a == NULL) || (result == NULL)) {
  17866. if (result != NULL) {
  17867. *result = MP_NO;
  17868. }
  17869. err = MP_VAL;
  17870. }
  17871. else if (a->used * 2 >= SP_INT_DIGITS) {
  17872. err = MP_VAL;
  17873. }
  17874. /* Check validity of Miller-Rabin iterations count.
  17875. * Must do at least one and need a unique pre-computed prime for each
  17876. * iteration.
  17877. */
  17878. if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
  17879. *result = MP_NO;
  17880. err = MP_VAL;
  17881. }
  17882. /* Short-cut, 1 is not prime. */
  17883. if ((err == MP_OKAY) && sp_isone(a)) {
  17884. *result = MP_NO;
  17885. haveRes = 1;
  17886. }
  17887. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  17888. /* Check against known small primes when a has 1 digit. */
  17889. if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
  17890. (a->dp[0] <= sp_primes[SP_PRIME_SIZE - 1])) {
  17891. haveRes = sp_cmp_primes(a, result);
  17892. }
  17893. /* Check all small primes for even divisibility. */
  17894. if ((err == MP_OKAY) && (!haveRes)) {
  17895. err = sp_div_primes(a, &haveRes, result);
  17896. }
  17897. /* Check a number of iterations of Miller-Rabin with small primes. */
  17898. if ((err == MP_OKAY) && (!haveRes)) {
  17899. err = _sp_prime_trials(a, trials, result);
  17900. }
  17901. RESTORE_VECTOR_REGISTERS();
  17902. return err;
  17903. }
  17904. #ifndef WC_NO_RNG
  17905. /* Check whether a is prime by doing t iterations of Miller-Rabin.
  17906. *
  17907. * t random numbers should give a (1/4)^t chance of a false prime.
  17908. *
  17909. * @param [in] a SP integer to check.
  17910. * @param [in] trials Number of iterations of Miller-Rabin test to perform.
  17911. * @param [out] result MP_YES when number is prime.
  17912. * MP_NO otherwise.
  17913. * @param [in] rng Random number generator for Miller-Rabin testing.
  17914. *
  17915. * @return MP_OKAY on success.
  17916. * @return MP_VAL when a, result or rng is NULL.
  17917. * @return MP_MEM when dynamic memory allocation fails.
  17918. */
  17919. static int _sp_prime_random_trials(const sp_int* a, int trials, int* result,
  17920. WC_RNG* rng)
  17921. {
  17922. int err = MP_OKAY;
  17923. int bits = sp_count_bits(a);
  17924. word32 baseSz = ((word32)bits + 7) / 8;
  17925. DECL_SP_INT_ARRAY(ds, a->used + 1, 2);
  17926. DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 2);
  17927. ALLOC_SP_INT_ARRAY(ds, a->used + 1, 2, err, NULL);
  17928. ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 2, err, NULL);
  17929. if (err == MP_OKAY) {
  17930. sp_int* c = ds[0];
  17931. sp_int* n1 = ds[1];
  17932. sp_int* b = d[0];
  17933. sp_int* r = d[1];
  17934. _sp_init_size(c , a->used + 1);
  17935. _sp_init_size(n1, a->used + 1);
  17936. _sp_init_size(b , a->used * 2 + 1);
  17937. _sp_init_size(r , a->used * 2 + 1);
  17938. _sp_sub_d(a, 2, c);
  17939. bits &= SP_WORD_MASK;
  17940. /* Keep trying random numbers until all trials complete. */
  17941. while (trials > 0) {
  17942. /* Generate random trial number. */
  17943. err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
  17944. if (err != MP_OKAY) {
  17945. break;
  17946. }
  17947. b->used = a->used;
  17948. #ifdef BIG_ENDIAN_ORDER
  17949. /* Fix top digit if fewer bytes than a full digit generated. */
  17950. if (((baseSz * 8) & SP_WORD_MASK) != 0) {
  17951. b->dp[b->used-1] >>=
  17952. SP_WORD_SIZE - ((baseSz * 8) & SP_WORD_MASK);
  17953. }
  17954. #endif /* BIG_ENDIAN_ORDER */
  17955. /* Ensure the top word has no more bits than necessary. */
  17956. if (bits > 0) {
  17957. b->dp[b->used - 1] &= ((sp_int_digit)1 << bits) - 1;
  17958. sp_clamp(b);
  17959. }
  17960. /* Can't use random value it is: 0, 1, a-2, a-1, >= a */
  17961. if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
  17962. continue;
  17963. }
  17964. /* Perform Miller-Rabin test with random value. */
  17965. err = sp_prime_miller_rabin(a, b, result, n1, r);
  17966. if ((err != MP_OKAY) || (*result == MP_NO)) {
  17967. break;
  17968. }
  17969. /* Trial complete. */
  17970. trials--;
  17971. }
  17972. /* Zeroize temporary values used when generating private prime. */
  17973. sp_forcezero(n1);
  17974. sp_forcezero(r);
  17975. sp_forcezero(b);
  17976. sp_forcezero(c);
  17977. }
  17978. FREE_SP_INT_ARRAY(d, NULL);
  17979. FREE_SP_INT_ARRAY(ds, NULL);
  17980. return err;
  17981. }
  17982. #endif /*!WC_NO_RNG */
  17983. /* Check whether a is prime.
  17984. * Checks against a number of small primes and does t iterations of
  17985. * Miller-Rabin.
  17986. *
  17987. * @param [in] a SP integer to check.
  17988. * @param [in] trials Number of iterations of Miller-Rabin test to perform.
  17989. * @param [out] result MP_YES when number is prime.
  17990. * MP_NO otherwise.
  17991. * @param [in] rng Random number generator for Miller-Rabin testing.
  17992. *
  17993. * @return MP_OKAY on success.
  17994. * @return MP_VAL when a, result or rng is NULL.
  17995. * @return MP_MEM when dynamic memory allocation fails.
  17996. */
  17997. int sp_prime_is_prime_ex(const sp_int* a, int trials, int* result, WC_RNG* rng)
  17998. {
  17999. int err = MP_OKAY;
  18000. int ret = MP_YES;
  18001. int haveRes = 0;
  18002. if ((a == NULL) || (result == NULL) || (rng == NULL)) {
  18003. err = MP_VAL;
  18004. }
  18005. #ifndef WC_NO_RNG
  18006. if ((err == MP_OKAY) && (a->used * 2 >= SP_INT_DIGITS)) {
  18007. err = MP_VAL;
  18008. }
  18009. #endif
  18010. #ifdef WOLFSSL_SP_INT_NEGATIVE
  18011. if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
  18012. err = MP_VAL;
  18013. }
  18014. #endif
  18015. /* Ensure trials is valid. Maximum based on number of small primes
  18016. * available. */
  18017. if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
  18018. err = MP_VAL;
  18019. }
  18020. if ((err == MP_OKAY) && sp_isone(a)) {
  18021. ret = MP_NO;
  18022. haveRes = 1;
  18023. }
  18024. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  18025. /* Check against known small primes when a has 1 digit. */
  18026. if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
  18027. (a->dp[0] <= (sp_int_digit)sp_primes[SP_PRIME_SIZE - 1])) {
  18028. haveRes = sp_cmp_primes(a, &ret);
  18029. }
  18030. /* Check all small primes for even divisibility. */
  18031. if ((err == MP_OKAY) && (!haveRes)) {
  18032. err = sp_div_primes(a, &haveRes, &ret);
  18033. }
  18034. #ifndef WC_NO_RNG
  18035. /* Check a number of iterations of Miller-Rabin with random large values. */
  18036. if ((err == MP_OKAY) && (!haveRes)) {
  18037. err = _sp_prime_random_trials(a, trials, &ret, rng);
  18038. }
  18039. #else
  18040. (void)trials;
  18041. #endif /* !WC_NO_RNG */
  18042. if (result != NULL) {
  18043. *result = ret;
  18044. }
  18045. RESTORE_VECTOR_REGISTERS();
  18046. return err;
  18047. }
  18048. #endif /* WOLFSSL_SP_PRIME_GEN */
  18049. #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
  18050. /* Calculates the Greatest Common Denominator (GCD) of a and b into r.
  18051. *
  18052. * Find the largest number that divides both a and b without remainder.
  18053. * r <= a, r <= b, a % r == 0, b % r == 0
  18054. *
  18055. * a and b are positive integers.
  18056. *
  18057. * Euclidian Algorithm:
  18058. * 1. If a > b then a = b, b = a
  18059. * 2. u = a
  18060. * 3. v = b % a
  18061. * 4. While v != 0
  18062. * 4.1. t = u % v
  18063. * 4.2. u <= v, v <= t, t <= u
  18064. * 5. r = u
  18065. *
  18066. * @param [in] a SP integer of first operand.
  18067. * @param [in] b SP integer of second operand.
  18068. * @param [out] r SP integer to hold result.
  18069. *
  18070. * @return MP_OKAY on success.
  18071. * @return MP_MEM when dynamic memory allocation fails.
  18072. */
  18073. static WC_INLINE int _sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
  18074. {
  18075. int err = MP_OKAY;
  18076. sp_int* u = NULL;
  18077. sp_int* v = NULL;
  18078. sp_int* t = NULL;
  18079. /* Used for swapping sp_ints. */
  18080. sp_int* s;
  18081. /* Determine maximum digit length numbers will reach. */
  18082. unsigned int used = (a->used >= b->used) ? a->used + 1 : b->used + 1;
  18083. DECL_SP_INT_ARRAY(d, used, 3);
  18084. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  18085. ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
  18086. if (err == MP_OKAY) {
  18087. u = d[0];
  18088. v = d[1];
  18089. t = d[2];
  18090. _sp_init_size(u, used);
  18091. _sp_init_size(v, used);
  18092. _sp_init_size(t, used);
  18093. /* 1. If a > b then a = b, b = a.
  18094. * Make a <= b.
  18095. */
  18096. if (_sp_cmp(a, b) == MP_GT) {
  18097. const sp_int* tmp;
  18098. tmp = a;
  18099. a = b;
  18100. b = tmp;
  18101. }
  18102. /* 2. u = a, v = b mod a */
  18103. _sp_copy(a, u);
  18104. /* 3. v = b mod a */
  18105. if (a->used == 1) {
  18106. err = sp_mod_d(b, a->dp[0], &v->dp[0]);
  18107. v->used = (v->dp[0] != 0);
  18108. }
  18109. else {
  18110. err = sp_mod(b, a, v);
  18111. }
  18112. }
  18113. /* 4. While v != 0 */
  18114. /* Keep reducing larger by smaller until smaller is 0 or u and v both one
  18115. * digit.
  18116. */
  18117. while ((err == MP_OKAY) && (!sp_iszero(v)) && (u->used > 1)) {
  18118. /* u' = v, v' = u mod v */
  18119. /* 4.1 t = u mod v */
  18120. if (v->used == 1) {
  18121. err = sp_mod_d(u, v->dp[0], &t->dp[0]);
  18122. t->used = (t->dp[0] != 0);
  18123. }
  18124. else {
  18125. err = sp_mod(u, v, t);
  18126. }
  18127. /* 4.2. u <= v, v <= t, t <= u */
  18128. s = u; u = v; v = t; t = s;
  18129. }
  18130. /* Only one digit remaining in u and v. */
  18131. while ((err == MP_OKAY) && (!sp_iszero(v))) {
  18132. /* u' = v, v' = u mod v */
  18133. /* 4.1 t = u mod v */
  18134. t->dp[0] = u->dp[0] % v->dp[0];
  18135. t->used = (t->dp[0] != 0);
  18136. /* 4.2. u <= v, v <= t, t <= u */
  18137. s = u; u = v; v = t; t = s;
  18138. }
  18139. if (err == MP_OKAY) {
  18140. /* 5. r = u */
  18141. _sp_copy(u, r);
  18142. }
  18143. FREE_SP_INT_ARRAY(d, NULL);
  18144. RESTORE_VECTOR_REGISTERS();
  18145. return err;
  18146. }
  18147. /* Calculates the Greatest Common Denominator (GCD) of a and b into r.
  18148. *
  18149. * Find the largest number that divides both a and b without remainder.
  18150. * r <= a, r <= b, a % r == 0, b % r == 0
  18151. *
  18152. * a and b are positive integers.
  18153. *
  18154. * @param [in] a SP integer of first operand.
  18155. * @param [in] b SP integer of second operand.
  18156. * @param [out] r SP integer to hold result.
  18157. *
  18158. * @return MP_OKAY on success.
  18159. * @return MP_VAL when a, b or r is NULL or too large.
  18160. * @return MP_MEM when dynamic memory allocation fails.
  18161. */
  18162. int sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
  18163. {
  18164. int err = MP_OKAY;
  18165. /* Validate parameters. */
  18166. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  18167. err = MP_VAL;
  18168. }
  18169. /* Check that we have space in numbers to do work. */
  18170. else if ((a->used >= SP_INT_DIGITS) || (b->used >= SP_INT_DIGITS)) {
  18171. err = MP_VAL;
  18172. }
  18173. /* Check that r is large enough to hold maximum sized result. */
  18174. else if (((a->used <= b->used) && (r->size < a->used)) ||
  18175. ((b->used < a->used) && (r->size < b->used))) {
  18176. err = MP_VAL;
  18177. }
  18178. #ifdef WOLFSSL_SP_INT_NEGATIVE
  18179. /* Algorithm doesn't work with negative numbers. */
  18180. else if ((a->sign == MP_NEG) || (b->sign == MP_NEG)) {
  18181. err = MP_VAL;
  18182. }
  18183. #endif
  18184. else if (sp_iszero(a)) {
  18185. /* GCD of 0 and 0 is undefined - all integers divide 0. */
  18186. if (sp_iszero(b)) {
  18187. err = MP_VAL;
  18188. }
  18189. else {
  18190. /* GCD of 0 and b is b - b divides 0. */
  18191. err = sp_copy(b, r);
  18192. }
  18193. }
  18194. else if (sp_iszero(b)) {
  18195. /* GCD of 0 and a is a - a divides 0. */
  18196. err = sp_copy(a, r);
  18197. }
  18198. else {
  18199. /* Calculate GCD. */
  18200. err = _sp_gcd(a, b, r);
  18201. }
  18202. return err;
  18203. }
  18204. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
  18205. #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && \
  18206. (!defined(WC_RSA_BLINDING) || defined(HAVE_FIPS) || defined(HAVE_SELFTEST))
  18207. /* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
  18208. * Smallest number divisible by both numbers.
  18209. *
  18210. * a and b are positive integers.
  18211. *
  18212. * lcm(a, b) = (a / gcd(a, b)) * b
  18213. * Divide the common divisor from a and multiply by b.
  18214. *
  18215. * Algorithm:
  18216. * 1. t0 = gcd(a, b)
  18217. * 2. If a > b then
  18218. * 2.1. t1 = a / t0
  18219. * 2.2. r = b * t1
  18220. * 3. Else
  18221. * 3.1. t1 = b / t0
  18222. * 3.2. r = a * t1
  18223. *
  18224. * @param [in] a SP integer of first operand.
  18225. * @param [in] b SP integer of second operand.
  18226. * @param [out] r SP integer to hold result.
  18227. *
  18228. * @return MP_OKAY on success.
  18229. * @return MP_MEM when dynamic memory allocation fails.
  18230. */
  18231. static int _sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
  18232. {
  18233. int err = MP_OKAY;
  18234. /* Determine maximum digit length numbers will reach. */
  18235. unsigned int used = ((a->used >= b->used) ? a->used + 1: b->used + 1);
  18236. DECL_SP_INT_ARRAY(t, used, 2);
  18237. ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
  18238. if (err == MP_OKAY) {
  18239. _sp_init_size(t[0], used);
  18240. _sp_init_size(t[1], used);
  18241. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  18242. if (err == MP_OKAY) {
  18243. /* 1. t0 = gcd(a, b) */
  18244. err = sp_gcd(a, b, t[0]);
  18245. }
  18246. if (err == MP_OKAY) {
  18247. /* Divide the greater by the common divisor and multiply by other
  18248. * to operate on the smallest length numbers.
  18249. */
  18250. /* 2. If a > b then */
  18251. if (_sp_cmp_abs(a, b) == MP_GT) {
  18252. /* 2.1. t1 = a / t0 */
  18253. err = sp_div(a, t[0], t[1], NULL);
  18254. if (err == MP_OKAY) {
  18255. /* 2.2. r = b * t1 */
  18256. err = sp_mul(b, t[1], r);
  18257. }
  18258. }
  18259. /* 3. Else */
  18260. else {
  18261. /* 3.1. t1 = b / t0 */
  18262. err = sp_div(b, t[0], t[1], NULL);
  18263. if (err == MP_OKAY) {
  18264. /* 3.2. r = a * t1 */
  18265. err = sp_mul(a, t[1], r);
  18266. }
  18267. }
  18268. }
  18269. RESTORE_VECTOR_REGISTERS();
  18270. }
  18271. FREE_SP_INT_ARRAY(t, NULL);
  18272. return err;
  18273. }
  18274. /* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
  18275. * Smallest number divisible by both numbers.
  18276. *
  18277. * a and b are positive integers.
  18278. *
  18279. * @param [in] a SP integer of first operand.
  18280. * @param [in] b SP integer of second operand.
  18281. * @param [out] r SP integer to hold result.
  18282. *
  18283. * @return MP_OKAY on success.
  18284. * @return MP_VAL when a, b or r is NULL; or a or b is zero.
  18285. * @return MP_MEM when dynamic memory allocation fails.
  18286. */
  18287. int sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
  18288. {
  18289. int err = MP_OKAY;
  18290. /* Validate parameters. */
  18291. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  18292. err = MP_VAL;
  18293. }
  18294. #ifdef WOLFSSL_SP_INT_NEGATIVE
  18295. /* Ensure a and b are positive. */
  18296. else if ((a->sign == MP_NEG) || (b->sign >= MP_NEG)) {
  18297. err = MP_VAL;
  18298. }
  18299. #endif
  18300. /* Ensure r has space for maximumal result. */
  18301. else if (r->size < a->used + b->used) {
  18302. err = MP_VAL;
  18303. }
  18304. /* LCM of 0 and any number is undefined as 0 is not in the set of values
  18305. * being used.
  18306. */
  18307. if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
  18308. err = MP_VAL;
  18309. }
  18310. if (err == MP_OKAY) {
  18311. /* Do operation. */
  18312. err = _sp_lcm(a, b, r);
  18313. }
  18314. return err;
  18315. }
  18316. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
  18317. /* Returns the run time settings.
  18318. *
  18319. * @return Settings value.
  18320. */
  18321. word32 CheckRunTimeSettings(void)
  18322. {
  18323. return CTC_SETTINGS;
  18324. }
  18325. /* Returns the fast math settings.
  18326. *
  18327. * @return Setting - number of bits in a digit.
  18328. */
  18329. word32 CheckRunTimeFastMath(void)
  18330. {
  18331. return SP_WORD_SIZE;
  18332. }
  18333. #ifdef WOLFSSL_CHECK_MEM_ZERO
  18334. /* Add an MP to check.
  18335. *
  18336. * @param [in] name Name of address to check.
  18337. * @param [in] sp sp_int that needs to be checked.
  18338. */
  18339. void sp_memzero_add(const char* name, sp_int* sp)
  18340. {
  18341. wc_MemZero_Add(name, sp->dp, sp->size * sizeof(sp_digit));
  18342. }
  18343. /* Check the memory in the data pointer for memory that must be zero.
  18344. *
  18345. * @param [in] sp sp_int that needs to be checked.
  18346. */
  18347. void sp_memzero_check(sp_int* sp)
  18348. {
  18349. wc_MemZero_Check(sp->dp, sp->size * sizeof(sp_digit));
  18350. }
  18351. #endif /* WOLFSSL_CHECK_MEM_ZERO */
  18352. #if (!defined(WOLFSSL_SMALL_STACK) && !defined(SP_ALLOC)) || \
  18353. defined(WOLFSSL_SP_NO_MALLOC)
  18354. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  18355. !defined(WOLFSSL_SP_NO_DYN_STACK)
  18356. #pragma GCC diagnostic pop
  18357. #endif
  18358. #endif
  18359. #endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */