sp_int.c 538 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450
  1. /* sp_int.c
  2. *
  3. * Copyright (C) 2006-2022 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /* Implementation by Sean Parkinson. */
  22. /*
  23. DESCRIPTION
  24. This library provides single precision (SP) integer math functions.
  25. */
  26. #ifdef HAVE_CONFIG_H
  27. #include <config.h>
  28. #endif
  29. #include <wolfssl/wolfcrypt/settings.h>
  30. #if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
  31. #include <wolfssl/wolfcrypt/error-crypt.h>
  32. #ifdef NO_INLINE
  33. #include <wolfssl/wolfcrypt/misc.h>
  34. #else
  35. #define WOLFSSL_MISC_INCLUDED
  36. #include <wolfcrypt/src/misc.c>
  37. #endif
  38. /* SP Build Options:
  39. * WOLFSSL_HAVE_SP_RSA: Enable SP RSA support
  40. * WOLFSSL_HAVE_SP_DH: Enable SP DH support
  41. * WOLFSSL_HAVE_SP_ECC: Enable SP ECC support
  42. * WOLFSSL_SP_MATH: Use only single precision math and algorithms
  43. * it supports (no fastmath tfm.c or normal integer.c)
  44. * WOLFSSL_SP_MATH_ALL Implementation of all MP functions
  45. * (replacement for tfm.c and integer.c)
  46. * WOLFSSL_SP_SMALL: Use smaller version of code and avoid large
  47. * stack variables
  48. * WOLFSSL_SP_NO_MALLOC: Always use stack, no heap XMALLOC/XFREE allowed
  49. * WOLFSSL_SP_NO_2048: Disable RSA/DH 2048-bit support
  50. * WOLFSSL_SP_NO_3072: Disable RSA/DH 3072-bit support
  51. * WOLFSSL_SP_4096: Enable RSA/RH 4096-bit support
  52. * WOLFSSL_SP_NO_256 Disable ECC 256-bit SECP256R1 support
  53. * WOLFSSL_SP_384 Enable ECC 384-bit SECP384R1 support
  54. * WOLFSSL_SP_521 Enable ECC 521-bit SECP521R1 support
  55. * WOLFSSL_SP_ASM Enable assembly speedups (detect platform)
  56. * WOLFSSL_SP_X86_64_ASM Enable Intel x64 assembly implementation
  57. * WOLFSSL_SP_ARM32_ASM Enable Aarch32 assembly implementation
  58. * WOLFSSL_SP_ARM64_ASM Enable Aarch64 assembly implementation
  59. * WOLFSSL_SP_ARM_CORTEX_M_ASM Enable Cortex-M assembly implementation
  60. * WOLFSSL_SP_ARM_THUMB_ASM Enable ARM Thumb assembly implementation
  61. * (used with -mthumb)
  62. * WOLFSSL_SP_X86_64 Enable Intel x86 64-bit assembly speedups
  63. * WOLFSSL_SP_X86 Enable Intel x86 assembly speedups
  64. * WOLFSSL_SP_ARM64 Enable Aarch64 assembly speedups
  65. * WOLFSSL_SP_ARM32 Enable ARM32 assembly speedups
  66. * WOLFSSL_SP_ARM32_UDIV Enable word divide asm that uses UDIV instr
  67. * WOLFSSL_SP_ARM_THUMB Enable ARM Thumb assembly speedups
  68. * (explicitly uses register 'r7')
  69. * WOLFSSL_SP_PPC64 Enable PPC64 assembly speedups
  70. * WOLFSSL_SP_PPC Enable PPC assembly speedups
  71. * WOLFSSL_SP_MIPS64 Enable MIPS64 assembly speedups
  72. * WOLFSSL_SP_MIPS Enable MIPS assembly speedups
  73. * WOLFSSL_SP_RISCV64 Enable RISCV64 assembly speedups
  74. * WOLFSSL_SP_RISCV32 Enable RISCV32 assembly speedups
  75. * WOLFSSL_SP_S390X Enable S390X assembly speedups
  76. * SP_WORD_SIZE Force 32 or 64 bit mode
  77. * WOLFSSL_SP_NONBLOCK Enables "non blocking" mode for SP math, which
  78. * will return FP_WOULDBLOCK for long operations and function must be
  79. * called again until complete.
  80. * WOLFSSL_SP_FAST_NCT_EXPTMOD Enables the faster non-constant time modular
  81. * exponentation implementation.
  82. * WOLFSSL_SP_INT_NEGATIVE Enables negative values to be used.
  83. * WOLFSSL_SP_INT_DIGIT_ALIGN Enable when unaligned access of sp_int_digit
  84. * pointer is not allowed.
  85. * WOLFSSL_SP_NO_DYN_STACK Disable use of dynamic stack items.
  86. * Used with small code size and not small stack.
  87. * WOLFSSL_SP_FAST_MODEXP Allow fast mod_exp with small C code
  88. */
  89. /* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
  90. #if defined(__clang__) && defined(__clang_major__) && \
  91. (__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
  92. #undef WOLFSSL_SP_SMALL
  93. #endif
  94. #include <wolfssl/wolfcrypt/sp_int.h>
  95. /* DECL_SP_INT: Declare one variable of type 'sp_int'. */
  96. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  97. !defined(WOLFSSL_SP_NO_MALLOC)
  98. /* Declare a variable that will be assigned a value on XMALLOC. */
  99. #define DECL_SP_INT(n, s) \
  100. sp_int* n = NULL
  101. #else
  102. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  103. defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
  104. /* Declare a variable on the stack with the required data size. */
  105. #define DECL_SP_INT(n, s) \
  106. byte n##d[MP_INT_SIZEOF(s)]; \
  107. sp_int* n = (sp_int*)n##d
  108. #else
  109. /* Declare a variable on the stack. */
  110. #define DECL_SP_INT(n, s) \
  111. sp_int n[1]
  112. #endif
  113. #endif
  114. /* ALLOC_SP_INT: Allocate an 'sp_int' of reqired size. */
  115. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  116. !defined(WOLFSSL_SP_NO_MALLOC)
  117. /* Dynamically allocate just enough data to support size. */
  118. #define ALLOC_SP_INT(n, s, err, h) \
  119. do { \
  120. if ((err) == MP_OKAY) { \
  121. (n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h), DYNAMIC_TYPE_BIGINT); \
  122. if ((n) == NULL) { \
  123. (err) = MP_MEM; \
  124. } \
  125. } \
  126. } \
  127. while (0)
  128. /* Dynamically allocate just enough data to support size - and set size. */
  129. #define ALLOC_SP_INT_SIZE(n, s, err, h) \
  130. do { \
  131. ALLOC_SP_INT(n, s, err, h); \
  132. if ((err) == MP_OKAY) { \
  133. (n)->size = (s); \
  134. } \
  135. } \
  136. while (0)
  137. #else
  138. /* Array declared on stack - nothing to do. */
  139. #define ALLOC_SP_INT(n, s, err, h)
  140. /* Array declared on stack - set the size field. */
  141. #define ALLOC_SP_INT_SIZE(n, s, err, h) \
  142. n->size = s;
  143. #endif
  144. /* FREE_SP_INT: Free an 'sp_int' variable. */
  145. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  146. !defined(WOLFSSL_SP_NO_MALLOC)
  147. /* Free dynamically allocated data. */
  148. #define FREE_SP_INT(n, h) \
  149. do { \
  150. if ((n) != NULL) { \
  151. XFREE(n, h, DYNAMIC_TYPE_BIGINT); \
  152. } \
  153. } \
  154. while (0)
  155. #else
  156. /* Nothing to do as declared on stack. */
  157. #define FREE_SP_INT(n, h)
  158. #endif
  159. /* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
  160. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  161. !defined(WOLFSSL_SP_NO_MALLOC)
  162. /* Declare a variable that will be assigned a value on XMALLOC. */
  163. #define DECL_SP_INT_ARRAY(n, s, c) \
  164. sp_int* n##d = NULL; \
  165. sp_int* (n)[c] = { NULL, }
  166. #else
  167. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  168. defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
  169. /* Declare a variable on the stack with the required data size. */
  170. #define DECL_SP_INT_ARRAY(n, s, c) \
  171. byte n##d[MP_INT_SIZEOF(s) * (c)]; \
  172. sp_int* (n)[c]
  173. #else
  174. /* Declare a variable on the stack. */
  175. #define DECL_SP_INT_ARRAY(n, s, c) \
  176. sp_int n##d[c]; \
  177. sp_int* (n)[c]
  178. #endif
  179. #endif
  180. /* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of reqired size. */
  181. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  182. !defined(WOLFSSL_SP_NO_MALLOC)
  183. /* Dynamically allocate just enough data to support multiple sp_ints of the
  184. * required size. Use pointers into data to make up array and set sizes.
  185. */
  186. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  187. do { \
  188. if ((err) == MP_OKAY) { \
  189. n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h), \
  190. DYNAMIC_TYPE_BIGINT); \
  191. if (n##d == NULL) { \
  192. (err) = MP_MEM; \
  193. } \
  194. else { \
  195. int n##ii; \
  196. (n)[0] = n##d; \
  197. (n)[0]->size = (s); \
  198. for (n##ii = 1; n##ii < (c); n##ii++) { \
  199. (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
  200. (n)[n##ii]->size = (s); \
  201. } \
  202. } \
  203. } \
  204. } \
  205. while (0)
  206. #else
  207. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  208. defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
  209. /* Data declared on stack that supports multiple sp_ints of the
  210. * required size. Use pointers into data to make up array and set sizes.
  211. */
  212. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  213. do { \
  214. if ((err) == MP_OKAY) { \
  215. int n##ii; \
  216. (n)[0] = (sp_int*)n##d; \
  217. (n)[0]->size = (s); \
  218. for (n##ii = 1; n##ii < (c); n##ii++) { \
  219. (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
  220. (n)[n##ii]->size = (s); \
  221. } \
  222. } \
  223. } \
  224. while (0)
  225. #else
  226. /* Data declared on stack that supports multiple sp_ints of the
  227. * required size. Set into array and set sizes.
  228. */
  229. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  230. do { \
  231. if ((err) == MP_OKAY) { \
  232. int n##ii; \
  233. for (n##ii = 0; n##ii < (c); n##ii++) { \
  234. (n)[n##ii] = &n##d[n##ii]; \
  235. (n)[n##ii]->size = (s); \
  236. } \
  237. } \
  238. } \
  239. while (0)
  240. #endif
  241. #endif
  242. /* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
  243. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  244. !defined(WOLFSSL_SP_NO_MALLOC)
  245. /* Free data variable that was dynamically allocated. */
  246. #define FREE_SP_INT_ARRAY(n, h) \
  247. do { \
  248. if (n##d != NULL) { \
  249. XFREE(n##d, h, DYNAMIC_TYPE_BIGINT); \
  250. } \
  251. } \
  252. while (0)
  253. #else
  254. /* Nothing to do as data declared on stack. */
  255. #define FREE_SP_INT_ARRAY(n, h)
  256. #endif
  257. #ifndef WOLFSSL_NO_ASM
  258. #ifdef __IAR_SYSTEMS_ICC__
  259. #define __asm__ asm
  260. #define __volatile__ volatile
  261. #endif /* __IAR_SYSTEMS_ICC__ */
  262. #ifdef __KEIL__
  263. #define __asm__ __asm
  264. #define __volatile__ volatile
  265. #endif
  266. #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
  267. /*
  268. * CPU: x86_64
  269. */
  270. /* Multiply va by vb and store double size result in: vh | vl */
  271. #define SP_ASM_MUL(vl, vh, va, vb) \
  272. __asm__ __volatile__ ( \
  273. "movq %[b], %%rax \n\t" \
  274. "mulq %[a] \n\t" \
  275. "movq %%rax, %[l] \n\t" \
  276. "movq %%rdx, %[h] \n\t" \
  277. : [h] "+r" (vh), [l] "+r" (vl) \
  278. : [a] "m" (va), [b] "m" (vb) \
  279. : "memory", "%rax", "%rdx", "cc" \
  280. )
  281. /* Multiply va by vb and store double size result in: vo | vh | vl */
  282. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  283. __asm__ __volatile__ ( \
  284. "movq %[b], %%rax \n\t" \
  285. "mulq %[a] \n\t" \
  286. "movq $0 , %[o] \n\t" \
  287. "movq %%rax, %[l] \n\t" \
  288. "movq %%rdx, %[h] \n\t" \
  289. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  290. : [a] "m" (va), [b] "m" (vb) \
  291. : "%rax", "%rdx", "cc" \
  292. )
  293. /* Multiply va by vb and add double size result into: vo | vh | vl */
  294. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  295. __asm__ __volatile__ ( \
  296. "movq %[b], %%rax \n\t" \
  297. "mulq %[a] \n\t" \
  298. "addq %%rax, %[l] \n\t" \
  299. "adcq %%rdx, %[h] \n\t" \
  300. "adcq $0 , %[o] \n\t" \
  301. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  302. : [a] "m" (va), [b] "m" (vb) \
  303. : "%rax", "%rdx", "cc" \
  304. )
  305. /* Multiply va by vb and add double size result into: vh | vl */
  306. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  307. __asm__ __volatile__ ( \
  308. "movq %[b], %%rax \n\t" \
  309. "mulq %[a] \n\t" \
  310. "addq %%rax, %[l] \n\t" \
  311. "adcq %%rdx, %[h] \n\t" \
  312. : [l] "+r" (vl), [h] "+r" (vh) \
  313. : [a] "m" (va), [b] "m" (vb) \
  314. : "%rax", "%rdx", "cc" \
  315. )
  316. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  317. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  318. __asm__ __volatile__ ( \
  319. "movq %[b], %%rax \n\t" \
  320. "mulq %[a] \n\t" \
  321. "addq %%rax, %[l] \n\t" \
  322. "adcq %%rdx, %[h] \n\t" \
  323. "adcq $0 , %[o] \n\t" \
  324. "addq %%rax, %[l] \n\t" \
  325. "adcq %%rdx, %[h] \n\t" \
  326. "adcq $0 , %[o] \n\t" \
  327. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  328. : [a] "m" (va), [b] "m" (vb) \
  329. : "%rax", "%rdx", "cc" \
  330. )
  331. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  332. * Assumes first add will not overflow vh | vl
  333. */
  334. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  335. __asm__ __volatile__ ( \
  336. "movq %[b], %%rax \n\t" \
  337. "mulq %[a] \n\t" \
  338. "addq %%rax, %[l] \n\t" \
  339. "adcq %%rdx, %[h] \n\t" \
  340. "addq %%rax, %[l] \n\t" \
  341. "adcq %%rdx, %[h] \n\t" \
  342. "adcq $0 , %[o] \n\t" \
  343. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  344. : [a] "m" (va), [b] "m" (vb) \
  345. : "%rax", "%rdx", "cc" \
  346. )
  347. /* Square va and store double size result in: vh | vl */
  348. #define SP_ASM_SQR(vl, vh, va) \
  349. __asm__ __volatile__ ( \
  350. "movq %[a], %%rax \n\t" \
  351. "mulq %%rax \n\t" \
  352. "movq %%rax, %[l] \n\t" \
  353. "movq %%rdx, %[h] \n\t" \
  354. : [h] "+r" (vh), [l] "+r" (vl) \
  355. : [a] "m" (va) \
  356. : "memory", "%rax", "%rdx", "cc" \
  357. )
  358. /* Square va and add double size result into: vo | vh | vl */
  359. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  360. __asm__ __volatile__ ( \
  361. "movq %[a], %%rax \n\t" \
  362. "mulq %%rax \n\t" \
  363. "addq %%rax, %[l] \n\t" \
  364. "adcq %%rdx, %[h] \n\t" \
  365. "adcq $0 , %[o] \n\t" \
  366. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  367. : [a] "m" (va) \
  368. : "%rax", "%rdx", "cc" \
  369. )
  370. /* Square va and add double size result into: vh | vl */
  371. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  372. __asm__ __volatile__ ( \
  373. "movq %[a], %%rax \n\t" \
  374. "mulq %%rax \n\t" \
  375. "addq %%rax, %[l] \n\t" \
  376. "adcq %%rdx, %[h] \n\t" \
  377. : [l] "+r" (vl), [h] "+r" (vh) \
  378. : [a] "m" (va) \
  379. : "%rax", "%rdx", "cc" \
  380. )
  381. /* Add va into: vh | vl */
  382. #define SP_ASM_ADDC(vl, vh, va) \
  383. __asm__ __volatile__ ( \
  384. "addq %[a], %[l] \n\t" \
  385. "adcq $0 , %[h] \n\t" \
  386. : [l] "+r" (vl), [h] "+r" (vh) \
  387. : [a] "m" (va) \
  388. : "cc" \
  389. )
  390. /* Add va, variable in a register, into: vh | vl */
  391. #define SP_ASM_ADDC_REG(vl, vh, va) \
  392. __asm__ __volatile__ ( \
  393. "addq %[a], %[l] \n\t" \
  394. "adcq $0 , %[h] \n\t" \
  395. : [l] "+r" (vl), [h] "+r" (vh) \
  396. : [a] "r" (va) \
  397. : "cc" \
  398. )
  399. /* Sub va from: vh | vl */
  400. #define SP_ASM_SUBC(vl, vh, va) \
  401. __asm__ __volatile__ ( \
  402. "subq %[a], %[l] \n\t" \
  403. "sbbq $0 , %[h] \n\t" \
  404. : [l] "+r" (vl), [h] "+r" (vh) \
  405. : [a] "m" (va) \
  406. : "cc" \
  407. )
  408. /* Add two times vc | vb | va into vo | vh | vl */
  409. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  410. __asm__ __volatile__ ( \
  411. "addq %[a], %[l] \n\t" \
  412. "adcq %[b], %[h] \n\t" \
  413. "adcq %[c], %[o] \n\t" \
  414. "addq %[a], %[l] \n\t" \
  415. "adcq %[b], %[h] \n\t" \
  416. "adcq %[c], %[o] \n\t" \
  417. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  418. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  419. : "%rax", "%rdx", "cc" \
  420. )
  421. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  422. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  423. *
  424. * Using divq instruction on Intel x64.
  425. *
  426. * @param [in] hi SP integer digit. High digit of the dividend.
  427. * @param [in] lo SP integer digit. Lower digit of the dividend.
  428. * @param [in] d SP integer digit. Number to divide by.
  429. * @return The division result.
  430. */
  431. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  432. sp_int_digit d)
  433. {
  434. __asm__ __volatile__ (
  435. "divq %2"
  436. : "+a" (lo)
  437. : "d" (hi), "r" (d)
  438. : "cc"
  439. );
  440. return lo;
  441. }
  442. #define SP_ASM_DIV_WORD
  443. #endif
  444. #define SP_INT_ASM_AVAILABLE
  445. #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
  446. #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
  447. /*
  448. * CPU: x86
  449. */
  450. /* Multiply va by vb and store double size result in: vh | vl */
  451. #define SP_ASM_MUL(vl, vh, va, vb) \
  452. __asm__ __volatile__ ( \
  453. "movl %[b], %%eax \n\t" \
  454. "mull %[a] \n\t" \
  455. "movl %%eax, %[l] \n\t" \
  456. "movl %%edx, %[h] \n\t" \
  457. : [h] "+r" (vh), [l] "+r" (vl) \
  458. : [a] "m" (va), [b] "m" (vb) \
  459. : "memory", "eax", "edx", "cc" \
  460. )
  461. /* Multiply va by vb and store double size result in: vo | vh | vl */
  462. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  463. __asm__ __volatile__ ( \
  464. "movl %[b], %%eax \n\t" \
  465. "mull %[a] \n\t" \
  466. "movl $0 , %[o] \n\t" \
  467. "movl %%eax, %[l] \n\t" \
  468. "movl %%edx, %[h] \n\t" \
  469. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  470. : [a] "m" (va), [b] "m" (vb) \
  471. : "eax", "edx", "cc" \
  472. )
  473. /* Multiply va by vb and add double size result into: vo | vh | vl */
  474. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  475. __asm__ __volatile__ ( \
  476. "movl %[b], %%eax \n\t" \
  477. "mull %[a] \n\t" \
  478. "addl %%eax, %[l] \n\t" \
  479. "adcl %%edx, %[h] \n\t" \
  480. "adcl $0 , %[o] \n\t" \
  481. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  482. : [a] "r" (va), [b] "r" (vb) \
  483. : "eax", "edx", "cc" \
  484. )
  485. /* Multiply va by vb and add double size result into: vh | vl */
  486. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  487. __asm__ __volatile__ ( \
  488. "movl %[b], %%eax \n\t" \
  489. "mull %[a] \n\t" \
  490. "addl %%eax, %[l] \n\t" \
  491. "adcl %%edx, %[h] \n\t" \
  492. : [l] "+r" (vl), [h] "+r" (vh) \
  493. : [a] "m" (va), [b] "m" (vb) \
  494. : "eax", "edx", "cc" \
  495. )
  496. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  497. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  498. __asm__ __volatile__ ( \
  499. "movl %[b], %%eax \n\t" \
  500. "mull %[a] \n\t" \
  501. "addl %%eax, %[l] \n\t" \
  502. "adcl %%edx, %[h] \n\t" \
  503. "adcl $0 , %[o] \n\t" \
  504. "addl %%eax, %[l] \n\t" \
  505. "adcl %%edx, %[h] \n\t" \
  506. "adcl $0 , %[o] \n\t" \
  507. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  508. : [a] "r" (va), [b] "r" (vb) \
  509. : "eax", "edx", "cc" \
  510. )
  511. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  512. * Assumes first add will not overflow vh | vl
  513. */
  514. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  515. __asm__ __volatile__ ( \
  516. "movl %[b], %%eax \n\t" \
  517. "mull %[a] \n\t" \
  518. "addl %%eax, %[l] \n\t" \
  519. "adcl %%edx, %[h] \n\t" \
  520. "addl %%eax, %[l] \n\t" \
  521. "adcl %%edx, %[h] \n\t" \
  522. "adcl $0 , %[o] \n\t" \
  523. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  524. : [a] "m" (va), [b] "m" (vb) \
  525. : "eax", "edx", "cc" \
  526. )
  527. /* Square va and store double size result in: vh | vl */
  528. #define SP_ASM_SQR(vl, vh, va) \
  529. __asm__ __volatile__ ( \
  530. "movl %[a], %%eax \n\t" \
  531. "mull %%eax \n\t" \
  532. "movl %%eax, %[l] \n\t" \
  533. "movl %%edx, %[h] \n\t" \
  534. : [h] "+r" (vh), [l] "+r" (vl) \
  535. : [a] "m" (va) \
  536. : "memory", "eax", "edx", "cc" \
  537. )
  538. /* Square va and add double size result into: vo | vh | vl */
  539. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  540. __asm__ __volatile__ ( \
  541. "movl %[a], %%eax \n\t" \
  542. "mull %%eax \n\t" \
  543. "addl %%eax, %[l] \n\t" \
  544. "adcl %%edx, %[h] \n\t" \
  545. "adcl $0 , %[o] \n\t" \
  546. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  547. : [a] "m" (va) \
  548. : "eax", "edx", "cc" \
  549. )
  550. /* Square va and add double size result into: vh | vl */
  551. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  552. __asm__ __volatile__ ( \
  553. "movl %[a], %%eax \n\t" \
  554. "mull %%eax \n\t" \
  555. "addl %%eax, %[l] \n\t" \
  556. "adcl %%edx, %[h] \n\t" \
  557. : [l] "+r" (vl), [h] "+r" (vh) \
  558. : [a] "m" (va) \
  559. : "eax", "edx", "cc" \
  560. )
  561. /* Add va into: vh | vl */
  562. #define SP_ASM_ADDC(vl, vh, va) \
  563. __asm__ __volatile__ ( \
  564. "addl %[a], %[l] \n\t" \
  565. "adcl $0 , %[h] \n\t" \
  566. : [l] "+r" (vl), [h] "+r" (vh) \
  567. : [a] "m" (va) \
  568. : "cc" \
  569. )
  570. /* Add va, variable in a register, into: vh | vl */
  571. #define SP_ASM_ADDC_REG(vl, vh, va) \
  572. __asm__ __volatile__ ( \
  573. "addl %[a], %[l] \n\t" \
  574. "adcl $0 , %[h] \n\t" \
  575. : [l] "+r" (vl), [h] "+r" (vh) \
  576. : [a] "r" (va) \
  577. : "cc" \
  578. )
  579. /* Sub va from: vh | vl */
  580. #define SP_ASM_SUBC(vl, vh, va) \
  581. __asm__ __volatile__ ( \
  582. "subl %[a], %[l] \n\t" \
  583. "sbbl $0 , %[h] \n\t" \
  584. : [l] "+r" (vl), [h] "+r" (vh) \
  585. : [a] "m" (va) \
  586. : "cc" \
  587. )
  588. /* Add two times vc | vb | va into vo | vh | vl */
  589. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  590. __asm__ __volatile__ ( \
  591. "addl %[a], %[l] \n\t" \
  592. "adcl %[b], %[h] \n\t" \
  593. "adcl %[c], %[o] \n\t" \
  594. "addl %[a], %[l] \n\t" \
  595. "adcl %[b], %[h] \n\t" \
  596. "adcl %[c], %[o] \n\t" \
  597. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  598. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  599. : "cc" \
  600. )
  601. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  602. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  603. *
  604. * Using divl instruction on Intel x64.
  605. *
  606. * @param [in] hi SP integer digit. High digit of the dividend.
  607. * @param [in] lo SP integer digit. Lower digit of the dividend.
  608. * @param [in] d SP integer digit. Number to divide by.
  609. * @return The division result.
  610. */
  611. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  612. sp_int_digit d)
  613. {
  614. __asm__ __volatile__ (
  615. "divl %2"
  616. : "+a" (lo)
  617. : "d" (hi), "r" (d)
  618. : "cc"
  619. );
  620. return lo;
  621. }
  622. #define SP_ASM_DIV_WORD
  623. #endif
  624. #define SP_INT_ASM_AVAILABLE
  625. #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
  626. #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
  627. /*
  628. * CPU: Aarch64
  629. */
  630. /* Multiply va by vb and store double size result in: vh | vl */
  631. #define SP_ASM_MUL(vl, vh, va, vb) \
  632. __asm__ __volatile__ ( \
  633. "mul %[l], %[a], %[b] \n\t" \
  634. "umulh %[h], %[a], %[b] \n\t" \
  635. : [h] "+r" (vh), [l] "+r" (vl) \
  636. : [a] "r" (va), [b] "r" (vb) \
  637. : "memory", "cc" \
  638. )
  639. /* Multiply va by vb and store double size result in: vo | vh | vl */
  640. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  641. __asm__ __volatile__ ( \
  642. "mul x8, %[a], %[b] \n\t" \
  643. "umulh %[h], %[a], %[b] \n\t" \
  644. "mov %[l], x8 \n\t" \
  645. "mov %[o], xzr \n\t" \
  646. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  647. : [a] "r" (va), [b] "r" (vb) \
  648. : "x8" \
  649. )
  650. /* Multiply va by vb and add double size result into: vo | vh | vl */
  651. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  652. __asm__ __volatile__ ( \
  653. "mul x8, %[a], %[b] \n\t" \
  654. "umulh x9, %[a], %[b] \n\t" \
  655. "adds %[l], %[l], x8 \n\t" \
  656. "adcs %[h], %[h], x9 \n\t" \
  657. "adc %[o], %[o], xzr \n\t" \
  658. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  659. : [a] "r" (va), [b] "r" (vb) \
  660. : "x8", "x9", "cc" \
  661. )
  662. /* Multiply va by vb and add double size result into: vh | vl */
  663. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  664. __asm__ __volatile__ ( \
  665. "mul x8, %[a], %[b] \n\t" \
  666. "umulh x9, %[a], %[b] \n\t" \
  667. "adds %[l], %[l], x8 \n\t" \
  668. "adc %[h], %[h], x9 \n\t" \
  669. : [l] "+r" (vl), [h] "+r" (vh) \
  670. : [a] "r" (va), [b] "r" (vb) \
  671. : "x8", "x9", "cc" \
  672. )
  673. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  674. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  675. __asm__ __volatile__ ( \
  676. "mul x8, %[a], %[b] \n\t" \
  677. "umulh x9, %[a], %[b] \n\t" \
  678. "adds %[l], %[l], x8 \n\t" \
  679. "adcs %[h], %[h], x9 \n\t" \
  680. "adc %[o], %[o], xzr \n\t" \
  681. "adds %[l], %[l], x8 \n\t" \
  682. "adcs %[h], %[h], x9 \n\t" \
  683. "adc %[o], %[o], xzr \n\t" \
  684. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  685. : [a] "r" (va), [b] "r" (vb) \
  686. : "x8", "x9", "cc" \
  687. )
  688. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  689. * Assumes first add will not overflow vh | vl
  690. */
  691. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  692. __asm__ __volatile__ ( \
  693. "mul x8, %[a], %[b] \n\t" \
  694. "umulh x9, %[a], %[b] \n\t" \
  695. "adds %[l], %[l], x8 \n\t" \
  696. "adc %[h], %[h], x9 \n\t" \
  697. "adds %[l], %[l], x8 \n\t" \
  698. "adcs %[h], %[h], x9 \n\t" \
  699. "adc %[o], %[o], xzr \n\t" \
  700. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  701. : [a] "r" (va), [b] "r" (vb) \
  702. : "x8", "x9", "cc" \
  703. )
  704. /* Square va and store double size result in: vh | vl */
  705. #define SP_ASM_SQR(vl, vh, va) \
  706. __asm__ __volatile__ ( \
  707. "mul %[l], %[a], %[a] \n\t" \
  708. "umulh %[h], %[a], %[a] \n\t" \
  709. : [h] "+r" (vh), [l] "+r" (vl) \
  710. : [a] "r" (va) \
  711. : "memory" \
  712. )
  713. /* Square va and add double size result into: vo | vh | vl */
  714. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  715. __asm__ __volatile__ ( \
  716. "mul x8, %[a], %[a] \n\t" \
  717. "umulh x9, %[a], %[a] \n\t" \
  718. "adds %[l], %[l], x8 \n\t" \
  719. "adcs %[h], %[h], x9 \n\t" \
  720. "adc %[o], %[o], xzr \n\t" \
  721. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  722. : [a] "r" (va) \
  723. : "x8", "x9", "cc" \
  724. )
  725. /* Square va and add double size result into: vh | vl */
  726. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  727. __asm__ __volatile__ ( \
  728. "mul x8, %[a], %[a] \n\t" \
  729. "umulh x9, %[a], %[a] \n\t" \
  730. "adds %[l], %[l], x8 \n\t" \
  731. "adc %[h], %[h], x9 \n\t" \
  732. : [l] "+r" (vl), [h] "+r" (vh) \
  733. : [a] "r" (va) \
  734. : "x8", "x9", "cc" \
  735. )
  736. /* Add va into: vh | vl */
  737. #define SP_ASM_ADDC(vl, vh, va) \
  738. __asm__ __volatile__ ( \
  739. "adds %[l], %[l], %[a] \n\t" \
  740. "adc %[h], %[h], xzr \n\t" \
  741. : [l] "+r" (vl), [h] "+r" (vh) \
  742. : [a] "r" (va) \
  743. : "cc" \
  744. )
  745. /* Sub va from: vh | vl */
  746. #define SP_ASM_SUBC(vl, vh, va) \
  747. __asm__ __volatile__ ( \
  748. "subs %[l], %[l], %[a] \n\t" \
  749. "sbc %[h], %[h], xzr \n\t" \
  750. : [l] "+r" (vl), [h] "+r" (vh) \
  751. : [a] "r" (va) \
  752. : "cc" \
  753. )
  754. /* Add two times vc | vb | va into vo | vh | vl */
  755. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  756. __asm__ __volatile__ ( \
  757. "adds %[l], %[l], %[a] \n\t" \
  758. "adcs %[h], %[h], %[b] \n\t" \
  759. "adc %[o], %[o], %[c] \n\t" \
  760. "adds %[l], %[l], %[a] \n\t" \
  761. "adcs %[h], %[h], %[b] \n\t" \
  762. "adc %[o], %[o], %[c] \n\t" \
  763. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  764. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  765. : "cc" \
  766. )
  767. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  768. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  769. *
  770. * Using udiv instruction on Aarch64.
  771. * Constant time.
  772. *
  773. * @param [in] hi SP integer digit. High digit of the dividend.
  774. * @param [in] lo SP integer digit. Lower digit of the dividend.
  775. * @param [in] d SP integer digit. Number to divide by.
  776. * @return The division result.
  777. */
  778. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  779. sp_int_digit d)
  780. {
  781. __asm__ __volatile__ (
  782. "lsr x3, %[d], 48\n\t"
  783. "mov x5, 16\n\t"
  784. "cmp x3, 0\n\t"
  785. "mov x4, 63\n\t"
  786. "csel x3, x5, xzr, eq\n\t"
  787. "sub x4, x4, x3\n\t"
  788. "lsl %[d], %[d], x3\n\t"
  789. "lsl %[hi], %[hi], x3\n\t"
  790. "lsr x5, %[lo], x4\n\t"
  791. "lsl %[lo], %[lo], x3\n\t"
  792. "orr %[hi], %[hi], x5, lsr 1\n\t"
  793. "lsr x5, %[d], 32\n\t"
  794. "add x5, x5, 1\n\t"
  795. "udiv x3, %[hi], x5\n\t"
  796. "lsl x6, x3, 32\n\t"
  797. "mul x4, %[d], x6\n\t"
  798. "umulh x3, %[d], x6\n\t"
  799. "subs %[lo], %[lo], x4\n\t"
  800. "sbc %[hi], %[hi], x3\n\t"
  801. "udiv x3, %[hi], x5\n\t"
  802. "lsl x3, x3, 32\n\t"
  803. "add x6, x6, x3\n\t"
  804. "mul x4, %[d], x3\n\t"
  805. "umulh x3, %[d], x3\n\t"
  806. "subs %[lo], %[lo], x4\n\t"
  807. "sbc %[hi], %[hi], x3\n\t"
  808. "lsr x3, %[lo], 32\n\t"
  809. "orr x3, x3, %[hi], lsl 32\n\t"
  810. "udiv x3, x3, x5\n\t"
  811. "add x6, x6, x3\n\t"
  812. "mul x4, %[d], x3\n\t"
  813. "umulh x3, %[d], x3\n\t"
  814. "subs %[lo], %[lo], x4\n\t"
  815. "sbc %[hi], %[hi], x3\n\t"
  816. "lsr x3, %[lo], 32\n\t"
  817. "orr x3, x3, %[hi], lsl 32\n\t"
  818. "udiv x3, x3, x5\n\t"
  819. "add x6, x6, x3\n\t"
  820. "mul x4, %[d], x3\n\t"
  821. "sub %[lo], %[lo], x4\n\t"
  822. "udiv x3, %[lo], %[d]\n\t"
  823. "add %[hi], x6, x3\n\t"
  824. : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  825. :
  826. : "x3", "x4", "x5", "x6"
  827. );
  828. return hi;
  829. }
  830. #define SP_ASM_DIV_WORD
  831. #endif
  832. #define SP_INT_ASM_AVAILABLE
  833. #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
  834. #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
  835. SP_WORD_SIZE == 32
  836. /*
  837. * CPU: ARM32 or Cortex-M4 and similar
  838. */
  839. /* Multiply va by vb and store double size result in: vh | vl */
  840. #define SP_ASM_MUL(vl, vh, va, vb) \
  841. __asm__ __volatile__ ( \
  842. "umull %[l], %[h], %[a], %[b] \n\t" \
  843. : [h] "+r" (vh), [l] "+r" (vl) \
  844. : [a] "r" (va), [b] "r" (vb) \
  845. : "memory" \
  846. )
  847. /* Multiply va by vb and store double size result in: vo | vh | vl */
  848. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  849. __asm__ __volatile__ ( \
  850. "umull %[l], %[h], %[a], %[b] \n\t" \
  851. "mov %[o], #0 \n\t" \
  852. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  853. : [a] "r" (va), [b] "r" (vb) \
  854. : \
  855. )
  856. /* Multiply va by vb and add double size result into: vo | vh | vl */
  857. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  858. __asm__ __volatile__ ( \
  859. "umull r8, r9, %[a], %[b] \n\t" \
  860. "adds %[l], %[l], r8 \n\t" \
  861. "adcs %[h], %[h], r9 \n\t" \
  862. "adc %[o], %[o], #0 \n\t" \
  863. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  864. : [a] "r" (va), [b] "r" (vb) \
  865. : "r8", "r9", "cc" \
  866. )
  867. /* Multiply va by vb and add double size result into: vh | vl */
  868. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  869. __asm__ __volatile__ ( \
  870. "umlal %[l], %[h], %[a], %[b] \n\t" \
  871. : [l] "+r" (vl), [h] "+r" (vh) \
  872. : [a] "r" (va), [b] "r" (vb) \
  873. : \
  874. )
  875. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  876. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  877. __asm__ __volatile__ ( \
  878. "umull r8, r9, %[a], %[b] \n\t" \
  879. "adds %[l], %[l], r8 \n\t" \
  880. "adcs %[h], %[h], r9 \n\t" \
  881. "adc %[o], %[o], #0 \n\t" \
  882. "adds %[l], %[l], r8 \n\t" \
  883. "adcs %[h], %[h], r9 \n\t" \
  884. "adc %[o], %[o], #0 \n\t" \
  885. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  886. : [a] "r" (va), [b] "r" (vb) \
  887. : "r8", "r9", "cc" \
  888. )
  889. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  890. * Assumes first add will not overflow vh | vl
  891. */
  892. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  893. __asm__ __volatile__ ( \
  894. "umull r8, r9, %[a], %[b] \n\t" \
  895. "adds %[l], %[l], r8 \n\t" \
  896. "adc %[h], %[h], r9 \n\t" \
  897. "adds %[l], %[l], r8 \n\t" \
  898. "adcs %[h], %[h], r9 \n\t" \
  899. "adc %[o], %[o], #0 \n\t" \
  900. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  901. : [a] "r" (va), [b] "r" (vb) \
  902. : "r8", "r9", "cc" \
  903. )
  904. /* Square va and store double size result in: vh | vl */
  905. #define SP_ASM_SQR(vl, vh, va) \
  906. __asm__ __volatile__ ( \
  907. "umull %[l], %[h], %[a], %[a] \n\t" \
  908. : [h] "+r" (vh), [l] "+r" (vl) \
  909. : [a] "r" (va) \
  910. : "memory" \
  911. )
  912. /* Square va and add double size result into: vo | vh | vl */
  913. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  914. __asm__ __volatile__ ( \
  915. "umull r8, r9, %[a], %[a] \n\t" \
  916. "adds %[l], %[l], r8 \n\t" \
  917. "adcs %[h], %[h], r9 \n\t" \
  918. "adc %[o], %[o], #0 \n\t" \
  919. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  920. : [a] "r" (va) \
  921. : "r8", "r9", "cc" \
  922. )
  923. /* Square va and add double size result into: vh | vl */
  924. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  925. __asm__ __volatile__ ( \
  926. "umlal %[l], %[h], %[a], %[a] \n\t" \
  927. : [l] "+r" (vl), [h] "+r" (vh) \
  928. : [a] "r" (va) \
  929. : "cc" \
  930. )
  931. /* Add va into: vh | vl */
  932. #define SP_ASM_ADDC(vl, vh, va) \
  933. __asm__ __volatile__ ( \
  934. "adds %[l], %[l], %[a] \n\t" \
  935. "adc %[h], %[h], #0 \n\t" \
  936. : [l] "+r" (vl), [h] "+r" (vh) \
  937. : [a] "r" (va) \
  938. : "cc" \
  939. )
  940. /* Sub va from: vh | vl */
  941. #define SP_ASM_SUBC(vl, vh, va) \
  942. __asm__ __volatile__ ( \
  943. "subs %[l], %[l], %[a] \n\t" \
  944. "sbc %[h], %[h], #0 \n\t" \
  945. : [l] "+r" (vl), [h] "+r" (vh) \
  946. : [a] "r" (va) \
  947. : "cc" \
  948. )
  949. /* Add two times vc | vb | va into vo | vh | vl */
  950. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  951. __asm__ __volatile__ ( \
  952. "adds %[l], %[l], %[a] \n\t" \
  953. "adcs %[h], %[h], %[b] \n\t" \
  954. "adc %[o], %[o], %[c] \n\t" \
  955. "adds %[l], %[l], %[a] \n\t" \
  956. "adcs %[h], %[h], %[b] \n\t" \
  957. "adc %[o], %[o], %[c] \n\t" \
  958. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  959. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  960. : "cc" \
  961. )
  962. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  963. #ifndef WOLFSSL_SP_ARM32_UDIV
  964. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  965. *
  966. * No division instruction used - does operation bit by bit.
  967. * Constant time.
  968. *
  969. * @param [in] hi SP integer digit. High digit of the dividend.
  970. * @param [in] lo SP integer digit. Lower digit of the dividend.
  971. * @param [in] d SP integer digit. Number to divide by.
  972. * @return The division result.
  973. */
  974. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  975. sp_int_digit d)
  976. {
  977. sp_int_digit r = 0;
  978. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
  979. static const char debruijn32[32] = {
  980. 0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
  981. 1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
  982. };
  983. static const sp_uint32 debruijn32_mul = 0x076be629;
  984. #endif
  985. __asm__ __volatile__ (
  986. /* Shift d so that top bit is set. */
  987. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
  988. "ldr r4, %[m]\n\t"
  989. "mov r5, %[d]\n\t"
  990. "orr r5, r5, r5, lsr #1\n\t"
  991. "orr r5, r5, r5, lsr #2\n\t"
  992. "orr r5, r5, r5, lsr #4\n\t"
  993. "orr r5, r5, r5, lsr #8\n\t"
  994. "orr r5, r5, r5, lsr #16\n\t"
  995. "add r5, r5, #1\n\t"
  996. "mul r5, r5, r4\n\t"
  997. "lsr r5, r5, #27\n\t"
  998. "ldrb r5, [%[t], r5]\n\t"
  999. #else
  1000. "clz r5, %[d]\n\t"
  1001. #endif
  1002. "rsb r6, r5, #31\n\t"
  1003. "lsl %[d], %[d], r5\n\t"
  1004. "lsl %[hi], %[hi], r5\n\t"
  1005. "lsr r9, %[lo], r6\n\t"
  1006. "lsl %[lo], %[lo], r5\n\t"
  1007. "orr %[hi], %[hi], r9, lsr #1\n\t"
  1008. "lsr r5, %[d], #1\n\t"
  1009. "add r5, r5, #1\n\t"
  1010. "mov r6, %[lo]\n\t"
  1011. "mov r9, %[hi]\n\t"
  1012. /* Do top 32 */
  1013. "subs r8, r5, r9\n\t"
  1014. "sbc r8, r8, r8\n\t"
  1015. "add %[r], %[r], %[r]\n\t"
  1016. "sub %[r], %[r], r8\n\t"
  1017. "and r8, r8, r5\n\t"
  1018. "subs r9, r9, r8\n\t"
  1019. /* Next 30 bits */
  1020. "mov r4, #29\n\t"
  1021. "\n1:\n\t"
  1022. "movs r6, r6, lsl #1\n\t"
  1023. "adc r9, r9, r9\n\t"
  1024. "subs r8, r5, r9\n\t"
  1025. "sbc r8, r8, r8\n\t"
  1026. "add %[r], %[r], %[r]\n\t"
  1027. "sub %[r], %[r], r8\n\t"
  1028. "and r8, r8, r5\n\t"
  1029. "subs r9, r9, r8\n\t"
  1030. "subs r4, r4, #1\n\t"
  1031. "bpl 1b\n\t"
  1032. "add %[r], %[r], %[r]\n\t"
  1033. "add %[r], %[r], #1\n\t"
  1034. /* Handle difference has hi word > 0. */
  1035. "umull r4, r5, %[r], %[d]\n\t"
  1036. "subs r4, %[lo], r4\n\t"
  1037. "sbc r5, %[hi], r5\n\t"
  1038. "add %[r], %[r], r5\n\t"
  1039. "umull r4, r5, %[r], %[d]\n\t"
  1040. "subs r4, %[lo], r4\n\t"
  1041. "sbc r5, %[hi], r5\n\t"
  1042. "add %[r], %[r], r5\n\t"
  1043. /* Add 1 to result if bottom half of difference is >= d. */
  1044. "mul r4, %[r], %[d]\n\t"
  1045. "subs r4, %[lo], r4\n\t"
  1046. "subs r9, %[d], r4\n\t"
  1047. "sbc r8, r8, r8\n\t"
  1048. "sub %[r], %[r], r8\n\t"
  1049. "subs r9, r9, #1\n\t"
  1050. "sbc r8, r8, r8\n\t"
  1051. "sub %[r], %[r], r8\n\t"
  1052. : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1053. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
  1054. : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
  1055. #else
  1056. :
  1057. #endif
  1058. : "r4", "r5", "r6", "r8", "r9"
  1059. );
  1060. return r;
  1061. }
  1062. #else
  1063. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  1064. *
  1065. * Using udiv instruction on arm32
  1066. * Constant time.
  1067. *
  1068. * @param [in] hi SP integer digit. High digit of the dividend.
  1069. * @param [in] lo SP integer digit. Lower digit of the dividend.
  1070. * @param [in] d SP integer digit. Number to divide by.
  1071. * @return The division result.
  1072. */
  1073. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  1074. sp_int_digit d)
  1075. {
  1076. __asm__ __volatile__ (
  1077. "lsrs r3, %[d], #24\n\t"
  1078. "it eq\n\t"
  1079. "moveq r3, #8\n\t"
  1080. "it ne\n\t"
  1081. "movne r3, #0\n\t"
  1082. "rsb r4, r3, #31\n\t"
  1083. "lsl %[d], %[d], r3\n\t"
  1084. "lsl %[hi], %[hi], r3\n\t"
  1085. "lsr r5, %[lo], r4\n\t"
  1086. "lsl %[lo], %[lo], r3\n\t"
  1087. "orr %[hi], %[hi], r5, lsr #1\n\t"
  1088. "lsr r5, %[d], 16\n\t"
  1089. "add r5, r5, 1\n\t"
  1090. "udiv r3, %[hi], r5\n\t"
  1091. "lsl r6, r3, 16\n\t"
  1092. "umull r4, r3, %[d], r6\n\t"
  1093. "subs %[lo], %[lo], r4\n\t"
  1094. "sbc %[hi], %[hi], r3\n\t"
  1095. "udiv r3, %[hi], r5\n\t"
  1096. "lsl r3, r3, 16\n\t"
  1097. "add r6, r6, r3\n\t"
  1098. "umull r4, r3, %[d], r3\n\t"
  1099. "subs %[lo], %[lo], r4\n\t"
  1100. "sbc %[hi], %[hi], r3\n\t"
  1101. "lsr r3, %[lo], 16\n\t"
  1102. "orr r3, r3, %[hi], lsl 16\n\t"
  1103. "udiv r3, r3, r5\n\t"
  1104. "add r6, r6, r3\n\t"
  1105. "umull r4, r3, %[d], r3\n\t"
  1106. "subs %[lo], %[lo], r4\n\t"
  1107. "sbc %[hi], %[hi], r3\n\t"
  1108. "lsr r3, %[lo], 16\n\t"
  1109. "orr r3, r3, %[hi], lsl 16\n\t"
  1110. "udiv r3, r3, r5\n\t"
  1111. "add r6, r6, r3\n\t"
  1112. "mul r4, %[d], r3\n\t"
  1113. "sub %[lo], %[lo], r4\n\t"
  1114. "udiv r3, %[lo], %[d]\n\t"
  1115. "add %[hi], r6, r3\n\t"
  1116. : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1117. :
  1118. : "r3", "r4", "r5", "r6"
  1119. );
  1120. return hi;
  1121. }
  1122. #endif
  1123. #define SP_ASM_DIV_WORD
  1124. #endif
  1125. #define SP_INT_ASM_AVAILABLE
  1126. #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
  1127. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  1128. /*
  1129. * CPU: ARM Thumb (like Cortex-M0)
  1130. */
  1131. /* Compile with -fomit-frame-pointer, or similar, if compiler complains about
  1132. * usage of register 'r7'.
  1133. */
  1134. #if defined(__clang__)
  1135. /* Multiply va by vb and store double size result in: vh | vl */
  1136. #define SP_ASM_MUL(vl, vh, va, vb) \
  1137. __asm__ __volatile__ ( \
  1138. /* al * bl */ \
  1139. "uxth r6, %[a] \n\t" \
  1140. "uxth %[l], %[b] \n\t" \
  1141. "muls %[l], r6 \n\t" \
  1142. /* al * bh */ \
  1143. "lsrs r4, %[b], #16 \n\t" \
  1144. "muls r6, r4 \n\t" \
  1145. "lsrs %[h], r6, #16 \n\t" \
  1146. "lsls r6, r6, #16 \n\t" \
  1147. "adds %[l], %[l], r6 \n\t" \
  1148. "movs r5, #0 \n\t" \
  1149. "adcs %[h], r5 \n\t" \
  1150. /* ah * bh */ \
  1151. "lsrs r6, %[a], #16 \n\t" \
  1152. "muls r4, r6 \n\t" \
  1153. "adds %[h], %[h], r4 \n\t" \
  1154. /* ah * bl */ \
  1155. "uxth r4, %[b] \n\t" \
  1156. "muls r6, r4 \n\t" \
  1157. "lsrs r4, r6, #16 \n\t" \
  1158. "lsls r6, r6, #16 \n\t" \
  1159. "adds %[l], %[l], r6 \n\t" \
  1160. "adcs %[h], r4 \n\t" \
  1161. : [h] "+l" (vh), [l] "+l" (vl) \
  1162. : [a] "l" (va), [b] "l" (vb) \
  1163. : "r4", "r5", "r6", "cc" \
  1164. )
  1165. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1166. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1167. __asm__ __volatile__ ( \
  1168. /* al * bl */ \
  1169. "uxth r6, %[a] \n\t" \
  1170. "uxth %[l], %[b] \n\t" \
  1171. "muls %[l], r6 \n\t" \
  1172. /* al * bh */ \
  1173. "lsrs r7, %[b], #16 \n\t" \
  1174. "muls r6, r7 \n\t" \
  1175. "lsrs %[h], r6, #16 \n\t" \
  1176. "lsls r6, r6, #16 \n\t" \
  1177. "adds %[l], %[l], r6 \n\t" \
  1178. "movs %[o], #0 \n\t" \
  1179. "adcs %[h], %[o] \n\t" \
  1180. /* ah * bh */ \
  1181. "lsrs r6, %[a], #16 \n\t" \
  1182. "muls r7, r6 \n\t" \
  1183. "adds %[h], %[h], r7 \n\t" \
  1184. /* ah * bl */ \
  1185. "uxth r7, %[b] \n\t" \
  1186. "muls r6, r7 \n\t" \
  1187. "lsrs r7, r6, #16 \n\t" \
  1188. "lsls r6, r6, #16 \n\t" \
  1189. "adds %[l], %[l], r6 \n\t" \
  1190. "adcs %[h], r7 \n\t" \
  1191. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1192. : [a] "l" (va), [b] "l" (vb) \
  1193. : "r6", "r7", "cc" \
  1194. )
  1195. #ifndef WOLFSSL_SP_SMALL
  1196. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1197. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1198. __asm__ __volatile__ ( \
  1199. /* al * bl */ \
  1200. "uxth r6, %[a] \n\t" \
  1201. "uxth r7, %[b] \n\t" \
  1202. "muls r7, r6 \n\t" \
  1203. "adds %[l], %[l], r7 \n\t" \
  1204. "movs r5, #0 \n\t" \
  1205. "adcs %[h], r5 \n\t" \
  1206. "adcs %[o], r5 \n\t" \
  1207. /* al * bh */ \
  1208. "lsrs r7, %[b], #16 \n\t" \
  1209. "muls r6, r7 \n\t" \
  1210. "lsrs r7, r6, #16 \n\t" \
  1211. "lsls r6, r6, #16 \n\t" \
  1212. "adds %[l], %[l], r6 \n\t" \
  1213. "adcs %[h], r7 \n\t" \
  1214. "adcs %[o], r5 \n\t" \
  1215. /* ah * bh */ \
  1216. "lsrs r6, %[a], #16 \n\t" \
  1217. "lsrs r7, %[b], #16 \n\t" \
  1218. "muls r7, r6 \n\t" \
  1219. "adds %[h], %[h], r7 \n\t" \
  1220. "adcs %[o], r5 \n\t" \
  1221. /* ah * bl */ \
  1222. "uxth r7, %[b] \n\t" \
  1223. "muls r6, r7 \n\t" \
  1224. "lsrs r7, r6, #16 \n\t" \
  1225. "lsls r6, r6, #16 \n\t" \
  1226. "adds %[l], %[l], r6 \n\t" \
  1227. "adcs %[h], r7 \n\t" \
  1228. "adcs %[o], r5 \n\t" \
  1229. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1230. : [a] "l" (va), [b] "l" (vb) \
  1231. : "r5", "r6", "r7", "cc" \
  1232. )
  1233. #else
  1234. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1235. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1236. __asm__ __volatile__ ( \
  1237. /* al * bl */ \
  1238. "uxth r6, %[a] \n\t" \
  1239. "uxth r5, %[b] \n\t" \
  1240. "muls r5, r6 \n\t" \
  1241. "adds %[l], %[l], r5 \n\t" \
  1242. "movs r5, #0 \n\t" \
  1243. "adcs %[h], r5 \n\t" \
  1244. "adcs %[o], r5 \n\t" \
  1245. /* al * bh */ \
  1246. "lsrs r5, %[b], #16 \n\t" \
  1247. "muls r6, r5 \n\t" \
  1248. "lsrs r5, r6, #16 \n\t" \
  1249. "lsls r6, r6, #16 \n\t" \
  1250. "adds %[l], %[l], r6 \n\t" \
  1251. "adcs %[h], r5 \n\t" \
  1252. "movs r5, #0 \n\t" \
  1253. "adcs %[o], r5 \n\t" \
  1254. /* ah * bh */ \
  1255. "lsrs r6, %[a], #16 \n\t" \
  1256. "lsrs r5, %[b], #16 \n\t" \
  1257. "muls r5, r6 \n\t" \
  1258. "adds %[h], %[h], r5 \n\t" \
  1259. "movs r5, #0 \n\t" \
  1260. "adcs %[o], r5 \n\t" \
  1261. /* ah * bl */ \
  1262. "uxth r5, %[b] \n\t" \
  1263. "muls r6, r5 \n\t" \
  1264. "lsrs r5, r6, #16 \n\t" \
  1265. "lsls r6, r6, #16 \n\t" \
  1266. "adds %[l], %[l], r6 \n\t" \
  1267. "adcs %[h], r5 \n\t" \
  1268. "movs r5, #0 \n\t" \
  1269. "adcs %[o], r5 \n\t" \
  1270. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1271. : [a] "l" (va), [b] "l" (vb) \
  1272. : "r5", "r6", "cc" \
  1273. )
  1274. #endif
  1275. /* Multiply va by vb and add double size result into: vh | vl */
  1276. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1277. __asm__ __volatile__ ( \
  1278. /* al * bl */ \
  1279. "uxth r6, %[a] \n\t" \
  1280. "uxth r4, %[b] \n\t" \
  1281. "muls r4, r6 \n\t" \
  1282. "adds %[l], %[l], r4 \n\t" \
  1283. "movs r5, #0 \n\t" \
  1284. "adcs %[h], r5 \n\t" \
  1285. /* al * bh */ \
  1286. "lsrs r4, %[b], #16 \n\t" \
  1287. "muls r6, r4 \n\t" \
  1288. "lsrs r4, r6, #16 \n\t" \
  1289. "lsls r6, r6, #16 \n\t" \
  1290. "adds %[l], %[l], r6 \n\t" \
  1291. "adcs %[h], r4 \n\t" \
  1292. /* ah * bh */ \
  1293. "lsrs r6, %[a], #16 \n\t" \
  1294. "lsrs r4, %[b], #16 \n\t" \
  1295. "muls r4, r6 \n\t" \
  1296. "adds %[h], %[h], r4 \n\t" \
  1297. /* ah * bl */ \
  1298. "uxth r4, %[b] \n\t" \
  1299. "muls r6, r4 \n\t" \
  1300. "lsrs r4, r6, #16 \n\t" \
  1301. "lsls r6, r6, #16 \n\t" \
  1302. "adds %[l], %[l], r6 \n\t" \
  1303. "adcs %[h], r4 \n\t" \
  1304. : [l] "+l" (vl), [h] "+l" (vh) \
  1305. : [a] "l" (va), [b] "l" (vb) \
  1306. : "r4", "r5", "r6", "cc" \
  1307. )
  1308. #ifndef WOLFSSL_SP_SMALL
  1309. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1310. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1311. __asm__ __volatile__ ( \
  1312. /* al * bl */ \
  1313. "uxth r6, %[a] \n\t" \
  1314. "uxth r7, %[b] \n\t" \
  1315. "muls r7, r6 \n\t" \
  1316. "adds %[l], %[l], r7 \n\t" \
  1317. "movs r5, #0 \n\t" \
  1318. "adcs %[h], r5 \n\t" \
  1319. "adcs %[o], r5 \n\t" \
  1320. "adds %[l], %[l], r7 \n\t" \
  1321. "adcs %[h], r5 \n\t" \
  1322. "adcs %[o], r5 \n\t" \
  1323. /* al * bh */ \
  1324. "lsrs r7, %[b], #16 \n\t" \
  1325. "muls r6, r7 \n\t" \
  1326. "lsrs r7, r6, #16 \n\t" \
  1327. "lsls r6, r6, #16 \n\t" \
  1328. "adds %[l], %[l], r6 \n\t" \
  1329. "adcs %[h], r7 \n\t" \
  1330. "adcs %[o], r5 \n\t" \
  1331. "adds %[l], %[l], r6 \n\t" \
  1332. "adcs %[h], r7 \n\t" \
  1333. "adcs %[o], r5 \n\t" \
  1334. /* ah * bh */ \
  1335. "lsrs r6, %[a], #16 \n\t" \
  1336. "lsrs r7, %[b], #16 \n\t" \
  1337. "muls r7, r6 \n\t" \
  1338. "adds %[h], %[h], r7 \n\t" \
  1339. "adcs %[o], r5 \n\t" \
  1340. "adds %[h], %[h], r7 \n\t" \
  1341. "adcs %[o], r5 \n\t" \
  1342. /* ah * bl */ \
  1343. "uxth r7, %[b] \n\t" \
  1344. "muls r6, r7 \n\t" \
  1345. "lsrs r7, r6, #16 \n\t" \
  1346. "lsls r6, r6, #16 \n\t" \
  1347. "adds %[l], %[l], r6 \n\t" \
  1348. "adcs %[h], r7 \n\t" \
  1349. "adcs %[o], r5 \n\t" \
  1350. "adds %[l], %[l], r6 \n\t" \
  1351. "adcs %[h], r7 \n\t" \
  1352. "adcs %[o], r5 \n\t" \
  1353. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1354. : [a] "l" (va), [b] "l" (vb) \
  1355. : "r5", "r6", "r7", "cc" \
  1356. )
  1357. #else
  1358. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1359. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1360. __asm__ __volatile__ ( \
  1361. "movs r8, %[a] \n\t" \
  1362. /* al * bl */ \
  1363. "uxth r6, %[a] \n\t" \
  1364. "uxth r5, %[b] \n\t" \
  1365. "muls r5, r6 \n\t" \
  1366. "adds %[l], %[l], r5 \n\t" \
  1367. "movs %[a], #0 \n\t" \
  1368. "adcs %[h], %[a] \n\t" \
  1369. "adcs %[o], %[a] \n\t" \
  1370. "adds %[l], %[l], r5 \n\t" \
  1371. "adcs %[h], %[a] \n\t" \
  1372. "adcs %[o], %[a] \n\t" \
  1373. /* al * bh */ \
  1374. "lsrs r5, %[b], #16 \n\t" \
  1375. "muls r6, r5 \n\t" \
  1376. "lsrs r5, r6, #16 \n\t" \
  1377. "lsls r6, r6, #16 \n\t" \
  1378. "adds %[l], %[l], r6 \n\t" \
  1379. "adcs %[h], r5 \n\t" \
  1380. "adcs %[o], %[a] \n\t" \
  1381. "adds %[l], %[l], r6 \n\t" \
  1382. "adcs %[h], r5 \n\t" \
  1383. "adcs %[o], %[a] \n\t" \
  1384. /* ah * bh */ \
  1385. "movs %[a], r8 \n\t" \
  1386. "lsrs r6, %[a], #16 \n\t" \
  1387. "lsrs r5, %[b], #16 \n\t" \
  1388. "muls r5, r6 \n\t" \
  1389. "adds %[h], %[h], r5 \n\t" \
  1390. "movs %[a], #0 \n\t" \
  1391. "adcs %[o], %[a] \n\t" \
  1392. "adds %[h], %[h], r5 \n\t" \
  1393. "adcs %[o], %[a] \n\t" \
  1394. /* ah * bl */ \
  1395. "uxth r5, %[b] \n\t" \
  1396. "muls r6, r5 \n\t" \
  1397. "lsrs r5, r6, #16 \n\t" \
  1398. "lsls r6, r6, #16 \n\t" \
  1399. "adds %[l], %[l], r6 \n\t" \
  1400. "adcs %[h], r5 \n\t" \
  1401. "adcs %[o], %[a] \n\t" \
  1402. "adds %[l], %[l], r6 \n\t" \
  1403. "adcs %[h], r5 \n\t" \
  1404. "adcs %[o], %[a] \n\t" \
  1405. "movs %[a], r8 \n\t" \
  1406. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1407. : [a] "l" (va), [b] "l" (vb) \
  1408. : "r5", "r6", "r8", "cc" \
  1409. )
  1410. #endif
  1411. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1412. * Assumes first add will not overflow vh | vl
  1413. */
  1414. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1415. __asm__ __volatile__ ( \
  1416. /* al * bl */ \
  1417. "uxth r6, %[a] \n\t" \
  1418. "uxth r7, %[b] \n\t" \
  1419. "muls r7, r6 \n\t" \
  1420. "adds %[l], %[l], r7 \n\t" \
  1421. "movs r5, #0 \n\t" \
  1422. "adcs %[h], r5 \n\t" \
  1423. "adds %[l], %[l], r7 \n\t" \
  1424. "adcs %[h], r5 \n\t" \
  1425. /* al * bh */ \
  1426. "lsrs r7, %[b], #16 \n\t" \
  1427. "muls r6, r7 \n\t" \
  1428. "lsrs r7, r6, #16 \n\t" \
  1429. "lsls r6, r6, #16 \n\t" \
  1430. "adds %[l], %[l], r6 \n\t" \
  1431. "adcs %[h], r7 \n\t" \
  1432. "adds %[l], %[l], r6 \n\t" \
  1433. "adcs %[h], r7 \n\t" \
  1434. "adcs %[o], r5 \n\t" \
  1435. /* ah * bh */ \
  1436. "lsrs r6, %[a], #16 \n\t" \
  1437. "lsrs r7, %[b], #16 \n\t" \
  1438. "muls r7, r6 \n\t" \
  1439. "adds %[h], %[h], r7 \n\t" \
  1440. "adcs %[o], r5 \n\t" \
  1441. "adds %[h], %[h], r7 \n\t" \
  1442. "adcs %[o], r5 \n\t" \
  1443. /* ah * bl */ \
  1444. "uxth r7, %[b] \n\t" \
  1445. "muls r6, r7 \n\t" \
  1446. "lsrs r7, r6, #16 \n\t" \
  1447. "lsls r6, r6, #16 \n\t" \
  1448. "adds %[l], %[l], r6 \n\t" \
  1449. "adcs %[h], r7 \n\t" \
  1450. "adcs %[o], r5 \n\t" \
  1451. "adds %[l], %[l], r6 \n\t" \
  1452. "adcs %[h], r7 \n\t" \
  1453. "adcs %[o], r5 \n\t" \
  1454. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1455. : [a] "l" (va), [b] "l" (vb) \
  1456. : "r5", "r6", "r7", "cc" \
  1457. )
  1458. /* Square va and store double size result in: vh | vl */
  1459. #define SP_ASM_SQR(vl, vh, va) \
  1460. __asm__ __volatile__ ( \
  1461. "lsrs r5, %[a], #16 \n\t" \
  1462. "uxth r6, %[a] \n\t" \
  1463. "mov %[l], r6 \n\t" \
  1464. "mov %[h], r5 \n\t" \
  1465. /* al * al */ \
  1466. "muls %[l], %[l] \n\t" \
  1467. /* ah * ah */ \
  1468. "muls %[h], %[h] \n\t" \
  1469. /* 2 * al * ah */ \
  1470. "muls r6, r5 \n\t" \
  1471. "lsrs r5, r6, #15 \n\t" \
  1472. "lsls r6, r6, #17 \n\t" \
  1473. "adds %[l], %[l], r6 \n\t" \
  1474. "adcs %[h], r5 \n\t" \
  1475. : [h] "+l" (vh), [l] "+l" (vl) \
  1476. : [a] "l" (va) \
  1477. : "r5", "r6", "cc" \
  1478. )
  1479. /* Square va and add double size result into: vo | vh | vl */
  1480. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1481. __asm__ __volatile__ ( \
  1482. "lsrs r4, %[a], #16 \n\t" \
  1483. "uxth r6, %[a] \n\t" \
  1484. /* al * al */ \
  1485. "muls r6, r6 \n\t" \
  1486. /* ah * ah */ \
  1487. "muls r4, r4 \n\t" \
  1488. "adds %[l], %[l], r6 \n\t" \
  1489. "adcs %[h], r4 \n\t" \
  1490. "movs r5, #0 \n\t" \
  1491. "adcs %[o], r5 \n\t" \
  1492. "lsrs r4, %[a], #16 \n\t" \
  1493. "uxth r6, %[a] \n\t" \
  1494. /* 2 * al * ah */ \
  1495. "muls r6, r4 \n\t" \
  1496. "lsrs r4, r6, #15 \n\t" \
  1497. "lsls r6, r6, #17 \n\t" \
  1498. "adds %[l], %[l], r6 \n\t" \
  1499. "adcs %[h], r4 \n\t" \
  1500. "adcs %[o], r5 \n\t" \
  1501. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1502. : [a] "l" (va) \
  1503. : "r4", "r5", "r6", "cc" \
  1504. )
  1505. /* Square va and add double size result into: vh | vl */
  1506. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1507. __asm__ __volatile__ ( \
  1508. "lsrs r7, %[a], #16 \n\t" \
  1509. "uxth r6, %[a] \n\t" \
  1510. /* al * al */ \
  1511. "muls r6, r6 \n\t" \
  1512. /* ah * ah */ \
  1513. "muls r7, r7 \n\t" \
  1514. "adds %[l], %[l], r6 \n\t" \
  1515. "adcs %[h], r7 \n\t" \
  1516. "lsrs r7, %[a], #16 \n\t" \
  1517. "uxth r6, %[a] \n\t" \
  1518. /* 2 * al * ah */ \
  1519. "muls r6, r7 \n\t" \
  1520. "lsrs r7, r6, #15 \n\t" \
  1521. "lsls r6, r6, #17 \n\t" \
  1522. "adds %[l], %[l], r6 \n\t" \
  1523. "adcs %[h], r7 \n\t" \
  1524. : [l] "+l" (vl), [h] "+l" (vh) \
  1525. : [a] "l" (va) \
  1526. : "r6", "r7", "cc" \
  1527. )
  1528. /* Add va into: vh | vl */
  1529. #define SP_ASM_ADDC(vl, vh, va) \
  1530. __asm__ __volatile__ ( \
  1531. "adds %[l], %[l], %[a] \n\t" \
  1532. "movs r5, #0 \n\t" \
  1533. "adcs %[h], r5 \n\t" \
  1534. : [l] "+l" (vl), [h] "+l" (vh) \
  1535. : [a] "l" (va) \
  1536. : "r5", "cc" \
  1537. )
  1538. /* Sub va from: vh | vl */
  1539. #define SP_ASM_SUBC(vl, vh, va) \
  1540. __asm__ __volatile__ ( \
  1541. "subs %[l], %[l], %[a] \n\t" \
  1542. "movs r5, #0 \n\t" \
  1543. "sbcs %[h], r5 \n\t" \
  1544. : [l] "+l" (vl), [h] "+l" (vh) \
  1545. : [a] "l" (va) \
  1546. : "r5", "cc" \
  1547. )
  1548. /* Add two times vc | vb | va into vo | vh | vl */
  1549. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1550. __asm__ __volatile__ ( \
  1551. "adds %[l], %[l], %[a] \n\t" \
  1552. "adcs %[h], %[b] \n\t" \
  1553. "adcs %[o], %[c] \n\t" \
  1554. "adds %[l], %[l], %[a] \n\t" \
  1555. "adcs %[h], %[b] \n\t" \
  1556. "adcs %[o], %[c] \n\t" \
  1557. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1558. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  1559. : "cc" \
  1560. )
  1561. #elif defined(WOLFSSL_KEIL)
  1562. /* Multiply va by vb and store double size result in: vh | vl */
  1563. #define SP_ASM_MUL(vl, vh, va, vb) \
  1564. __asm__ __volatile__ ( \
  1565. /* al * bl */ \
  1566. "uxth r6, %[a] \n\t" \
  1567. "uxth %[l], %[b] \n\t" \
  1568. "muls %[l], r6, %[l] \n\t" \
  1569. /* al * bh */ \
  1570. "lsrs r4, %[b], #16 \n\t" \
  1571. "muls r6, r4, r6 \n\t" \
  1572. "lsrs %[h], r6, #16 \n\t" \
  1573. "lsls r6, r6, #16 \n\t" \
  1574. "adds %[l], %[l], r6 \n\t" \
  1575. "movs r5, #0 \n\t" \
  1576. "adcs %[h], %[h], r5 \n\t" \
  1577. /* ah * bh */ \
  1578. "lsrs r6, %[a], #16 \n\t" \
  1579. "muls r4, r6, r4 \n\t" \
  1580. "adds %[h], %[h], r4 \n\t" \
  1581. /* ah * bl */ \
  1582. "uxth r4, %[b] \n\t" \
  1583. "muls r6, r4, r6 \n\t" \
  1584. "lsrs r4, r6, #16 \n\t" \
  1585. "lsls r6, r6, #16 \n\t" \
  1586. "adds %[l], %[l], r6 \n\t" \
  1587. "adcs %[h], %[h], r4 \n\t" \
  1588. : [h] "+l" (vh), [l] "+l" (vl) \
  1589. : [a] "l" (va), [b] "l" (vb) \
  1590. : "r4", "r5", "r6", "cc" \
  1591. )
  1592. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1593. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1594. __asm__ __volatile__ ( \
  1595. /* al * bl */ \
  1596. "uxth r6, %[a] \n\t" \
  1597. "uxth %[l], %[b] \n\t" \
  1598. "muls %[l], r6, %[l] \n\t" \
  1599. /* al * bh */ \
  1600. "lsrs r7, %[b], #16 \n\t" \
  1601. "muls r6, r7, r6 \n\t" \
  1602. "lsrs %[h], r6, #16 \n\t" \
  1603. "lsls r6, r6, #16 \n\t" \
  1604. "adds %[l], %[l], r6 \n\t" \
  1605. "movs %[o], #0 \n\t" \
  1606. "adcs %[h], %[h], %[o] \n\t" \
  1607. /* ah * bh */ \
  1608. "lsrs r6, %[a], #16 \n\t" \
  1609. "muls r7, r6, r7 \n\t" \
  1610. "adds %[h], %[h], r7 \n\t" \
  1611. /* ah * bl */ \
  1612. "uxth r7, %[b] \n\t" \
  1613. "muls r6, r7, r6 \n\t" \
  1614. "lsrs r7, r6, #16 \n\t" \
  1615. "lsls r6, r6, #16 \n\t" \
  1616. "adds %[l], %[l], r6 \n\t" \
  1617. "adcs %[h], %[h], r7 \n\t" \
  1618. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1619. : [a] "l" (va), [b] "l" (vb) \
  1620. : "r6", "r7", "cc" \
  1621. )
  1622. #ifndef WOLFSSL_SP_SMALL
  1623. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1624. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1625. __asm__ __volatile__ ( \
  1626. /* al * bl */ \
  1627. "uxth r6, %[a] \n\t" \
  1628. "uxth r7, %[b] \n\t" \
  1629. "muls r7, r6, r7 \n\t" \
  1630. "adds %[l], %[l], r7 \n\t" \
  1631. "movs r5, #0 \n\t" \
  1632. "adcs %[h], %[h], r5 \n\t" \
  1633. "adcs %[o], %[o], r5 \n\t" \
  1634. /* al * bh */ \
  1635. "lsrs r7, %[b], #16 \n\t" \
  1636. "muls r6, r7, r6 \n\t" \
  1637. "lsrs r7, r6, #16 \n\t" \
  1638. "lsls r6, r6, #16 \n\t" \
  1639. "adds %[l], %[l], r6 \n\t" \
  1640. "adcs %[h], %[h], r7 \n\t" \
  1641. "adcs %[o], %[o], r5 \n\t" \
  1642. /* ah * bh */ \
  1643. "lsrs r6, %[a], #16 \n\t" \
  1644. "lsrs r7, %[b], #16 \n\t" \
  1645. "muls r7, r6, r7 \n\t" \
  1646. "adds %[h], %[h], r7 \n\t" \
  1647. "adcs %[o], %[o], r5 \n\t" \
  1648. /* ah * bl */ \
  1649. "uxth r7, %[b] \n\t" \
  1650. "muls r6, r7, r6 \n\t" \
  1651. "lsrs r7, r6, #16 \n\t" \
  1652. "lsls r6, r6, #16 \n\t" \
  1653. "adds %[l], %[l], r6 \n\t" \
  1654. "adcs %[h], %[h], r7 \n\t" \
  1655. "adcs %[o], %[o], r5 \n\t" \
  1656. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1657. : [a] "l" (va), [b] "l" (vb) \
  1658. : "r5", "r6", "r7", "cc" \
  1659. )
  1660. #else
  1661. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1662. __asm__ __volatile__ ( \
  1663. /* al * bl */ \
  1664. "uxth r6, %[a] \n\t" \
  1665. "uxth r5, %[b] \n\t" \
  1666. "muls r5, r6, r5 \n\t" \
  1667. "adds %[l], %[l], r5 \n\t" \
  1668. "movs r5, #0 \n\t" \
  1669. "adcs %[h], %[h], r5 \n\t" \
  1670. "adcs %[o], %[o], r5 \n\t" \
  1671. /* al * bh */ \
  1672. "lsrs r5, %[b], #16 \n\t" \
  1673. "muls r6, r5, r6 \n\t" \
  1674. "lsrs r5, r6, #16 \n\t" \
  1675. "lsls r6, r6, #16 \n\t" \
  1676. "adds %[l], %[l], r6 \n\t" \
  1677. "adcs %[h], %[h], r5 \n\t" \
  1678. "movs r5, #0 \n\t" \
  1679. "adcs %[o], %[o], r5 \n\t" \
  1680. /* ah * bh */ \
  1681. "lsrs r6, %[a], #16 \n\t" \
  1682. "lsrs r5, %[b], #16 \n\t" \
  1683. "muls r5, r6, r5 \n\t" \
  1684. "adds %[h], %[h], r5 \n\t" \
  1685. "movs r5, #0 \n\t" \
  1686. "adcs %[o], %[o], r5 \n\t" \
  1687. /* ah * bl */ \
  1688. "uxth r5, %[b] \n\t" \
  1689. "muls r6, r5, r6 \n\t" \
  1690. "lsrs r5, r6, #16 \n\t" \
  1691. "lsls r6, r6, #16 \n\t" \
  1692. "adds %[l], %[l], r6 \n\t" \
  1693. "adcs %[h], %[h], r5 \n\t" \
  1694. "movs r5, #0 \n\t" \
  1695. "adcs %[o], %[o], r5 \n\t" \
  1696. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1697. : [a] "l" (va), [b] "l" (vb) \
  1698. : "r5", "r6", "cc" \
  1699. )
  1700. #endif
  1701. /* Multiply va by vb and add double size result into: vh | vl */
  1702. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1703. __asm__ __volatile__ ( \
  1704. /* al * bl */ \
  1705. "uxth r6, %[a] \n\t" \
  1706. "uxth r4, %[b] \n\t" \
  1707. "muls r4, r6, r4 \n\t" \
  1708. "adds %[l], %[l], r4 \n\t" \
  1709. "movs r5, #0 \n\t" \
  1710. "adcs %[h], %[h], r5 \n\t" \
  1711. /* al * bh */ \
  1712. "lsrs r4, %[b], #16 \n\t" \
  1713. "muls r6, r4, r6 \n\t" \
  1714. "lsrs r4, r6, #16 \n\t" \
  1715. "lsls r6, r6, #16 \n\t" \
  1716. "adds %[l], %[l], r6 \n\t" \
  1717. "adcs %[h], %[h], r4 \n\t" \
  1718. /* ah * bh */ \
  1719. "lsrs r6, %[a], #16 \n\t" \
  1720. "lsrs r4, %[b], #16 \n\t" \
  1721. "muls r4, r6, r4 \n\t" \
  1722. "adds %[h], %[h], r4 \n\t" \
  1723. /* ah * bl */ \
  1724. "uxth r4, %[b] \n\t" \
  1725. "muls r6, r4, r6 \n\t" \
  1726. "lsrs r4, r6, #16 \n\t" \
  1727. "lsls r6, r6, #16 \n\t" \
  1728. "adds %[l], %[l], r6 \n\t" \
  1729. "adcs %[h], %[h], r4 \n\t" \
  1730. : [l] "+l" (vl), [h] "+l" (vh) \
  1731. : [a] "l" (va), [b] "l" (vb) \
  1732. : "r4", "r5", "r6", "cc" \
  1733. )
  1734. #ifndef WOLFSSL_SP_SMALL
  1735. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1736. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1737. __asm__ __volatile__ ( \
  1738. /* al * bl */ \
  1739. "uxth r6, %[a] \n\t" \
  1740. "uxth r7, %[b] \n\t" \
  1741. "muls r7, r6, r7 \n\t" \
  1742. "adds %[l], %[l], r7 \n\t" \
  1743. "movs r5, #0 \n\t" \
  1744. "adcs %[h], %[h], r5 \n\t" \
  1745. "adcs %[o], %[o], r5 \n\t" \
  1746. "adds %[l], %[l], r7 \n\t" \
  1747. "adcs %[h], %[h], r5 \n\t" \
  1748. "adcs %[o], %[o], r5 \n\t" \
  1749. /* al * bh */ \
  1750. "lsrs r7, %[b], #16 \n\t" \
  1751. "muls r6, r7, r6 \n\t" \
  1752. "lsrs r7, r6, #16 \n\t" \
  1753. "lsls r6, r6, #16 \n\t" \
  1754. "adds %[l], %[l], r6 \n\t" \
  1755. "adcs %[h], %[h], r7 \n\t" \
  1756. "adcs %[o], %[o], r5 \n\t" \
  1757. "adds %[l], %[l], r6 \n\t" \
  1758. "adcs %[h], %[h], r7 \n\t" \
  1759. "adcs %[o], %[o], r5 \n\t" \
  1760. /* ah * bh */ \
  1761. "lsrs r6, %[a], #16 \n\t" \
  1762. "lsrs r7, %[b], #16 \n\t" \
  1763. "muls r7, r6, r7 \n\t" \
  1764. "adds %[h], %[h], r7 \n\t" \
  1765. "adcs %[o], %[o], r5 \n\t" \
  1766. "adds %[h], %[h], r7 \n\t" \
  1767. "adcs %[o], %[o], r5 \n\t" \
  1768. /* ah * bl */ \
  1769. "uxth r7, %[b] \n\t" \
  1770. "muls r6, r7, r6 \n\t" \
  1771. "lsrs r7, r6, #16 \n\t" \
  1772. "lsls r6, r6, #16 \n\t" \
  1773. "adds %[l], %[l], r6 \n\t" \
  1774. "adcs %[h], %[h], r7 \n\t" \
  1775. "adcs %[o], %[o], r5 \n\t" \
  1776. "adds %[l], %[l], r6 \n\t" \
  1777. "adcs %[h], %[h], r7 \n\t" \
  1778. "adcs %[o], %[o], r5 \n\t" \
  1779. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1780. : [a] "l" (va), [b] "l" (vb) \
  1781. : "r5", "r6", "r7", "cc" \
  1782. )
  1783. #else
  1784. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1785. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1786. __asm__ __volatile__ ( \
  1787. "movs r8, %[a] \n\t" \
  1788. /* al * bl */ \
  1789. "uxth r6, %[a] \n\t" \
  1790. "uxth r5, %[b] \n\t" \
  1791. "muls r5, r6, r5 \n\t" \
  1792. "adds %[l], %[l], r5 \n\t" \
  1793. "movs %[a], #0 \n\t" \
  1794. "adcs %[h], %[h], %[a] \n\t" \
  1795. "adcs %[o], %[o], %[a] \n\t" \
  1796. "adds %[l], %[l], r5 \n\t" \
  1797. "adcs %[h], %[h], %[a] \n\t" \
  1798. "adcs %[o], %[o], %[a] \n\t" \
  1799. /* al * bh */ \
  1800. "lsrs r5, %[b], #16 \n\t" \
  1801. "muls r6, r5, r6 \n\t" \
  1802. "lsrs r5, r6, #16 \n\t" \
  1803. "lsls r6, r6, #16 \n\t" \
  1804. "adds %[l], %[l], r6 \n\t" \
  1805. "adcs %[h], %[h], r5 \n\t" \
  1806. "adcs %[o], %[o], %[a] \n\t" \
  1807. "adds %[l], %[l], r6 \n\t" \
  1808. "adcs %[h], %[h], r5 \n\t" \
  1809. "adcs %[o], %[o], %[a] \n\t" \
  1810. /* ah * bh */ \
  1811. "movs %[a], r8 \n\t" \
  1812. "lsrs r6, %[a], #16 \n\t" \
  1813. "lsrs r5, %[b], #16 \n\t" \
  1814. "muls r5, r6, r5 \n\t" \
  1815. "adds %[h], %[h], r5 \n\t" \
  1816. "movs %[a], #0 \n\t" \
  1817. "adcs %[o], %[o], %[a] \n\t" \
  1818. "adds %[h], %[h], r5 \n\t" \
  1819. "adcs %[o], %[o], %[a] \n\t" \
  1820. /* ah * bl */ \
  1821. "uxth r5, %[b] \n\t" \
  1822. "muls r6, r5, r6 \n\t" \
  1823. "lsrs r5, r6, #16 \n\t" \
  1824. "lsls r6, r6, #16 \n\t" \
  1825. "adds %[l], %[l], r6 \n\t" \
  1826. "adcs %[h], %[h], r5 \n\t" \
  1827. "adcs %[o], %[o], %[a] \n\t" \
  1828. "adds %[l], %[l], r6 \n\t" \
  1829. "adcs %[h], %[h], r5 \n\t" \
  1830. "adcs %[o], %[o], %[a] \n\t" \
  1831. "movs %[a], r8 \n\t" \
  1832. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1833. : [a] "l" (va), [b] "l" (vb) \
  1834. : "r5", "r6", "r8", "cc" \
  1835. )
  1836. #endif
  1837. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1838. * Assumes first add will not overflow vh | vl
  1839. */
  1840. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1841. __asm__ __volatile__ ( \
  1842. /* al * bl */ \
  1843. "uxth r6, %[a] \n\t" \
  1844. "uxth r7, %[b] \n\t" \
  1845. "muls r7, r6, r7 \n\t" \
  1846. "adds %[l], %[l], r7 \n\t" \
  1847. "movs r5, #0 \n\t" \
  1848. "adcs %[h], %[h], r5 \n\t" \
  1849. "adds %[l], %[l], r7 \n\t" \
  1850. "adcs %[h], %[h], r5 \n\t" \
  1851. /* al * bh */ \
  1852. "lsrs r7, %[b], #16 \n\t" \
  1853. "muls r6, r7, r6 \n\t" \
  1854. "lsrs r7, r6, #16 \n\t" \
  1855. "lsls r6, r6, #16 \n\t" \
  1856. "adds %[l], %[l], r6 \n\t" \
  1857. "adcs %[h], %[h], r7 \n\t" \
  1858. "adds %[l], %[l], r6 \n\t" \
  1859. "adcs %[h], %[h], r7 \n\t" \
  1860. "adcs %[o], %[o], r5 \n\t" \
  1861. /* ah * bh */ \
  1862. "lsrs r6, %[a], #16 \n\t" \
  1863. "lsrs r7, %[b], #16 \n\t" \
  1864. "muls r7, r6, r7 \n\t" \
  1865. "adds %[h], %[h], r7 \n\t" \
  1866. "adcs %[o], %[o], r5 \n\t" \
  1867. "adds %[h], %[h], r7 \n\t" \
  1868. "adcs %[o], %[o], r5 \n\t" \
  1869. /* ah * bl */ \
  1870. "uxth r7, %[b] \n\t" \
  1871. "muls r6, r7, r6 \n\t" \
  1872. "lsrs r7, r6, #16 \n\t" \
  1873. "lsls r6, r6, #16 \n\t" \
  1874. "adds %[l], %[l], r6 \n\t" \
  1875. "adcs %[h], %[h], r7 \n\t" \
  1876. "adcs %[o], %[o], r5 \n\t" \
  1877. "adds %[l], %[l], r6 \n\t" \
  1878. "adcs %[h], %[h], r7 \n\t" \
  1879. "adcs %[o], %[o], r5 \n\t" \
  1880. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1881. : [a] "l" (va), [b] "l" (vb) \
  1882. : "r5", "r6", "r7", "cc" \
  1883. )
  1884. /* Square va and store double size result in: vh | vl */
  1885. #define SP_ASM_SQR(vl, vh, va) \
  1886. __asm__ __volatile__ ( \
  1887. "lsrs r5, %[a], #16 \n\t" \
  1888. "uxth r6, %[a] \n\t" \
  1889. "mov %[l], r6 \n\t" \
  1890. "mov %[h], r5 \n\t" \
  1891. /* al * al */ \
  1892. "muls %[l], %[l], %[l] \n\t" \
  1893. /* ah * ah */ \
  1894. "muls %[h], %[h], %[h] \n\t" \
  1895. /* 2 * al * ah */ \
  1896. "muls r6, r5, r6 \n\t" \
  1897. "lsrs r5, r6, #15 \n\t" \
  1898. "lsls r6, r6, #17 \n\t" \
  1899. "adds %[l], %[l], r6 \n\t" \
  1900. "adcs %[h], %[h], r5 \n\t" \
  1901. : [h] "+l" (vh), [l] "+l" (vl) \
  1902. : [a] "l" (va) \
  1903. : "r5", "r6", "cc" \
  1904. )
  1905. /* Square va and add double size result into: vo | vh | vl */
  1906. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1907. __asm__ __volatile__ ( \
  1908. "lsrs r4, %[a], #16 \n\t" \
  1909. "uxth r6, %[a] \n\t" \
  1910. /* al * al */ \
  1911. "muls r6, r6, r6 \n\t" \
  1912. /* ah * ah */ \
  1913. "muls r4, r4, r4 \n\t" \
  1914. "adds %[l], %[l], r6 \n\t" \
  1915. "adcs %[h], %[h], r4 \n\t" \
  1916. "movs r5, #0 \n\t" \
  1917. "adcs %[o], %[o], r5 \n\t" \
  1918. "lsrs r4, %[a], #16 \n\t" \
  1919. "uxth r6, %[a] \n\t" \
  1920. /* 2 * al * ah */ \
  1921. "muls r6, r4, r6 \n\t" \
  1922. "lsrs r4, r6, #15 \n\t" \
  1923. "lsls r6, r6, #17 \n\t" \
  1924. "adds %[l], %[l], r6 \n\t" \
  1925. "adcs %[h], %[h], r4 \n\t" \
  1926. "adcs %[o], %[o], r5 \n\t" \
  1927. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1928. : [a] "l" (va) \
  1929. : "r4", "r5", "r6", "cc" \
  1930. )
  1931. /* Square va and add double size result into: vh | vl */
  1932. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1933. __asm__ __volatile__ ( \
  1934. "lsrs r7, %[a], #16 \n\t" \
  1935. "uxth r6, %[a] \n\t" \
  1936. /* al * al */ \
  1937. "muls r6, r6, r6 \n\t" \
  1938. /* ah * ah */ \
  1939. "muls r7, r7, r7 \n\t" \
  1940. "adds %[l], %[l], r6 \n\t" \
  1941. "adcs %[h], %[h], r7 \n\t" \
  1942. "lsrs r7, %[a], #16 \n\t" \
  1943. "uxth r6, %[a] \n\t" \
  1944. /* 2 * al * ah */ \
  1945. "muls r6, r7, r6 \n\t" \
  1946. "lsrs r7, r6, #15 \n\t" \
  1947. "lsls r6, r6, #17 \n\t" \
  1948. "adds %[l], %[l], r6 \n\t" \
  1949. "adcs %[h], %[h], r7 \n\t" \
  1950. : [l] "+l" (vl), [h] "+l" (vh) \
  1951. : [a] "l" (va) \
  1952. : "r6", "r7", "cc" \
  1953. )
  1954. /* Add va into: vh | vl */
  1955. #define SP_ASM_ADDC(vl, vh, va) \
  1956. __asm__ __volatile__ ( \
  1957. "adds %[l], %[l], %[a] \n\t" \
  1958. "movs r5, #0 \n\t" \
  1959. "adcs %[h], %[h], r5 \n\t" \
  1960. : [l] "+l" (vl), [h] "+l" (vh) \
  1961. : [a] "l" (va) \
  1962. : "r5", "cc" \
  1963. )
  1964. /* Sub va from: vh | vl */
  1965. #define SP_ASM_SUBC(vl, vh, va) \
  1966. __asm__ __volatile__ ( \
  1967. "subs %[l], %[l], %[a] \n\t" \
  1968. "movs r5, #0 \n\t" \
  1969. "sbcs %[h], %[h], r5 \n\t" \
  1970. : [l] "+l" (vl), [h] "+l" (vh) \
  1971. : [a] "l" (va) \
  1972. : "r5", "cc" \
  1973. )
  1974. /* Add two times vc | vb | va into vo | vh | vl */
  1975. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1976. __asm__ __volatile__ ( \
  1977. "adds %[l], %[l], %[a] \n\t" \
  1978. "adcs %[h], %[h], %[b] \n\t" \
  1979. "adcs %[o], %[o], %[c] \n\t" \
  1980. "adds %[l], %[l], %[a] \n\t" \
  1981. "adcs %[h], %[h], %[b] \n\t" \
  1982. "adcs %[o], %[o], %[c] \n\t" \
  1983. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1984. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  1985. : "cc" \
  1986. )
  1987. #elif defined(__GNUC__)
  1988. /* Multiply va by vb and store double size result in: vh | vl */
  1989. #define SP_ASM_MUL(vl, vh, va, vb) \
  1990. __asm__ __volatile__ ( \
  1991. /* al * bl */ \
  1992. "uxth r6, %[a] \n\t" \
  1993. "uxth %[l], %[b] \n\t" \
  1994. "mul %[l], r6 \n\t" \
  1995. /* al * bh */ \
  1996. "lsr r4, %[b], #16 \n\t" \
  1997. "mul r6, r4 \n\t" \
  1998. "lsr %[h], r6, #16 \n\t" \
  1999. "lsl r6, r6, #16 \n\t" \
  2000. "add %[l], %[l], r6 \n\t" \
  2001. "mov r5, #0 \n\t" \
  2002. "adc %[h], r5 \n\t" \
  2003. /* ah * bh */ \
  2004. "lsr r6, %[a], #16 \n\t" \
  2005. "mul r4, r6 \n\t" \
  2006. "add %[h], %[h], r4 \n\t" \
  2007. /* ah * bl */ \
  2008. "uxth r4, %[b] \n\t" \
  2009. "mul r6, r4 \n\t" \
  2010. "lsr r4, r6, #16 \n\t" \
  2011. "lsl r6, r6, #16 \n\t" \
  2012. "add %[l], %[l], r6 \n\t" \
  2013. "adc %[h], r4 \n\t" \
  2014. : [h] "+l" (vh), [l] "+l" (vl) \
  2015. : [a] "l" (va), [b] "l" (vb) \
  2016. : "r4", "r5", "r6", "cc" \
  2017. )
  2018. /* Multiply va by vb and store double size result in: vo | vh | vl */
  2019. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  2020. __asm__ __volatile__ ( \
  2021. /* al * bl */ \
  2022. "uxth r6, %[a] \n\t" \
  2023. "uxth %[l], %[b] \n\t" \
  2024. "mul %[l], r6 \n\t" \
  2025. /* al * bh */ \
  2026. "lsr r7, %[b], #16 \n\t" \
  2027. "mul r6, r7 \n\t" \
  2028. "lsr %[h], r6, #16 \n\t" \
  2029. "lsl r6, r6, #16 \n\t" \
  2030. "add %[l], %[l], r6 \n\t" \
  2031. "mov %[o], #0 \n\t" \
  2032. "adc %[h], %[o] \n\t" \
  2033. /* ah * bh */ \
  2034. "lsr r6, %[a], #16 \n\t" \
  2035. "mul r7, r6 \n\t" \
  2036. "add %[h], %[h], r7 \n\t" \
  2037. /* ah * bl */ \
  2038. "uxth r7, %[b] \n\t" \
  2039. "mul r6, r7 \n\t" \
  2040. "lsr r7, r6, #16 \n\t" \
  2041. "lsl r6, r6, #16 \n\t" \
  2042. "add %[l], %[l], r6 \n\t" \
  2043. "adc %[h], r7 \n\t" \
  2044. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2045. : [a] "l" (va), [b] "l" (vb) \
  2046. : "r6", "r7", "cc" \
  2047. )
  2048. #ifndef WOLFSSL_SP_SMALL
  2049. /* Multiply va by vb and add double size result into: vo | vh | vl */
  2050. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  2051. __asm__ __volatile__ ( \
  2052. /* al * bl */ \
  2053. "uxth r6, %[a] \n\t" \
  2054. "uxth r7, %[b] \n\t" \
  2055. "mul r7, r6 \n\t" \
  2056. "add %[l], %[l], r7 \n\t" \
  2057. "mov r5, #0 \n\t" \
  2058. "adc %[h], r5 \n\t" \
  2059. "adc %[o], r5 \n\t" \
  2060. /* al * bh */ \
  2061. "lsr r7, %[b], #16 \n\t" \
  2062. "mul r6, r7 \n\t" \
  2063. "lsr r7, r6, #16 \n\t" \
  2064. "lsl r6, r6, #16 \n\t" \
  2065. "add %[l], %[l], r6 \n\t" \
  2066. "adc %[h], r7 \n\t" \
  2067. "adc %[o], r5 \n\t" \
  2068. /* ah * bh */ \
  2069. "lsr r6, %[a], #16 \n\t" \
  2070. "lsr r7, %[b], #16 \n\t" \
  2071. "mul r7, r6 \n\t" \
  2072. "add %[h], %[h], r7 \n\t" \
  2073. "adc %[o], r5 \n\t" \
  2074. /* ah * bl */ \
  2075. "uxth r7, %[b] \n\t" \
  2076. "mul r6, r7 \n\t" \
  2077. "lsr r7, r6, #16 \n\t" \
  2078. "lsl r6, r6, #16 \n\t" \
  2079. "add %[l], %[l], r6 \n\t" \
  2080. "adc %[h], r7 \n\t" \
  2081. "adc %[o], r5 \n\t" \
  2082. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2083. : [a] "l" (va), [b] "l" (vb) \
  2084. : "r5", "r6", "r7", "cc" \
  2085. )
  2086. #else
  2087. /* Multiply va by vb and add double size result into: vo | vh | vl */
  2088. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  2089. __asm__ __volatile__ ( \
  2090. /* al * bl */ \
  2091. "uxth r6, %[a] \n\t" \
  2092. "uxth r5, %[b] \n\t" \
  2093. "mul r5, r6 \n\t" \
  2094. "add %[l], %[l], r5 \n\t" \
  2095. "mov r5, #0 \n\t" \
  2096. "adc %[h], r5 \n\t" \
  2097. "adc %[o], r5 \n\t" \
  2098. /* al * bh */ \
  2099. "lsr r5, %[b], #16 \n\t" \
  2100. "mul r6, r5 \n\t" \
  2101. "lsr r5, r6, #16 \n\t" \
  2102. "lsl r6, r6, #16 \n\t" \
  2103. "add %[l], %[l], r6 \n\t" \
  2104. "adc %[h], r5 \n\t" \
  2105. "mov r5, #0 \n\t" \
  2106. "adc %[o], r5 \n\t" \
  2107. /* ah * bh */ \
  2108. "lsr r6, %[a], #16 \n\t" \
  2109. "lsr r5, %[b], #16 \n\t" \
  2110. "mul r5, r6 \n\t" \
  2111. "add %[h], %[h], r5 \n\t" \
  2112. "mov r5, #0 \n\t" \
  2113. "adc %[o], r5 \n\t" \
  2114. /* ah * bl */ \
  2115. "uxth r5, %[b] \n\t" \
  2116. "mul r6, r5 \n\t" \
  2117. "lsr r5, r6, #16 \n\t" \
  2118. "lsl r6, r6, #16 \n\t" \
  2119. "add %[l], %[l], r6 \n\t" \
  2120. "adc %[h], r5 \n\t" \
  2121. "mov r5, #0 \n\t" \
  2122. "adc %[o], r5 \n\t" \
  2123. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2124. : [a] "l" (va), [b] "l" (vb) \
  2125. : "r5", "r6", "cc" \
  2126. )
  2127. #endif
  2128. /* Multiply va by vb and add double size result into: vh | vl */
  2129. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  2130. __asm__ __volatile__ ( \
  2131. /* al * bl */ \
  2132. "uxth r6, %[a] \n\t" \
  2133. "uxth r4, %[b] \n\t" \
  2134. "mul r4, r6 \n\t" \
  2135. "add %[l], %[l], r4 \n\t" \
  2136. "mov r5, #0 \n\t" \
  2137. "adc %[h], r5 \n\t" \
  2138. /* al * bh */ \
  2139. "lsr r4, %[b], #16 \n\t" \
  2140. "mul r6, r4 \n\t" \
  2141. "lsr r4, r6, #16 \n\t" \
  2142. "lsl r6, r6, #16 \n\t" \
  2143. "add %[l], %[l], r6 \n\t" \
  2144. "adc %[h], r4 \n\t" \
  2145. /* ah * bh */ \
  2146. "lsr r6, %[a], #16 \n\t" \
  2147. "lsr r4, %[b], #16 \n\t" \
  2148. "mul r4, r6 \n\t" \
  2149. "add %[h], %[h], r4 \n\t" \
  2150. /* ah * bl */ \
  2151. "uxth r4, %[b] \n\t" \
  2152. "mul r6, r4 \n\t" \
  2153. "lsr r4, r6, #16 \n\t" \
  2154. "lsl r6, r6, #16 \n\t" \
  2155. "add %[l], %[l], r6 \n\t" \
  2156. "adc %[h], r4 \n\t" \
  2157. : [l] "+l" (vl), [h] "+l" (vh) \
  2158. : [a] "l" (va), [b] "l" (vb) \
  2159. : "r4", "r5", "r6", "cc" \
  2160. )
  2161. #ifndef WOLFSSL_SP_SMALL
  2162. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2163. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2164. __asm__ __volatile__ ( \
  2165. /* al * bl */ \
  2166. "uxth r6, %[a] \n\t" \
  2167. "uxth r7, %[b] \n\t" \
  2168. "mul r7, r6 \n\t" \
  2169. "add %[l], %[l], r7 \n\t" \
  2170. "mov r5, #0 \n\t" \
  2171. "adc %[h], r5 \n\t" \
  2172. "adc %[o], r5 \n\t" \
  2173. "add %[l], %[l], r7 \n\t" \
  2174. "adc %[h], r5 \n\t" \
  2175. "adc %[o], r5 \n\t" \
  2176. /* al * bh */ \
  2177. "lsr r7, %[b], #16 \n\t" \
  2178. "mul r6, r7 \n\t" \
  2179. "lsr r7, r6, #16 \n\t" \
  2180. "lsl r6, r6, #16 \n\t" \
  2181. "add %[l], %[l], r6 \n\t" \
  2182. "adc %[h], r7 \n\t" \
  2183. "adc %[o], r5 \n\t" \
  2184. "add %[l], %[l], r6 \n\t" \
  2185. "adc %[h], r7 \n\t" \
  2186. "adc %[o], r5 \n\t" \
  2187. /* ah * bh */ \
  2188. "lsr r6, %[a], #16 \n\t" \
  2189. "lsr r7, %[b], #16 \n\t" \
  2190. "mul r7, r6 \n\t" \
  2191. "add %[h], %[h], r7 \n\t" \
  2192. "adc %[o], r5 \n\t" \
  2193. "add %[h], %[h], r7 \n\t" \
  2194. "adc %[o], r5 \n\t" \
  2195. /* ah * bl */ \
  2196. "uxth r7, %[b] \n\t" \
  2197. "mul r6, r7 \n\t" \
  2198. "lsr r7, r6, #16 \n\t" \
  2199. "lsl r6, r6, #16 \n\t" \
  2200. "add %[l], %[l], r6 \n\t" \
  2201. "adc %[h], r7 \n\t" \
  2202. "adc %[o], r5 \n\t" \
  2203. "add %[l], %[l], r6 \n\t" \
  2204. "adc %[h], r7 \n\t" \
  2205. "adc %[o], r5 \n\t" \
  2206. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2207. : [a] "l" (va), [b] "l" (vb) \
  2208. : "r5", "r6", "r7", "cc" \
  2209. )
  2210. #else
  2211. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2212. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2213. __asm__ __volatile__ ( \
  2214. "mov r8, %[a] \n\t" \
  2215. /* al * bl */ \
  2216. "uxth r6, %[a] \n\t" \
  2217. "uxth r5, %[b] \n\t" \
  2218. "mul r5, r6 \n\t" \
  2219. "add %[l], %[l], r5 \n\t" \
  2220. "mov %[a], #0 \n\t" \
  2221. "adc %[h], %[a] \n\t" \
  2222. "adc %[o], %[a] \n\t" \
  2223. "add %[l], %[l], r5 \n\t" \
  2224. "adc %[h], %[a] \n\t" \
  2225. "adc %[o], %[a] \n\t" \
  2226. /* al * bh */ \
  2227. "lsr r5, %[b], #16 \n\t" \
  2228. "mul r6, r5 \n\t" \
  2229. "lsr r5, r6, #16 \n\t" \
  2230. "lsl r6, r6, #16 \n\t" \
  2231. "add %[l], %[l], r6 \n\t" \
  2232. "adc %[h], r5 \n\t" \
  2233. "adc %[o], %[a] \n\t" \
  2234. "add %[l], %[l], r6 \n\t" \
  2235. "adc %[h], r5 \n\t" \
  2236. "adc %[o], %[a] \n\t" \
  2237. /* ah * bh */ \
  2238. "mov %[a], r8 \n\t" \
  2239. "lsr r6, %[a], #16 \n\t" \
  2240. "lsr r5, %[b], #16 \n\t" \
  2241. "mul r5, r6 \n\t" \
  2242. "add %[h], %[h], r5 \n\t" \
  2243. "mov %[a], #0 \n\t" \
  2244. "adc %[o], %[a] \n\t" \
  2245. "add %[h], %[h], r5 \n\t" \
  2246. "adc %[o], %[a] \n\t" \
  2247. /* ah * bl */ \
  2248. "uxth r5, %[b] \n\t" \
  2249. "mul r6, r5 \n\t" \
  2250. "lsr r5, r6, #16 \n\t" \
  2251. "lsl r6, r6, #16 \n\t" \
  2252. "add %[l], %[l], r6 \n\t" \
  2253. "adc %[h], r5 \n\t" \
  2254. "adc %[o], %[a] \n\t" \
  2255. "add %[l], %[l], r6 \n\t" \
  2256. "adc %[h], r5 \n\t" \
  2257. "adc %[o], %[a] \n\t" \
  2258. "mov %[a], r8 \n\t" \
  2259. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2260. : [a] "l" (va), [b] "l" (vb) \
  2261. : "r5", "r6", "r8", "cc" \
  2262. )
  2263. #endif
  2264. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2265. * Assumes first add will not overflow vh | vl
  2266. */
  2267. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2268. __asm__ __volatile__ ( \
  2269. /* al * bl */ \
  2270. "uxth r6, %[a] \n\t" \
  2271. "uxth r7, %[b] \n\t" \
  2272. "mul r7, r6 \n\t" \
  2273. "add %[l], %[l], r7 \n\t" \
  2274. "mov r5, #0 \n\t" \
  2275. "adc %[h], r5 \n\t" \
  2276. "add %[l], %[l], r7 \n\t" \
  2277. "adc %[h], r5 \n\t" \
  2278. /* al * bh */ \
  2279. "lsr r7, %[b], #16 \n\t" \
  2280. "mul r6, r7 \n\t" \
  2281. "lsr r7, r6, #16 \n\t" \
  2282. "lsl r6, r6, #16 \n\t" \
  2283. "add %[l], %[l], r6 \n\t" \
  2284. "adc %[h], r7 \n\t" \
  2285. "add %[l], %[l], r6 \n\t" \
  2286. "adc %[h], r7 \n\t" \
  2287. "adc %[o], r5 \n\t" \
  2288. /* ah * bh */ \
  2289. "lsr r6, %[a], #16 \n\t" \
  2290. "lsr r7, %[b], #16 \n\t" \
  2291. "mul r7, r6 \n\t" \
  2292. "add %[h], %[h], r7 \n\t" \
  2293. "adc %[o], r5 \n\t" \
  2294. "add %[h], %[h], r7 \n\t" \
  2295. "adc %[o], r5 \n\t" \
  2296. /* ah * bl */ \
  2297. "uxth r7, %[b] \n\t" \
  2298. "mul r6, r7 \n\t" \
  2299. "lsr r7, r6, #16 \n\t" \
  2300. "lsl r6, r6, #16 \n\t" \
  2301. "add %[l], %[l], r6 \n\t" \
  2302. "adc %[h], r7 \n\t" \
  2303. "adc %[o], r5 \n\t" \
  2304. "add %[l], %[l], r6 \n\t" \
  2305. "adc %[h], r7 \n\t" \
  2306. "adc %[o], r5 \n\t" \
  2307. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2308. : [a] "l" (va), [b] "l" (vb) \
  2309. : "r5", "r6", "r7", "cc" \
  2310. )
  2311. /* Square va and store double size result in: vh | vl */
  2312. #define SP_ASM_SQR(vl, vh, va) \
  2313. __asm__ __volatile__ ( \
  2314. "lsr r5, %[a], #16 \n\t" \
  2315. "uxth r6, %[a] \n\t" \
  2316. "mov %[l], r6 \n\t" \
  2317. "mov %[h], r5 \n\t" \
  2318. /* al * al */ \
  2319. "mul %[l], %[l] \n\t" \
  2320. /* ah * ah */ \
  2321. "mul %[h], %[h] \n\t" \
  2322. /* 2 * al * ah */ \
  2323. "mul r6, r5 \n\t" \
  2324. "lsr r5, r6, #15 \n\t" \
  2325. "lsl r6, r6, #17 \n\t" \
  2326. "add %[l], %[l], r6 \n\t" \
  2327. "adc %[h], r5 \n\t" \
  2328. : [h] "+l" (vh), [l] "+l" (vl) \
  2329. : [a] "l" (va) \
  2330. : "r5", "r6", "cc" \
  2331. )
  2332. /* Square va and add double size result into: vo | vh | vl */
  2333. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  2334. __asm__ __volatile__ ( \
  2335. "lsr r4, %[a], #16 \n\t" \
  2336. "uxth r6, %[a] \n\t" \
  2337. /* al * al */ \
  2338. "mul r6, r6 \n\t" \
  2339. /* ah * ah */ \
  2340. "mul r4, r4 \n\t" \
  2341. "add %[l], %[l], r6 \n\t" \
  2342. "adc %[h], r4 \n\t" \
  2343. "mov r5, #0 \n\t" \
  2344. "adc %[o], r5 \n\t" \
  2345. "lsr r4, %[a], #16 \n\t" \
  2346. "uxth r6, %[a] \n\t" \
  2347. /* 2 * al * ah */ \
  2348. "mul r6, r4 \n\t" \
  2349. "lsr r4, r6, #15 \n\t" \
  2350. "lsl r6, r6, #17 \n\t" \
  2351. "add %[l], %[l], r6 \n\t" \
  2352. "adc %[h], r4 \n\t" \
  2353. "adc %[o], r5 \n\t" \
  2354. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2355. : [a] "l" (va) \
  2356. : "r4", "r5", "r6", "cc" \
  2357. )
  2358. /* Square va and add double size result into: vh | vl */
  2359. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  2360. __asm__ __volatile__ ( \
  2361. "lsr r7, %[a], #16 \n\t" \
  2362. "uxth r6, %[a] \n\t" \
  2363. /* al * al */ \
  2364. "mul r6, r6 \n\t" \
  2365. /* ah * ah */ \
  2366. "mul r7, r7 \n\t" \
  2367. "add %[l], %[l], r6 \n\t" \
  2368. "adc %[h], r7 \n\t" \
  2369. "lsr r7, %[a], #16 \n\t" \
  2370. "uxth r6, %[a] \n\t" \
  2371. /* 2 * al * ah */ \
  2372. "mul r6, r7 \n\t" \
  2373. "lsr r7, r6, #15 \n\t" \
  2374. "lsl r6, r6, #17 \n\t" \
  2375. "add %[l], %[l], r6 \n\t" \
  2376. "adc %[h], r7 \n\t" \
  2377. : [l] "+l" (vl), [h] "+l" (vh) \
  2378. : [a] "l" (va) \
  2379. : "r6", "r7", "cc" \
  2380. )
  2381. /* Add va into: vh | vl */
  2382. #define SP_ASM_ADDC(vl, vh, va) \
  2383. __asm__ __volatile__ ( \
  2384. "add %[l], %[l], %[a] \n\t" \
  2385. "mov r5, #0 \n\t" \
  2386. "adc %[h], r5 \n\t" \
  2387. : [l] "+l" (vl), [h] "+l" (vh) \
  2388. : [a] "l" (va) \
  2389. : "r5", "cc" \
  2390. )
  2391. /* Sub va from: vh | vl */
  2392. #define SP_ASM_SUBC(vl, vh, va) \
  2393. __asm__ __volatile__ ( \
  2394. "sub %[l], %[l], %[a] \n\t" \
  2395. "mov r5, #0 \n\t" \
  2396. "sbc %[h], r5 \n\t" \
  2397. : [l] "+l" (vl), [h] "+l" (vh) \
  2398. : [a] "l" (va) \
  2399. : "r5", "cc" \
  2400. )
  2401. /* Add two times vc | vb | va into vo | vh | vl */
  2402. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  2403. __asm__ __volatile__ ( \
  2404. "add %[l], %[l], %[a] \n\t" \
  2405. "adc %[h], %[b] \n\t" \
  2406. "adc %[o], %[c] \n\t" \
  2407. "add %[l], %[l], %[a] \n\t" \
  2408. "adc %[h], %[b] \n\t" \
  2409. "adc %[o], %[c] \n\t" \
  2410. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2411. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  2412. : "cc" \
  2413. )
  2414. #endif
  2415. #ifdef WOLFSSL_SP_DIV_WORD_HALF
  2416. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  2417. *
  2418. * No division instruction used - does operation bit by bit.
  2419. * Constant time.
  2420. *
  2421. * @param [in] hi SP integer digit. High digit of the dividend.
  2422. * @param [in] lo SP integer digit. Lower digit of the dividend.
  2423. * @param [in] d SP integer digit. Number to divide by.
  2424. * @return The division result.
  2425. */
  2426. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  2427. sp_int_digit d)
  2428. {
  2429. __asm__ __volatile__ (
  2430. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2431. "lsrs r3, %[d], #24\n\t"
  2432. #else
  2433. "lsr r3, %[d], #24\n\t"
  2434. #endif
  2435. "beq 2%=f\n\t"
  2436. "\n1%=:\n\t"
  2437. "movs r3, #0\n\t"
  2438. "b 3%=f\n\t"
  2439. "\n2%=:\n\t"
  2440. "mov r3, #8\n\t"
  2441. "\n3%=:\n\t"
  2442. "movs r4, #31\n\t"
  2443. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2444. "subs r4, r4, r3\n\t"
  2445. #else
  2446. "sub r4, r4, r3\n\t"
  2447. #endif
  2448. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2449. "lsls %[d], %[d], r3\n\t"
  2450. #else
  2451. "lsl %[d], %[d], r3\n\t"
  2452. #endif
  2453. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2454. "lsls %[hi], %[hi], r3\n\t"
  2455. #else
  2456. "lsl %[hi], %[hi], r3\n\t"
  2457. #endif
  2458. "mov r5, %[lo]\n\t"
  2459. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2460. "lsrs r5, r5, r4\n\t"
  2461. #else
  2462. "lsr r5, r5, r4\n\t"
  2463. #endif
  2464. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2465. "lsls %[lo], %[lo], r3\n\t"
  2466. #else
  2467. "lsl %[lo], %[lo], r3\n\t"
  2468. #endif
  2469. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2470. "lsrs r5, r5, #1\n\t"
  2471. #else
  2472. "lsr r5, r5, #1\n\t"
  2473. #endif
  2474. #if defined(WOLFSSL_KEIL)
  2475. "orrs %[hi], %[hi], r5\n\t"
  2476. #elif defined(__clang__)
  2477. "orrs %[hi], r5\n\t"
  2478. #else
  2479. "orr %[hi], r5\n\t"
  2480. #endif
  2481. "movs r3, #0\n\t"
  2482. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2483. "lsrs r5, %[d], #1\n\t"
  2484. #else
  2485. "lsr r5, %[d], #1\n\t"
  2486. #endif
  2487. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2488. "adds r5, r5, #1\n\t"
  2489. #else
  2490. "add r5, r5, #1\n\t"
  2491. #endif
  2492. "mov r8, %[lo]\n\t"
  2493. "mov r9, %[hi]\n\t"
  2494. /* Do top 32 */
  2495. "movs r6, r5\n\t"
  2496. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2497. "subs r6, r6, %[hi]\n\t"
  2498. #else
  2499. "sub r6, r6, %[hi]\n\t"
  2500. #endif
  2501. #ifdef WOLFSSL_KEIL
  2502. "sbcs r6, r6, r6\n\t"
  2503. #elif defined(__clang__)
  2504. "sbcs r6, r6\n\t"
  2505. #else
  2506. "sbc r6, r6\n\t"
  2507. #endif
  2508. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2509. "adds r3, r3, r3\n\t"
  2510. #else
  2511. "add r3, r3, r3\n\t"
  2512. #endif
  2513. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2514. "subs r3, r3, r6\n\t"
  2515. #else
  2516. "sub r3, r3, r6\n\t"
  2517. #endif
  2518. #ifdef WOLFSSL_KEIL
  2519. "ands r6, r6, r5\n\t"
  2520. #elif defined(__clang__)
  2521. "ands r6, r5\n\t"
  2522. #else
  2523. "and r6, r5\n\t"
  2524. #endif
  2525. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2526. "subs %[hi], %[hi], r6\n\t"
  2527. #else
  2528. "sub %[hi], %[hi], r6\n\t"
  2529. #endif
  2530. "movs r4, #29\n\t"
  2531. "\n"
  2532. "L_sp_div_word_loop%=:\n\t"
  2533. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2534. "lsls %[lo], %[lo], #1\n\t"
  2535. #else
  2536. "lsl %[lo], %[lo], #1\n\t"
  2537. #endif
  2538. #ifdef WOLFSSL_KEIL
  2539. "adcs %[hi], %[hi], %[hi]\n\t"
  2540. #elif defined(__clang__)
  2541. "adcs %[hi], %[hi]\n\t"
  2542. #else
  2543. "adc %[hi], %[hi]\n\t"
  2544. #endif
  2545. "movs r6, r5\n\t"
  2546. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2547. "subs r6, r6, %[hi]\n\t"
  2548. #else
  2549. "sub r6, r6, %[hi]\n\t"
  2550. #endif
  2551. #ifdef WOLFSSL_KEIL
  2552. "sbcs r6, r6, r6\n\t"
  2553. #elif defined(__clang__)
  2554. "sbcs r6, r6\n\t"
  2555. #else
  2556. "sbc r6, r6\n\t"
  2557. #endif
  2558. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2559. "adds r3, r3, r3\n\t"
  2560. #else
  2561. "add r3, r3, r3\n\t"
  2562. #endif
  2563. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2564. "subs r3, r3, r6\n\t"
  2565. #else
  2566. "sub r3, r3, r6\n\t"
  2567. #endif
  2568. #ifdef WOLFSSL_KEIL
  2569. "ands r6, r6, r5\n\t"
  2570. #elif defined(__clang__)
  2571. "ands r6, r5\n\t"
  2572. #else
  2573. "and r6, r5\n\t"
  2574. #endif
  2575. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2576. "subs %[hi], %[hi], r6\n\t"
  2577. #else
  2578. "sub %[hi], %[hi], r6\n\t"
  2579. #endif
  2580. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2581. "subs r4, r4, #1\n\t"
  2582. #else
  2583. "sub r4, r4, #1\n\t"
  2584. #endif
  2585. "bpl L_sp_div_word_loop%=\n\t"
  2586. "movs r7, #0\n\t"
  2587. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2588. "adds r3, r3, r3\n\t"
  2589. #else
  2590. "add r3, r3, r3\n\t"
  2591. #endif
  2592. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2593. "adds r3, r3, #1\n\t"
  2594. #else
  2595. "add r3, r3, #1\n\t"
  2596. #endif
  2597. /* r * d - Start */
  2598. "uxth %[hi], r3\n\t"
  2599. "uxth r4, %[d]\n\t"
  2600. #ifdef WOLFSSL_KEIL
  2601. "muls r4, %[hi], r4\n\t"
  2602. #elif defined(__clang__)
  2603. "muls r4, %[hi]\n\t"
  2604. #else
  2605. "mul r4, %[hi]\n\t"
  2606. #endif
  2607. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2608. "lsrs r6, %[d], #16\n\t"
  2609. #else
  2610. "lsr r6, %[d], #16\n\t"
  2611. #endif
  2612. #ifdef WOLFSSL_KEIL
  2613. "muls %[hi], r6, %[hi]\n\t"
  2614. #elif defined(__clang__)
  2615. "muls %[hi], r6\n\t"
  2616. #else
  2617. "mul %[hi], r6\n\t"
  2618. #endif
  2619. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2620. "lsrs r5, %[hi], #16\n\t"
  2621. #else
  2622. "lsr r5, %[hi], #16\n\t"
  2623. #endif
  2624. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2625. "lsls %[hi], %[hi], #16\n\t"
  2626. #else
  2627. "lsl %[hi], %[hi], #16\n\t"
  2628. #endif
  2629. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2630. "adds r4, r4, %[hi]\n\t"
  2631. #else
  2632. "add r4, r4, %[hi]\n\t"
  2633. #endif
  2634. #ifdef WOLFSSL_KEIL
  2635. "adcs r5, r5, r7\n\t"
  2636. #elif defined(__clang__)
  2637. "adcs r5, r7\n\t"
  2638. #else
  2639. "adc r5, r7\n\t"
  2640. #endif
  2641. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2642. "lsrs %[hi], r3, #16\n\t"
  2643. #else
  2644. "lsr %[hi], r3, #16\n\t"
  2645. #endif
  2646. #ifdef WOLFSSL_KEIL
  2647. "muls r6, %[hi], r6\n\t"
  2648. #elif defined(__clang__)
  2649. "muls r6, %[hi]\n\t"
  2650. #else
  2651. "mul r6, %[hi]\n\t"
  2652. #endif
  2653. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2654. "adds r5, r5, r6\n\t"
  2655. #else
  2656. "add r5, r5, r6\n\t"
  2657. #endif
  2658. "uxth r6, %[d]\n\t"
  2659. #ifdef WOLFSSL_KEIL
  2660. "muls %[hi], r6, %[hi]\n\t"
  2661. #elif defined(__clang__)
  2662. "muls %[hi], r6\n\t"
  2663. #else
  2664. "mul %[hi], r6\n\t"
  2665. #endif
  2666. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2667. "lsrs r6, %[hi], #16\n\t"
  2668. #else
  2669. "lsr r6, %[hi], #16\n\t"
  2670. #endif
  2671. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2672. "lsls %[hi], %[hi], #16\n\t"
  2673. #else
  2674. "lsl %[hi], %[hi], #16\n\t"
  2675. #endif
  2676. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2677. "adds r4, r4, %[hi]\n\t"
  2678. #else
  2679. "add r4, r4, %[hi]\n\t"
  2680. #endif
  2681. #ifdef WOLFSSL_KEIL
  2682. "adcs r5, r5, r6\n\t"
  2683. #elif defined(__clang__)
  2684. "adcs r5, r6\n\t"
  2685. #else
  2686. "adc r5, r6\n\t"
  2687. #endif
  2688. /* r * d - Done */
  2689. "mov %[hi], r8\n\t"
  2690. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2691. "subs %[hi], %[hi], r4\n\t"
  2692. #else
  2693. "sub %[hi], %[hi], r4\n\t"
  2694. #endif
  2695. "movs r4, %[hi]\n\t"
  2696. "mov %[hi], r9\n\t"
  2697. #ifdef WOLFSSL_KEIL
  2698. "sbcs %[hi], %[hi], r5\n\t"
  2699. #elif defined(__clang__)
  2700. "sbcs %[hi], r5\n\t"
  2701. #else
  2702. "sbc %[hi], r5\n\t"
  2703. #endif
  2704. "movs r5, %[hi]\n\t"
  2705. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2706. "adds r3, r3, r5\n\t"
  2707. #else
  2708. "add r3, r3, r5\n\t"
  2709. #endif
  2710. /* r * d - Start */
  2711. "uxth %[hi], r3\n\t"
  2712. "uxth r4, %[d]\n\t"
  2713. #ifdef WOLFSSL_KEIL
  2714. "muls r4, %[hi], r4\n\t"
  2715. #elif defined(__clang__)
  2716. "muls r4, %[hi]\n\t"
  2717. #else
  2718. "mul r4, %[hi]\n\t"
  2719. #endif
  2720. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2721. "lsrs r6, %[d], #16\n\t"
  2722. #else
  2723. "lsr r6, %[d], #16\n\t"
  2724. #endif
  2725. #ifdef WOLFSSL_KEIL
  2726. "muls %[hi], r6, %[hi]\n\t"
  2727. #elif defined(__clang__)
  2728. "muls %[hi], r6\n\t"
  2729. #else
  2730. "mul %[hi], r6\n\t"
  2731. #endif
  2732. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2733. "lsrs r5, %[hi], #16\n\t"
  2734. #else
  2735. "lsr r5, %[hi], #16\n\t"
  2736. #endif
  2737. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2738. "lsls %[hi], %[hi], #16\n\t"
  2739. #else
  2740. "lsl %[hi], %[hi], #16\n\t"
  2741. #endif
  2742. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2743. "adds r4, r4, %[hi]\n\t"
  2744. #else
  2745. "add r4, r4, %[hi]\n\t"
  2746. #endif
  2747. #ifdef WOLFSSL_KEIL
  2748. "adcs r5, r5, r7\n\t"
  2749. #elif defined(__clang__)
  2750. "adcs r5, r7\n\t"
  2751. #else
  2752. "adc r5, r7\n\t"
  2753. #endif
  2754. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2755. "lsrs %[hi], r3, #16\n\t"
  2756. #else
  2757. "lsr %[hi], r3, #16\n\t"
  2758. #endif
  2759. #ifdef WOLFSSL_KEIL
  2760. "muls r6, %[hi], r6\n\t"
  2761. #elif defined(__clang__)
  2762. "muls r6, %[hi]\n\t"
  2763. #else
  2764. "mul r6, %[hi]\n\t"
  2765. #endif
  2766. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2767. "adds r5, r5, r6\n\t"
  2768. #else
  2769. "add r5, r5, r6\n\t"
  2770. #endif
  2771. "uxth r6, %[d]\n\t"
  2772. #ifdef WOLFSSL_KEIL
  2773. "muls %[hi], r6, %[hi]\n\t"
  2774. #elif defined(__clang__)
  2775. "muls %[hi], r6\n\t"
  2776. #else
  2777. "mul %[hi], r6\n\t"
  2778. #endif
  2779. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2780. "lsrs r6, %[hi], #16\n\t"
  2781. #else
  2782. "lsr r6, %[hi], #16\n\t"
  2783. #endif
  2784. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2785. "lsls %[hi], %[hi], #16\n\t"
  2786. #else
  2787. "lsl %[hi], %[hi], #16\n\t"
  2788. #endif
  2789. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2790. "adds r4, r4, %[hi]\n\t"
  2791. #else
  2792. "add r4, r4, %[hi]\n\t"
  2793. #endif
  2794. #ifdef WOLFSSL_KEIL
  2795. "adcs r5, r5, r6\n\t"
  2796. #elif defined(__clang__)
  2797. "adcs r5, r6\n\t"
  2798. #else
  2799. "adc r5, r6\n\t"
  2800. #endif
  2801. /* r * d - Done */
  2802. "mov %[hi], r8\n\t"
  2803. "mov r6, r9\n\t"
  2804. #ifdef WOLFSSL_KEIL
  2805. "subs r4, %[hi], r4\n\t"
  2806. #else
  2807. #ifdef __clang__
  2808. "subs r4, %[hi], r4\n\t"
  2809. #else
  2810. "sub r4, %[hi], r4\n\t"
  2811. #endif
  2812. #endif
  2813. #ifdef WOLFSSL_KEIL
  2814. "sbcs r6, r6, r5\n\t"
  2815. #elif defined(__clang__)
  2816. "sbcs r6, r5\n\t"
  2817. #else
  2818. "sbc r6, r5\n\t"
  2819. #endif
  2820. "movs r5, r6\n\t"
  2821. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2822. "adds r3, r3, r5\n\t"
  2823. #else
  2824. "add r3, r3, r5\n\t"
  2825. #endif
  2826. /* r * d - Start */
  2827. "uxth %[hi], r3\n\t"
  2828. "uxth r4, %[d]\n\t"
  2829. #ifdef WOLFSSL_KEIL
  2830. "muls r4, %[hi], r4\n\t"
  2831. #elif defined(__clang__)
  2832. "muls r4, %[hi]\n\t"
  2833. #else
  2834. "mul r4, %[hi]\n\t"
  2835. #endif
  2836. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2837. "lsrs r6, %[d], #16\n\t"
  2838. #else
  2839. "lsr r6, %[d], #16\n\t"
  2840. #endif
  2841. #ifdef WOLFSSL_KEIL
  2842. "muls %[hi], r6, %[hi]\n\t"
  2843. #elif defined(__clang__)
  2844. "muls %[hi], r6\n\t"
  2845. #else
  2846. "mul %[hi], r6\n\t"
  2847. #endif
  2848. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2849. "lsrs r5, %[hi], #16\n\t"
  2850. #else
  2851. "lsr r5, %[hi], #16\n\t"
  2852. #endif
  2853. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2854. "lsls %[hi], %[hi], #16\n\t"
  2855. #else
  2856. "lsl %[hi], %[hi], #16\n\t"
  2857. #endif
  2858. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2859. "adds r4, r4, %[hi]\n\t"
  2860. #else
  2861. "add r4, r4, %[hi]\n\t"
  2862. #endif
  2863. #ifdef WOLFSSL_KEIL
  2864. "adcs r5, r5, r7\n\t"
  2865. #elif defined(__clang__)
  2866. "adcs r5, r7\n\t"
  2867. #else
  2868. "adc r5, r7\n\t"
  2869. #endif
  2870. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2871. "lsrs %[hi], r3, #16\n\t"
  2872. #else
  2873. "lsr %[hi], r3, #16\n\t"
  2874. #endif
  2875. #ifdef WOLFSSL_KEIL
  2876. "muls r6, %[hi], r6\n\t"
  2877. #elif defined(__clang__)
  2878. "muls r6, %[hi]\n\t"
  2879. #else
  2880. "mul r6, %[hi]\n\t"
  2881. #endif
  2882. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2883. "adds r5, r5, r6\n\t"
  2884. #else
  2885. "add r5, r5, r6\n\t"
  2886. #endif
  2887. "uxth r6, %[d]\n\t"
  2888. #ifdef WOLFSSL_KEIL
  2889. "muls %[hi], r6, %[hi]\n\t"
  2890. #elif defined(__clang__)
  2891. "muls %[hi], r6\n\t"
  2892. #else
  2893. "mul %[hi], r6\n\t"
  2894. #endif
  2895. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2896. "lsrs r6, %[hi], #16\n\t"
  2897. #else
  2898. "lsr r6, %[hi], #16\n\t"
  2899. #endif
  2900. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2901. "lsls %[hi], %[hi], #16\n\t"
  2902. #else
  2903. "lsl %[hi], %[hi], #16\n\t"
  2904. #endif
  2905. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2906. "adds r4, r4, %[hi]\n\t"
  2907. #else
  2908. "add r4, r4, %[hi]\n\t"
  2909. #endif
  2910. #ifdef WOLFSSL_KEIL
  2911. "adcs r5, r5, r6\n\t"
  2912. #elif defined(__clang__)
  2913. "adcs r5, r6\n\t"
  2914. #else
  2915. "adc r5, r6\n\t"
  2916. #endif
  2917. /* r * d - Done */
  2918. "mov %[hi], r8\n\t"
  2919. "mov r6, r9\n\t"
  2920. #ifdef WOLFSSL_KEIL
  2921. "subs r4, %[hi], r4\n\t"
  2922. #else
  2923. #ifdef __clang__
  2924. "subs r4, %[hi], r4\n\t"
  2925. #else
  2926. "sub r4, %[hi], r4\n\t"
  2927. #endif
  2928. #endif
  2929. #ifdef WOLFSSL_KEIL
  2930. "sbcs r6, r6, r5\n\t"
  2931. #elif defined(__clang__)
  2932. "sbcs r6, r5\n\t"
  2933. #else
  2934. "sbc r6, r5\n\t"
  2935. #endif
  2936. "movs r5, r6\n\t"
  2937. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2938. "adds r3, r3, r5\n\t"
  2939. #else
  2940. "add r3, r3, r5\n\t"
  2941. #endif
  2942. "movs r6, %[d]\n\t"
  2943. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2944. "subs r6, r6, r4\n\t"
  2945. #else
  2946. "sub r6, r6, r4\n\t"
  2947. #endif
  2948. #ifdef WOLFSSL_KEIL
  2949. "sbcs r6, r6, r6\n\t"
  2950. #elif defined(__clang__)
  2951. "sbcs r6, r6\n\t"
  2952. #else
  2953. "sbc r6, r6\n\t"
  2954. #endif
  2955. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2956. "subs r3, r3, r6\n\t"
  2957. #else
  2958. "sub r3, r3, r6\n\t"
  2959. #endif
  2960. "movs %[hi], r3\n\t"
  2961. : [hi] "+l" (hi), [lo] "+l" (lo), [d] "+l" (d)
  2962. :
  2963. : "r3", "r4", "r5", "r6", "r7", "r8", "r9"
  2964. );
  2965. return (uint32_t)(size_t)hi;
  2966. }
  2967. #define SP_ASM_DIV_WORD
  2968. #endif /* !WOLFSSL_SP_DIV_WORD_HALF */
  2969. #define SP_INT_ASM_AVAILABLE
  2970. #endif /* WOLFSSL_SP_ARM_THUMB && SP_WORD_SIZE == 32 */
  2971. #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
  2972. /*
  2973. * CPU: PPC64
  2974. */
  2975. /* Multiply va by vb and store double size result in: vh | vl */
  2976. #define SP_ASM_MUL(vl, vh, va, vb) \
  2977. __asm__ __volatile__ ( \
  2978. "mulld %[l], %[a], %[b] \n\t" \
  2979. "mulhdu %[h], %[a], %[b] \n\t" \
  2980. : [h] "+r" (vh), [l] "+r" (vl) \
  2981. : [a] "r" (va), [b] "r" (vb) \
  2982. : "memory" \
  2983. )
  2984. /* Multiply va by vb and store double size result in: vo | vh | vl */
  2985. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  2986. __asm__ __volatile__ ( \
  2987. "mulhdu %[h], %[a], %[b] \n\t" \
  2988. "mulld %[l], %[a], %[b] \n\t" \
  2989. "li %[o], 0 \n\t" \
  2990. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  2991. : [a] "r" (va), [b] "r" (vb) \
  2992. : \
  2993. )
  2994. /* Multiply va by vb and add double size result into: vo | vh | vl */
  2995. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  2996. __asm__ __volatile__ ( \
  2997. "mulld 16, %[a], %[b] \n\t" \
  2998. "mulhdu 17, %[a], %[b] \n\t" \
  2999. "addc %[l], %[l], 16 \n\t" \
  3000. "adde %[h], %[h], 17 \n\t" \
  3001. "addze %[o], %[o] \n\t" \
  3002. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3003. : [a] "r" (va), [b] "r" (vb) \
  3004. : "16", "17", "cc" \
  3005. )
  3006. /* Multiply va by vb and add double size result into: vh | vl */
  3007. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3008. __asm__ __volatile__ ( \
  3009. "mulld 16, %[a], %[b] \n\t" \
  3010. "mulhdu 17, %[a], %[b] \n\t" \
  3011. "addc %[l], %[l], 16 \n\t" \
  3012. "adde %[h], %[h], 17 \n\t" \
  3013. : [l] "+r" (vl), [h] "+r" (vh) \
  3014. : [a] "r" (va), [b] "r" (vb) \
  3015. : "16", "17", "cc" \
  3016. )
  3017. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3018. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3019. __asm__ __volatile__ ( \
  3020. "mulld 16, %[a], %[b] \n\t" \
  3021. "mulhdu 17, %[a], %[b] \n\t" \
  3022. "addc %[l], %[l], 16 \n\t" \
  3023. "adde %[h], %[h], 17 \n\t" \
  3024. "addze %[o], %[o] \n\t" \
  3025. "addc %[l], %[l], 16 \n\t" \
  3026. "adde %[h], %[h], 17 \n\t" \
  3027. "addze %[o], %[o] \n\t" \
  3028. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3029. : [a] "r" (va), [b] "r" (vb) \
  3030. : "16", "17", "cc" \
  3031. )
  3032. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3033. * Assumes first add will not overflow vh | vl
  3034. */
  3035. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3036. __asm__ __volatile__ ( \
  3037. "mulld 16, %[a], %[b] \n\t" \
  3038. "mulhdu 17, %[a], %[b] \n\t" \
  3039. "addc %[l], %[l], 16 \n\t" \
  3040. "adde %[h], %[h], 17 \n\t" \
  3041. "addc %[l], %[l], 16 \n\t" \
  3042. "adde %[h], %[h], 17 \n\t" \
  3043. "addze %[o], %[o] \n\t" \
  3044. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3045. : [a] "r" (va), [b] "r" (vb) \
  3046. : "16", "17", "cc" \
  3047. )
  3048. /* Square va and store double size result in: vh | vl */
  3049. #define SP_ASM_SQR(vl, vh, va) \
  3050. __asm__ __volatile__ ( \
  3051. "mulld %[l], %[a], %[a] \n\t" \
  3052. "mulhdu %[h], %[a], %[a] \n\t" \
  3053. : [h] "+r" (vh), [l] "+r" (vl) \
  3054. : [a] "r" (va) \
  3055. : "memory" \
  3056. )
  3057. /* Square va and add double size result into: vo | vh | vl */
  3058. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3059. __asm__ __volatile__ ( \
  3060. "mulld 16, %[a], %[a] \n\t" \
  3061. "mulhdu 17, %[a], %[a] \n\t" \
  3062. "addc %[l], %[l], 16 \n\t" \
  3063. "adde %[h], %[h], 17 \n\t" \
  3064. "addze %[o], %[o] \n\t" \
  3065. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3066. : [a] "r" (va) \
  3067. : "16", "17", "cc" \
  3068. )
  3069. /* Square va and add double size result into: vh | vl */
  3070. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3071. __asm__ __volatile__ ( \
  3072. "mulld 16, %[a], %[a] \n\t" \
  3073. "mulhdu 17, %[a], %[a] \n\t" \
  3074. "addc %[l], %[l], 16 \n\t" \
  3075. "adde %[h], %[h], 17 \n\t" \
  3076. : [l] "+r" (vl), [h] "+r" (vh) \
  3077. : [a] "r" (va) \
  3078. : "16", "17", "cc" \
  3079. )
  3080. /* Add va into: vh | vl */
  3081. #define SP_ASM_ADDC(vl, vh, va) \
  3082. __asm__ __volatile__ ( \
  3083. "addc %[l], %[l], %[a] \n\t" \
  3084. "addze %[h], %[h] \n\t" \
  3085. : [l] "+r" (vl), [h] "+r" (vh) \
  3086. : [a] "r" (va) \
  3087. : "cc" \
  3088. )
  3089. /* Sub va from: vh | vl */
  3090. #define SP_ASM_SUBC(vl, vh, va) \
  3091. __asm__ __volatile__ ( \
  3092. "subfc %[l], %[a], %[l] \n\t" \
  3093. "li 16, 0 \n\t" \
  3094. "subfe %[h], 16, %[h] \n\t" \
  3095. : [l] "+r" (vl), [h] "+r" (vh) \
  3096. : [a] "r" (va) \
  3097. : "16", "cc" \
  3098. )
  3099. /* Add two times vc | vb | va into vo | vh | vl */
  3100. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3101. __asm__ __volatile__ ( \
  3102. "addc %[l], %[l], %[a] \n\t" \
  3103. "adde %[h], %[h], %[b] \n\t" \
  3104. "adde %[o], %[o], %[c] \n\t" \
  3105. "addc %[l], %[l], %[a] \n\t" \
  3106. "adde %[h], %[h], %[b] \n\t" \
  3107. "adde %[o], %[o], %[c] \n\t" \
  3108. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3109. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3110. : "cc" \
  3111. )
  3112. #define SP_INT_ASM_AVAILABLE
  3113. #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
  3114. #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
  3115. /*
  3116. * CPU: PPC 32-bit
  3117. */
  3118. /* Multiply va by vb and store double size result in: vh | vl */
  3119. #define SP_ASM_MUL(vl, vh, va, vb) \
  3120. __asm__ __volatile__ ( \
  3121. "mullw %[l], %[a], %[b] \n\t" \
  3122. "mulhwu %[h], %[a], %[b] \n\t" \
  3123. : [h] "+r" (vh), [l] "+r" (vl) \
  3124. : [a] "r" (va), [b] "r" (vb) \
  3125. : "memory" \
  3126. )
  3127. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3128. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3129. __asm__ __volatile__ ( \
  3130. "mulhwu %[h], %[a], %[b] \n\t" \
  3131. "mullw %[l], %[a], %[b] \n\t" \
  3132. "li %[o], 0 \n\t" \
  3133. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3134. : [a] "r" (va), [b] "r" (vb) \
  3135. : \
  3136. )
  3137. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3138. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3139. __asm__ __volatile__ ( \
  3140. "mullw 16, %[a], %[b] \n\t" \
  3141. "mulhwu 17, %[a], %[b] \n\t" \
  3142. "addc %[l], %[l], 16 \n\t" \
  3143. "adde %[h], %[h], 17 \n\t" \
  3144. "addze %[o], %[o] \n\t" \
  3145. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3146. : [a] "r" (va), [b] "r" (vb) \
  3147. : "16", "17", "cc" \
  3148. )
  3149. /* Multiply va by vb and add double size result into: vh | vl */
  3150. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3151. __asm__ __volatile__ ( \
  3152. "mullw 16, %[a], %[b] \n\t" \
  3153. "mulhwu 17, %[a], %[b] \n\t" \
  3154. "addc %[l], %[l], 16 \n\t" \
  3155. "adde %[h], %[h], 17 \n\t" \
  3156. : [l] "+r" (vl), [h] "+r" (vh) \
  3157. : [a] "r" (va), [b] "r" (vb) \
  3158. : "16", "17", "cc" \
  3159. )
  3160. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3161. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3162. __asm__ __volatile__ ( \
  3163. "mullw 16, %[a], %[b] \n\t" \
  3164. "mulhwu 17, %[a], %[b] \n\t" \
  3165. "addc %[l], %[l], 16 \n\t" \
  3166. "adde %[h], %[h], 17 \n\t" \
  3167. "addze %[o], %[o] \n\t" \
  3168. "addc %[l], %[l], 16 \n\t" \
  3169. "adde %[h], %[h], 17 \n\t" \
  3170. "addze %[o], %[o] \n\t" \
  3171. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3172. : [a] "r" (va), [b] "r" (vb) \
  3173. : "16", "17", "cc" \
  3174. )
  3175. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3176. * Assumes first add will not overflow vh | vl
  3177. */
  3178. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3179. __asm__ __volatile__ ( \
  3180. "mullw 16, %[a], %[b] \n\t" \
  3181. "mulhwu 17, %[a], %[b] \n\t" \
  3182. "addc %[l], %[l], 16 \n\t" \
  3183. "adde %[h], %[h], 17 \n\t" \
  3184. "addc %[l], %[l], 16 \n\t" \
  3185. "adde %[h], %[h], 17 \n\t" \
  3186. "addze %[o], %[o] \n\t" \
  3187. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3188. : [a] "r" (va), [b] "r" (vb) \
  3189. : "16", "17", "cc" \
  3190. )
  3191. /* Square va and store double size result in: vh | vl */
  3192. #define SP_ASM_SQR(vl, vh, va) \
  3193. __asm__ __volatile__ ( \
  3194. "mullw %[l], %[a], %[a] \n\t" \
  3195. "mulhwu %[h], %[a], %[a] \n\t" \
  3196. : [h] "+r" (vh), [l] "+r" (vl) \
  3197. : [a] "r" (va) \
  3198. : "memory" \
  3199. )
  3200. /* Square va and add double size result into: vo | vh | vl */
  3201. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3202. __asm__ __volatile__ ( \
  3203. "mullw 16, %[a], %[a] \n\t" \
  3204. "mulhwu 17, %[a], %[a] \n\t" \
  3205. "addc %[l], %[l], 16 \n\t" \
  3206. "adde %[h], %[h], 17 \n\t" \
  3207. "addze %[o], %[o] \n\t" \
  3208. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3209. : [a] "r" (va) \
  3210. : "16", "17", "cc" \
  3211. )
  3212. /* Square va and add double size result into: vh | vl */
  3213. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3214. __asm__ __volatile__ ( \
  3215. "mullw 16, %[a], %[a] \n\t" \
  3216. "mulhwu 17, %[a], %[a] \n\t" \
  3217. "addc %[l], %[l], 16 \n\t" \
  3218. "adde %[h], %[h], 17 \n\t" \
  3219. : [l] "+r" (vl), [h] "+r" (vh) \
  3220. : [a] "r" (va) \
  3221. : "16", "17", "cc" \
  3222. )
  3223. /* Add va into: vh | vl */
  3224. #define SP_ASM_ADDC(vl, vh, va) \
  3225. __asm__ __volatile__ ( \
  3226. "addc %[l], %[l], %[a] \n\t" \
  3227. "addze %[h], %[h] \n\t" \
  3228. : [l] "+r" (vl), [h] "+r" (vh) \
  3229. : [a] "r" (va) \
  3230. : "cc" \
  3231. )
  3232. /* Sub va from: vh | vl */
  3233. #define SP_ASM_SUBC(vl, vh, va) \
  3234. __asm__ __volatile__ ( \
  3235. "subfc %[l], %[a], %[l] \n\t" \
  3236. "li 16, 0 \n\t" \
  3237. "subfe %[h], 16, %[h] \n\t" \
  3238. : [l] "+r" (vl), [h] "+r" (vh) \
  3239. : [a] "r" (va) \
  3240. : "16", "cc" \
  3241. )
  3242. /* Add two times vc | vb | va into vo | vh | vl */
  3243. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3244. __asm__ __volatile__ ( \
  3245. "addc %[l], %[l], %[a] \n\t" \
  3246. "adde %[h], %[h], %[b] \n\t" \
  3247. "adde %[o], %[o], %[c] \n\t" \
  3248. "addc %[l], %[l], %[a] \n\t" \
  3249. "adde %[h], %[h], %[b] \n\t" \
  3250. "adde %[o], %[o], %[c] \n\t" \
  3251. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3252. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3253. : "cc" \
  3254. )
  3255. #define SP_INT_ASM_AVAILABLE
  3256. #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
  3257. #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
  3258. /*
  3259. * CPU: MIPS 64-bit
  3260. */
  3261. /* Multiply va by vb and store double size result in: vh | vl */
  3262. #define SP_ASM_MUL(vl, vh, va, vb) \
  3263. __asm__ __volatile__ ( \
  3264. "dmultu %[a], %[b] \n\t" \
  3265. "mflo %[l] \n\t" \
  3266. "mfhi %[h] \n\t" \
  3267. : [h] "+r" (vh), [l] "+r" (vl) \
  3268. : [a] "r" (va), [b] "r" (vb) \
  3269. : "memory", "$lo", "$hi" \
  3270. )
  3271. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3272. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3273. __asm__ __volatile__ ( \
  3274. "dmultu %[a], %[b] \n\t" \
  3275. "mflo %[l] \n\t" \
  3276. "mfhi %[h] \n\t" \
  3277. "move %[o], $0 \n\t" \
  3278. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3279. : [a] "r" (va), [b] "r" (vb) \
  3280. : "$lo", "$hi" \
  3281. )
  3282. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3283. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3284. __asm__ __volatile__ ( \
  3285. "dmultu %[a], %[b] \n\t" \
  3286. "mflo $10 \n\t" \
  3287. "mfhi $11 \n\t" \
  3288. "daddu %[l], %[l], $10 \n\t" \
  3289. "sltu $12, %[l], $10 \n\t" \
  3290. "daddu %[h], %[h], $12 \n\t" \
  3291. "sltu $12, %[h], $12 \n\t" \
  3292. "daddu %[o], %[o], $12 \n\t" \
  3293. "daddu %[h], %[h], $11 \n\t" \
  3294. "sltu $12, %[h], $11 \n\t" \
  3295. "daddu %[o], %[o], $12 \n\t" \
  3296. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3297. : [a] "r" (va), [b] "r" (vb) \
  3298. : "$10", "$11", "$12", "$lo", "$hi" \
  3299. )
  3300. /* Multiply va by vb and add double size result into: vh | vl */
  3301. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3302. __asm__ __volatile__ ( \
  3303. "dmultu %[a], %[b] \n\t" \
  3304. "mflo $10 \n\t" \
  3305. "mfhi $11 \n\t" \
  3306. "daddu %[l], %[l], $10 \n\t" \
  3307. "sltu $12, %[l], $10 \n\t" \
  3308. "daddu %[h], %[h], $11 \n\t" \
  3309. "daddu %[h], %[h], $12 \n\t" \
  3310. : [l] "+r" (vl), [h] "+r" (vh) \
  3311. : [a] "r" (va), [b] "r" (vb) \
  3312. : "$10", "$11", "$12", "$lo", "$hi" \
  3313. )
  3314. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3315. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3316. __asm__ __volatile__ ( \
  3317. "dmultu %[a], %[b] \n\t" \
  3318. "mflo $10 \n\t" \
  3319. "mfhi $11 \n\t" \
  3320. "daddu %[l], %[l], $10 \n\t" \
  3321. "sltu $12, %[l], $10 \n\t" \
  3322. "daddu %[h], %[h], $12 \n\t" \
  3323. "sltu $12, %[h], $12 \n\t" \
  3324. "daddu %[o], %[o], $12 \n\t" \
  3325. "daddu %[h], %[h], $11 \n\t" \
  3326. "sltu $12, %[h], $11 \n\t" \
  3327. "daddu %[o], %[o], $12 \n\t" \
  3328. "daddu %[l], %[l], $10 \n\t" \
  3329. "sltu $12, %[l], $10 \n\t" \
  3330. "daddu %[h], %[h], $12 \n\t" \
  3331. "sltu $12, %[h], $12 \n\t" \
  3332. "daddu %[o], %[o], $12 \n\t" \
  3333. "daddu %[h], %[h], $11 \n\t" \
  3334. "sltu $12, %[h], $11 \n\t" \
  3335. "daddu %[o], %[o], $12 \n\t" \
  3336. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3337. : [a] "r" (va), [b] "r" (vb) \
  3338. : "$10", "$11", "$12", "$lo", "$hi" \
  3339. )
  3340. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3341. * Assumes first add will not overflow vh | vl
  3342. */
  3343. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3344. __asm__ __volatile__ ( \
  3345. "dmultu %[a], %[b] \n\t" \
  3346. "mflo $10 \n\t" \
  3347. "mfhi $11 \n\t" \
  3348. "daddu %[l], %[l], $10 \n\t" \
  3349. "sltu $12, %[l], $10 \n\t" \
  3350. "daddu %[h], %[h], $11 \n\t" \
  3351. "daddu %[h], %[h], $12 \n\t" \
  3352. "daddu %[l], %[l], $10 \n\t" \
  3353. "sltu $12, %[l], $10 \n\t" \
  3354. "daddu %[h], %[h], $12 \n\t" \
  3355. "sltu $12, %[h], $12 \n\t" \
  3356. "daddu %[o], %[o], $12 \n\t" \
  3357. "daddu %[h], %[h], $11 \n\t" \
  3358. "sltu $12, %[h], $11 \n\t" \
  3359. "daddu %[o], %[o], $12 \n\t" \
  3360. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3361. : [a] "r" (va), [b] "r" (vb) \
  3362. : "$10", "$11", "$12", "$lo", "$hi" \
  3363. )
  3364. /* Square va and store double size result in: vh | vl */
  3365. #define SP_ASM_SQR(vl, vh, va) \
  3366. __asm__ __volatile__ ( \
  3367. "dmultu %[a], %[a] \n\t" \
  3368. "mflo %[l] \n\t" \
  3369. "mfhi %[h] \n\t" \
  3370. : [h] "+r" (vh), [l] "+r" (vl) \
  3371. : [a] "r" (va) \
  3372. : "memory", "$lo", "$hi" \
  3373. )
  3374. /* Square va and add double size result into: vo | vh | vl */
  3375. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3376. __asm__ __volatile__ ( \
  3377. "dmultu %[a], %[a] \n\t" \
  3378. "mflo $10 \n\t" \
  3379. "mfhi $11 \n\t" \
  3380. "daddu %[l], %[l], $10 \n\t" \
  3381. "sltu $12, %[l], $10 \n\t" \
  3382. "daddu %[h], %[h], $12 \n\t" \
  3383. "sltu $12, %[h], $12 \n\t" \
  3384. "daddu %[o], %[o], $12 \n\t" \
  3385. "daddu %[h], %[h], $11 \n\t" \
  3386. "sltu $12, %[h], $11 \n\t" \
  3387. "daddu %[o], %[o], $12 \n\t" \
  3388. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3389. : [a] "r" (va) \
  3390. : "$10", "$11", "$12", "$lo", "$hi" \
  3391. )
  3392. /* Square va and add double size result into: vh | vl */
  3393. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3394. __asm__ __volatile__ ( \
  3395. "dmultu %[a], %[a] \n\t" \
  3396. "mflo $10 \n\t" \
  3397. "mfhi $11 \n\t" \
  3398. "daddu %[l], %[l], $10 \n\t" \
  3399. "sltu $12, %[l], $10 \n\t" \
  3400. "daddu %[h], %[h], $11 \n\t" \
  3401. "daddu %[h], %[h], $12 \n\t" \
  3402. : [l] "+r" (vl), [h] "+r" (vh) \
  3403. : [a] "r" (va) \
  3404. : "$10", "$11", "$12", "$lo", "$hi" \
  3405. )
  3406. /* Add va into: vh | vl */
  3407. #define SP_ASM_ADDC(vl, vh, va) \
  3408. __asm__ __volatile__ ( \
  3409. "daddu %[l], %[l], %[a] \n\t" \
  3410. "sltu $12, %[l], %[a] \n\t" \
  3411. "daddu %[h], %[h], $12 \n\t" \
  3412. : [l] "+r" (vl), [h] "+r" (vh) \
  3413. : [a] "r" (va) \
  3414. : "$12" \
  3415. )
  3416. /* Sub va from: vh | vl */
  3417. #define SP_ASM_SUBC(vl, vh, va) \
  3418. __asm__ __volatile__ ( \
  3419. "move $12, %[l] \n\t" \
  3420. "dsubu %[l], $12, %[a] \n\t" \
  3421. "sltu $12, $12, %[l] \n\t" \
  3422. "dsubu %[h], %[h], $12 \n\t" \
  3423. : [l] "+r" (vl), [h] "+r" (vh) \
  3424. : [a] "r" (va) \
  3425. : "$12" \
  3426. )
  3427. /* Add two times vc | vb | va into vo | vh | vl */
  3428. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3429. __asm__ __volatile__ ( \
  3430. "daddu %[l], %[l], %[a] \n\t" \
  3431. "sltu $12, %[l], %[a] \n\t" \
  3432. "daddu %[h], %[h], $12 \n\t" \
  3433. "sltu $12, %[h], $12 \n\t" \
  3434. "daddu %[o], %[o], $12 \n\t" \
  3435. "daddu %[h], %[h], %[b] \n\t" \
  3436. "sltu $12, %[h], %[b] \n\t" \
  3437. "daddu %[o], %[o], %[c] \n\t" \
  3438. "daddu %[o], %[o], $12 \n\t" \
  3439. "daddu %[l], %[l], %[a] \n\t" \
  3440. "sltu $12, %[l], %[a] \n\t" \
  3441. "daddu %[h], %[h], $12 \n\t" \
  3442. "sltu $12, %[h], $12 \n\t" \
  3443. "daddu %[o], %[o], $12 \n\t" \
  3444. "daddu %[h], %[h], %[b] \n\t" \
  3445. "sltu $12, %[h], %[b] \n\t" \
  3446. "daddu %[o], %[o], %[c] \n\t" \
  3447. "daddu %[o], %[o], $12 \n\t" \
  3448. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3449. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3450. : "$12" \
  3451. )
  3452. #define SP_INT_ASM_AVAILABLE
  3453. #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
  3454. #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
  3455. /*
  3456. * CPU: MIPS 32-bit
  3457. */
  3458. /* Multiply va by vb and store double size result in: vh | vl */
  3459. #define SP_ASM_MUL(vl, vh, va, vb) \
  3460. __asm__ __volatile__ ( \
  3461. "multu %[a], %[b] \n\t" \
  3462. "mflo %[l] \n\t" \
  3463. "mfhi %[h] \n\t" \
  3464. : [h] "+r" (vh), [l] "+r" (vl) \
  3465. : [a] "r" (va), [b] "r" (vb) \
  3466. : "memory", "%lo", "%hi" \
  3467. )
  3468. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3469. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3470. __asm__ __volatile__ ( \
  3471. "multu %[a], %[b] \n\t" \
  3472. "mflo %[l] \n\t" \
  3473. "mfhi %[h] \n\t" \
  3474. "move %[o], $0 \n\t" \
  3475. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3476. : [a] "r" (va), [b] "r" (vb) \
  3477. : "%lo", "%hi" \
  3478. )
  3479. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3480. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3481. __asm__ __volatile__ ( \
  3482. "multu %[a], %[b] \n\t" \
  3483. "mflo $10 \n\t" \
  3484. "mfhi $11 \n\t" \
  3485. "addu %[l], %[l], $10 \n\t" \
  3486. "sltu $12, %[l], $10 \n\t" \
  3487. "addu %[h], %[h], $12 \n\t" \
  3488. "sltu $12, %[h], $12 \n\t" \
  3489. "addu %[o], %[o], $12 \n\t" \
  3490. "addu %[h], %[h], $11 \n\t" \
  3491. "sltu $12, %[h], $11 \n\t" \
  3492. "addu %[o], %[o], $12 \n\t" \
  3493. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3494. : [a] "r" (va), [b] "r" (vb) \
  3495. : "$10", "$11", "$12", "%lo", "%hi" \
  3496. )
  3497. /* Multiply va by vb and add double size result into: vh | vl */
  3498. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3499. __asm__ __volatile__ ( \
  3500. "multu %[a], %[b] \n\t" \
  3501. "mflo $10 \n\t" \
  3502. "mfhi $11 \n\t" \
  3503. "addu %[l], %[l], $10 \n\t" \
  3504. "sltu $12, %[l], $10 \n\t" \
  3505. "addu %[h], %[h], $11 \n\t" \
  3506. "addu %[h], %[h], $12 \n\t" \
  3507. : [l] "+r" (vl), [h] "+r" (vh) \
  3508. : [a] "r" (va), [b] "r" (vb) \
  3509. : "$10", "$11", "$12", "%lo", "%hi" \
  3510. )
  3511. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3512. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3513. __asm__ __volatile__ ( \
  3514. "multu %[a], %[b] \n\t" \
  3515. "mflo $10 \n\t" \
  3516. "mfhi $11 \n\t" \
  3517. "addu %[l], %[l], $10 \n\t" \
  3518. "sltu $12, %[l], $10 \n\t" \
  3519. "addu %[h], %[h], $12 \n\t" \
  3520. "sltu $12, %[h], $12 \n\t" \
  3521. "addu %[o], %[o], $12 \n\t" \
  3522. "addu %[h], %[h], $11 \n\t" \
  3523. "sltu $12, %[h], $11 \n\t" \
  3524. "addu %[o], %[o], $12 \n\t" \
  3525. "addu %[l], %[l], $10 \n\t" \
  3526. "sltu $12, %[l], $10 \n\t" \
  3527. "addu %[h], %[h], $12 \n\t" \
  3528. "sltu $12, %[h], $12 \n\t" \
  3529. "addu %[o], %[o], $12 \n\t" \
  3530. "addu %[h], %[h], $11 \n\t" \
  3531. "sltu $12, %[h], $11 \n\t" \
  3532. "addu %[o], %[o], $12 \n\t" \
  3533. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3534. : [a] "r" (va), [b] "r" (vb) \
  3535. : "$10", "$11", "$12", "%lo", "%hi" \
  3536. )
  3537. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3538. * Assumes first add will not overflow vh | vl
  3539. */
  3540. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3541. __asm__ __volatile__ ( \
  3542. "multu %[a], %[b] \n\t" \
  3543. "mflo $10 \n\t" \
  3544. "mfhi $11 \n\t" \
  3545. "addu %[l], %[l], $10 \n\t" \
  3546. "sltu $12, %[l], $10 \n\t" \
  3547. "addu %[h], %[h], $11 \n\t" \
  3548. "addu %[h], %[h], $12 \n\t" \
  3549. "addu %[l], %[l], $10 \n\t" \
  3550. "sltu $12, %[l], $10 \n\t" \
  3551. "addu %[h], %[h], $12 \n\t" \
  3552. "sltu $12, %[h], $12 \n\t" \
  3553. "addu %[o], %[o], $12 \n\t" \
  3554. "addu %[h], %[h], $11 \n\t" \
  3555. "sltu $12, %[h], $11 \n\t" \
  3556. "addu %[o], %[o], $12 \n\t" \
  3557. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3558. : [a] "r" (va), [b] "r" (vb) \
  3559. : "$10", "$11", "$12", "%lo", "%hi" \
  3560. )
  3561. /* Square va and store double size result in: vh | vl */
  3562. #define SP_ASM_SQR(vl, vh, va) \
  3563. __asm__ __volatile__ ( \
  3564. "multu %[a], %[a] \n\t" \
  3565. "mflo %[l] \n\t" \
  3566. "mfhi %[h] \n\t" \
  3567. : [h] "+r" (vh), [l] "+r" (vl) \
  3568. : [a] "r" (va) \
  3569. : "memory", "%lo", "%hi" \
  3570. )
  3571. /* Square va and add double size result into: vo | vh | vl */
  3572. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3573. __asm__ __volatile__ ( \
  3574. "multu %[a], %[a] \n\t" \
  3575. "mflo $10 \n\t" \
  3576. "mfhi $11 \n\t" \
  3577. "addu %[l], %[l], $10 \n\t" \
  3578. "sltu $12, %[l], $10 \n\t" \
  3579. "addu %[h], %[h], $12 \n\t" \
  3580. "sltu $12, %[h], $12 \n\t" \
  3581. "addu %[o], %[o], $12 \n\t" \
  3582. "addu %[h], %[h], $11 \n\t" \
  3583. "sltu $12, %[h], $11 \n\t" \
  3584. "addu %[o], %[o], $12 \n\t" \
  3585. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3586. : [a] "r" (va) \
  3587. : "$10", "$11", "$12", "%lo", "%hi" \
  3588. )
  3589. /* Square va and add double size result into: vh | vl */
  3590. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3591. __asm__ __volatile__ ( \
  3592. "multu %[a], %[a] \n\t" \
  3593. "mflo $10 \n\t" \
  3594. "mfhi $11 \n\t" \
  3595. "addu %[l], %[l], $10 \n\t" \
  3596. "sltu $12, %[l], $10 \n\t" \
  3597. "addu %[h], %[h], $11 \n\t" \
  3598. "addu %[h], %[h], $12 \n\t" \
  3599. : [l] "+r" (vl), [h] "+r" (vh) \
  3600. : [a] "r" (va) \
  3601. : "$10", "$11", "$12", "%lo", "%hi" \
  3602. )
  3603. /* Add va into: vh | vl */
  3604. #define SP_ASM_ADDC(vl, vh, va) \
  3605. __asm__ __volatile__ ( \
  3606. "addu %[l], %[l], %[a] \n\t" \
  3607. "sltu $12, %[l], %[a] \n\t" \
  3608. "addu %[h], %[h], $12 \n\t" \
  3609. : [l] "+r" (vl), [h] "+r" (vh) \
  3610. : [a] "r" (va) \
  3611. : "$12" \
  3612. )
  3613. /* Sub va from: vh | vl */
  3614. #define SP_ASM_SUBC(vl, vh, va) \
  3615. __asm__ __volatile__ ( \
  3616. "move $12, %[l] \n\t" \
  3617. "subu %[l], $12, %[a] \n\t" \
  3618. "sltu $12, $12, %[l] \n\t" \
  3619. "subu %[h], %[h], $12 \n\t" \
  3620. : [l] "+r" (vl), [h] "+r" (vh) \
  3621. : [a] "r" (va) \
  3622. : "$12" \
  3623. )
  3624. /* Add two times vc | vb | va into vo | vh | vl */
  3625. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3626. __asm__ __volatile__ ( \
  3627. "addu %[l], %[l], %[a] \n\t" \
  3628. "sltu $12, %[l], %[a] \n\t" \
  3629. "addu %[h], %[h], $12 \n\t" \
  3630. "sltu $12, %[h], $12 \n\t" \
  3631. "addu %[o], %[o], $12 \n\t" \
  3632. "addu %[h], %[h], %[b] \n\t" \
  3633. "sltu $12, %[h], %[b] \n\t" \
  3634. "addu %[o], %[o], %[c] \n\t" \
  3635. "addu %[o], %[o], $12 \n\t" \
  3636. "addu %[l], %[l], %[a] \n\t" \
  3637. "sltu $12, %[l], %[a] \n\t" \
  3638. "addu %[h], %[h], $12 \n\t" \
  3639. "sltu $12, %[h], $12 \n\t" \
  3640. "addu %[o], %[o], $12 \n\t" \
  3641. "addu %[h], %[h], %[b] \n\t" \
  3642. "sltu $12, %[h], %[b] \n\t" \
  3643. "addu %[o], %[o], %[c] \n\t" \
  3644. "addu %[o], %[o], $12 \n\t" \
  3645. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3646. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3647. : "$12" \
  3648. )
  3649. #define SP_INT_ASM_AVAILABLE
  3650. #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
  3651. #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
  3652. /*
  3653. * CPU: RISCV 64-bit
  3654. */
  3655. /* Multiply va by vb and store double size result in: vh | vl */
  3656. #define SP_ASM_MUL(vl, vh, va, vb) \
  3657. __asm__ __volatile__ ( \
  3658. "mul %[l], %[a], %[b] \n\t" \
  3659. "mulhu %[h], %[a], %[b] \n\t" \
  3660. : [h] "+r" (vh), [l] "+r" (vl) \
  3661. : [a] "r" (va), [b] "r" (vb) \
  3662. : "memory" \
  3663. )
  3664. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3665. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3666. __asm__ __volatile__ ( \
  3667. "mulhu %[h], %[a], %[b] \n\t" \
  3668. "mul %[l], %[a], %[b] \n\t" \
  3669. "add %[o], zero, zero \n\t" \
  3670. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3671. : [a] "r" (va), [b] "r" (vb) \
  3672. : \
  3673. )
  3674. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3675. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3676. __asm__ __volatile__ ( \
  3677. "mul a5, %[a], %[b] \n\t" \
  3678. "mulhu a6, %[a], %[b] \n\t" \
  3679. "add %[l], %[l], a5 \n\t" \
  3680. "sltu a7, %[l], a5 \n\t" \
  3681. "add %[h], %[h], a7 \n\t" \
  3682. "sltu a7, %[h], a7 \n\t" \
  3683. "add %[o], %[o], a7 \n\t" \
  3684. "add %[h], %[h], a6 \n\t" \
  3685. "sltu a7, %[h], a6 \n\t" \
  3686. "add %[o], %[o], a7 \n\t" \
  3687. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3688. : [a] "r" (va), [b] "r" (vb) \
  3689. : "a5", "a6", "a7" \
  3690. )
  3691. /* Multiply va by vb and add double size result into: vh | vl */
  3692. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3693. __asm__ __volatile__ ( \
  3694. "mul a5, %[a], %[b] \n\t" \
  3695. "mulhu a6, %[a], %[b] \n\t" \
  3696. "add %[l], %[l], a5 \n\t" \
  3697. "sltu a7, %[l], a5 \n\t" \
  3698. "add %[h], %[h], a6 \n\t" \
  3699. "add %[h], %[h], a7 \n\t" \
  3700. : [l] "+r" (vl), [h] "+r" (vh) \
  3701. : [a] "r" (va), [b] "r" (vb) \
  3702. : "a5", "a6", "a7" \
  3703. )
  3704. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3705. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3706. __asm__ __volatile__ ( \
  3707. "mul a5, %[a], %[b] \n\t" \
  3708. "mulhu a6, %[a], %[b] \n\t" \
  3709. "add %[l], %[l], a5 \n\t" \
  3710. "sltu a7, %[l], a5 \n\t" \
  3711. "add %[h], %[h], a7 \n\t" \
  3712. "sltu a7, %[h], a7 \n\t" \
  3713. "add %[o], %[o], a7 \n\t" \
  3714. "add %[h], %[h], a6 \n\t" \
  3715. "sltu a7, %[h], a6 \n\t" \
  3716. "add %[o], %[o], a7 \n\t" \
  3717. "add %[l], %[l], a5 \n\t" \
  3718. "sltu a7, %[l], a5 \n\t" \
  3719. "add %[h], %[h], a7 \n\t" \
  3720. "sltu a7, %[h], a7 \n\t" \
  3721. "add %[o], %[o], a7 \n\t" \
  3722. "add %[h], %[h], a6 \n\t" \
  3723. "sltu a7, %[h], a6 \n\t" \
  3724. "add %[o], %[o], a7 \n\t" \
  3725. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3726. : [a] "r" (va), [b] "r" (vb) \
  3727. : "a5", "a6", "a7" \
  3728. )
  3729. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3730. * Assumes first add will not overflow vh | vl
  3731. */
  3732. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3733. __asm__ __volatile__ ( \
  3734. "mul a5, %[a], %[b] \n\t" \
  3735. "mulhu a6, %[a], %[b] \n\t" \
  3736. "add %[l], %[l], a5 \n\t" \
  3737. "sltu a7, %[l], a5 \n\t" \
  3738. "add %[h], %[h], a6 \n\t" \
  3739. "add %[h], %[h], a7 \n\t" \
  3740. "add %[l], %[l], a5 \n\t" \
  3741. "sltu a7, %[l], a5 \n\t" \
  3742. "add %[h], %[h], a7 \n\t" \
  3743. "sltu a7, %[h], a7 \n\t" \
  3744. "add %[o], %[o], a7 \n\t" \
  3745. "add %[h], %[h], a6 \n\t" \
  3746. "sltu a7, %[h], a6 \n\t" \
  3747. "add %[o], %[o], a7 \n\t" \
  3748. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3749. : [a] "r" (va), [b] "r" (vb) \
  3750. : "a5", "a6", "a7" \
  3751. )
  3752. /* Square va and store double size result in: vh | vl */
  3753. #define SP_ASM_SQR(vl, vh, va) \
  3754. __asm__ __volatile__ ( \
  3755. "mul %[l], %[a], %[a] \n\t" \
  3756. "mulhu %[h], %[a], %[a] \n\t" \
  3757. : [h] "+r" (vh), [l] "+r" (vl) \
  3758. : [a] "r" (va) \
  3759. : "memory" \
  3760. )
  3761. /* Square va and add double size result into: vo | vh | vl */
  3762. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3763. __asm__ __volatile__ ( \
  3764. "mul a5, %[a], %[a] \n\t" \
  3765. "mulhu a6, %[a], %[a] \n\t" \
  3766. "add %[l], %[l], a5 \n\t" \
  3767. "sltu a7, %[l], a5 \n\t" \
  3768. "add %[h], %[h], a7 \n\t" \
  3769. "sltu a7, %[h], a7 \n\t" \
  3770. "add %[o], %[o], a7 \n\t" \
  3771. "add %[h], %[h], a6 \n\t" \
  3772. "sltu a7, %[h], a6 \n\t" \
  3773. "add %[o], %[o], a7 \n\t" \
  3774. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3775. : [a] "r" (va) \
  3776. : "a5", "a6", "a7" \
  3777. )
  3778. /* Square va and add double size result into: vh | vl */
  3779. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3780. __asm__ __volatile__ ( \
  3781. "mul a5, %[a], %[a] \n\t" \
  3782. "mulhu a6, %[a], %[a] \n\t" \
  3783. "add %[l], %[l], a5 \n\t" \
  3784. "sltu a7, %[l], a5 \n\t" \
  3785. "add %[h], %[h], a6 \n\t" \
  3786. "add %[h], %[h], a7 \n\t" \
  3787. : [l] "+r" (vl), [h] "+r" (vh) \
  3788. : [a] "r" (va) \
  3789. : "a5", "a6", "a7" \
  3790. )
  3791. /* Add va into: vh | vl */
  3792. #define SP_ASM_ADDC(vl, vh, va) \
  3793. __asm__ __volatile__ ( \
  3794. "add %[l], %[l], %[a] \n\t" \
  3795. "sltu a7, %[l], %[a] \n\t" \
  3796. "add %[h], %[h], a7 \n\t" \
  3797. : [l] "+r" (vl), [h] "+r" (vh) \
  3798. : [a] "r" (va) \
  3799. : "a7" \
  3800. )
  3801. /* Sub va from: vh | vl */
  3802. #define SP_ASM_SUBC(vl, vh, va) \
  3803. __asm__ __volatile__ ( \
  3804. "add a7, %[l], zero \n\t" \
  3805. "sub %[l], a7, %[a] \n\t" \
  3806. "sltu a7, a7, %[l] \n\t" \
  3807. "sub %[h], %[h], a7 \n\t" \
  3808. : [l] "+r" (vl), [h] "+r" (vh) \
  3809. : [a] "r" (va) \
  3810. : "a7" \
  3811. )
  3812. /* Add two times vc | vb | va into vo | vh | vl */
  3813. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3814. __asm__ __volatile__ ( \
  3815. "add %[l], %[l], %[a] \n\t" \
  3816. "sltu a7, %[l], %[a] \n\t" \
  3817. "add %[h], %[h], a7 \n\t" \
  3818. "sltu a7, %[h], a7 \n\t" \
  3819. "add %[o], %[o], a7 \n\t" \
  3820. "add %[h], %[h], %[b] \n\t" \
  3821. "sltu a7, %[h], %[b] \n\t" \
  3822. "add %[o], %[o], %[c] \n\t" \
  3823. "add %[o], %[o], a7 \n\t" \
  3824. "add %[l], %[l], %[a] \n\t" \
  3825. "sltu a7, %[l], %[a] \n\t" \
  3826. "add %[h], %[h], a7 \n\t" \
  3827. "sltu a7, %[h], a7 \n\t" \
  3828. "add %[o], %[o], a7 \n\t" \
  3829. "add %[h], %[h], %[b] \n\t" \
  3830. "sltu a7, %[h], %[b] \n\t" \
  3831. "add %[o], %[o], %[c] \n\t" \
  3832. "add %[o], %[o], a7 \n\t" \
  3833. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3834. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3835. : "a7" \
  3836. )
  3837. #define SP_INT_ASM_AVAILABLE
  3838. #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
  3839. #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
  3840. /*
  3841. * CPU: RISCV 32-bit
  3842. */
  3843. /* Multiply va by vb and store double size result in: vh | vl */
  3844. #define SP_ASM_MUL(vl, vh, va, vb) \
  3845. __asm__ __volatile__ ( \
  3846. "mul %[l], %[a], %[b] \n\t" \
  3847. "mulhu %[h], %[a], %[b] \n\t" \
  3848. : [h] "+r" (vh), [l] "+r" (vl) \
  3849. : [a] "r" (va), [b] "r" (vb) \
  3850. : "memory" \
  3851. )
  3852. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3853. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3854. __asm__ __volatile__ ( \
  3855. "mulhu %[h], %[a], %[b] \n\t" \
  3856. "mul %[l], %[a], %[b] \n\t" \
  3857. "add %[o], zero, zero \n\t" \
  3858. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3859. : [a] "r" (va), [b] "r" (vb) \
  3860. : \
  3861. )
  3862. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3863. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3864. __asm__ __volatile__ ( \
  3865. "mul a5, %[a], %[b] \n\t" \
  3866. "mulhu a6, %[a], %[b] \n\t" \
  3867. "add %[l], %[l], a5 \n\t" \
  3868. "sltu a7, %[l], a5 \n\t" \
  3869. "add %[h], %[h], a7 \n\t" \
  3870. "sltu a7, %[h], a7 \n\t" \
  3871. "add %[o], %[o], a7 \n\t" \
  3872. "add %[h], %[h], a6 \n\t" \
  3873. "sltu a7, %[h], a6 \n\t" \
  3874. "add %[o], %[o], a7 \n\t" \
  3875. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3876. : [a] "r" (va), [b] "r" (vb) \
  3877. : "a5", "a6", "a7" \
  3878. )
  3879. /* Multiply va by vb and add double size result into: vh | vl */
  3880. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3881. __asm__ __volatile__ ( \
  3882. "mul a5, %[a], %[b] \n\t" \
  3883. "mulhu a6, %[a], %[b] \n\t" \
  3884. "add %[l], %[l], a5 \n\t" \
  3885. "sltu a7, %[l], a5 \n\t" \
  3886. "add %[h], %[h], a6 \n\t" \
  3887. "add %[h], %[h], a7 \n\t" \
  3888. : [l] "+r" (vl), [h] "+r" (vh) \
  3889. : [a] "r" (va), [b] "r" (vb) \
  3890. : "a5", "a6", "a7" \
  3891. )
  3892. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3893. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3894. __asm__ __volatile__ ( \
  3895. "mul a5, %[a], %[b] \n\t" \
  3896. "mulhu a6, %[a], %[b] \n\t" \
  3897. "add %[l], %[l], a5 \n\t" \
  3898. "sltu a7, %[l], a5 \n\t" \
  3899. "add %[h], %[h], a7 \n\t" \
  3900. "sltu a7, %[h], a7 \n\t" \
  3901. "add %[o], %[o], a7 \n\t" \
  3902. "add %[h], %[h], a6 \n\t" \
  3903. "sltu a7, %[h], a6 \n\t" \
  3904. "add %[o], %[o], a7 \n\t" \
  3905. "add %[l], %[l], a5 \n\t" \
  3906. "sltu a7, %[l], a5 \n\t" \
  3907. "add %[h], %[h], a7 \n\t" \
  3908. "sltu a7, %[h], a7 \n\t" \
  3909. "add %[o], %[o], a7 \n\t" \
  3910. "add %[h], %[h], a6 \n\t" \
  3911. "sltu a7, %[h], a6 \n\t" \
  3912. "add %[o], %[o], a7 \n\t" \
  3913. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3914. : [a] "r" (va), [b] "r" (vb) \
  3915. : "a5", "a6", "a7" \
  3916. )
  3917. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3918. * Assumes first add will not overflow vh | vl
  3919. */
  3920. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3921. __asm__ __volatile__ ( \
  3922. "mul a5, %[a], %[b] \n\t" \
  3923. "mulhu a6, %[a], %[b] \n\t" \
  3924. "add %[l], %[l], a5 \n\t" \
  3925. "sltu a7, %[l], a5 \n\t" \
  3926. "add %[h], %[h], a6 \n\t" \
  3927. "add %[h], %[h], a7 \n\t" \
  3928. "add %[l], %[l], a5 \n\t" \
  3929. "sltu a7, %[l], a5 \n\t" \
  3930. "add %[h], %[h], a7 \n\t" \
  3931. "sltu a7, %[h], a7 \n\t" \
  3932. "add %[o], %[o], a7 \n\t" \
  3933. "add %[h], %[h], a6 \n\t" \
  3934. "sltu a7, %[h], a6 \n\t" \
  3935. "add %[o], %[o], a7 \n\t" \
  3936. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3937. : [a] "r" (va), [b] "r" (vb) \
  3938. : "a5", "a6", "a7" \
  3939. )
  3940. /* Square va and store double size result in: vh | vl */
  3941. #define SP_ASM_SQR(vl, vh, va) \
  3942. __asm__ __volatile__ ( \
  3943. "mul %[l], %[a], %[a] \n\t" \
  3944. "mulhu %[h], %[a], %[a] \n\t" \
  3945. : [h] "+r" (vh), [l] "+r" (vl) \
  3946. : [a] "r" (va) \
  3947. : "memory" \
  3948. )
  3949. /* Square va and add double size result into: vo | vh | vl */
  3950. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3951. __asm__ __volatile__ ( \
  3952. "mul a5, %[a], %[a] \n\t" \
  3953. "mulhu a6, %[a], %[a] \n\t" \
  3954. "add %[l], %[l], a5 \n\t" \
  3955. "sltu a7, %[l], a5 \n\t" \
  3956. "add %[h], %[h], a7 \n\t" \
  3957. "sltu a7, %[h], a7 \n\t" \
  3958. "add %[o], %[o], a7 \n\t" \
  3959. "add %[h], %[h], a6 \n\t" \
  3960. "sltu a7, %[h], a6 \n\t" \
  3961. "add %[o], %[o], a7 \n\t" \
  3962. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3963. : [a] "r" (va) \
  3964. : "a5", "a6", "a7" \
  3965. )
  3966. /* Square va and add double size result into: vh | vl */
  3967. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3968. __asm__ __volatile__ ( \
  3969. "mul a5, %[a], %[a] \n\t" \
  3970. "mulhu a6, %[a], %[a] \n\t" \
  3971. "add %[l], %[l], a5 \n\t" \
  3972. "sltu a7, %[l], a5 \n\t" \
  3973. "add %[h], %[h], a6 \n\t" \
  3974. "add %[h], %[h], a7 \n\t" \
  3975. : [l] "+r" (vl), [h] "+r" (vh) \
  3976. : [a] "r" (va) \
  3977. : "a5", "a6", "a7" \
  3978. )
  3979. /* Add va into: vh | vl */
  3980. #define SP_ASM_ADDC(vl, vh, va) \
  3981. __asm__ __volatile__ ( \
  3982. "add %[l], %[l], %[a] \n\t" \
  3983. "sltu a7, %[l], %[a] \n\t" \
  3984. "add %[h], %[h], a7 \n\t" \
  3985. : [l] "+r" (vl), [h] "+r" (vh) \
  3986. : [a] "r" (va) \
  3987. : "a7" \
  3988. )
  3989. /* Sub va from: vh | vl */
  3990. #define SP_ASM_SUBC(vl, vh, va) \
  3991. __asm__ __volatile__ ( \
  3992. "add a7, %[l], zero \n\t" \
  3993. "sub %[l], a7, %[a] \n\t" \
  3994. "sltu a7, a7, %[l] \n\t" \
  3995. "sub %[h], %[h], a7 \n\t" \
  3996. : [l] "+r" (vl), [h] "+r" (vh) \
  3997. : [a] "r" (va) \
  3998. : "a7" \
  3999. )
  4000. /* Add two times vc | vb | va into vo | vh | vl */
  4001. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4002. __asm__ __volatile__ ( \
  4003. "add %[l], %[l], %[a] \n\t" \
  4004. "sltu a7, %[l], %[a] \n\t" \
  4005. "add %[h], %[h], a7 \n\t" \
  4006. "sltu a7, %[h], a7 \n\t" \
  4007. "add %[o], %[o], a7 \n\t" \
  4008. "add %[h], %[h], %[b] \n\t" \
  4009. "sltu a7, %[h], %[b] \n\t" \
  4010. "add %[o], %[o], %[c] \n\t" \
  4011. "add %[o], %[o], a7 \n\t" \
  4012. "add %[l], %[l], %[a] \n\t" \
  4013. "sltu a7, %[l], %[a] \n\t" \
  4014. "add %[h], %[h], a7 \n\t" \
  4015. "sltu a7, %[h], a7 \n\t" \
  4016. "add %[o], %[o], a7 \n\t" \
  4017. "add %[h], %[h], %[b] \n\t" \
  4018. "sltu a7, %[h], %[b] \n\t" \
  4019. "add %[o], %[o], %[c] \n\t" \
  4020. "add %[o], %[o], a7 \n\t" \
  4021. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4022. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4023. : "a7" \
  4024. )
  4025. #define SP_INT_ASM_AVAILABLE
  4026. #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
  4027. #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
  4028. /*
  4029. * CPU: Intel s390x
  4030. */
  4031. /* Multiply va by vb and store double size result in: vh | vl */
  4032. #define SP_ASM_MUL(vl, vh, va, vb) \
  4033. __asm__ __volatile__ ( \
  4034. "lgr %%r1, %[a] \n\t" \
  4035. "mlgr %%r0, %[b] \n\t" \
  4036. "lgr %[l], %%r1 \n\t" \
  4037. "lgr %[h], %%r0 \n\t" \
  4038. : [h] "+r" (vh), [l] "+r" (vl) \
  4039. : [a] "r" (va), [b] "r" (vb) \
  4040. : "memory", "r0", "r1" \
  4041. )
  4042. /* Multiply va by vb and store double size result in: vo | vh | vl */
  4043. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  4044. __asm__ __volatile__ ( \
  4045. "lgr %%r1, %[a] \n\t" \
  4046. "mlgr %%r0, %[b] \n\t" \
  4047. "lghi %[o], 0 \n\t" \
  4048. "lgr %[l], %%r1 \n\t" \
  4049. "lgr %[h], %%r0 \n\t" \
  4050. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  4051. : [a] "r" (va), [b] "r" (vb) \
  4052. : "r0", "r1" \
  4053. )
  4054. /* Multiply va by vb and add double size result into: vo | vh | vl */
  4055. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  4056. __asm__ __volatile__ ( \
  4057. "lghi %%r10, 0 \n\t" \
  4058. "lgr %%r1, %[a] \n\t" \
  4059. "mlgr %%r0, %[b] \n\t" \
  4060. "algr %[l], %%r1 \n\t" \
  4061. "alcgr %[h], %%r0 \n\t" \
  4062. "alcgr %[o], %%r10 \n\t" \
  4063. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4064. : [a] "r" (va), [b] "r" (vb) \
  4065. : "r0", "r1", "r10", "cc" \
  4066. )
  4067. /* Multiply va by vb and add double size result into: vh | vl */
  4068. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  4069. __asm__ __volatile__ ( \
  4070. "lgr %%r1, %[a] \n\t" \
  4071. "mlgr %%r0, %[b] \n\t" \
  4072. "algr %[l], %%r1 \n\t" \
  4073. "alcgr %[h], %%r0 \n\t" \
  4074. : [l] "+r" (vl), [h] "+r" (vh) \
  4075. : [a] "r" (va), [b] "r" (vb) \
  4076. : "r0", "r1", "cc" \
  4077. )
  4078. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  4079. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  4080. __asm__ __volatile__ ( \
  4081. "lghi %%r10, 0 \n\t" \
  4082. "lgr %%r1, %[a] \n\t" \
  4083. "mlgr %%r0, %[b] \n\t" \
  4084. "algr %[l], %%r1 \n\t" \
  4085. "alcgr %[h], %%r0 \n\t" \
  4086. "alcgr %[o], %%r10 \n\t" \
  4087. "algr %[l], %%r1 \n\t" \
  4088. "alcgr %[h], %%r0 \n\t" \
  4089. "alcgr %[o], %%r10 \n\t" \
  4090. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4091. : [a] "r" (va), [b] "r" (vb) \
  4092. : "r0", "r1", "r10", "cc" \
  4093. )
  4094. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  4095. * Assumes first add will not overflow vh | vl
  4096. */
  4097. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  4098. __asm__ __volatile__ ( \
  4099. "lghi %%r10, 0 \n\t" \
  4100. "lgr %%r1, %[a] \n\t" \
  4101. "mlgr %%r0, %[b] \n\t" \
  4102. "algr %[l], %%r1 \n\t" \
  4103. "alcgr %[h], %%r0 \n\t" \
  4104. "algr %[l], %%r1 \n\t" \
  4105. "alcgr %[h], %%r0 \n\t" \
  4106. "alcgr %[o], %%r10 \n\t" \
  4107. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4108. : [a] "r" (va), [b] "r" (vb) \
  4109. : "r0", "r1", "r10", "cc" \
  4110. )
  4111. /* Square va and store double size result in: vh | vl */
  4112. #define SP_ASM_SQR(vl, vh, va) \
  4113. __asm__ __volatile__ ( \
  4114. "lgr %%r1, %[a] \n\t" \
  4115. "mlgr %%r0, %%r1 \n\t" \
  4116. "lgr %[l], %%r1 \n\t" \
  4117. "lgr %[h], %%r0 \n\t" \
  4118. : [h] "+r" (vh), [l] "+r" (vl) \
  4119. : [a] "r" (va) \
  4120. : "memory", "r0", "r1" \
  4121. )
  4122. /* Square va and add double size result into: vo | vh | vl */
  4123. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  4124. __asm__ __volatile__ ( \
  4125. "lghi %%r10, 0 \n\t" \
  4126. "lgr %%r1, %[a] \n\t" \
  4127. "mlgr %%r0, %%r1 \n\t" \
  4128. "algr %[l], %%r1 \n\t" \
  4129. "alcgr %[h], %%r0 \n\t" \
  4130. "alcgr %[o], %%r10 \n\t" \
  4131. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4132. : [a] "r" (va) \
  4133. : "r0", "r1", "r10", "cc" \
  4134. )
  4135. /* Square va and add double size result into: vh | vl */
  4136. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4137. __asm__ __volatile__ ( \
  4138. "lgr %%r1, %[a] \n\t" \
  4139. "mlgr %%r0, %%r1 \n\t" \
  4140. "algr %[l], %%r1 \n\t" \
  4141. "alcgr %[h], %%r0 \n\t" \
  4142. : [l] "+r" (vl), [h] "+r" (vh) \
  4143. : [a] "r" (va) \
  4144. : "r0", "r1", "cc" \
  4145. )
  4146. /* Add va into: vh | vl */
  4147. #define SP_ASM_ADDC(vl, vh, va) \
  4148. __asm__ __volatile__ ( \
  4149. "lghi %%r10, 0 \n\t" \
  4150. "algr %[l], %[a] \n\t" \
  4151. "alcgr %[h], %%r10 \n\t" \
  4152. : [l] "+r" (vl), [h] "+r" (vh) \
  4153. : [a] "r" (va) \
  4154. : "r10", "cc" \
  4155. )
  4156. /* Sub va from: vh | vl */
  4157. #define SP_ASM_SUBC(vl, vh, va) \
  4158. __asm__ __volatile__ ( \
  4159. "lghi %%r10, 0 \n\t" \
  4160. "slgr %[l], %[a] \n\t" \
  4161. "slbgr %[h], %%r10 \n\t" \
  4162. : [l] "+r" (vl), [h] "+r" (vh) \
  4163. : [a] "r" (va) \
  4164. : "r10", "cc" \
  4165. )
  4166. /* Add two times vc | vb | va into vo | vh | vl */
  4167. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4168. __asm__ __volatile__ ( \
  4169. "algr %[l], %[a] \n\t" \
  4170. "alcgr %[h], %[b] \n\t" \
  4171. "alcgr %[o], %[c] \n\t" \
  4172. "algr %[l], %[a] \n\t" \
  4173. "alcgr %[h], %[b] \n\t" \
  4174. "alcgr %[o], %[c] \n\t" \
  4175. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4176. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4177. : "cc" \
  4178. )
  4179. #define SP_INT_ASM_AVAILABLE
  4180. #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
  4181. #ifdef SP_INT_ASM_AVAILABLE
  4182. #ifndef SP_INT_NO_ASM
  4183. #define SQR_MUL_ASM
  4184. #endif
  4185. #ifndef SP_ASM_ADDC_REG
  4186. #define SP_ASM_ADDC_REG SP_ASM_ADDC
  4187. #endif /* SP_ASM_ADDC_REG */
  4188. #endif /* SQR_MUL_ASM */
  4189. #endif /* !WOLFSSL_NO_ASM */
  4190. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
  4191. !defined(NO_DSA) || !defined(NO_DH) || \
  4192. (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
  4193. (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4194. #ifndef WC_NO_CACHE_RESISTANT
  4195. /* Mask of address for constant time operations. */
  4196. const size_t sp_off_on_addr[2] =
  4197. {
  4198. (size_t) 0,
  4199. (size_t)-1
  4200. };
  4201. #endif
  4202. #endif
  4203. #if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
  4204. #ifdef __cplusplus
  4205. extern "C" {
  4206. #endif
  4207. /* Modular exponentiation implementations using Single Precision. */
  4208. WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
  4209. sp_int* res);
  4210. WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
  4211. sp_int* res);
  4212. WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
  4213. sp_int* res);
  4214. WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
  4215. sp_int* res);
  4216. WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
  4217. sp_int* res);
  4218. #ifdef __cplusplus
  4219. } /* extern "C" */
  4220. #endif
  4221. #endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
  4222. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
  4223. static int _sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp);
  4224. #endif
  4225. /* Set the multi-precision number to zero.
  4226. *
  4227. * Assumes a is not NULL.
  4228. *
  4229. * @param [out] a SP integer to set to zero.
  4230. */
  4231. static void _sp_zero(sp_int* a)
  4232. {
  4233. a->used = 0;
  4234. a->dp[0] = 0;
  4235. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4236. a->sign = MP_ZPOS;
  4237. #endif
  4238. }
  4239. /* Initialize the multi-precision number to be zero.
  4240. *
  4241. * @param [out] a SP integer.
  4242. *
  4243. * @return MP_OKAY on success.
  4244. * @return MP_VAL when a is NULL.
  4245. */
  4246. int sp_init(sp_int* a)
  4247. {
  4248. int err = MP_OKAY;
  4249. if (a == NULL) {
  4250. err = MP_VAL;
  4251. }
  4252. if (err == MP_OKAY) {
  4253. #ifdef HAVE_WOLF_BIGINT
  4254. wc_bigint_init(&a->raw);
  4255. #endif
  4256. _sp_zero(a);
  4257. a->size = SP_INT_DIGITS;
  4258. }
  4259. return err;
  4260. }
  4261. /* Initialize the multi-precision number to be zero and have a maximum size.
  4262. *
  4263. * @param [out] a SP integer.
  4264. * @param [in] size Number of words to say are available.
  4265. *
  4266. * @return MP_OKAY on success.
  4267. * @return MP_VAL when a is NULL.
  4268. */
  4269. int sp_init_size(sp_int* a, int size)
  4270. {
  4271. int err = sp_init(a);
  4272. if (err == MP_OKAY) {
  4273. a->size = size;
  4274. }
  4275. return err;
  4276. }
  4277. #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
  4278. /* Initialize up to six multi-precision numbers to be zero.
  4279. *
  4280. * @param [out] n1 SP integer.
  4281. * @param [out] n2 SP integer.
  4282. * @param [out] n3 SP integer.
  4283. * @param [out] n4 SP integer.
  4284. * @param [out] n5 SP integer.
  4285. * @param [out] n6 SP integer.
  4286. *
  4287. * @return MP_OKAY on success.
  4288. */
  4289. int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
  4290. sp_int* n6)
  4291. {
  4292. if (n1 != NULL) {
  4293. #ifdef HAVE_WOLF_BIGINT
  4294. wc_bigint_init(&n1->raw);
  4295. #endif
  4296. _sp_zero(n1);
  4297. n1->dp[0] = 0;
  4298. n1->size = SP_INT_DIGITS;
  4299. #ifdef HAVE_WOLF_BIGINT
  4300. wc_bigint_init(&n1->raw);
  4301. #endif
  4302. }
  4303. if (n2 != NULL) {
  4304. #ifdef HAVE_WOLF_BIGINT
  4305. wc_bigint_init(&n2->raw);
  4306. #endif
  4307. _sp_zero(n2);
  4308. n2->dp[0] = 0;
  4309. n2->size = SP_INT_DIGITS;
  4310. #ifdef HAVE_WOLF_BIGINT
  4311. wc_bigint_init(&n2->raw);
  4312. #endif
  4313. }
  4314. if (n3 != NULL) {
  4315. #ifdef HAVE_WOLF_BIGINT
  4316. wc_bigint_init(&n3->raw);
  4317. #endif
  4318. _sp_zero(n3);
  4319. n3->dp[0] = 0;
  4320. n3->size = SP_INT_DIGITS;
  4321. #ifdef HAVE_WOLF_BIGINT
  4322. wc_bigint_init(&n3->raw);
  4323. #endif
  4324. }
  4325. if (n4 != NULL) {
  4326. #ifdef HAVE_WOLF_BIGINT
  4327. wc_bigint_init(&n4->raw);
  4328. #endif
  4329. _sp_zero(n4);
  4330. n4->dp[0] = 0;
  4331. n4->size = SP_INT_DIGITS;
  4332. #ifdef HAVE_WOLF_BIGINT
  4333. wc_bigint_init(&n4->raw);
  4334. #endif
  4335. }
  4336. if (n5 != NULL) {
  4337. #ifdef HAVE_WOLF_BIGINT
  4338. wc_bigint_init(&n5->raw);
  4339. #endif
  4340. _sp_zero(n5);
  4341. n5->dp[0] = 0;
  4342. n5->size = SP_INT_DIGITS;
  4343. #ifdef HAVE_WOLF_BIGINT
  4344. wc_bigint_init(&n5->raw);
  4345. #endif
  4346. }
  4347. if (n6 != NULL) {
  4348. #ifdef HAVE_WOLF_BIGINT
  4349. wc_bigint_init(&n6->raw);
  4350. #endif
  4351. _sp_zero(n6);
  4352. n6->dp[0] = 0;
  4353. n6->size = SP_INT_DIGITS;
  4354. #ifdef HAVE_WOLF_BIGINT
  4355. wc_bigint_init(&n6->raw);
  4356. #endif
  4357. }
  4358. return MP_OKAY;
  4359. }
  4360. #endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
  4361. /* Free the memory allocated in the multi-precision number.
  4362. *
  4363. * @param [in] a SP integer.
  4364. */
  4365. void sp_free(sp_int* a)
  4366. {
  4367. if (a != NULL) {
  4368. #ifdef HAVE_WOLF_BIGINT
  4369. wc_bigint_free(&a->raw);
  4370. #endif
  4371. }
  4372. }
  4373. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
  4374. /* Grow multi-precision number to be able to hold l digits.
  4375. * This function does nothing as the number of digits is fixed.
  4376. *
  4377. * @param [in,out] a SP integer.
  4378. * @param [in] l Number of digits to grow to.
  4379. *
  4380. * @return MP_OKAY on success
  4381. * @return MP_MEM if the number of digits requested is more than available.
  4382. */
  4383. int sp_grow(sp_int* a, int l)
  4384. {
  4385. int err = MP_OKAY;
  4386. if (a == NULL) {
  4387. err = MP_VAL;
  4388. }
  4389. if ((err == MP_OKAY) && (l > a->size)) {
  4390. err = MP_MEM;
  4391. }
  4392. if (err == MP_OKAY) {
  4393. int i;
  4394. for (i = a->used; i < l; i++) {
  4395. a->dp[i] = 0;
  4396. }
  4397. }
  4398. return err;
  4399. }
  4400. #endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
  4401. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(HAVE_ECC)
  4402. /* Set the multi-precision number to zero.
  4403. *
  4404. * @param [out] a SP integer to set to zero.
  4405. */
  4406. void sp_zero(sp_int* a)
  4407. {
  4408. if (a != NULL) {
  4409. _sp_zero(a);
  4410. }
  4411. }
  4412. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  4413. /* Clear the data from the multi-precision number and set to zero.
  4414. *
  4415. * @param [out] a SP integer.
  4416. */
  4417. void sp_clear(sp_int* a)
  4418. {
  4419. if (a != NULL) {
  4420. int i;
  4421. for (i = 0; i < a->used; i++) {
  4422. a->dp[i] = 0;
  4423. }
  4424. _sp_zero(a);
  4425. sp_free(a);
  4426. }
  4427. }
  4428. #if !defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
  4429. /* Ensure the data in the multi-precision number is zeroed.
  4430. *
  4431. * Use when security sensitive data needs to be wiped.
  4432. *
  4433. * @param [in] a SP integer.
  4434. */
  4435. void sp_forcezero(sp_int* a)
  4436. {
  4437. if (a != NULL) {
  4438. /* Ensure all data zeroized - data not zeroed when used decreases. */
  4439. ForceZero(a->dp, a->used * sizeof(sp_int_digit));
  4440. _sp_zero(a);
  4441. #ifdef HAVE_WOLF_BIGINT
  4442. wc_bigint_zero(&a->raw);
  4443. #endif
  4444. sp_free(a);
  4445. }
  4446. }
  4447. #endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
  4448. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4449. !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
  4450. /* Copy value of multi-precision number a into r.
  4451. *
  4452. * @param [in] a SP integer - source.
  4453. * @param [out] r SP integer - destination.
  4454. *
  4455. * @return MP_OKAY on success.
  4456. */
  4457. int sp_copy(const sp_int* a, sp_int* r)
  4458. {
  4459. int err = MP_OKAY;
  4460. if ((a == NULL) || (r == NULL)) {
  4461. err = MP_VAL;
  4462. }
  4463. else if (a != r) {
  4464. XMEMCPY(r->dp, a->dp, a->used * sizeof(sp_int_digit));
  4465. if (a->used == 0)
  4466. r->dp[0] = 0;
  4467. r->used = a->used;
  4468. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4469. r->sign = a->sign;
  4470. #endif
  4471. }
  4472. return err;
  4473. }
  4474. #endif
  4475. #if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
  4476. /* Initializes r and copies in value from a.
  4477. *
  4478. * @param [out] r SP integer - destination.
  4479. * @param [in] a SP integer - source.
  4480. *
  4481. * @return MP_OKAY on success.
  4482. * @return MP_VAL when a or r is NULL.
  4483. */
  4484. int sp_init_copy(sp_int* r, sp_int* a)
  4485. {
  4486. int err;
  4487. err = sp_init(r);
  4488. if (err == MP_OKAY) {
  4489. err = sp_copy(a, r);
  4490. }
  4491. return err;
  4492. }
  4493. #endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
  4494. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4495. !defined(NO_DH) || !defined(NO_DSA)
  4496. /* Exchange the values in a and b.
  4497. *
  4498. * @param [in,out] a SP integer to swap.
  4499. * @param [in,out] b SP integer to swap.
  4500. *
  4501. * @return MP_OKAY on success.
  4502. * @return MP_VAL when a or b is NULL.
  4503. * @return MP_MEM when dynamic memory allocation fails.
  4504. */
  4505. int sp_exch(sp_int* a, sp_int* b)
  4506. {
  4507. int err = MP_OKAY;
  4508. DECL_SP_INT(t, (a != NULL) ? a->used : 1);
  4509. if ((a == NULL) || (b == NULL)) {
  4510. err = MP_VAL;
  4511. }
  4512. if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
  4513. err = MP_VAL;
  4514. }
  4515. ALLOC_SP_INT(t, a->used, err, NULL);
  4516. if (err == MP_OKAY) {
  4517. int asize = a->size;
  4518. int bsize = b->size;
  4519. XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
  4520. XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
  4521. XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
  4522. a->size = asize;
  4523. b->size = bsize;
  4524. }
  4525. FREE_SP_INT(t, NULL);
  4526. return err;
  4527. }
  4528. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
  4529. * !NO_DSA */
  4530. #if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
  4531. !defined(WC_NO_CACHE_RESISTANT)
  4532. int sp_cond_swap_ct(sp_int * a, sp_int * b, int c, int m)
  4533. {
  4534. int i;
  4535. int err = MP_OKAY;
  4536. sp_int_digit mask = (sp_int_digit)0 - m;
  4537. DECL_SP_INT(t, c);
  4538. ALLOC_SP_INT(t, c, err, NULL);
  4539. if (err == MP_OKAY) {
  4540. t->used = (int)((a->used ^ b->used) & mask);
  4541. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4542. t->sign = (int)((a->sign ^ b->sign) & mask);
  4543. #endif
  4544. for (i = 0; i < c; i++) {
  4545. t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
  4546. }
  4547. a->used ^= t->used;
  4548. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4549. a->sign ^= t->sign;
  4550. #endif
  4551. for (i = 0; i < c; i++) {
  4552. a->dp[i] ^= t->dp[i];
  4553. }
  4554. b->used ^= t->used;
  4555. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4556. b->sign ^= b->sign;
  4557. #endif
  4558. for (i = 0; i < c; i++) {
  4559. b->dp[i] ^= t->dp[i];
  4560. }
  4561. }
  4562. FREE_SP_INT(t, NULL);
  4563. return err;
  4564. }
  4565. #endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
  4566. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4567. /* Calculate the absolute value of the multi-precision number.
  4568. *
  4569. * @param [in] a SP integer to calculate absolute value of.
  4570. * @param [out] r SP integer to hold result.
  4571. *
  4572. * @return MP_OKAY on success.
  4573. * @return MP_VAL when a or r is NULL.
  4574. */
  4575. int sp_abs(sp_int* a, sp_int* r)
  4576. {
  4577. int err;
  4578. err = sp_copy(a, r);
  4579. if (r != NULL) {
  4580. r->sign = MP_ZPOS;
  4581. }
  4582. return err;
  4583. }
  4584. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  4585. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4586. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  4587. /* Compare absolute value of two multi-precision numbers.
  4588. *
  4589. * @param [in] a SP integer.
  4590. * @param [in] b SP integer.
  4591. *
  4592. * @return MP_GT when a is greater than b.
  4593. * @return MP_LT when a is less than b.
  4594. * @return MP_EQ when a is equals b.
  4595. */
  4596. static int _sp_cmp_abs(sp_int* a, sp_int* b)
  4597. {
  4598. int ret = MP_EQ;
  4599. if (a->used > b->used) {
  4600. ret = MP_GT;
  4601. }
  4602. else if (a->used < b->used) {
  4603. ret = MP_LT;
  4604. }
  4605. else {
  4606. int i;
  4607. for (i = a->used - 1; i >= 0; i--) {
  4608. if (a->dp[i] > b->dp[i]) {
  4609. ret = MP_GT;
  4610. break;
  4611. }
  4612. else if (a->dp[i] < b->dp[i]) {
  4613. ret = MP_LT;
  4614. break;
  4615. }
  4616. }
  4617. }
  4618. return ret;
  4619. }
  4620. #endif
  4621. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  4622. /* Compare absolute value of two multi-precision numbers.
  4623. *
  4624. * @param [in] a SP integer.
  4625. * @param [in] b SP integer.
  4626. *
  4627. * @return MP_GT when a is greater than b.
  4628. * @return MP_LT when a is less than b.
  4629. * @return MP_EQ when a is equals b.
  4630. */
  4631. int sp_cmp_mag(sp_int* a, sp_int* b)
  4632. {
  4633. int ret;
  4634. if (a == b) {
  4635. ret = MP_EQ;
  4636. }
  4637. else if (a == NULL) {
  4638. ret = MP_LT;
  4639. }
  4640. else if (b == NULL) {
  4641. ret = MP_GT;
  4642. }
  4643. else
  4644. {
  4645. ret = _sp_cmp_abs(a, b);
  4646. }
  4647. return ret;
  4648. }
  4649. #endif
  4650. #if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
  4651. defined(OPENSSL_EXTRA) || !defined(NO_DH) || \
  4652. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  4653. /* Compare two multi-precision numbers.
  4654. *
  4655. * Assumes a and b are not NULL.
  4656. *
  4657. * @param [in] a SP integer.
  4658. * @param [in] a SP integer.
  4659. *
  4660. * @return MP_GT when a is greater than b.
  4661. * @return MP_LT when a is less than b.
  4662. * @return MP_EQ when a is equals b.
  4663. */
  4664. static int _sp_cmp(sp_int* a, sp_int* b)
  4665. {
  4666. int ret;
  4667. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4668. if (a->sign == b->sign) {
  4669. #endif
  4670. ret = _sp_cmp_abs(a, b);
  4671. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4672. if (a->sign == MP_NEG) {
  4673. /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
  4674. * Swapping MP_GT and MP_LT results.
  4675. */
  4676. ret = -ret;
  4677. }
  4678. }
  4679. else if (a->sign > b->sign) {
  4680. ret = MP_LT;
  4681. }
  4682. else /* (a->sign < b->sign) */ {
  4683. ret = MP_GT;
  4684. }
  4685. #endif
  4686. return ret;
  4687. }
  4688. #endif
  4689. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4690. !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_DH) || \
  4691. defined(WOLFSSL_SP_MATH_ALL)
  4692. /* Compare two multi-precision numbers.
  4693. *
  4694. * Pointers are compared such that NULL is less than not NULL.
  4695. *
  4696. * @param [in] a SP integer.
  4697. * @param [in] a SP integer.
  4698. *
  4699. * @return MP_GT when a is greater than b.
  4700. * @return MP_LT when a is less than b.
  4701. * @return MP_EQ when a is equals b.
  4702. */
  4703. int sp_cmp(sp_int* a, sp_int* b)
  4704. {
  4705. int ret;
  4706. if (a == b) {
  4707. ret = MP_EQ;
  4708. }
  4709. else if (a == NULL) {
  4710. ret = MP_LT;
  4711. }
  4712. else if (b == NULL) {
  4713. ret = MP_GT;
  4714. }
  4715. else
  4716. {
  4717. ret = _sp_cmp(a, b);
  4718. }
  4719. return ret;
  4720. }
  4721. #endif
  4722. /*************************
  4723. * Bit check/set functions
  4724. *************************/
  4725. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || (defined(WOLFSSL_SP_MATH_ALL) && \
  4726. defined(HAVE_ECC))
  4727. /* Check if a bit is set
  4728. *
  4729. * When a is NULL, result is 0.
  4730. *
  4731. * @param [in] a SP integer.
  4732. * @param [in] b Bit position to check.
  4733. *
  4734. * @return 0 when bit is not set.
  4735. * @return 1 when bit is set.
  4736. */
  4737. int sp_is_bit_set(sp_int* a, unsigned int b)
  4738. {
  4739. int ret = 0;
  4740. int i = (int)(b >> SP_WORD_SHIFT);
  4741. int s = (int)(b & SP_WORD_MASK);
  4742. if ((a != NULL) && (i < a->used)) {
  4743. ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
  4744. }
  4745. return ret;
  4746. }
  4747. #endif /* WOLFSSL_RSA_VERIFY_ONLY */
  4748. /* Count the number of bits in the multi-precision number.
  4749. *
  4750. * When a is not NULL, result is 0.
  4751. *
  4752. * @param [in] a SP integer.
  4753. *
  4754. * @return The number of bits in the number.
  4755. */
  4756. int sp_count_bits(const sp_int* a)
  4757. {
  4758. int r = 0;
  4759. if (a != NULL) {
  4760. r = a->used - 1;
  4761. while ((r >= 0) && (a->dp[r] == 0)) {
  4762. r--;
  4763. }
  4764. if (r < 0) {
  4765. r = 0;
  4766. }
  4767. else {
  4768. sp_int_digit d;
  4769. d = a->dp[r];
  4770. r *= SP_WORD_SIZE;
  4771. if (d > SP_HALF_MAX) {
  4772. r += SP_WORD_SIZE;
  4773. while ((d & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
  4774. r--;
  4775. d <<= 1;
  4776. }
  4777. }
  4778. else {
  4779. while (d != 0) {
  4780. r++;
  4781. d >>= 1;
  4782. }
  4783. }
  4784. }
  4785. }
  4786. return r;
  4787. }
  4788. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4789. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  4790. (defined(HAVE_ECC) && defined(FP_ECC)) || \
  4791. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  4792. /* Number of entries in array of number of least significant zero bits. */
  4793. #define SP_LNZ_CNT 16
  4794. /* Number of bits the array checks. */
  4795. #define SP_LNZ_BITS 4
  4796. /* Mask to apply to check with array. */
  4797. #define SP_LNZ_MASK 0xf
  4798. /* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
  4799. static const int sp_lnz[SP_LNZ_CNT] = {
  4800. 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
  4801. };
  4802. /* Count the number of least significant zero bits.
  4803. *
  4804. * When a is not NULL, result is 0.
  4805. *
  4806. * @param [in] a SP integer to use.
  4807. *
  4808. * @return Number of leas significant zero bits.
  4809. */
  4810. #if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
  4811. static
  4812. #endif /* !HAVE_ECC || HAVE_COMP_KEY */
  4813. int sp_cnt_lsb(sp_int* a)
  4814. {
  4815. int bc = 0;
  4816. if ((a != NULL) && (!sp_iszero(a))) {
  4817. int i;
  4818. int j;
  4819. int cnt = 0;
  4820. for (i = 0; i < a->used && a->dp[i] == 0; i++, cnt += SP_WORD_SIZE) {
  4821. }
  4822. for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
  4823. bc = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
  4824. if (bc != 4) {
  4825. bc += cnt + j;
  4826. break;
  4827. }
  4828. }
  4829. }
  4830. return bc;
  4831. }
  4832. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
  4833. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  4834. (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_ASN))
  4835. /* Determine if the most significant byte of the encoded multi-precision number
  4836. * has the top bit set.
  4837. *
  4838. * When A is NULL, result is 0.
  4839. *
  4840. * @param [in] a SP integer.
  4841. *
  4842. * @return 1 when the top bit of top byte is set.
  4843. * @return 0 when the top bit of top byte is not set.
  4844. */
  4845. int sp_leading_bit(sp_int* a)
  4846. {
  4847. int bit = 0;
  4848. if ((a != NULL) && (a->used > 0)) {
  4849. sp_int_digit d = a->dp[a->used - 1];
  4850. #if SP_WORD_SIZE > 8
  4851. while (d > (sp_int_digit)0xff) {
  4852. d >>= 8;
  4853. }
  4854. #endif
  4855. bit = (int)(d >> 7);
  4856. }
  4857. return bit;
  4858. }
  4859. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  4860. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  4861. defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
  4862. !defined(NO_RSA)
  4863. /* Set a bit of a: a |= 1 << i
  4864. * The field 'used' is updated in a.
  4865. *
  4866. * @param [in,out] a SP integer to set bit into.
  4867. * @param [in] i Index of bit to set.
  4868. *
  4869. * @return MP_OKAY on success.
  4870. * @return MP_VAL when a is NULL or index is too large.
  4871. */
  4872. int sp_set_bit(sp_int* a, int i)
  4873. {
  4874. int err = MP_OKAY;
  4875. int w = (int)(i >> SP_WORD_SHIFT);
  4876. if ((a == NULL) || (w >= a->size)) {
  4877. err = MP_VAL;
  4878. }
  4879. else {
  4880. int s = (int)(i & (SP_WORD_SIZE - 1));
  4881. int j;
  4882. for (j = a->used; j <= w; j++) {
  4883. a->dp[j] = 0;
  4884. }
  4885. a->dp[w] |= (sp_int_digit)1 << s;
  4886. if (a->used <= w) {
  4887. a->used = w + 1;
  4888. }
  4889. }
  4890. return err;
  4891. }
  4892. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
  4893. * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
  4894. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4895. defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
  4896. /* Exponentiate 2 to the power of e: a = 2^e
  4897. * This is done by setting the 'e'th bit.
  4898. *
  4899. * @param [out] a SP integer to hold result.
  4900. * @param [in] e Exponent.
  4901. *
  4902. * @return MP_OKAY on success.
  4903. * @return MP_VAL when a is NULL or 2^exponent is too large.
  4904. */
  4905. int sp_2expt(sp_int* a, int e)
  4906. {
  4907. int err = MP_OKAY;
  4908. if (a == NULL) {
  4909. err = MP_VAL;
  4910. }
  4911. if (err == MP_OKAY) {
  4912. _sp_zero(a);
  4913. err = sp_set_bit(a, e);
  4914. }
  4915. return err;
  4916. }
  4917. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  4918. * WOLFSSL_KEY_GEN || !NO_DH */
  4919. /**********************
  4920. * Digit/Long functions
  4921. **********************/
  4922. /* Set the multi-precision number to be the value of the digit.
  4923. *
  4924. * @param [out] a SP integer to become number.
  4925. * @param [in] d Digit to be set.
  4926. *
  4927. * @return MP_OKAY on success.
  4928. * @return MP_VAL when a is NULL.
  4929. */
  4930. int sp_set(sp_int* a, sp_int_digit d)
  4931. {
  4932. int err = MP_OKAY;
  4933. if (a == NULL) {
  4934. err = MP_VAL;
  4935. }
  4936. if (err == MP_OKAY) {
  4937. /* gcc-11 reports out-of-bounds array access if the byte array backing
  4938. * the sp_int* is smaller than sizeof(sp_int), as occurs when
  4939. * WOLFSSL_SP_SMALL.
  4940. */
  4941. PRAGMA_GCC_DIAG_PUSH;
  4942. PRAGMA_GCC("GCC diagnostic ignored \"-Warray-bounds\"");
  4943. a->dp[0] = d;
  4944. a->used = d > 0;
  4945. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4946. a->sign = MP_ZPOS;
  4947. #endif
  4948. PRAGMA_GCC_DIAG_POP;
  4949. }
  4950. return err;
  4951. }
  4952. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || defined(OPENSSL_EXTRA)
  4953. /* Set a number into the multi-precision number.
  4954. *
  4955. * Number may be larger than the size of a digit.
  4956. *
  4957. * @param [out] a SP integer to set.
  4958. * @param [in] n Long value to set.
  4959. *
  4960. * @return MP_OKAY on success.
  4961. * @return MP_VAL when a is NULL.
  4962. */
  4963. int sp_set_int(sp_int* a, unsigned long n)
  4964. {
  4965. int err = MP_OKAY;
  4966. if (a == NULL) {
  4967. err = MP_VAL;
  4968. }
  4969. if (err == MP_OKAY) {
  4970. #if SP_WORD_SIZE < SP_ULONG_BITS
  4971. if (n <= (sp_int_digit)SP_DIGIT_MAX) {
  4972. #endif
  4973. a->dp[0] = (sp_int_digit)n;
  4974. a->used = (n != 0);
  4975. #if SP_WORD_SIZE < SP_ULONG_BITS
  4976. }
  4977. else {
  4978. int i;
  4979. for (i = 0; n > 0; i++,n >>= SP_WORD_SIZE) {
  4980. a->dp[i] = (sp_int_digit)n;
  4981. }
  4982. a->used = i;
  4983. }
  4984. #endif
  4985. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4986. a->sign = MP_ZPOS;
  4987. #endif
  4988. }
  4989. return err;
  4990. }
  4991. #endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA */
  4992. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  4993. (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_DH))
  4994. /* Compare a one digit number with a multi-precision number.
  4995. *
  4996. * When a is NULL, MP_LT is returned.
  4997. *
  4998. * @param [in] a SP integer to compare.
  4999. * @param [in] d Digit to compare with.
  5000. *
  5001. * @return MP_GT when a is greater than d.
  5002. * @return MP_LT when a is less than d.
  5003. * @return MP_EQ when a is equals d.
  5004. */
  5005. int sp_cmp_d(sp_int* a, sp_int_digit d)
  5006. {
  5007. int ret = MP_EQ;
  5008. if (a == NULL) {
  5009. ret = MP_LT;
  5010. }
  5011. else
  5012. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5013. if (a->sign == MP_NEG) {
  5014. ret = MP_LT;
  5015. }
  5016. else
  5017. #endif
  5018. {
  5019. /* special case for zero*/
  5020. if (a->used == 0) {
  5021. if (d == 0) {
  5022. ret = MP_EQ;
  5023. }
  5024. else {
  5025. ret = MP_LT;
  5026. }
  5027. }
  5028. else if (a->used > 1) {
  5029. ret = MP_GT;
  5030. }
  5031. else {
  5032. if (a->dp[0] > d) {
  5033. ret = MP_GT;
  5034. }
  5035. else if (a->dp[0] < d) {
  5036. ret = MP_LT;
  5037. }
  5038. }
  5039. }
  5040. return ret;
  5041. }
  5042. #endif
  5043. #if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || \
  5044. !defined(NO_DSA) || \
  5045. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5046. defined(OPENSSL_EXTRA)
  5047. #define WOLFSSL_SP_ADD_D
  5048. #endif
  5049. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5050. !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
  5051. #define WOLFSSL_SP_SUB_D
  5052. #endif
  5053. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  5054. !defined(WOLFSSL_RSA_VERIFY_ONLY)
  5055. #define WOLFSSL_SP_READ_RADIX_10
  5056. #endif
  5057. #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
  5058. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5059. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  5060. #define WOLFSSL_SP_INVMOD
  5061. #endif
  5062. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  5063. #define WOLFSSL_SP_INVMOD_MONT_CT
  5064. #endif
  5065. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5066. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  5067. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  5068. #define WOLFSSL_SP_PRIME_GEN
  5069. #endif
  5070. #if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
  5071. defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
  5072. /* Add a one digit number to the multi-precision number.
  5073. *
  5074. * @param [in] a SP integer be added to.
  5075. * @param [in] d Digit to add.
  5076. * @param [out] r SP integer to store result in.
  5077. *
  5078. * @return MP_OKAY on success.
  5079. * @return MP_VAL when result is too large for fixed size dp array.
  5080. */
  5081. static int _sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
  5082. {
  5083. int err = MP_OKAY;
  5084. int i = 0;
  5085. sp_int_digit t;
  5086. r->used = a->used;
  5087. if (a->used == 0) {
  5088. r->used = d > 0;
  5089. }
  5090. t = a->dp[0] + d;
  5091. if (t < a->dp[0]) {
  5092. for (++i; i < a->used; i++) {
  5093. r->dp[i] = a->dp[i] + 1;
  5094. if (r->dp[i] != 0) {
  5095. break;
  5096. }
  5097. }
  5098. if (i == a->used) {
  5099. if (i < r->size) {
  5100. r->used++;
  5101. r->dp[i] = 1;
  5102. }
  5103. else {
  5104. err = MP_VAL;
  5105. }
  5106. }
  5107. }
  5108. if (err == MP_OKAY) {
  5109. r->dp[0] = t;
  5110. if (r != a) {
  5111. for (++i; i < a->used; i++) {
  5112. r->dp[i] = a->dp[i];
  5113. }
  5114. }
  5115. }
  5116. return err;
  5117. }
  5118. #endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
  5119. * defined(WOLFSSL_SP_READ_RADIX_10) */
  5120. #if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
  5121. defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
  5122. defined(WOLFSSL_SP_INVMOD_MONT_CT) || defined(WOLFSSL_SP_PRIME_GEN)
  5123. /* Sub a one digit number from the multi-precision number.
  5124. *
  5125. * returns MP_OKAY always.
  5126. * @param [in] a SP integer be subtracted from.
  5127. * @param [in] d Digit to subtract.
  5128. * @param [out] r SP integer to store result in.
  5129. */
  5130. static void _sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
  5131. {
  5132. int i = 0;
  5133. sp_int_digit t;
  5134. r->used = a->used;
  5135. if (a->used == 0) {
  5136. r->dp[0] = 0;
  5137. }
  5138. else {
  5139. t = a->dp[0] - d;
  5140. if (t > a->dp[0]) {
  5141. for (++i; i < a->used; i++) {
  5142. r->dp[i] = a->dp[i] - 1;
  5143. if (r->dp[i] != SP_DIGIT_MAX) {
  5144. break;
  5145. }
  5146. }
  5147. }
  5148. r->dp[0] = t;
  5149. if (r != a) {
  5150. for (++i; i < a->used; i++) {
  5151. r->dp[i] = a->dp[i];
  5152. }
  5153. }
  5154. sp_clamp(r);
  5155. }
  5156. }
  5157. #endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
  5158. * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
  5159. * WOLFSSL_SP_PRIME_GEN */
  5160. #ifdef WOLFSSL_SP_ADD_D
  5161. /* Add a one digit number to the multi-precision number.
  5162. *
  5163. * @param [in] a SP integer be added to.
  5164. * @param [in] d Digit to add.
  5165. * @param [out] r SP integer to store result in.
  5166. *
  5167. * @return MP_OKAY on success.
  5168. * @return MP_VAL when result is too large for fixed size dp array.
  5169. */
  5170. int sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
  5171. {
  5172. int err = MP_OKAY;
  5173. /* Check validity of parameters. */
  5174. if ((a == NULL) || (r == NULL)) {
  5175. err = MP_VAL;
  5176. }
  5177. else
  5178. {
  5179. #ifndef WOLFSSL_SP_INT_NEGATIVE
  5180. /* Positive only so just use internal function. */
  5181. err = _sp_add_d(a, d, r);
  5182. #else
  5183. if (a->sign == MP_ZPOS) {
  5184. /* Positive so use interal function. */
  5185. r->sign = MP_ZPOS;
  5186. err = _sp_add_d(a, d, r);
  5187. }
  5188. else if ((a->used > 1) || (a->dp[0] > d)) {
  5189. /* Negative value bigger than digit so subtract digit. */
  5190. r->sign = MP_NEG;
  5191. _sp_sub_d(a, d, r);
  5192. }
  5193. else {
  5194. /* Negative value smaller or equal to digit. */
  5195. r->sign = MP_ZPOS;
  5196. /* Subtract negative value from digit. */
  5197. r->dp[0] = d - a->dp[0];
  5198. /* Result is a digit equal to or greater than zero. */
  5199. r->used = ((r->dp[0] == 0) ? 0 : 1);
  5200. }
  5201. #endif
  5202. }
  5203. return err;
  5204. }
  5205. #endif /* WOLFSSL_SP_ADD_D */
  5206. #ifdef WOLFSSL_SP_SUB_D
  5207. /* Sub a one digit number from the multi-precision number.
  5208. *
  5209. * @param [in] a SP integer be subtracted from.
  5210. * @param [in] d Digit to subtract.
  5211. * @param [out] r SP integer to store result in.
  5212. *
  5213. * @return MP_OKAY on success.
  5214. * @return MP_VAL when a or r is NULL.
  5215. */
  5216. int sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
  5217. {
  5218. int err = MP_OKAY;
  5219. /* Check validity of parameters. */
  5220. if ((a == NULL) || (r == NULL)) {
  5221. err = MP_VAL;
  5222. }
  5223. else {
  5224. #ifndef WOLFSSL_SP_INT_NEGATIVE
  5225. /* Positive only so just use internal function. */
  5226. _sp_sub_d(a, d, r);
  5227. #else
  5228. if (a->sign == MP_NEG) {
  5229. /* Subtracting from negative use interal add. */
  5230. r->sign = MP_NEG;
  5231. err = _sp_add_d(a, d, r);
  5232. }
  5233. else if ((a->used > 1) || (a->dp[0] >= d)) {
  5234. /* Positive number greater than digit so add digit. */
  5235. r->sign = MP_ZPOS;
  5236. _sp_sub_d(a, d, r);
  5237. }
  5238. else {
  5239. /* Negative value smaller than digit. */
  5240. r->sign = MP_NEG;
  5241. /* Subtract positive value from digit. */
  5242. r->dp[0] = d - a->dp[0];
  5243. /* Result is a digit equal to or greater than zero. */
  5244. r->used = 1;
  5245. }
  5246. #endif
  5247. }
  5248. return err;
  5249. }
  5250. #endif /* WOLFSSL_SP_SUB_D */
  5251. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5252. defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
  5253. !defined(NO_DH) || defined(HAVE_ECC) || \
  5254. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5255. !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
  5256. (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
  5257. /* Multiply a by digit n and put result into r shifting up o digits.
  5258. * r = (a * n) << (o * SP_WORD_SIZE)
  5259. *
  5260. * @param [in] a SP integer to be multiplied.
  5261. * @param [in] n Number (SP digit) to multiply by.
  5262. * @param [out] r SP integer result.
  5263. * @param [in] o Number of digits to move result up by.
  5264. * @return MP_OKAY on success.
  5265. * @return MP_VAL when result is too large for sp_int.
  5266. */
  5267. static int _sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r, int o)
  5268. {
  5269. int err = MP_OKAY;
  5270. int i;
  5271. #ifndef SQR_MUL_ASM
  5272. sp_int_word t = 0;
  5273. #else
  5274. sp_int_digit l = 0;
  5275. sp_int_digit h = 0;
  5276. #endif
  5277. #ifdef WOLFSSL_SP_SMALL
  5278. for (i = 0; i < o; i++) {
  5279. r->dp[i] = 0;
  5280. }
  5281. #else
  5282. /* Don't use the offset. Only when doing small code size div. */
  5283. (void)o;
  5284. #endif
  5285. for (i = 0; i < a->used; i++, o++) {
  5286. #ifndef SQR_MUL_ASM
  5287. t += (sp_int_word)a->dp[i] * n;
  5288. r->dp[o] = (sp_int_digit)t;
  5289. t >>= SP_WORD_SIZE;
  5290. #else
  5291. SP_ASM_MUL_ADD_NO(l, h, a->dp[i], n);
  5292. r->dp[o] = l;
  5293. l = h;
  5294. h = 0;
  5295. #endif
  5296. }
  5297. #ifndef SQR_MUL_ASM
  5298. if (t > 0)
  5299. #else
  5300. if (l > 0)
  5301. #endif
  5302. {
  5303. if (o == r->size) {
  5304. err = MP_VAL;
  5305. }
  5306. else {
  5307. #ifndef SQR_MUL_ASM
  5308. r->dp[o++] = (sp_int_digit)t;
  5309. #else
  5310. r->dp[o++] = l;
  5311. #endif
  5312. }
  5313. }
  5314. r->used = o;
  5315. sp_clamp(r);
  5316. return err;
  5317. }
  5318. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  5319. * WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
  5320. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5321. (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
  5322. /* Multiply a by digit n and put result into r. r = a * n
  5323. *
  5324. * @param [in] a SP integer to multiply.
  5325. * @param [in] n Digit to multiply by.
  5326. * @param [out] r SP integer to hold result.
  5327. *
  5328. * @return MP_OKAY on success.
  5329. * @return MP_VAL when a or b is NULL, or a has maximum number of digits used.
  5330. */
  5331. int sp_mul_d(sp_int* a, sp_int_digit d, sp_int* r)
  5332. {
  5333. int err = MP_OKAY;
  5334. if ((a == NULL) || (r == NULL)) {
  5335. err = MP_VAL;
  5336. }
  5337. if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
  5338. err = MP_VAL;
  5339. }
  5340. if (err == MP_OKAY) {
  5341. err = _sp_mul_d(a, d, r, 0);
  5342. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5343. if (d == 0) {
  5344. r->sign = MP_ZPOS;
  5345. }
  5346. else {
  5347. r->sign = a->sign;
  5348. }
  5349. #endif
  5350. }
  5351. return err;
  5352. }
  5353. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  5354. * (WOLFSSL_KEY_GEN && !NO_RSA) */
  5355. /* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
  5356. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5357. defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
  5358. defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
  5359. #define WOLFSSL_SP_DIV_D
  5360. #endif
  5361. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5362. !defined(NO_DH) || \
  5363. (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
  5364. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  5365. #define WOLFSSL_SP_MOD_D
  5366. #endif
  5367. #if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  5368. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5369. !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
  5370. defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
  5371. #ifndef SP_ASM_DIV_WORD
  5372. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  5373. *
  5374. * @param [in] hi SP integer digit. High digit of the dividend.
  5375. * @param [in] lo SP integer digit. Lower digit of the dividend.
  5376. * @param [in] d SP integer digit. Number to divide by.
  5377. * @return The division result.
  5378. */
  5379. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  5380. sp_int_digit d)
  5381. {
  5382. #ifdef WOLFSSL_SP_DIV_WORD_HALF
  5383. sp_int_digit r;
  5384. if (hi != 0) {
  5385. sp_int_digit divsz = d >> SP_HALF_SIZE;
  5386. sp_int_digit r2;
  5387. sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
  5388. sp_int_word trial;
  5389. r = hi / divsz;
  5390. if (r > SP_HALF_MAX) {
  5391. r = SP_HALF_MAX;
  5392. }
  5393. r <<= SP_HALF_SIZE;
  5394. trial = r * (sp_int_word)d;
  5395. while (trial > w) {
  5396. r -= (sp_int_digit)1 << SP_HALF_SIZE;
  5397. trial -= (sp_int_word)d << SP_HALF_SIZE;
  5398. }
  5399. w -= trial;
  5400. r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divsz;
  5401. trial = r2 * (sp_int_word)d;
  5402. while (trial > w) {
  5403. r2--;
  5404. trial -= d;
  5405. }
  5406. w -= trial;
  5407. r += r2;
  5408. r2 = ((sp_int_digit)w) / d;
  5409. r += r2;
  5410. }
  5411. else {
  5412. r = lo / d;
  5413. }
  5414. return r;
  5415. #else
  5416. sp_int_word w;
  5417. sp_int_digit r;
  5418. w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
  5419. w /= d;
  5420. r = (sp_int_digit)w;
  5421. return r;
  5422. #endif /* WOLFSSL_SP_DIV_WORD_HALF */
  5423. }
  5424. #endif /* !SP_ASM_DIV_WORD */
  5425. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  5426. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  5427. #if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
  5428. !defined(WOLFSSL_SP_SMALL)
  5429. #if SP_WORD_SIZE == 64
  5430. #define SP_DIV_3_CONST 0x5555555555555555L
  5431. #define SP_DIV_10_CONST 0x1999999999999999L
  5432. #elif SP_WORD_SIZE == 32
  5433. #define SP_DIV_3_CONST 0x55555555
  5434. #define SP_DIV_10_CONST 0x19999999
  5435. #elif SP_WORD_SIZE == 16
  5436. #define SP_DIV_3_CONST 0x5555
  5437. #define SP_DIV_10_CONST 0x1999
  5438. #elif SP_WORD_SIZE == 8
  5439. #define SP_DIV_3_CONST 0x55
  5440. #define SP_DIV_10_CONST 0x19
  5441. #endif
  5442. /* Divide by 3: r = a / 3 and rem = a % 3
  5443. *
  5444. * @param [in] a SP integer to be divided.
  5445. * @param [out] r SP integer that is the quotient. May be NULL.
  5446. * @param [out] rem SP integer that is the remainder. May be NULL.
  5447. */
  5448. static void _sp_div_3(sp_int* a, sp_int* r, sp_int_digit* rem)
  5449. {
  5450. int i;
  5451. #ifndef SQR_MUL_ASM
  5452. sp_int_word t;
  5453. sp_int_digit tt;
  5454. #else
  5455. sp_int_digit l = 0;
  5456. sp_int_digit tt = 0;
  5457. sp_int_digit t;
  5458. #endif
  5459. sp_int_digit tr = 0;
  5460. static const unsigned char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
  5461. static const unsigned char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
  5462. if (r == NULL) {
  5463. for (i = a->used - 1; i >= 0; i--) {
  5464. #ifndef SQR_MUL_ASM
  5465. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  5466. tt = (t * SP_DIV_3_CONST) >> SP_WORD_SIZE;
  5467. tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
  5468. #else
  5469. t = SP_DIV_3_CONST;
  5470. SP_ASM_MUL(l, tt, a->dp[i], t);
  5471. tt += tr * SP_DIV_3_CONST;
  5472. tr = a->dp[i] - (tt * 3);
  5473. #endif
  5474. tr = sp_rem6[tr];
  5475. }
  5476. *rem = tr;
  5477. }
  5478. else {
  5479. for (i = a->used - 1; i >= 0; i--) {
  5480. #ifndef SQR_MUL_ASM
  5481. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  5482. tt = (t * SP_DIV_3_CONST) >> SP_WORD_SIZE;
  5483. tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
  5484. #else
  5485. t = SP_DIV_3_CONST;
  5486. SP_ASM_MUL(l, tt, a->dp[i], t);
  5487. tt += tr * SP_DIV_3_CONST;
  5488. tr = a->dp[i] - (tt * 3);
  5489. #endif
  5490. tt += sp_r6[tr];
  5491. tr = sp_rem6[tr];
  5492. r->dp[i] = tt;
  5493. }
  5494. r->used = a->used;
  5495. sp_clamp(r);
  5496. if (rem != NULL) {
  5497. *rem = tr;
  5498. }
  5499. }
  5500. }
  5501. /* Divide by 10: r = a / 10 and rem = a % 10
  5502. *
  5503. * @param [in] a SP integer to be divided.
  5504. * @param [out] r SP integer that is the quotient. May be NULL.
  5505. * @param [out] rem SP integer that is the remainder. May be NULL.
  5506. */
  5507. static void _sp_div_10(sp_int* a, sp_int* r, sp_int_digit* rem)
  5508. {
  5509. int i;
  5510. #ifndef SQR_MUL_ASM
  5511. sp_int_word t;
  5512. sp_int_digit tt;
  5513. #else
  5514. sp_int_digit l = 0;
  5515. sp_int_digit tt = 0;
  5516. sp_int_digit t;
  5517. #endif
  5518. sp_int_digit tr = 0;
  5519. if (r == NULL) {
  5520. for (i = a->used - 1; i >= 0; i--) {
  5521. #ifndef SQR_MUL_ASM
  5522. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  5523. tt = (t * SP_DIV_10_CONST) >> SP_WORD_SIZE;
  5524. tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
  5525. #else
  5526. t = SP_DIV_10_CONST;
  5527. SP_ASM_MUL(l, tt, a->dp[i], t);
  5528. tt += tr * SP_DIV_10_CONST;
  5529. tr = a->dp[i] - (tt * 10);
  5530. #endif
  5531. tr = tr % 10;
  5532. }
  5533. *rem = tr;
  5534. }
  5535. else {
  5536. for (i = a->used - 1; i >= 0; i--) {
  5537. #ifndef SQR_MUL_ASM
  5538. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  5539. tt = (t * SP_DIV_10_CONST) >> SP_WORD_SIZE;
  5540. tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
  5541. #else
  5542. t = SP_DIV_10_CONST;
  5543. SP_ASM_MUL(l, tt, a->dp[i], t);
  5544. tt += tr * SP_DIV_10_CONST;
  5545. tr = a->dp[i] - (tt * 10);
  5546. #endif
  5547. tt += tr / 10;
  5548. tr = tr % 10;
  5549. r->dp[i] = tt;
  5550. }
  5551. r->used = a->used;
  5552. sp_clamp(r);
  5553. if (rem != NULL) {
  5554. *rem = tr;
  5555. }
  5556. }
  5557. }
  5558. #endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
  5559. #if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
  5560. /* Divide by small number: r = a / d and rem = a % d
  5561. *
  5562. * @param [in] a SP integer to be divided.
  5563. * @param [in] d Digit to divide by.
  5564. * @param [out] r SP integer that is the quotient. May be NULL.
  5565. * @param [out] rem SP integer that is the remainder. May be NULL.
  5566. */
  5567. static void _sp_div_small(sp_int* a, sp_int_digit d, sp_int* r,
  5568. sp_int_digit* rem)
  5569. {
  5570. int i;
  5571. #ifndef SQR_MUL_ASM
  5572. sp_int_word t;
  5573. sp_int_digit tt;
  5574. #else
  5575. sp_int_digit l = 0;
  5576. sp_int_digit tt = 0;
  5577. #endif
  5578. sp_int_digit tr = 0;
  5579. sp_int_digit m;
  5580. if (r == NULL) {
  5581. m = SP_DIGIT_MAX / d;
  5582. for (i = a->used - 1; i >= 0; i--) {
  5583. #ifndef SQR_MUL_ASM
  5584. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  5585. tt = (t * m) >> SP_WORD_SIZE;
  5586. tr = (sp_int_digit)(t - tt * d);
  5587. #else
  5588. SP_ASM_MUL(l, tt, a->dp[i], m);
  5589. tt += tr * m;
  5590. tr = a->dp[i] - (tt * d);
  5591. #endif
  5592. tr = tr % d;
  5593. }
  5594. *rem = tr;
  5595. }
  5596. else {
  5597. m = SP_DIGIT_MAX / d;
  5598. for (i = a->used - 1; i >= 0; i--) {
  5599. #ifndef SQR_MUL_ASM
  5600. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  5601. tt = (t * m) >> SP_WORD_SIZE;
  5602. tr = (sp_int_digit)(t - tt * d);
  5603. #else
  5604. SP_ASM_MUL(l, tt, a->dp[i], m);
  5605. tt += tr * m;
  5606. tr = a->dp[i] - (tt * d);
  5607. #endif
  5608. tt += tr / d;
  5609. tr = tr % d;
  5610. r->dp[i] = tt;
  5611. }
  5612. r->used = a->used;
  5613. sp_clamp(r);
  5614. if (rem != NULL) {
  5615. *rem = tr;
  5616. }
  5617. }
  5618. }
  5619. #endif
  5620. #ifdef WOLFSSL_SP_DIV_D
  5621. /* Divide a multi-precision number by a digit size number and calculate
  5622. * remainder.
  5623. * r = a / d; rem = a % d
  5624. *
  5625. * @param [in] a SP integer to be divided.
  5626. * @param [in] d Digit to divide by.
  5627. * @param [out] r SP integer that is the quotient. May be NULL.
  5628. * @param [out] rem Digit that is the remainder. May be NULL.
  5629. *
  5630. * @return MP_OKAY on success.
  5631. * @return MP_VAL when a is NULL or d is 0.
  5632. */
  5633. int sp_div_d(sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
  5634. {
  5635. int err = MP_OKAY;
  5636. if ((a == NULL) || (d == 0)) {
  5637. err = MP_VAL;
  5638. }
  5639. if (err == MP_OKAY) {
  5640. #if !defined(WOLFSSL_SP_SMALL)
  5641. if (d == 3) {
  5642. _sp_div_3(a, r, rem);
  5643. }
  5644. else if (d == 10) {
  5645. _sp_div_10(a, r, rem);
  5646. }
  5647. else
  5648. #endif
  5649. if (d <= SP_HALF_MAX) {
  5650. _sp_div_small(a, d, r, rem);
  5651. }
  5652. else
  5653. {
  5654. int i;
  5655. #ifndef SQR_MUL_ASM
  5656. sp_int_word w = 0;
  5657. #else
  5658. sp_int_digit l;
  5659. sp_int_digit h = 0;
  5660. #endif
  5661. sp_int_digit t;
  5662. for (i = a->used - 1; i >= 0; i--) {
  5663. #ifndef SQR_MUL_ASM
  5664. t = sp_div_word((sp_int_digit)w, a->dp[i], d);
  5665. w = (w << SP_WORD_SIZE) | a->dp[i];
  5666. w -= (sp_int_word)t * d;
  5667. #else
  5668. l = a->dp[i];
  5669. t = sp_div_word(h, l, d);
  5670. h = l - t * d;
  5671. #endif
  5672. if (r != NULL) {
  5673. r->dp[i] = t;
  5674. }
  5675. }
  5676. if (r != NULL) {
  5677. r->used = a->used;
  5678. sp_clamp(r);
  5679. }
  5680. if (rem != NULL) {
  5681. #ifndef SQR_MUL_ASM
  5682. *rem = (sp_int_digit)w;
  5683. #else
  5684. *rem = h;
  5685. #endif
  5686. }
  5687. }
  5688. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5689. if (r != NULL) {
  5690. r->sign = a->sign;
  5691. }
  5692. #endif
  5693. }
  5694. return err;
  5695. }
  5696. #endif /* WOLFSSL_SP_DIV_D */
  5697. #ifdef WOLFSSL_SP_MOD_D
  5698. /* Calculate a modulo the digit d into r: r = a mod d
  5699. *
  5700. * @param [in] a SP integer to reduce.
  5701. * @param [in] d Digit to that is the modulus.
  5702. * @param [out] r Digit that is the result..
  5703. *
  5704. * @return MP_OKAY on success.
  5705. * @return MP_VAL when a is NULL or d is 0.
  5706. */
  5707. #if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
  5708. !defined(HAVE_COMP_KEY)) && !defined(OPENSSL_EXTRA)
  5709. static
  5710. #endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
  5711. int sp_mod_d(sp_int* a, const sp_int_digit d, sp_int_digit* r)
  5712. {
  5713. int err = MP_OKAY;
  5714. if ((a == NULL) || (r == NULL) || (d == 0)) {
  5715. err = MP_VAL;
  5716. }
  5717. #if 0
  5718. sp_print(a, "a");
  5719. sp_print_digit(d, "m");
  5720. #endif
  5721. if (err == MP_OKAY) {
  5722. /* Check whether d is a power of 2. */
  5723. if ((d & (d - 1)) == 0) {
  5724. if (a->used == 0) {
  5725. *r = 0;
  5726. }
  5727. else {
  5728. *r = a->dp[0] & (d - 1);
  5729. }
  5730. }
  5731. #if !defined(WOLFSSL_SP_SMALL)
  5732. else if (d == 3) {
  5733. _sp_div_3(a, NULL, r);
  5734. }
  5735. else if (d == 10) {
  5736. _sp_div_10(a, NULL, r);
  5737. }
  5738. #endif
  5739. else if (d <= SP_HALF_MAX) {
  5740. _sp_div_small(a, d, NULL, r);
  5741. }
  5742. else {
  5743. int i;
  5744. #ifndef SQR_MUL_ASM
  5745. sp_int_word w = 0;
  5746. #else
  5747. sp_int_digit l;
  5748. sp_int_digit h = 0;
  5749. #endif
  5750. sp_int_digit t;
  5751. for (i = a->used - 1; i >= 0; i--) {
  5752. #ifndef SQR_MUL_ASM
  5753. t = sp_div_word((sp_int_digit)w, a->dp[i], d);
  5754. w = (w << SP_WORD_SIZE) | a->dp[i];
  5755. w -= (sp_int_word)t * d;
  5756. #else
  5757. l = a->dp[i];
  5758. t = sp_div_word(h, l, d);
  5759. h = l - t * d;
  5760. #endif
  5761. }
  5762. #ifndef SQR_MUL_ASM
  5763. *r = (sp_int_digit)w;
  5764. #else
  5765. *r = h;
  5766. #endif
  5767. }
  5768. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5769. if (a->sign == MP_NEG) {
  5770. *r = d - *r;
  5771. }
  5772. #endif
  5773. }
  5774. #if 0
  5775. sp_print_digit(*r, "rmod");
  5776. #endif
  5777. return err;
  5778. }
  5779. #endif /* WOLFSSL_SP_MOD_D */
  5780. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  5781. /* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
  5782. *
  5783. * r = a / 2 (mod m) - constant time (a < m and positive)
  5784. *
  5785. * @param [in] a SP integer to divide.
  5786. * @param [in] m SP integer that is modulus.
  5787. * @param [out] r SP integer to hold result.
  5788. *
  5789. * @return MP_OKAY on success.
  5790. * @return MP_VAL when a, m or r is NULL.
  5791. */
  5792. int sp_div_2_mod_ct(sp_int* a, sp_int* m, sp_int* r)
  5793. {
  5794. int err = MP_OKAY;
  5795. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  5796. err = MP_VAL;
  5797. }
  5798. if ((err == MP_OKAY) && (r->size < m->used + 1)) {
  5799. err = MP_VAL;
  5800. }
  5801. if (err == MP_OKAY) {
  5802. #ifndef SQR_MUL_ASM
  5803. sp_int_word w = 0;
  5804. #else
  5805. sp_int_digit l = 0;
  5806. sp_int_digit h = 0;
  5807. sp_int_digit t;
  5808. #endif
  5809. sp_int_digit mask;
  5810. int i;
  5811. #if 0
  5812. sp_print(a, "a");
  5813. sp_print(m, "m");
  5814. #endif
  5815. mask = (sp_int_digit)0 - (a->dp[0] & 1);
  5816. for (i = 0; i < m->used; i++) {
  5817. sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
  5818. #ifndef SQR_MUL_ASM
  5819. w += m->dp[i] & mask;
  5820. w += a->dp[i] & mask_a;
  5821. r->dp[i] = (sp_int_digit)w;
  5822. w >>= DIGIT_BIT;
  5823. #else
  5824. t = m->dp[i] & mask;
  5825. SP_ASM_ADDC(l, h, t);
  5826. t = a->dp[i] & mask_a;
  5827. SP_ASM_ADDC(l, h, t);
  5828. r->dp[i] = l;
  5829. l = h;
  5830. h = 0;
  5831. #endif
  5832. }
  5833. #ifndef SQR_MUL_ASM
  5834. r->dp[i] = (sp_int_digit)w;
  5835. #else
  5836. r->dp[i] = l;
  5837. #endif
  5838. r->used = i + 1;
  5839. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5840. r->sign = MP_ZPOS;
  5841. #endif
  5842. sp_clamp(r);
  5843. sp_div_2(r, r);
  5844. #if 0
  5845. sp_print(r, "rd2");
  5846. #endif
  5847. }
  5848. return err;
  5849. }
  5850. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  5851. #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
  5852. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5853. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  5854. /* Divides a by 2 and stores in r: r = a >> 1
  5855. *
  5856. * @param [in] a SP integer to divide.
  5857. * @param [out] r SP integer to hold result.
  5858. *
  5859. * @return MP_OKAY on success.
  5860. * @return MP_VAL when a or r is NULL.
  5861. */
  5862. #if !(defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
  5863. static
  5864. #endif
  5865. int sp_div_2(sp_int* a, sp_int* r)
  5866. {
  5867. int err = MP_OKAY;
  5868. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  5869. /* Only when a public API. */
  5870. if ((a == NULL) || (r == NULL)) {
  5871. err = MP_VAL;
  5872. }
  5873. #endif
  5874. if (err == MP_OKAY) {
  5875. int i;
  5876. r->used = a->used;
  5877. for (i = 0; i < a->used - 1; i++) {
  5878. r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
  5879. }
  5880. r->dp[i] = a->dp[i] >> 1;
  5881. r->used = i + 1;
  5882. sp_clamp(r);
  5883. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5884. r->sign = a->sign;
  5885. #endif
  5886. }
  5887. return err;
  5888. }
  5889. #endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
  5890. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  5891. /************************
  5892. * Add/Subtract Functions
  5893. ************************/
  5894. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
  5895. /* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZEOF))
  5896. *
  5897. * @param [in] a SP integer to add to.
  5898. * @param [in] b SP integer to add.
  5899. * @param [out] r SP integer to store result in.
  5900. * @param [in] o Number of digits to offset b.
  5901. *
  5902. * @return MP_OKAY on success.
  5903. */
  5904. static int _sp_add_off(sp_int* a, sp_int* b, sp_int* r, int o)
  5905. {
  5906. int i;
  5907. int j;
  5908. #ifndef SQR_MUL_ASM
  5909. sp_int_word t = 0;
  5910. #else
  5911. sp_int_digit l = 0;
  5912. sp_int_digit h = 0;
  5913. sp_int_digit t = 0;
  5914. #endif
  5915. #ifdef SP_MATH_NEED_ADD_OFF
  5916. for (i = 0; (i < o) && (i < a->used); i++) {
  5917. r->dp[i] = a->dp[i];
  5918. }
  5919. for (; i < o; i++) {
  5920. r->dp[i] = 0;
  5921. }
  5922. #else
  5923. i = 0;
  5924. (void)o;
  5925. #endif
  5926. for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
  5927. #ifndef SQR_MUL_ASM
  5928. t += a->dp[i];
  5929. t += b->dp[j];
  5930. r->dp[i] = (sp_int_digit)t;
  5931. t >>= SP_WORD_SIZE;
  5932. #else
  5933. t = a->dp[i];
  5934. SP_ASM_ADDC(l, h, t);
  5935. t = b->dp[j];
  5936. SP_ASM_ADDC(l, h, t);
  5937. r->dp[i] = l;
  5938. l = h;
  5939. h = 0;
  5940. #endif
  5941. }
  5942. for (; i < a->used; i++) {
  5943. #ifndef SQR_MUL_ASM
  5944. t += a->dp[i];
  5945. r->dp[i] = (sp_int_digit)t;
  5946. t >>= SP_WORD_SIZE;
  5947. #else
  5948. t = a->dp[i];
  5949. SP_ASM_ADDC(l, h, t);
  5950. r->dp[i] = l;
  5951. l = h;
  5952. h = 0;
  5953. #endif
  5954. }
  5955. for (; j < b->used; i++, j++) {
  5956. #ifndef SQR_MUL_ASM
  5957. t += b->dp[j];
  5958. r->dp[i] = (sp_int_digit)t;
  5959. t >>= SP_WORD_SIZE;
  5960. #else
  5961. t = b->dp[j];
  5962. SP_ASM_ADDC(l, h, t);
  5963. r->dp[i] = l;
  5964. l = h;
  5965. h = 0;
  5966. #endif
  5967. }
  5968. r->used = i;
  5969. #ifndef SQR_MUL_ASM
  5970. if (t != 0) {
  5971. r->dp[i] = (sp_int_digit)t;
  5972. r->used++;
  5973. }
  5974. #else
  5975. if (l != 0) {
  5976. r->dp[i] = l;
  5977. r->used++;
  5978. }
  5979. #endif
  5980. sp_clamp(r);
  5981. return MP_OKAY;
  5982. }
  5983. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  5984. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
  5985. !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
  5986. !defined(WOLFSSL_RSA_VERIFY_ONLY))
  5987. /* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZEOF))
  5988. * a must be greater than b.
  5989. *
  5990. * @param [in] a SP integer to subtract from.
  5991. * @param [in] b SP integer to subtract.
  5992. * @param [out] r SP integer to store result in.
  5993. * @param [in] o Number of digits to offset b.
  5994. *
  5995. * @return MP_OKAY on success.
  5996. */
  5997. static int _sp_sub_off(sp_int* a, sp_int* b, sp_int* r, int o)
  5998. {
  5999. int i;
  6000. int j;
  6001. #ifndef SQR_MUL_ASM
  6002. sp_int_sword t = 0;
  6003. #else
  6004. sp_int_digit l = 0;
  6005. sp_int_digit h = 0;
  6006. sp_int_digit t = 0;
  6007. #endif
  6008. for (i = 0; (i < o) && (i < a->used); i++) {
  6009. r->dp[i] = a->dp[i];
  6010. }
  6011. for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
  6012. #ifndef SQR_MUL_ASM
  6013. t += a->dp[i];
  6014. t -= b->dp[j];
  6015. r->dp[i] = (sp_int_digit)t;
  6016. t >>= SP_WORD_SIZE;
  6017. #else
  6018. t = a->dp[i];
  6019. SP_ASM_ADDC(l, h, t);
  6020. t = b->dp[j];
  6021. SP_ASM_SUBC(l, h, t);
  6022. r->dp[i] = l;
  6023. l = h;
  6024. h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
  6025. #endif
  6026. }
  6027. for (; i < a->used; i++) {
  6028. #ifndef SQR_MUL_ASM
  6029. t += a->dp[i];
  6030. r->dp[i] = (sp_int_digit)t;
  6031. t >>= SP_WORD_SIZE;
  6032. #else
  6033. t = a->dp[i];
  6034. SP_ASM_ADDC(l, h, t);
  6035. r->dp[i] = l;
  6036. l = h;
  6037. h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
  6038. #endif
  6039. }
  6040. r->used = i;
  6041. sp_clamp(r);
  6042. return MP_OKAY;
  6043. }
  6044. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
  6045. * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  6046. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
  6047. /* Add b to a into r: r = a + b
  6048. *
  6049. * @param [in] a SP integer to add to.
  6050. * @param [in] b SP integer to add.
  6051. * @param [out] r SP integer to store result in.
  6052. *
  6053. * @return MP_OKAY on success.
  6054. * @return MP_VAL when a, b, or r is NULL.
  6055. */
  6056. int sp_add(sp_int* a, sp_int* b, sp_int* r)
  6057. {
  6058. int err = MP_OKAY;
  6059. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  6060. err = MP_VAL;
  6061. }
  6062. if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
  6063. err = MP_VAL;
  6064. }
  6065. if (err == MP_OKAY) {
  6066. #ifndef WOLFSSL_SP_INT_NEGATIVE
  6067. err = _sp_add_off(a, b, r, 0);
  6068. #else
  6069. if (a->sign == b->sign) {
  6070. r->sign = a->sign;
  6071. err = _sp_add_off(a, b, r, 0);
  6072. }
  6073. else if (_sp_cmp_abs(a, b) != MP_LT) {
  6074. err = _sp_sub_off(a, b, r, 0);
  6075. if (sp_iszero(r)) {
  6076. r->sign = MP_ZPOS;
  6077. }
  6078. else {
  6079. r->sign = a->sign;
  6080. }
  6081. }
  6082. else {
  6083. err = _sp_sub_off(b, a, r, 0);
  6084. if (sp_iszero(r)) {
  6085. r->sign = MP_ZPOS;
  6086. }
  6087. else {
  6088. r->sign = b->sign;
  6089. }
  6090. }
  6091. #endif
  6092. }
  6093. return err;
  6094. }
  6095. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  6096. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  6097. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  6098. /* Subtract b from a into r: r = a - b
  6099. *
  6100. * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
  6101. *
  6102. * @param [in] a SP integer to subtract from.
  6103. * @param [in] b SP integer to subtract.
  6104. * @param [out] r SP integer to store result in.
  6105. *
  6106. * @return MP_OKAY on success.
  6107. * @return MP_VAL when a, b, or r is NULL.
  6108. */
  6109. int sp_sub(sp_int* a, sp_int* b, sp_int* r)
  6110. {
  6111. int err = MP_OKAY;
  6112. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  6113. err = MP_VAL;
  6114. }
  6115. else {
  6116. #ifndef WOLFSSL_SP_INT_NEGATIVE
  6117. err = _sp_sub_off(a, b, r, 0);
  6118. #else
  6119. if (a->sign != b->sign) {
  6120. r->sign = a->sign;
  6121. err = _sp_add_off(a, b, r, 0);
  6122. }
  6123. else if (_sp_cmp_abs(a, b) != MP_LT) {
  6124. err = _sp_sub_off(a, b, r, 0);
  6125. if (sp_iszero(r)) {
  6126. r->sign = MP_ZPOS;
  6127. }
  6128. else {
  6129. r->sign = a->sign;
  6130. }
  6131. }
  6132. else {
  6133. err = _sp_sub_off(b, a, r, 0);
  6134. if (sp_iszero(r)) {
  6135. r->sign = MP_ZPOS;
  6136. }
  6137. else {
  6138. r->sign = 1 - a->sign;
  6139. }
  6140. }
  6141. #endif
  6142. }
  6143. return err;
  6144. }
  6145. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  6146. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
  6147. /****************************
  6148. * Add/Subtract mod functions
  6149. ****************************/
  6150. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  6151. (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
  6152. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
  6153. /* Add two value and reduce: r = (a + b) % m
  6154. *
  6155. * @param [in] a SP integer to add.
  6156. * @param [in] b SP integer to add with.
  6157. * @param [in] m SP integer that is the modulus.
  6158. * @param [out] r SP integer to hold result.
  6159. *
  6160. * @return MP_OKAY on success.
  6161. * @return MP_VAL when a, b, m or r is NULL.
  6162. * @return MP_MEM when dynamic memory allocation fails.
  6163. */
  6164. int sp_addmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
  6165. {
  6166. int err = MP_OKAY;
  6167. int used = ((a == NULL) || (b == NULL)) ? 1 :
  6168. ((a->used >= b->used) ? a->used + 1 : b->used + 1);
  6169. DECL_SP_INT(t, used);
  6170. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  6171. err = MP_VAL;
  6172. }
  6173. ALLOC_SP_INT_SIZE(t, used, err, NULL);
  6174. #if 0
  6175. if (err == MP_OKAY) {
  6176. sp_print(a, "a");
  6177. sp_print(b, "b");
  6178. sp_print(m, "m");
  6179. }
  6180. #endif
  6181. if (err == MP_OKAY) {
  6182. err = sp_add(a, b, t);
  6183. }
  6184. if (err == MP_OKAY) {
  6185. err = sp_mod(t, m, r);
  6186. }
  6187. #if 0
  6188. if (err == MP_OKAY) {
  6189. sp_print(r, "rma");
  6190. }
  6191. #endif
  6192. FREE_SP_INT(t, NULL);
  6193. return err;
  6194. }
  6195. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
  6196. * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
  6197. #if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  6198. defined(HAVE_ECC))
  6199. /* Sub b from a and reduce: r = (a - b) % m
  6200. * Result is always positive.
  6201. *
  6202. * @param [in] a SP integer to subtract from
  6203. * @param [in] b SP integer to subtract.
  6204. * @param [in] m SP integer that is the modulus.
  6205. * @param [out] r SP integer to hold result.
  6206. *
  6207. * @return MP_OKAY on success.
  6208. * @return MP_VAL when a, b, m or r is NULL.
  6209. * @return MP_MEM when dynamic memory allocation fails.
  6210. */
  6211. int sp_submod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
  6212. {
  6213. #ifndef WOLFSSL_SP_INT_NEGATIVE
  6214. int err = MP_OKAY;
  6215. int used = ((a == NULL) || (b == NULL) || (m == NULL)) ? 1 :
  6216. ((a->used >= m->used) ?
  6217. ((a->used >= b->used) ? (a->used + 1) : (b->used + 1)) :
  6218. ((b->used >= m->used)) ? (b->used + 1) : (m->used + 1));
  6219. DECL_SP_INT_ARRAY(t, used, 2);
  6220. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  6221. err = MP_VAL;
  6222. }
  6223. #if 0
  6224. if (err == MP_OKAY) {
  6225. sp_print(a, "a");
  6226. sp_print(b, "b");
  6227. sp_print(m, "m");
  6228. }
  6229. #endif
  6230. ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
  6231. if (err == MP_OKAY) {
  6232. if (_sp_cmp(a, m) != MP_LT) {
  6233. err = sp_mod(a, m, t[0]);
  6234. a = t[0];
  6235. }
  6236. }
  6237. if (err == MP_OKAY) {
  6238. if (_sp_cmp(b, m) != MP_LT) {
  6239. err = sp_mod(b, m, t[1]);
  6240. b = t[1];
  6241. }
  6242. }
  6243. if (err == MP_OKAY) {
  6244. if (_sp_cmp(a, b) == MP_LT) {
  6245. err = sp_add(a, m, t[0]);
  6246. a = t[0];
  6247. }
  6248. }
  6249. if (err == MP_OKAY) {
  6250. err = sp_sub(a, b, r);
  6251. }
  6252. #if 0
  6253. if (err == MP_OKAY) {
  6254. sp_print(r, "rms");
  6255. }
  6256. #endif
  6257. FREE_SP_INT_ARRAY(t, NULL);
  6258. return err;
  6259. #else /* WOLFSSL_SP_INT_NEGATIVE */
  6260. int err = MP_OKAY;
  6261. int used = ((a == NULL) || (b == NULL)) ? 1 :
  6262. ((a->used >= b->used) ? a->used + 1 : b->used + 1);
  6263. DECL_SP_INT(t, used);
  6264. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  6265. err = MP_VAL;
  6266. }
  6267. #if 0
  6268. if (err == MP_OKAY) {
  6269. sp_print(a, "a");
  6270. sp_print(b, "b");
  6271. sp_print(m, "m");
  6272. }
  6273. #endif
  6274. ALLOC_SP_INT_SIZE(t, used, err, NULL);
  6275. if (err == MP_OKAY) {
  6276. err = sp_sub(a, b, t);
  6277. }
  6278. if (err == MP_OKAY) {
  6279. err = sp_mod(t, m, r);
  6280. }
  6281. #if 0
  6282. if (err == MP_OKAY) {
  6283. sp_print(r, "rms");
  6284. }
  6285. #endif
  6286. FREE_SP_INT(t, NULL);
  6287. return err;
  6288. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  6289. }
  6290. #endif /* WOLFSSL_SP_MATH_ALL */
  6291. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  6292. /* Add two value and reduce: r = (a + b) % m
  6293. *
  6294. * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
  6295. *
  6296. * Assumes a, b, m and r are not NULL.
  6297. * m and r must not be the same pointer.
  6298. *
  6299. * @param [in] a SP integer to add.
  6300. * @param [in] b SP integer to add with.
  6301. * @param [in] m SP integer that is the modulus.
  6302. * @param [out] r SP integer to hold result.
  6303. *
  6304. * @return MP_OKAY on success.
  6305. */
  6306. int sp_addmod_ct(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
  6307. {
  6308. int err = MP_OKAY;
  6309. #ifndef SQR_MUL_ASM
  6310. sp_int_sword w;
  6311. sp_int_sword s;
  6312. #else
  6313. sp_int_digit wl;
  6314. sp_int_digit wh;
  6315. sp_int_digit sl;
  6316. sp_int_digit sh;
  6317. sp_int_digit t;
  6318. #endif
  6319. sp_int_digit mask;
  6320. int i;
  6321. if (r->size < m->used) {
  6322. err = MP_VAL;
  6323. }
  6324. if ((err == MP_OKAY) && (r == m)) {
  6325. err = MP_VAL;
  6326. }
  6327. if (err == MP_OKAY) {
  6328. #if 0
  6329. sp_print(a, "a");
  6330. sp_print(b, "b");
  6331. sp_print(m, "m");
  6332. #endif
  6333. /* Add a to b into r. Do the subtract of modulus but don't store result.
  6334. * When subtract result is negative, the overflow will be negative.
  6335. * Only need to subtract mod when result is positive - overflow is
  6336. * positive.
  6337. */
  6338. #ifndef SQR_MUL_ASM
  6339. w = 0;
  6340. s = 0;
  6341. #else
  6342. wl = 0;
  6343. wh = 0;
  6344. sl = 0;
  6345. sh = 0;
  6346. #endif
  6347. for (i = 0; i < m->used; i++) {
  6348. /* Values past 'used' are not initialized. */
  6349. sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
  6350. sp_int_digit mask_b = (sp_int_digit)0 - (i < b->used);
  6351. #ifndef SQR_MUL_ASM
  6352. w += a->dp[i] & mask_a;
  6353. w += b->dp[i] & mask_b;
  6354. r->dp[i] = (sp_int_digit)w;
  6355. s += (sp_int_digit)w;
  6356. s -= m->dp[i];
  6357. s >>= DIGIT_BIT;
  6358. w >>= DIGIT_BIT;
  6359. #else
  6360. t = a->dp[i] & mask_a;
  6361. SP_ASM_ADDC(wl, wh, t);
  6362. t = b->dp[i] & mask_b;
  6363. SP_ASM_ADDC(wl, wh, t);
  6364. r->dp[i] = wl;
  6365. SP_ASM_ADDC(sl, sh, wl);
  6366. t = m->dp[i];
  6367. SP_ASM_SUBC(sl, sh, t);
  6368. sl = sh;
  6369. sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE-1));
  6370. wl = wh;
  6371. wh = 0;
  6372. #endif
  6373. }
  6374. #ifndef SQR_MUL_ASM
  6375. s += (sp_int_digit)w;
  6376. /* s will be positive when subtracting modulus is needed. */
  6377. mask = (sp_int_digit)0 - (s >= 0);
  6378. #else
  6379. SP_ASM_ADDC(sl, sh, wl);
  6380. /* s will be positive when subtracting modulus is needed. */
  6381. mask = (sh >> (SP_WORD_SIZE-1)) - 1;
  6382. #endif
  6383. /* Constant time, conditionally, subtract modulus from sum. */
  6384. #ifndef SQR_MUL_ASM
  6385. w = 0;
  6386. #else
  6387. wl = 0;
  6388. wh = 0;
  6389. #endif
  6390. for (i = 0; i < m->used; i++) {
  6391. #ifndef SQR_MUL_ASM
  6392. w += r->dp[i];
  6393. w -= m->dp[i] & mask;
  6394. r->dp[i] = (sp_int_digit)w;
  6395. w >>= DIGIT_BIT;
  6396. #else
  6397. t = r->dp[i];
  6398. SP_ASM_ADDC(wl, wh, t);
  6399. t = m->dp[i] & mask;
  6400. SP_ASM_SUBC(wl, wh, t);
  6401. r->dp[i] = wl;
  6402. wl = wh;
  6403. wh = (sp_int_digit)0 - (wl >> (SP_WORD_SIZE-1));
  6404. #endif
  6405. }
  6406. /* Result will always have digits equal to or less than those in
  6407. * modulus. */
  6408. r->used = i;
  6409. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6410. r->sign = MP_ZPOS;
  6411. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  6412. sp_clamp(r);
  6413. #if 0
  6414. sp_print(r, "rma");
  6415. #endif
  6416. }
  6417. return err;
  6418. }
  6419. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  6420. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  6421. /* Sub b from a and reduce: r = (a - b) % m
  6422. * Result is always positive.
  6423. *
  6424. * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
  6425. *
  6426. * Assumes a, b, m and r are not NULL.
  6427. * m and r must not be the same pointer.
  6428. *
  6429. * @param [in] a SP integer to subtract from
  6430. * @param [in] b SP integer to subtract.
  6431. * @param [in] m SP integer that is the modulus.
  6432. * @param [out] r SP integer to hold result.
  6433. *
  6434. * @return MP_OKAY on success.
  6435. */
  6436. int sp_submod_ct(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
  6437. {
  6438. int err = MP_OKAY;
  6439. #ifndef SQR_MUL_ASM
  6440. sp_int_sword w;
  6441. #else
  6442. sp_int_digit l;
  6443. sp_int_digit h;
  6444. sp_int_digit t;
  6445. #endif
  6446. sp_int_digit mask;
  6447. int i;
  6448. if (r->size < m->used + 1) {
  6449. err = MP_VAL;
  6450. }
  6451. if ((err == MP_OKAY) && (r == m)) {
  6452. err = MP_VAL;
  6453. }
  6454. if (err == MP_OKAY) {
  6455. #if 0
  6456. sp_print(a, "a");
  6457. sp_print(b, "b");
  6458. sp_print(m, "m");
  6459. #endif
  6460. /* In constant time, subtract b from a putting result in r. */
  6461. #ifndef SQR_MUL_ASM
  6462. w = 0;
  6463. #else
  6464. l = 0;
  6465. h = 0;
  6466. #endif
  6467. for (i = 0; i < m->used; i++) {
  6468. /* Values past 'used' are not initialized. */
  6469. sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
  6470. sp_int_digit mask_b = (sp_int_digit)0 - (i < b->used);
  6471. #ifndef SQR_MUL_ASM
  6472. w += a->dp[i] & mask_a;
  6473. w -= b->dp[i] & mask_b;
  6474. r->dp[i] = (sp_int_digit)w;
  6475. w >>= DIGIT_BIT;
  6476. #else
  6477. t = a->dp[i] & mask_a;
  6478. SP_ASM_ADDC(l, h, t);
  6479. t = b->dp[i] & mask_b;
  6480. SP_ASM_SUBC(l, h, t);
  6481. r->dp[i] = l;
  6482. l = h;
  6483. h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
  6484. #endif
  6485. }
  6486. /* When w is negative then we need to add modulus to make result
  6487. * positive. */
  6488. #ifndef SQR_MUL_ASM
  6489. mask = (sp_int_digit)0 - (w < 0);
  6490. #else
  6491. mask = h;
  6492. #endif
  6493. /* Constant time, conditionally, add modulus to difference. */
  6494. #ifndef SQR_MUL_ASM
  6495. w = 0;
  6496. #else
  6497. l = 0;
  6498. h = 0;
  6499. #endif
  6500. for (i = 0; i < m->used; i++) {
  6501. #ifndef SQR_MUL_ASM
  6502. w += r->dp[i];
  6503. w += m->dp[i] & mask;
  6504. r->dp[i] = (sp_int_digit)w;
  6505. w >>= DIGIT_BIT;
  6506. #else
  6507. t = r->dp[i];
  6508. SP_ASM_ADDC(l, h, t);
  6509. t = m->dp[i] & mask;
  6510. SP_ASM_ADDC(l, h, t);
  6511. r->dp[i] = l;
  6512. l = h;
  6513. h = 0;
  6514. #endif
  6515. }
  6516. r->used = i;
  6517. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6518. r->sign = MP_ZPOS;
  6519. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  6520. sp_clamp(r);
  6521. #if 0
  6522. sp_print(r, "rms");
  6523. #endif
  6524. }
  6525. return err;
  6526. }
  6527. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  6528. /********************
  6529. * Shifting functoins
  6530. ********************/
  6531. #if !defined(NO_DH) || defined(HAVE_ECC) || (defined(WC_RSA_BLINDING) && \
  6532. !defined(WOLFSSL_RSA_VERIFY_ONLY))
  6533. /* Left shift the multi-precision number by a number of digits.
  6534. *
  6535. * @param [in,out] a SP integer to shift.
  6536. * @param [in] s Number of digits to shift.
  6537. *
  6538. * @return MP_OKAY on success.
  6539. * @return MP_VAL when a is NULL or the result is too big to fit in an SP.
  6540. */
  6541. int sp_lshd(sp_int* a, int s)
  6542. {
  6543. int err = MP_OKAY;
  6544. if (a == NULL) {
  6545. err = MP_VAL;
  6546. }
  6547. if ((err == MP_OKAY) && (a->used + s > a->size)) {
  6548. err = MP_VAL;
  6549. }
  6550. if (err == MP_OKAY) {
  6551. XMEMMOVE(a->dp + s, a->dp, a->used * sizeof(sp_int_digit));
  6552. a->used += s;
  6553. XMEMSET(a->dp, 0, s * sizeof(sp_int_digit));
  6554. sp_clamp(a);
  6555. }
  6556. return err;
  6557. }
  6558. #endif
  6559. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  6560. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  6561. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  6562. /* Left shift the multi-precision number by n bits.
  6563. * Bits may be larger than the word size.
  6564. *
  6565. * @param [in,out] a SP integer to shift.
  6566. * @param [in] n Number of bits to shift left.
  6567. *
  6568. * @return MP_OKAY on success.
  6569. */
  6570. static int sp_lshb(sp_int* a, int n)
  6571. {
  6572. int err = MP_OKAY;
  6573. if (a->used != 0) {
  6574. int s = n >> SP_WORD_SHIFT;
  6575. int i;
  6576. if (a->used + s >= a->size) {
  6577. err = MP_VAL;
  6578. }
  6579. if (err == MP_OKAY) {
  6580. n &= SP_WORD_MASK;
  6581. if (n != 0) {
  6582. sp_int_digit v;
  6583. v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
  6584. a->dp[a->used - 1 + s] = a->dp[a->used - 1] << n;
  6585. for (i = a->used - 2; i >= 0; i--) {
  6586. a->dp[i + 1 + s] |= a->dp[i] >> (SP_WORD_SIZE - n);
  6587. a->dp[i + s] = a->dp[i] << n;
  6588. }
  6589. if (v != 0) {
  6590. a->dp[a->used + s] = v;
  6591. a->used++;
  6592. }
  6593. }
  6594. else if (s > 0) {
  6595. for (i = a->used - 1; i >= 0; i--) {
  6596. a->dp[i + s] = a->dp[i];
  6597. }
  6598. }
  6599. a->used += s;
  6600. XMEMSET(a->dp, 0, SP_WORD_SIZEOF * s);
  6601. }
  6602. }
  6603. return err;
  6604. }
  6605. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  6606. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  6607. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  6608. !defined(NO_DH) || defined(HAVE_ECC) || \
  6609. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  6610. /* Shift a right by n digits into r: r = a >> (n * SP_WORD_SIZE)
  6611. *
  6612. * @param [in] a SP integer to shift.
  6613. * @param [in] n Number of digits to shift.
  6614. * @param [out] r SP integer to store result in.
  6615. */
  6616. void sp_rshd(sp_int* a, int c)
  6617. {
  6618. if (a != NULL) {
  6619. int i;
  6620. int j;
  6621. if (c >= a->used) {
  6622. _sp_zero(a);
  6623. }
  6624. else {
  6625. for (i = c, j = 0; i < a->used; i++, j++) {
  6626. a->dp[j] = a->dp[i];
  6627. }
  6628. a->used -= c;
  6629. }
  6630. }
  6631. }
  6632. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
  6633. * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  6634. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  6635. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  6636. defined(WOLFSSL_HAVE_SP_DH)
  6637. /* Shift a right by n bits into r: r = a >> n
  6638. *
  6639. * @param [in] a SP integer to shift.
  6640. * @param [in] n Number of bits to shift.
  6641. * @param [out] r SP integer to store result in.
  6642. */
  6643. void sp_rshb(sp_int* a, int n, sp_int* r)
  6644. {
  6645. int i = n >> SP_WORD_SHIFT;
  6646. if (i >= a->used) {
  6647. _sp_zero(r);
  6648. }
  6649. else {
  6650. int j;
  6651. n &= SP_WORD_SIZE - 1;
  6652. if (n == 0) {
  6653. for (j = 0; i < a->used; i++, j++)
  6654. r->dp[j] = a->dp[i];
  6655. r->used = j;
  6656. }
  6657. else if (n > 0) {
  6658. for (j = 0; i < a->used-1; i++, j++)
  6659. r->dp[j] = (a->dp[i] >> n) | (a->dp[i+1] << (SP_WORD_SIZE - n));
  6660. r->dp[j] = a->dp[i] >> n;
  6661. r->used = j + 1;
  6662. sp_clamp(r);
  6663. }
  6664. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6665. if (sp_iszero(r)) {
  6666. r->sign = MP_ZPOS;
  6667. }
  6668. else {
  6669. r->sign = a->sign;
  6670. }
  6671. #endif
  6672. }
  6673. }
  6674. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  6675. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
  6676. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  6677. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  6678. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  6679. /* Divide a by d and return the quotient in r and the remainder in rem.
  6680. * r = a / d; rem = a % d
  6681. *
  6682. * @param [in] a SP integer to be divided.
  6683. * @param [in] d SP integer to divide by.
  6684. * @param [out] r SP integer that is the quotient.
  6685. * @param [out] rem SP integer that is the remainder.
  6686. *
  6687. * @return MP_OKAY on success.
  6688. * @return MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
  6689. * @return MP_MEM when dynamic memory allocation fails.
  6690. */
  6691. #ifndef WOLFSSL_SP_MATH_ALL
  6692. static
  6693. #endif
  6694. int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem)
  6695. {
  6696. int err = MP_OKAY;
  6697. int ret;
  6698. int done = 0;
  6699. int i;
  6700. int s = 0;
  6701. sp_int_digit dt;
  6702. sp_int_digit t;
  6703. sp_int* sa = NULL;
  6704. sp_int* sd = NULL;
  6705. sp_int* tr = NULL;
  6706. sp_int* trial = NULL;
  6707. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6708. int aSign = MP_ZPOS;
  6709. int dSign = MP_ZPOS;
  6710. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  6711. DECL_SP_INT_ARRAY(td, (a == NULL) ? 1 : a->used + 1, 4);
  6712. if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
  6713. err = MP_VAL;
  6714. }
  6715. if ((err == MP_OKAY) && sp_iszero(d)) {
  6716. err = MP_VAL;
  6717. }
  6718. if ((err == MP_OKAY) && (r != NULL) && (r->size < a->used - d->used + 2)) {
  6719. err = MP_VAL;
  6720. }
  6721. if ((err == MP_OKAY) && (rem != NULL)) {
  6722. if ((a->used <= d->used) && (rem->size < a->used + 1)) {
  6723. err = MP_VAL;
  6724. }
  6725. else if ((a->used > d->used) && (rem->size < d->used + 1)) {
  6726. err = MP_VAL;
  6727. }
  6728. }
  6729. /* May need to shift number being divided left into a new word. */
  6730. if ((err == MP_OKAY) && (a->used == SP_INT_DIGITS)) {
  6731. int bits = SP_WORD_SIZE - (sp_count_bits(d) % SP_WORD_SIZE);
  6732. if ((bits != SP_WORD_SIZE) &&
  6733. (sp_count_bits(a) + bits > SP_INT_DIGITS * SP_WORD_SIZE)) {
  6734. err = MP_VAL;
  6735. }
  6736. }
  6737. #if 0
  6738. if (err == MP_OKAY) {
  6739. sp_print(a, "a");
  6740. sp_print(d, "b");
  6741. }
  6742. #endif
  6743. if (err == MP_OKAY) {
  6744. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6745. aSign = a->sign;
  6746. dSign = d->sign;
  6747. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  6748. ret = _sp_cmp_abs(a, d);
  6749. if (ret == MP_LT) {
  6750. if (rem != NULL) {
  6751. sp_copy(a, rem);
  6752. }
  6753. if (r != NULL) {
  6754. sp_set(r, 0);
  6755. }
  6756. done = 1;
  6757. }
  6758. else if (ret == MP_EQ) {
  6759. if (rem != NULL) {
  6760. sp_set(rem, 0);
  6761. }
  6762. if (r != NULL) {
  6763. sp_set(r, 1);
  6764. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6765. r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
  6766. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  6767. }
  6768. done = 1;
  6769. }
  6770. else if (sp_count_bits(a) == sp_count_bits(d)) {
  6771. /* a is greater than d but same bit length */
  6772. if (rem != NULL) {
  6773. _sp_sub_off(a, d, rem, 0);
  6774. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6775. rem->sign = aSign;
  6776. #endif
  6777. }
  6778. if (r != NULL) {
  6779. sp_set(r, 1);
  6780. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6781. r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
  6782. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  6783. }
  6784. done = 1;
  6785. }
  6786. }
  6787. if ((!done) && (err == MP_OKAY)) {
  6788. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  6789. !defined(WOLFSSL_SP_NO_MALLOC)
  6790. int cnt = 4;
  6791. if ((rem != NULL) && (rem != d) && (rem->size > a->used)) {
  6792. cnt--;
  6793. }
  6794. if ((r != NULL) && (r != d)) {
  6795. cnt--;
  6796. }
  6797. /* Macro always has code associated with it and checks err first. */
  6798. ALLOC_SP_INT_ARRAY(td, a->used + 1, cnt, err, NULL);
  6799. #else
  6800. ALLOC_SP_INT_ARRAY(td, a->used + 1, 4, err, NULL);
  6801. #endif
  6802. }
  6803. if ((!done) && (err == MP_OKAY)) {
  6804. sd = td[0];
  6805. trial = td[1];
  6806. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  6807. !defined(WOLFSSL_SP_NO_MALLOC)
  6808. i = 2;
  6809. sa = ((rem != NULL) && (rem != d) && (rem->size > a->used)) ? rem :
  6810. td[i++];
  6811. tr = ((r != NULL) && (r != d)) ? r : td[i];
  6812. #else
  6813. sa = td[2];
  6814. tr = td[3];
  6815. #endif
  6816. sp_init_size(sd, d->used + 1);
  6817. sp_init_size(trial, a->used + 1);
  6818. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  6819. !defined(WOLFSSL_SP_NO_MALLOC)
  6820. if (sa != rem) {
  6821. sp_init_size(sa, a->used + 1);
  6822. }
  6823. if (tr != r) {
  6824. sp_init_size(tr, a->used - d->used + 2);
  6825. }
  6826. #else
  6827. sp_init_size(sa, a->used + 1);
  6828. sp_init_size(tr, a->used - d->used + 2);
  6829. #endif
  6830. s = sp_count_bits(d);
  6831. s = SP_WORD_SIZE - (s & SP_WORD_MASK);
  6832. sp_copy(a, sa);
  6833. if (s != SP_WORD_SIZE) {
  6834. err = sp_lshb(sa, s);
  6835. if (err == MP_OKAY) {
  6836. sp_copy(d, sd);
  6837. d = sd;
  6838. err = sp_lshb(sd, s);
  6839. }
  6840. }
  6841. }
  6842. if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
  6843. #ifdef WOLFSSL_SP_SMALL
  6844. int c;
  6845. #else
  6846. int j;
  6847. int o;
  6848. #ifndef SQR_MUL_ASM
  6849. sp_int_sword sw;
  6850. #else
  6851. sp_int_digit sl;
  6852. sp_int_digit sh;
  6853. sp_int_digit st;
  6854. #endif
  6855. #endif /* WOLFSSL_SP_SMALL */
  6856. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6857. sa->sign = MP_ZPOS;
  6858. sd->sign = MP_ZPOS;
  6859. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  6860. tr->used = sa->used - d->used + 1;
  6861. sp_clear(tr);
  6862. tr->used = sa->used - d->used + 1;
  6863. dt = d->dp[d->used-1];
  6864. for (i = d->used - 1; i > 0; i--) {
  6865. if (sa->dp[sa->used - d->used + i] != d->dp[i]) {
  6866. break;
  6867. }
  6868. }
  6869. if (sa->dp[sa->used - d->used + i] >= d->dp[i]) {
  6870. i = sa->used;
  6871. _sp_sub_off(sa, d, sa, sa->used - d->used);
  6872. /* Keep the same used so that 0 zeros will be put in. */
  6873. sa->used = i;
  6874. if (r != NULL) {
  6875. tr->dp[sa->used - d->used] = 1;
  6876. }
  6877. }
  6878. for (i = sa->used - 1; i >= d->used; i--) {
  6879. if (sa->dp[i] == dt) {
  6880. t = SP_DIGIT_MAX;
  6881. }
  6882. else {
  6883. t = sp_div_word(sa->dp[i], sa->dp[i-1], dt);
  6884. }
  6885. #ifdef WOLFSSL_SP_SMALL
  6886. do {
  6887. err = _sp_mul_d(d, t, trial, i - d->used);
  6888. if (err != MP_OKAY) {
  6889. break;
  6890. }
  6891. c = _sp_cmp_abs(trial, sa);
  6892. if (c == MP_GT) {
  6893. t--;
  6894. }
  6895. }
  6896. while (c == MP_GT);
  6897. if (err != MP_OKAY) {
  6898. break;
  6899. }
  6900. _sp_sub_off(sa, trial, sa, 0);
  6901. tr->dp[i - d->used] += t;
  6902. if (tr->dp[i - d->used] < t) {
  6903. tr->dp[i + 1 - d->used]++;
  6904. }
  6905. #else
  6906. o = i - d->used;
  6907. do {
  6908. #ifndef SQR_MUL_ASM
  6909. sp_int_word tw = 0;
  6910. #else
  6911. sp_int_digit tl = 0;
  6912. sp_int_digit th = 0;
  6913. #endif
  6914. for (j = 0; j < d->used; j++) {
  6915. #ifndef SQR_MUL_ASM
  6916. tw += (sp_int_word)d->dp[j] * t;
  6917. trial->dp[j] = (sp_int_digit)tw;
  6918. tw >>= SP_WORD_SIZE;
  6919. #else
  6920. SP_ASM_MUL_ADD_NO(tl, th, d->dp[j], t);
  6921. trial->dp[j] = tl;
  6922. tl = th;
  6923. th = 0;
  6924. #endif
  6925. }
  6926. #ifndef SQR_MUL_ASM
  6927. trial->dp[j] = (sp_int_digit)tw;
  6928. #else
  6929. trial->dp[j] = tl;
  6930. #endif
  6931. for (j = d->used; j > 0; j--) {
  6932. if (trial->dp[j] != sa->dp[j + o]) {
  6933. break;
  6934. }
  6935. }
  6936. if (trial->dp[j] > sa->dp[j + o]) {
  6937. t--;
  6938. }
  6939. }
  6940. while (trial->dp[j] > sa->dp[j + o]);
  6941. #ifndef SQR_MUL_ASM
  6942. sw = 0;
  6943. #else
  6944. sl = 0;
  6945. sh = 0;
  6946. #endif
  6947. for (j = 0; j <= d->used; j++) {
  6948. #ifndef SQR_MUL_ASM
  6949. sw += sa->dp[j + o];
  6950. sw -= trial->dp[j];
  6951. sa->dp[j + o] = (sp_int_digit)sw;
  6952. sw >>= SP_WORD_SIZE;
  6953. #else
  6954. st = sa->dp[j + o];
  6955. SP_ASM_ADDC(sl, sh, st);
  6956. st = trial->dp[j];
  6957. SP_ASM_SUBC(sl, sh, st);
  6958. sa->dp[j + o] = sl;
  6959. sl = sh;
  6960. sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE - 1));
  6961. #endif
  6962. }
  6963. tr->dp[o] = t;
  6964. #endif /* WOLFSSL_SP_SMALL */
  6965. }
  6966. sa->used = i + 1;
  6967. if ((err == MP_OKAY) && (rem != NULL)) {
  6968. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6969. sa->sign = (sa->used == 0) ? MP_ZPOS : aSign;
  6970. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  6971. if (s != SP_WORD_SIZE) {
  6972. sp_rshb(sa, s, sa);
  6973. }
  6974. sp_copy(sa, rem);
  6975. sp_clamp(rem);
  6976. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6977. if (sp_iszero(rem)) {
  6978. rem->sign = MP_ZPOS;
  6979. }
  6980. #endif
  6981. }
  6982. if ((err == MP_OKAY) && (r != NULL)) {
  6983. sp_copy(tr, r);
  6984. sp_clamp(r);
  6985. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6986. if (sp_iszero(r)) {
  6987. r->sign = MP_ZPOS;
  6988. }
  6989. else {
  6990. r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
  6991. }
  6992. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  6993. }
  6994. }
  6995. #if 0
  6996. if (err == MP_OKAY) {
  6997. if (rem != NULL) {
  6998. sp_print(rem, "rdr");
  6999. }
  7000. if (r != NULL) {
  7001. sp_print(r, "rdw");
  7002. }
  7003. }
  7004. #endif
  7005. FREE_SP_INT_ARRAY(td, NULL);
  7006. return err;
  7007. }
  7008. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
  7009. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  7010. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7011. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  7012. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  7013. #ifndef FREESCALE_LTC_TFM
  7014. /* Calculate the remainder of dividing a by m: r = a mod m.
  7015. *
  7016. * @param [in] a SP integer to reduce.
  7017. * @param [in] m SP integer that is the modulus.
  7018. * @param [out] r SP integer to store result in.
  7019. *
  7020. * @return MP_OKAY on success.
  7021. * @return MP_VAL when a, m or r is NULL or m is 0.
  7022. */
  7023. int sp_mod(sp_int* a, sp_int* m, sp_int* r)
  7024. {
  7025. int err = MP_OKAY;
  7026. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7027. DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
  7028. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7029. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  7030. err = MP_VAL;
  7031. }
  7032. #ifndef WOLFSSL_SP_INT_NEGATIVE
  7033. if (err == MP_OKAY) {
  7034. err = sp_div(a, m, NULL, r);
  7035. }
  7036. #else
  7037. ALLOC_SP_INT(t, a->used + 1, err, NULL);
  7038. if (err == MP_OKAY) {
  7039. sp_init_size(t, a->used + 1);
  7040. err = sp_div(a, m, NULL, t);
  7041. }
  7042. if (err == MP_OKAY) {
  7043. if ((!sp_iszero(t)) && (t->sign != m->sign)) {
  7044. err = sp_add(t, m, r);
  7045. }
  7046. else {
  7047. err = sp_copy(t, r);
  7048. }
  7049. }
  7050. FREE_SP_INT(t, NULL);
  7051. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7052. return err;
  7053. }
  7054. #endif /* !FREESCALE_LTC_TFM */
  7055. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
  7056. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  7057. /* START SP_MUL implementations. */
  7058. /* This code is generated.
  7059. * To generate:
  7060. * cd scripts/sp/sp_int
  7061. * ./gen.sh
  7062. * File sp_mul.c contains code.
  7063. */
  7064. #ifdef SQR_MUL_ASM
  7065. /* Multiply a by b into r where a and b have same no. digits. r = a * b
  7066. *
  7067. * Optimised code for when number of digits in a and b are the same.
  7068. *
  7069. * @param [in] a SP integer to mulitply.
  7070. * @param [in] b SP integer to mulitply by.
  7071. * @param [out] r SP integer to hod reult.
  7072. *
  7073. * @return MP_OKAY otherwise.
  7074. * @return MP_MEM when dynamic memory allocation fails.
  7075. */
  7076. static int _sp_mul_nxn(sp_int* a, sp_int* b, sp_int* r)
  7077. {
  7078. int err = MP_OKAY;
  7079. int i;
  7080. int j;
  7081. int k;
  7082. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7083. sp_int_digit* t = NULL;
  7084. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  7085. defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
  7086. sp_int_digit t[a->used * 2];
  7087. #else
  7088. sp_int_digit t[SP_INT_DIGITS];
  7089. #endif
  7090. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7091. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
  7092. DYNAMIC_TYPE_BIGINT);
  7093. if (t == NULL) {
  7094. err = MP_MEM;
  7095. }
  7096. #endif
  7097. if (err == MP_OKAY) {
  7098. sp_int_digit l, h, o;
  7099. sp_int_digit* dp;
  7100. h = 0;
  7101. l = 0;
  7102. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  7103. t[0] = h;
  7104. h = 0;
  7105. o = 0;
  7106. for (k = 1; k <= a->used - 1; k++) {
  7107. j = k;
  7108. dp = a->dp;
  7109. for (; j >= 0; dp++, j--) {
  7110. SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
  7111. }
  7112. t[k] = l;
  7113. l = h;
  7114. h = o;
  7115. o = 0;
  7116. }
  7117. for (; k <= (a->used - 1) * 2; k++) {
  7118. i = k - (b->used - 1);
  7119. dp = &b->dp[b->used - 1];
  7120. for (; i < a->used; i++, dp--) {
  7121. SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
  7122. }
  7123. t[k] = l;
  7124. l = h;
  7125. h = o;
  7126. o = 0;
  7127. }
  7128. t[k] = l;
  7129. r->used = k + 1;
  7130. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  7131. sp_clamp(r);
  7132. }
  7133. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7134. if (t != NULL) {
  7135. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  7136. }
  7137. #endif
  7138. return err;
  7139. }
  7140. /* Multiply a by b into r. r = a * b
  7141. *
  7142. * @param [in] a SP integer to mulitply.
  7143. * @param [in] b SP integer to mulitply by.
  7144. * @param [out] r SP integer to hod reult.
  7145. *
  7146. * @return MP_OKAY otherwise.
  7147. * @return MP_MEM when dynamic memory allocation fails.
  7148. */
  7149. static int _sp_mul(sp_int* a, sp_int* b, sp_int* r)
  7150. {
  7151. int err = MP_OKAY;
  7152. int i;
  7153. int j;
  7154. int k;
  7155. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7156. sp_int_digit* t = NULL;
  7157. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  7158. defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
  7159. sp_int_digit t[a->used + b->used];
  7160. #else
  7161. sp_int_digit t[SP_INT_DIGITS];
  7162. #endif
  7163. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7164. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
  7165. DYNAMIC_TYPE_BIGINT);
  7166. if (t == NULL) {
  7167. err = MP_MEM;
  7168. }
  7169. #endif
  7170. if (err == MP_OKAY) {
  7171. sp_int_digit l;
  7172. sp_int_digit h;
  7173. sp_int_digit o;
  7174. h = 0;
  7175. l = 0;
  7176. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  7177. t[0] = h;
  7178. h = 0;
  7179. o = 0;
  7180. for (k = 1; k <= b->used - 1; k++) {
  7181. i = 0;
  7182. j = k;
  7183. for (; (i < a->used) && (j >= 0); i++, j--) {
  7184. SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
  7185. }
  7186. t[k] = l;
  7187. l = h;
  7188. h = o;
  7189. o = 0;
  7190. }
  7191. for (; k <= (a->used - 1) + (b->used - 1); k++) {
  7192. j = b->used - 1;
  7193. i = k - j;
  7194. for (; (i < a->used) && (j >= 0); i++, j--) {
  7195. SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
  7196. }
  7197. t[k] = l;
  7198. l = h;
  7199. h = o;
  7200. o = 0;
  7201. }
  7202. t[k] = l;
  7203. r->used = k + 1;
  7204. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  7205. sp_clamp(r);
  7206. }
  7207. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7208. if (t != NULL) {
  7209. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  7210. }
  7211. #endif
  7212. return err;
  7213. }
  7214. #else
  7215. /* Multiply a by b into r. r = a * b
  7216. *
  7217. * @param [in] a SP integer to mulitply.
  7218. * @param [in] b SP integer to mulitply by.
  7219. * @param [out] r SP integer to hod reult.
  7220. *
  7221. * @return MP_OKAY otherwise.
  7222. * @return MP_MEM when dynamic memory allocation fails.
  7223. */
  7224. static int _sp_mul(sp_int* a, sp_int* b, sp_int* r)
  7225. {
  7226. int err = MP_OKAY;
  7227. int i;
  7228. int j;
  7229. int k;
  7230. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7231. sp_int_digit* t = NULL;
  7232. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  7233. defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
  7234. sp_int_digit t[a->used + b->used];
  7235. #else
  7236. sp_int_digit t[SP_INT_DIGITS];
  7237. #endif
  7238. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7239. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
  7240. DYNAMIC_TYPE_BIGINT);
  7241. if (t == NULL) {
  7242. err = MP_MEM;
  7243. }
  7244. #endif
  7245. if (err == MP_OKAY) {
  7246. sp_int_word w;
  7247. sp_int_word l;
  7248. sp_int_word h;
  7249. #ifdef SP_WORD_OVERFLOW
  7250. sp_int_word o;
  7251. #endif
  7252. w = (sp_int_word)a->dp[0] * b->dp[0];
  7253. t[0] = (sp_int_digit)w;
  7254. l = (sp_int_digit)(w >> SP_WORD_SIZE);
  7255. h = 0;
  7256. #ifdef SP_WORD_OVERFLOW
  7257. o = 0;
  7258. #endif
  7259. for (k = 1; k <= (a->used - 1) + (b->used - 1); k++) {
  7260. i = k - (b->used - 1);
  7261. i &= (((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U);
  7262. j = k - i;
  7263. for (; (i < a->used) && (j >= 0); i++, j--) {
  7264. w = (sp_int_word)a->dp[i] * b->dp[j];
  7265. l += (sp_int_digit)w;
  7266. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  7267. #ifdef SP_WORD_OVERFLOW
  7268. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  7269. l &= SP_MASK;
  7270. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  7271. h &= SP_MASK;
  7272. #endif
  7273. }
  7274. t[k] = (sp_int_digit)l;
  7275. l >>= SP_WORD_SIZE;
  7276. l += (sp_int_digit)h;
  7277. h >>= SP_WORD_SIZE;
  7278. #ifdef SP_WORD_OVERFLOW
  7279. h += o & SP_MASK;
  7280. o >>= SP_WORD_SIZE;
  7281. #endif
  7282. }
  7283. t[k] = (sp_int_digit)l;
  7284. r->used = k + 1;
  7285. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  7286. sp_clamp(r);
  7287. }
  7288. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7289. if (t != NULL) {
  7290. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  7291. }
  7292. #endif
  7293. return err;
  7294. }
  7295. #endif
  7296. #ifndef WOLFSSL_SP_SMALL
  7297. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  7298. #if SP_WORD_SIZE == 64
  7299. #ifndef SQR_MUL_ASM
  7300. /* Multiply a by b and store in r: r = a * b
  7301. *
  7302. * Long-hand implementation.
  7303. *
  7304. * @param [in] a SP integer to multiply.
  7305. * @param [in] b SP integer to multiply.
  7306. * @param [out] r SP integer result.
  7307. *
  7308. * @return MP_OKAY on success.
  7309. * @return MP_MEM when dynamic memory allocation fails.
  7310. */
  7311. static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r)
  7312. {
  7313. int err = MP_OKAY;
  7314. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7315. sp_int_word* w = NULL;
  7316. #else
  7317. sp_int_word w[16];
  7318. #endif
  7319. sp_int_digit* da = a->dp;
  7320. sp_int_digit* db = b->dp;
  7321. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7322. w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
  7323. DYNAMIC_TYPE_BIGINT);
  7324. if (w == NULL) {
  7325. err = MP_MEM;
  7326. }
  7327. #endif
  7328. if (err == MP_OKAY) {
  7329. w[0] = (sp_int_word)da[0] * db[0];
  7330. w[1] = (sp_int_word)da[0] * db[1];
  7331. w[2] = (sp_int_word)da[1] * db[0];
  7332. w[3] = (sp_int_word)da[0] * db[2];
  7333. w[4] = (sp_int_word)da[1] * db[1];
  7334. w[5] = (sp_int_word)da[2] * db[0];
  7335. w[6] = (sp_int_word)da[0] * db[3];
  7336. w[7] = (sp_int_word)da[1] * db[2];
  7337. w[8] = (sp_int_word)da[2] * db[1];
  7338. w[9] = (sp_int_word)da[3] * db[0];
  7339. w[10] = (sp_int_word)da[1] * db[3];
  7340. w[11] = (sp_int_word)da[2] * db[2];
  7341. w[12] = (sp_int_word)da[3] * db[1];
  7342. w[13] = (sp_int_word)da[2] * db[3];
  7343. w[14] = (sp_int_word)da[3] * db[2];
  7344. w[15] = (sp_int_word)da[3] * db[3];
  7345. r->dp[0] = w[0];
  7346. w[0] >>= SP_WORD_SIZE;
  7347. w[0] += (sp_int_digit)w[1];
  7348. w[0] += (sp_int_digit)w[2];
  7349. r->dp[1] = w[0];
  7350. w[0] >>= SP_WORD_SIZE;
  7351. w[1] >>= SP_WORD_SIZE;
  7352. w[0] += (sp_int_digit)w[1];
  7353. w[2] >>= SP_WORD_SIZE;
  7354. w[0] += (sp_int_digit)w[2];
  7355. w[0] += (sp_int_digit)w[3];
  7356. w[0] += (sp_int_digit)w[4];
  7357. w[0] += (sp_int_digit)w[5];
  7358. r->dp[2] = w[0];
  7359. w[0] >>= SP_WORD_SIZE;
  7360. w[3] >>= SP_WORD_SIZE;
  7361. w[0] += (sp_int_digit)w[3];
  7362. w[4] >>= SP_WORD_SIZE;
  7363. w[0] += (sp_int_digit)w[4];
  7364. w[5] >>= SP_WORD_SIZE;
  7365. w[0] += (sp_int_digit)w[5];
  7366. w[0] += (sp_int_digit)w[6];
  7367. w[0] += (sp_int_digit)w[7];
  7368. w[0] += (sp_int_digit)w[8];
  7369. w[0] += (sp_int_digit)w[9];
  7370. r->dp[3] = w[0];
  7371. w[0] >>= SP_WORD_SIZE;
  7372. w[6] >>= SP_WORD_SIZE;
  7373. w[0] += (sp_int_digit)w[6];
  7374. w[7] >>= SP_WORD_SIZE;
  7375. w[0] += (sp_int_digit)w[7];
  7376. w[8] >>= SP_WORD_SIZE;
  7377. w[0] += (sp_int_digit)w[8];
  7378. w[9] >>= SP_WORD_SIZE;
  7379. w[0] += (sp_int_digit)w[9];
  7380. w[0] += (sp_int_digit)w[10];
  7381. w[0] += (sp_int_digit)w[11];
  7382. w[0] += (sp_int_digit)w[12];
  7383. r->dp[4] = w[0];
  7384. w[0] >>= SP_WORD_SIZE;
  7385. w[10] >>= SP_WORD_SIZE;
  7386. w[0] += (sp_int_digit)w[10];
  7387. w[11] >>= SP_WORD_SIZE;
  7388. w[0] += (sp_int_digit)w[11];
  7389. w[12] >>= SP_WORD_SIZE;
  7390. w[0] += (sp_int_digit)w[12];
  7391. w[0] += (sp_int_digit)w[13];
  7392. w[0] += (sp_int_digit)w[14];
  7393. r->dp[5] = w[0];
  7394. w[0] >>= SP_WORD_SIZE;
  7395. w[13] >>= SP_WORD_SIZE;
  7396. w[0] += (sp_int_digit)w[13];
  7397. w[14] >>= SP_WORD_SIZE;
  7398. w[0] += (sp_int_digit)w[14];
  7399. w[0] += (sp_int_digit)w[15];
  7400. r->dp[6] = w[0];
  7401. w[0] >>= SP_WORD_SIZE;
  7402. w[15] >>= SP_WORD_SIZE;
  7403. w[0] += (sp_int_digit)w[15];
  7404. r->dp[7] = w[0];
  7405. r->used = 8;
  7406. sp_clamp(r);
  7407. }
  7408. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  7409. if (w != NULL) {
  7410. XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
  7411. }
  7412. #endif
  7413. return err;
  7414. }
  7415. #else /* SQR_MUL_ASM */
  7416. /* Multiply a by b and store in r: r = a * b
  7417. *
  7418. * Comba implementation.
  7419. *
  7420. * @param [in] a SP integer to multiply.
  7421. * @param [in] b SP integer to multiply.
  7422. * @param [out] r SP integer result.
  7423. *
  7424. * @return MP_OKAY on success.
  7425. * @return MP_MEM when dynamic memory allocation fails.
  7426. */
  7427. static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r)
  7428. {
  7429. sp_int_digit l = 0;
  7430. sp_int_digit h = 0;
  7431. sp_int_digit o = 0;
  7432. sp_int_digit t[4];
  7433. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  7434. t[0] = h;
  7435. h = 0;
  7436. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  7437. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  7438. t[1] = l;
  7439. l = h;
  7440. h = o;
  7441. o = 0;
  7442. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  7443. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  7444. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  7445. t[2] = l;
  7446. l = h;
  7447. h = o;
  7448. o = 0;
  7449. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  7450. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  7451. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  7452. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  7453. t[3] = l;
  7454. l = h;
  7455. h = o;
  7456. o = 0;
  7457. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  7458. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  7459. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  7460. r->dp[4] = l;
  7461. l = h;
  7462. h = o;
  7463. o = 0;
  7464. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  7465. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  7466. r->dp[5] = l;
  7467. l = h;
  7468. h = o;
  7469. SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
  7470. r->dp[6] = l;
  7471. r->dp[7] = h;
  7472. XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
  7473. r->used = 8;
  7474. sp_clamp(r);
  7475. return MP_OKAY;
  7476. }
  7477. #endif /* SQR_MUL_ASM */
  7478. #endif /* SP_WORD_SIZE == 64 */
  7479. #if SP_WORD_SIZE == 64
  7480. #ifdef SQR_MUL_ASM
  7481. /* Multiply a by b and store in r: r = a * b
  7482. *
  7483. * Comba implementation.
  7484. *
  7485. * @param [in] a SP integer to multiply.
  7486. * @param [in] b SP integer to multiply.
  7487. * @param [out] r SP integer result.
  7488. *
  7489. * @return MP_OKAY on success.
  7490. * @return MP_MEM when dynamic memory allocation fails.
  7491. */
  7492. static int _sp_mul_6(sp_int* a, sp_int* b, sp_int* r)
  7493. {
  7494. sp_int_digit l = 0;
  7495. sp_int_digit h = 0;
  7496. sp_int_digit o = 0;
  7497. sp_int_digit t[6];
  7498. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  7499. t[0] = h;
  7500. h = 0;
  7501. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  7502. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  7503. t[1] = l;
  7504. l = h;
  7505. h = o;
  7506. o = 0;
  7507. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  7508. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  7509. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  7510. t[2] = l;
  7511. l = h;
  7512. h = o;
  7513. o = 0;
  7514. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  7515. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  7516. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  7517. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  7518. t[3] = l;
  7519. l = h;
  7520. h = o;
  7521. o = 0;
  7522. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  7523. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  7524. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  7525. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  7526. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  7527. t[4] = l;
  7528. l = h;
  7529. h = o;
  7530. o = 0;
  7531. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  7532. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  7533. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  7534. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  7535. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  7536. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  7537. t[5] = l;
  7538. l = h;
  7539. h = o;
  7540. o = 0;
  7541. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  7542. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  7543. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  7544. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  7545. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  7546. r->dp[6] = l;
  7547. l = h;
  7548. h = o;
  7549. o = 0;
  7550. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  7551. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  7552. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  7553. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  7554. r->dp[7] = l;
  7555. l = h;
  7556. h = o;
  7557. o = 0;
  7558. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  7559. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  7560. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  7561. r->dp[8] = l;
  7562. l = h;
  7563. h = o;
  7564. o = 0;
  7565. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  7566. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  7567. r->dp[9] = l;
  7568. l = h;
  7569. h = o;
  7570. SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
  7571. r->dp[10] = l;
  7572. r->dp[11] = h;
  7573. XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
  7574. r->used = 12;
  7575. sp_clamp(r);
  7576. return MP_OKAY;
  7577. }
  7578. #endif /* SQR_MUL_ASM */
  7579. #endif /* SP_WORD_SIZE == 64 */
  7580. #if SP_WORD_SIZE == 32
  7581. #ifdef SQR_MUL_ASM
  7582. /* Multiply a by b and store in r: r = a * b
  7583. *
  7584. * Comba implementation.
  7585. *
  7586. * @param [in] a SP integer to multiply.
  7587. * @param [in] b SP integer to multiply.
  7588. * @param [out] r SP integer result.
  7589. *
  7590. * @return MP_OKAY on success.
  7591. * @return MP_MEM when dynamic memory allocation fails.
  7592. */
  7593. static int _sp_mul_8(sp_int* a, sp_int* b, sp_int* r)
  7594. {
  7595. sp_int_digit l = 0;
  7596. sp_int_digit h = 0;
  7597. sp_int_digit o = 0;
  7598. sp_int_digit t[8];
  7599. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  7600. t[0] = h;
  7601. h = 0;
  7602. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  7603. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  7604. t[1] = l;
  7605. l = h;
  7606. h = o;
  7607. o = 0;
  7608. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  7609. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  7610. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  7611. t[2] = l;
  7612. l = h;
  7613. h = o;
  7614. o = 0;
  7615. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  7616. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  7617. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  7618. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  7619. t[3] = l;
  7620. l = h;
  7621. h = o;
  7622. o = 0;
  7623. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  7624. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  7625. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  7626. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  7627. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  7628. t[4] = l;
  7629. l = h;
  7630. h = o;
  7631. o = 0;
  7632. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  7633. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  7634. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  7635. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  7636. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  7637. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  7638. t[5] = l;
  7639. l = h;
  7640. h = o;
  7641. o = 0;
  7642. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  7643. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  7644. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  7645. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  7646. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  7647. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  7648. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  7649. t[6] = l;
  7650. l = h;
  7651. h = o;
  7652. o = 0;
  7653. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  7654. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  7655. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  7656. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  7657. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  7658. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  7659. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  7660. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  7661. t[7] = l;
  7662. l = h;
  7663. h = o;
  7664. o = 0;
  7665. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  7666. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  7667. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  7668. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  7669. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  7670. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  7671. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  7672. r->dp[8] = l;
  7673. l = h;
  7674. h = o;
  7675. o = 0;
  7676. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  7677. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  7678. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  7679. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  7680. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  7681. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  7682. r->dp[9] = l;
  7683. l = h;
  7684. h = o;
  7685. o = 0;
  7686. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  7687. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  7688. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  7689. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  7690. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  7691. r->dp[10] = l;
  7692. l = h;
  7693. h = o;
  7694. o = 0;
  7695. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  7696. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  7697. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  7698. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  7699. r->dp[11] = l;
  7700. l = h;
  7701. h = o;
  7702. o = 0;
  7703. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  7704. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  7705. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  7706. r->dp[12] = l;
  7707. l = h;
  7708. h = o;
  7709. o = 0;
  7710. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  7711. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  7712. r->dp[13] = l;
  7713. l = h;
  7714. h = o;
  7715. SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
  7716. r->dp[14] = l;
  7717. r->dp[15] = h;
  7718. XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
  7719. r->used = 16;
  7720. sp_clamp(r);
  7721. return MP_OKAY;
  7722. }
  7723. #endif /* SQR_MUL_ASM */
  7724. #endif /* SP_WORD_SIZE == 32 */
  7725. #if SP_WORD_SIZE == 32
  7726. #ifdef SQR_MUL_ASM
  7727. /* Multiply a by b and store in r: r = a * b
  7728. *
  7729. * Comba implementation.
  7730. *
  7731. * @param [in] a SP integer to multiply.
  7732. * @param [in] b SP integer to multiply.
  7733. * @param [out] r SP integer result.
  7734. *
  7735. * @return MP_OKAY on success.
  7736. * @return MP_MEM when dynamic memory allocation fails.
  7737. */
  7738. static int _sp_mul_12(sp_int* a, sp_int* b, sp_int* r)
  7739. {
  7740. sp_int_digit l = 0;
  7741. sp_int_digit h = 0;
  7742. sp_int_digit o = 0;
  7743. sp_int_digit t[12];
  7744. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  7745. t[0] = h;
  7746. h = 0;
  7747. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  7748. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  7749. t[1] = l;
  7750. l = h;
  7751. h = o;
  7752. o = 0;
  7753. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  7754. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  7755. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  7756. t[2] = l;
  7757. l = h;
  7758. h = o;
  7759. o = 0;
  7760. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  7761. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  7762. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  7763. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  7764. t[3] = l;
  7765. l = h;
  7766. h = o;
  7767. o = 0;
  7768. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  7769. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  7770. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  7771. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  7772. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  7773. t[4] = l;
  7774. l = h;
  7775. h = o;
  7776. o = 0;
  7777. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  7778. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  7779. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  7780. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  7781. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  7782. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  7783. t[5] = l;
  7784. l = h;
  7785. h = o;
  7786. o = 0;
  7787. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  7788. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  7789. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  7790. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  7791. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  7792. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  7793. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  7794. t[6] = l;
  7795. l = h;
  7796. h = o;
  7797. o = 0;
  7798. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  7799. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  7800. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  7801. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  7802. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  7803. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  7804. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  7805. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  7806. t[7] = l;
  7807. l = h;
  7808. h = o;
  7809. o = 0;
  7810. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  7811. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  7812. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  7813. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  7814. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  7815. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  7816. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  7817. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  7818. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  7819. t[8] = l;
  7820. l = h;
  7821. h = o;
  7822. o = 0;
  7823. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  7824. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  7825. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  7826. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  7827. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  7828. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  7829. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  7830. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  7831. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  7832. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  7833. t[9] = l;
  7834. l = h;
  7835. h = o;
  7836. o = 0;
  7837. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  7838. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  7839. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  7840. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  7841. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  7842. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  7843. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  7844. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  7845. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  7846. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  7847. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  7848. t[10] = l;
  7849. l = h;
  7850. h = o;
  7851. o = 0;
  7852. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  7853. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  7854. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  7855. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  7856. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  7857. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  7858. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  7859. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  7860. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  7861. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  7862. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  7863. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  7864. t[11] = l;
  7865. l = h;
  7866. h = o;
  7867. o = 0;
  7868. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  7869. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  7870. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  7871. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  7872. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  7873. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  7874. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  7875. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  7876. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  7877. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  7878. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  7879. r->dp[12] = l;
  7880. l = h;
  7881. h = o;
  7882. o = 0;
  7883. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  7884. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  7885. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  7886. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  7887. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  7888. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  7889. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  7890. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  7891. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  7892. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  7893. r->dp[13] = l;
  7894. l = h;
  7895. h = o;
  7896. o = 0;
  7897. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  7898. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  7899. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  7900. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  7901. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  7902. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  7903. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  7904. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  7905. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  7906. r->dp[14] = l;
  7907. l = h;
  7908. h = o;
  7909. o = 0;
  7910. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  7911. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  7912. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  7913. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  7914. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  7915. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  7916. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  7917. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  7918. r->dp[15] = l;
  7919. l = h;
  7920. h = o;
  7921. o = 0;
  7922. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  7923. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  7924. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  7925. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  7926. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  7927. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  7928. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  7929. r->dp[16] = l;
  7930. l = h;
  7931. h = o;
  7932. o = 0;
  7933. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  7934. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  7935. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  7936. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  7937. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  7938. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  7939. r->dp[17] = l;
  7940. l = h;
  7941. h = o;
  7942. o = 0;
  7943. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  7944. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  7945. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  7946. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  7947. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  7948. r->dp[18] = l;
  7949. l = h;
  7950. h = o;
  7951. o = 0;
  7952. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  7953. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  7954. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  7955. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  7956. r->dp[19] = l;
  7957. l = h;
  7958. h = o;
  7959. o = 0;
  7960. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  7961. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  7962. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  7963. r->dp[20] = l;
  7964. l = h;
  7965. h = o;
  7966. o = 0;
  7967. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  7968. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  7969. r->dp[21] = l;
  7970. l = h;
  7971. h = o;
  7972. SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
  7973. r->dp[22] = l;
  7974. r->dp[23] = h;
  7975. XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
  7976. r->used = 24;
  7977. sp_clamp(r);
  7978. return MP_OKAY;
  7979. }
  7980. #endif /* SQR_MUL_ASM */
  7981. #endif /* SP_WORD_SIZE == 32 */
  7982. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  7983. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  7984. #if SP_INT_DIGITS >= 32
  7985. /* Multiply a by b and store in r: r = a * b
  7986. *
  7987. * Comba implementation.
  7988. *
  7989. * @param [in] a SP integer to multiply.
  7990. * @param [in] b SP integer to multiply.
  7991. * @param [out] r SP integer result.
  7992. *
  7993. * @return MP_OKAY on success.
  7994. * @return MP_MEM when dynamic memory allocation fails.
  7995. */
  7996. static int _sp_mul_16(sp_int* a, sp_int* b, sp_int* r)
  7997. {
  7998. int err = MP_OKAY;
  7999. sp_int_digit l = 0;
  8000. sp_int_digit h = 0;
  8001. sp_int_digit o = 0;
  8002. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8003. sp_int_digit* t = NULL;
  8004. #else
  8005. sp_int_digit t[16];
  8006. #endif
  8007. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8008. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
  8009. DYNAMIC_TYPE_BIGINT);
  8010. if (t == NULL) {
  8011. err = MP_MEM;
  8012. }
  8013. #endif
  8014. if (err == MP_OKAY) {
  8015. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8016. t[0] = h;
  8017. h = 0;
  8018. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8019. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8020. t[1] = l;
  8021. l = h;
  8022. h = o;
  8023. o = 0;
  8024. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8025. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8026. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8027. t[2] = l;
  8028. l = h;
  8029. h = o;
  8030. o = 0;
  8031. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8032. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8033. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8034. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8035. t[3] = l;
  8036. l = h;
  8037. h = o;
  8038. o = 0;
  8039. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  8040. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8041. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8042. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8043. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  8044. t[4] = l;
  8045. l = h;
  8046. h = o;
  8047. o = 0;
  8048. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  8049. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  8050. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8051. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8052. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  8053. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  8054. t[5] = l;
  8055. l = h;
  8056. h = o;
  8057. o = 0;
  8058. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  8059. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  8060. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  8061. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  8062. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  8063. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  8064. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  8065. t[6] = l;
  8066. l = h;
  8067. h = o;
  8068. o = 0;
  8069. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  8070. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  8071. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  8072. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  8073. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  8074. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  8075. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  8076. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  8077. t[7] = l;
  8078. l = h;
  8079. h = o;
  8080. o = 0;
  8081. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  8082. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  8083. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  8084. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  8085. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  8086. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  8087. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  8088. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  8089. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  8090. t[8] = l;
  8091. l = h;
  8092. h = o;
  8093. o = 0;
  8094. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  8095. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  8096. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  8097. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  8098. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  8099. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  8100. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  8101. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  8102. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  8103. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  8104. t[9] = l;
  8105. l = h;
  8106. h = o;
  8107. o = 0;
  8108. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  8109. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  8110. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  8111. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  8112. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  8113. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  8114. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  8115. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  8116. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  8117. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  8118. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  8119. t[10] = l;
  8120. l = h;
  8121. h = o;
  8122. o = 0;
  8123. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  8124. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  8125. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  8126. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  8127. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  8128. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  8129. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  8130. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  8131. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  8132. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  8133. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  8134. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  8135. t[11] = l;
  8136. l = h;
  8137. h = o;
  8138. o = 0;
  8139. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
  8140. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  8141. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  8142. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  8143. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  8144. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  8145. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  8146. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  8147. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  8148. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  8149. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  8150. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  8151. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
  8152. t[12] = l;
  8153. l = h;
  8154. h = o;
  8155. o = 0;
  8156. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
  8157. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
  8158. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  8159. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  8160. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  8161. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  8162. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  8163. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  8164. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  8165. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  8166. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  8167. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  8168. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
  8169. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
  8170. t[13] = l;
  8171. l = h;
  8172. h = o;
  8173. o = 0;
  8174. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
  8175. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
  8176. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
  8177. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  8178. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  8179. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  8180. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  8181. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  8182. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  8183. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  8184. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  8185. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  8186. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
  8187. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
  8188. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
  8189. t[14] = l;
  8190. l = h;
  8191. h = o;
  8192. o = 0;
  8193. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
  8194. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
  8195. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
  8196. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
  8197. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  8198. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  8199. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  8200. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  8201. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  8202. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  8203. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  8204. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  8205. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
  8206. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
  8207. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
  8208. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
  8209. t[15] = l;
  8210. l = h;
  8211. h = o;
  8212. o = 0;
  8213. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
  8214. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
  8215. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
  8216. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
  8217. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  8218. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  8219. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  8220. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  8221. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  8222. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  8223. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  8224. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
  8225. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
  8226. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
  8227. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
  8228. r->dp[16] = l;
  8229. l = h;
  8230. h = o;
  8231. o = 0;
  8232. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
  8233. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
  8234. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
  8235. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
  8236. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  8237. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  8238. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  8239. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  8240. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  8241. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  8242. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
  8243. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
  8244. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
  8245. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
  8246. r->dp[17] = l;
  8247. l = h;
  8248. h = o;
  8249. o = 0;
  8250. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
  8251. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
  8252. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
  8253. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
  8254. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  8255. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  8256. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  8257. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  8258. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  8259. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
  8260. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
  8261. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
  8262. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
  8263. r->dp[18] = l;
  8264. l = h;
  8265. h = o;
  8266. o = 0;
  8267. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
  8268. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
  8269. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
  8270. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
  8271. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  8272. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  8273. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  8274. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  8275. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
  8276. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
  8277. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
  8278. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
  8279. r->dp[19] = l;
  8280. l = h;
  8281. h = o;
  8282. o = 0;
  8283. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
  8284. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
  8285. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
  8286. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
  8287. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  8288. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  8289. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  8290. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
  8291. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
  8292. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
  8293. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
  8294. r->dp[20] = l;
  8295. l = h;
  8296. h = o;
  8297. o = 0;
  8298. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
  8299. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
  8300. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
  8301. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
  8302. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  8303. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  8304. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
  8305. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
  8306. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
  8307. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
  8308. r->dp[21] = l;
  8309. l = h;
  8310. h = o;
  8311. o = 0;
  8312. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
  8313. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
  8314. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
  8315. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
  8316. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
  8317. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
  8318. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
  8319. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
  8320. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
  8321. r->dp[22] = l;
  8322. l = h;
  8323. h = o;
  8324. o = 0;
  8325. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
  8326. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
  8327. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
  8328. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
  8329. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
  8330. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
  8331. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
  8332. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
  8333. r->dp[23] = l;
  8334. l = h;
  8335. h = o;
  8336. o = 0;
  8337. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
  8338. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
  8339. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
  8340. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
  8341. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
  8342. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
  8343. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
  8344. r->dp[24] = l;
  8345. l = h;
  8346. h = o;
  8347. o = 0;
  8348. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
  8349. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
  8350. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
  8351. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
  8352. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
  8353. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
  8354. r->dp[25] = l;
  8355. l = h;
  8356. h = o;
  8357. o = 0;
  8358. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
  8359. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
  8360. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
  8361. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
  8362. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
  8363. r->dp[26] = l;
  8364. l = h;
  8365. h = o;
  8366. o = 0;
  8367. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
  8368. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
  8369. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
  8370. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
  8371. r->dp[27] = l;
  8372. l = h;
  8373. h = o;
  8374. o = 0;
  8375. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
  8376. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
  8377. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
  8378. r->dp[28] = l;
  8379. l = h;
  8380. h = o;
  8381. o = 0;
  8382. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
  8383. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
  8384. r->dp[29] = l;
  8385. l = h;
  8386. h = o;
  8387. SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
  8388. r->dp[30] = l;
  8389. r->dp[31] = h;
  8390. XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
  8391. r->used = 32;
  8392. sp_clamp(r);
  8393. }
  8394. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8395. if (t != NULL) {
  8396. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8397. }
  8398. #endif
  8399. return err;
  8400. }
  8401. #endif /* SP_INT_DIGITS >= 32 */
  8402. #if SP_INT_DIGITS >= 48
  8403. /* Multiply a by b and store in r: r = a * b
  8404. *
  8405. * Comba implementation.
  8406. *
  8407. * @param [in] a SP integer to multiply.
  8408. * @param [in] b SP integer to multiply.
  8409. * @param [out] r SP integer result.
  8410. *
  8411. * @return MP_OKAY on success.
  8412. * @return MP_MEM when dynamic memory allocation fails.
  8413. */
  8414. static int _sp_mul_24(sp_int* a, sp_int* b, sp_int* r)
  8415. {
  8416. int err = MP_OKAY;
  8417. sp_int_digit l = 0;
  8418. sp_int_digit h = 0;
  8419. sp_int_digit o = 0;
  8420. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8421. sp_int_digit* t = NULL;
  8422. #else
  8423. sp_int_digit t[24];
  8424. #endif
  8425. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8426. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
  8427. DYNAMIC_TYPE_BIGINT);
  8428. if (t == NULL) {
  8429. err = MP_MEM;
  8430. }
  8431. #endif
  8432. if (err == MP_OKAY) {
  8433. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8434. t[0] = h;
  8435. h = 0;
  8436. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8437. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8438. t[1] = l;
  8439. l = h;
  8440. h = o;
  8441. o = 0;
  8442. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8443. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8444. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8445. t[2] = l;
  8446. l = h;
  8447. h = o;
  8448. o = 0;
  8449. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8450. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8451. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8452. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8453. t[3] = l;
  8454. l = h;
  8455. h = o;
  8456. o = 0;
  8457. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  8458. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8459. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8460. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8461. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  8462. t[4] = l;
  8463. l = h;
  8464. h = o;
  8465. o = 0;
  8466. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  8467. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  8468. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8469. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8470. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  8471. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  8472. t[5] = l;
  8473. l = h;
  8474. h = o;
  8475. o = 0;
  8476. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  8477. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  8478. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  8479. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  8480. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  8481. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  8482. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  8483. t[6] = l;
  8484. l = h;
  8485. h = o;
  8486. o = 0;
  8487. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  8488. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  8489. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  8490. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  8491. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  8492. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  8493. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  8494. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  8495. t[7] = l;
  8496. l = h;
  8497. h = o;
  8498. o = 0;
  8499. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  8500. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  8501. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  8502. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  8503. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  8504. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  8505. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  8506. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  8507. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  8508. t[8] = l;
  8509. l = h;
  8510. h = o;
  8511. o = 0;
  8512. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  8513. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  8514. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  8515. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  8516. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  8517. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  8518. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  8519. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  8520. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  8521. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  8522. t[9] = l;
  8523. l = h;
  8524. h = o;
  8525. o = 0;
  8526. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  8527. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  8528. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  8529. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  8530. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  8531. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  8532. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  8533. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  8534. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  8535. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  8536. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  8537. t[10] = l;
  8538. l = h;
  8539. h = o;
  8540. o = 0;
  8541. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  8542. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  8543. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  8544. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  8545. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  8546. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  8547. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  8548. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  8549. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  8550. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  8551. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  8552. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  8553. t[11] = l;
  8554. l = h;
  8555. h = o;
  8556. o = 0;
  8557. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
  8558. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  8559. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  8560. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  8561. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  8562. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  8563. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  8564. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  8565. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  8566. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  8567. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  8568. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  8569. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
  8570. t[12] = l;
  8571. l = h;
  8572. h = o;
  8573. o = 0;
  8574. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
  8575. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
  8576. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  8577. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  8578. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  8579. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  8580. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  8581. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  8582. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  8583. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  8584. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  8585. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  8586. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
  8587. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
  8588. t[13] = l;
  8589. l = h;
  8590. h = o;
  8591. o = 0;
  8592. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
  8593. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
  8594. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
  8595. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  8596. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  8597. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  8598. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  8599. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  8600. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  8601. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  8602. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  8603. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  8604. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
  8605. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
  8606. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
  8607. t[14] = l;
  8608. l = h;
  8609. h = o;
  8610. o = 0;
  8611. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
  8612. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
  8613. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
  8614. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
  8615. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  8616. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  8617. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  8618. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  8619. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  8620. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  8621. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  8622. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  8623. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
  8624. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
  8625. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
  8626. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
  8627. t[15] = l;
  8628. l = h;
  8629. h = o;
  8630. o = 0;
  8631. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
  8632. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
  8633. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
  8634. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
  8635. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
  8636. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  8637. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  8638. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  8639. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  8640. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  8641. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  8642. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  8643. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
  8644. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
  8645. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
  8646. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
  8647. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
  8648. t[16] = l;
  8649. l = h;
  8650. h = o;
  8651. o = 0;
  8652. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
  8653. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
  8654. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
  8655. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
  8656. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
  8657. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
  8658. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  8659. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  8660. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  8661. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  8662. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  8663. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  8664. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
  8665. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
  8666. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
  8667. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
  8668. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
  8669. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
  8670. t[17] = l;
  8671. l = h;
  8672. h = o;
  8673. o = 0;
  8674. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
  8675. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
  8676. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
  8677. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
  8678. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
  8679. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
  8680. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
  8681. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  8682. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  8683. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  8684. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  8685. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  8686. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
  8687. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
  8688. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
  8689. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
  8690. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
  8691. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
  8692. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
  8693. t[18] = l;
  8694. l = h;
  8695. h = o;
  8696. o = 0;
  8697. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
  8698. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
  8699. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
  8700. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
  8701. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
  8702. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
  8703. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
  8704. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
  8705. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  8706. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  8707. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  8708. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  8709. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
  8710. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
  8711. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
  8712. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
  8713. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
  8714. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
  8715. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
  8716. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
  8717. t[19] = l;
  8718. l = h;
  8719. h = o;
  8720. o = 0;
  8721. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
  8722. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
  8723. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
  8724. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
  8725. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
  8726. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
  8727. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
  8728. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
  8729. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
  8730. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  8731. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  8732. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  8733. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
  8734. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
  8735. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
  8736. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
  8737. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
  8738. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
  8739. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
  8740. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
  8741. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
  8742. t[20] = l;
  8743. l = h;
  8744. h = o;
  8745. o = 0;
  8746. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
  8747. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
  8748. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
  8749. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
  8750. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
  8751. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
  8752. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
  8753. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
  8754. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
  8755. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
  8756. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  8757. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  8758. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
  8759. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
  8760. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
  8761. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
  8762. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
  8763. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
  8764. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
  8765. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
  8766. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
  8767. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
  8768. t[21] = l;
  8769. l = h;
  8770. h = o;
  8771. o = 0;
  8772. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
  8773. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
  8774. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
  8775. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
  8776. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
  8777. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
  8778. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
  8779. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
  8780. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
  8781. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
  8782. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
  8783. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
  8784. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
  8785. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
  8786. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
  8787. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
  8788. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
  8789. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
  8790. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
  8791. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
  8792. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
  8793. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
  8794. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
  8795. t[22] = l;
  8796. l = h;
  8797. h = o;
  8798. o = 0;
  8799. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
  8800. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
  8801. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
  8802. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
  8803. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
  8804. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
  8805. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
  8806. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
  8807. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
  8808. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
  8809. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
  8810. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
  8811. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
  8812. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
  8813. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
  8814. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
  8815. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
  8816. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
  8817. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
  8818. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
  8819. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
  8820. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
  8821. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
  8822. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
  8823. t[23] = l;
  8824. l = h;
  8825. h = o;
  8826. o = 0;
  8827. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
  8828. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
  8829. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
  8830. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
  8831. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
  8832. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
  8833. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
  8834. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
  8835. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
  8836. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
  8837. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
  8838. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
  8839. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
  8840. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
  8841. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
  8842. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
  8843. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
  8844. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
  8845. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
  8846. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
  8847. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
  8848. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
  8849. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
  8850. r->dp[24] = l;
  8851. l = h;
  8852. h = o;
  8853. o = 0;
  8854. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
  8855. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
  8856. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
  8857. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
  8858. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
  8859. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
  8860. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
  8861. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
  8862. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
  8863. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
  8864. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
  8865. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
  8866. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
  8867. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
  8868. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
  8869. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
  8870. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
  8871. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
  8872. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
  8873. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
  8874. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
  8875. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
  8876. r->dp[25] = l;
  8877. l = h;
  8878. h = o;
  8879. o = 0;
  8880. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
  8881. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
  8882. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
  8883. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
  8884. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
  8885. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
  8886. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
  8887. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
  8888. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
  8889. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
  8890. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
  8891. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
  8892. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
  8893. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
  8894. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
  8895. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
  8896. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
  8897. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
  8898. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
  8899. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
  8900. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
  8901. r->dp[26] = l;
  8902. l = h;
  8903. h = o;
  8904. o = 0;
  8905. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
  8906. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
  8907. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
  8908. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
  8909. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
  8910. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
  8911. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
  8912. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
  8913. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
  8914. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
  8915. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
  8916. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
  8917. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
  8918. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
  8919. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
  8920. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
  8921. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
  8922. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
  8923. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
  8924. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
  8925. r->dp[27] = l;
  8926. l = h;
  8927. h = o;
  8928. o = 0;
  8929. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
  8930. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
  8931. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
  8932. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
  8933. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
  8934. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
  8935. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
  8936. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
  8937. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
  8938. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
  8939. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
  8940. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
  8941. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
  8942. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
  8943. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
  8944. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
  8945. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
  8946. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
  8947. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
  8948. r->dp[28] = l;
  8949. l = h;
  8950. h = o;
  8951. o = 0;
  8952. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
  8953. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
  8954. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
  8955. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
  8956. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
  8957. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
  8958. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
  8959. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
  8960. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
  8961. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
  8962. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
  8963. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
  8964. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
  8965. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
  8966. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
  8967. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
  8968. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
  8969. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
  8970. r->dp[29] = l;
  8971. l = h;
  8972. h = o;
  8973. o = 0;
  8974. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
  8975. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
  8976. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
  8977. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
  8978. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
  8979. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
  8980. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
  8981. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
  8982. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
  8983. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
  8984. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
  8985. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
  8986. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
  8987. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
  8988. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
  8989. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
  8990. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
  8991. r->dp[30] = l;
  8992. l = h;
  8993. h = o;
  8994. o = 0;
  8995. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
  8996. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
  8997. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
  8998. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
  8999. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
  9000. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
  9001. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
  9002. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
  9003. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
  9004. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
  9005. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
  9006. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
  9007. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
  9008. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
  9009. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
  9010. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
  9011. r->dp[31] = l;
  9012. l = h;
  9013. h = o;
  9014. o = 0;
  9015. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
  9016. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
  9017. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
  9018. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
  9019. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
  9020. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
  9021. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
  9022. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
  9023. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
  9024. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
  9025. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
  9026. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
  9027. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
  9028. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
  9029. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
  9030. r->dp[32] = l;
  9031. l = h;
  9032. h = o;
  9033. o = 0;
  9034. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
  9035. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
  9036. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
  9037. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
  9038. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
  9039. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
  9040. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
  9041. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
  9042. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
  9043. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
  9044. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
  9045. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
  9046. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
  9047. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
  9048. r->dp[33] = l;
  9049. l = h;
  9050. h = o;
  9051. o = 0;
  9052. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
  9053. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
  9054. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
  9055. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
  9056. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
  9057. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
  9058. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
  9059. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
  9060. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
  9061. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
  9062. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
  9063. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
  9064. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
  9065. r->dp[34] = l;
  9066. l = h;
  9067. h = o;
  9068. o = 0;
  9069. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
  9070. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
  9071. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
  9072. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
  9073. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
  9074. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
  9075. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
  9076. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
  9077. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
  9078. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
  9079. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
  9080. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
  9081. r->dp[35] = l;
  9082. l = h;
  9083. h = o;
  9084. o = 0;
  9085. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
  9086. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
  9087. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
  9088. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
  9089. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
  9090. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
  9091. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
  9092. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
  9093. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
  9094. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
  9095. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
  9096. r->dp[36] = l;
  9097. l = h;
  9098. h = o;
  9099. o = 0;
  9100. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
  9101. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
  9102. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
  9103. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
  9104. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
  9105. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
  9106. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
  9107. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
  9108. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
  9109. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
  9110. r->dp[37] = l;
  9111. l = h;
  9112. h = o;
  9113. o = 0;
  9114. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
  9115. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
  9116. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
  9117. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
  9118. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
  9119. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
  9120. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
  9121. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
  9122. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
  9123. r->dp[38] = l;
  9124. l = h;
  9125. h = o;
  9126. o = 0;
  9127. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
  9128. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
  9129. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
  9130. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
  9131. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
  9132. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
  9133. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
  9134. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
  9135. r->dp[39] = l;
  9136. l = h;
  9137. h = o;
  9138. o = 0;
  9139. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
  9140. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
  9141. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
  9142. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
  9143. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
  9144. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
  9145. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
  9146. r->dp[40] = l;
  9147. l = h;
  9148. h = o;
  9149. o = 0;
  9150. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
  9151. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
  9152. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
  9153. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
  9154. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
  9155. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
  9156. r->dp[41] = l;
  9157. l = h;
  9158. h = o;
  9159. o = 0;
  9160. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
  9161. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
  9162. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
  9163. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
  9164. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
  9165. r->dp[42] = l;
  9166. l = h;
  9167. h = o;
  9168. o = 0;
  9169. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
  9170. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
  9171. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
  9172. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
  9173. r->dp[43] = l;
  9174. l = h;
  9175. h = o;
  9176. o = 0;
  9177. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
  9178. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
  9179. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
  9180. r->dp[44] = l;
  9181. l = h;
  9182. h = o;
  9183. o = 0;
  9184. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
  9185. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
  9186. r->dp[45] = l;
  9187. l = h;
  9188. h = o;
  9189. SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
  9190. r->dp[46] = l;
  9191. r->dp[47] = h;
  9192. XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
  9193. r->used = 48;
  9194. sp_clamp(r);
  9195. }
  9196. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9197. if (t != NULL) {
  9198. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  9199. }
  9200. #endif
  9201. return err;
  9202. }
  9203. #endif /* SP_INT_DIGITS >= 48 */
  9204. #if SP_INT_DIGITS >= 64
  9205. /* Multiply a by b and store in r: r = a * b
  9206. *
  9207. * Karatsuba implementaiton.
  9208. *
  9209. * @param [in] a SP integer to multiply.
  9210. * @param [in] b SP integer to multiply.
  9211. * @param [out] r SP integer result.
  9212. *
  9213. * @return MP_OKAY on success.
  9214. * @return MP_MEM when dynamic memory allocation fails.
  9215. */
  9216. static int _sp_mul_32(sp_int* a, sp_int* b, sp_int* r)
  9217. {
  9218. int err = MP_OKAY;
  9219. int i;
  9220. sp_int_digit l;
  9221. sp_int_digit h;
  9222. sp_int* a1;
  9223. sp_int* b1;
  9224. sp_int* z0;
  9225. sp_int* z1;
  9226. sp_int* z2;
  9227. sp_int_digit ca;
  9228. sp_int_digit cb;
  9229. DECL_SP_INT_ARRAY(t, 16, 2);
  9230. DECL_SP_INT_ARRAY(z, 33, 2);
  9231. ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
  9232. ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
  9233. if (err == MP_OKAY) {
  9234. a1 = t[0];
  9235. b1 = t[1];
  9236. z1 = z[0];
  9237. z2 = z[1];
  9238. z0 = r;
  9239. XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
  9240. a1->used = 16;
  9241. XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
  9242. b1->used = 16;
  9243. /* z2 = a1 * b1 */
  9244. err = _sp_mul_16(a1, b1, z2);
  9245. }
  9246. if (err == MP_OKAY) {
  9247. l = a1->dp[0];
  9248. h = 0;
  9249. SP_ASM_ADDC(l, h, a->dp[0]);
  9250. a1->dp[0] = l;
  9251. l = h;
  9252. h = 0;
  9253. for (i = 1; i < 16; i++) {
  9254. SP_ASM_ADDC(l, h, a1->dp[i]);
  9255. SP_ASM_ADDC(l, h, a->dp[i]);
  9256. a1->dp[i] = l;
  9257. l = h;
  9258. h = 0;
  9259. }
  9260. ca = l;
  9261. /* b01 = b0 + b1 */
  9262. l = b1->dp[0];
  9263. h = 0;
  9264. SP_ASM_ADDC(l, h, b->dp[0]);
  9265. b1->dp[0] = l;
  9266. l = h;
  9267. h = 0;
  9268. for (i = 1; i < 16; i++) {
  9269. SP_ASM_ADDC(l, h, b1->dp[i]);
  9270. SP_ASM_ADDC(l, h, b->dp[i]);
  9271. b1->dp[i] = l;
  9272. l = h;
  9273. h = 0;
  9274. }
  9275. cb = l;
  9276. /* z0 = a0 * b0 */
  9277. err = _sp_mul_16(a, b, z0);
  9278. }
  9279. if (err == MP_OKAY) {
  9280. /* z1 = (a0 + a1) * (b0 + b1) */
  9281. err = _sp_mul_16(a1, b1, z1);
  9282. }
  9283. if (err == MP_OKAY) {
  9284. /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
  9285. /* r = z0 */
  9286. /* r += (z1 - z0 - z2) << 16 */
  9287. z1->dp[32] = ca & cb;
  9288. l = 0;
  9289. if (ca) {
  9290. h = 0;
  9291. for (i = 0; i < 16; i++) {
  9292. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  9293. SP_ASM_ADDC(l, h, b1->dp[i]);
  9294. z1->dp[i + 16] = l;
  9295. l = h;
  9296. h = 0;
  9297. }
  9298. }
  9299. z1->dp[32] += l;
  9300. l = 0;
  9301. if (cb) {
  9302. h = 0;
  9303. for (i = 0; i < 16; i++) {
  9304. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  9305. SP_ASM_ADDC(l, h, a1->dp[i]);
  9306. z1->dp[i + 16] = l;
  9307. l = h;
  9308. h = 0;
  9309. }
  9310. }
  9311. z1->dp[32] += l;
  9312. /* z1 = z1 - z0 - z1 */
  9313. l = 0;
  9314. h = 0;
  9315. for (i = 0; i < 32; i++) {
  9316. l += z1->dp[i];
  9317. SP_ASM_SUBC(l, h, z0->dp[i]);
  9318. SP_ASM_SUBC(l, h, z2->dp[i]);
  9319. z1->dp[i] = l;
  9320. l = h;
  9321. h = 0;
  9322. }
  9323. z1->dp[i] += l;
  9324. /* r += z1 << 16 */
  9325. l = 0;
  9326. h = 0;
  9327. for (i = 0; i < 16; i++) {
  9328. SP_ASM_ADDC(l, h, r->dp[i + 16]);
  9329. SP_ASM_ADDC(l, h, z1->dp[i]);
  9330. r->dp[i + 16] = l;
  9331. l = h;
  9332. h = 0;
  9333. }
  9334. for (; i < 33; i++) {
  9335. SP_ASM_ADDC(l, h, z1->dp[i]);
  9336. r->dp[i + 16] = l;
  9337. l = h;
  9338. h = 0;
  9339. }
  9340. /* r += z2 << 32 */
  9341. l = 0;
  9342. h = 0;
  9343. for (i = 0; i < 17; i++) {
  9344. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  9345. SP_ASM_ADDC(l, h, z2->dp[i]);
  9346. r->dp[i + 32] = l;
  9347. l = h;
  9348. h = 0;
  9349. }
  9350. for (; i < 32; i++) {
  9351. SP_ASM_ADDC(l, h, z2->dp[i]);
  9352. r->dp[i + 32] = l;
  9353. l = h;
  9354. h = 0;
  9355. }
  9356. r->used = 64;
  9357. sp_clamp(r);
  9358. }
  9359. FREE_SP_INT_ARRAY(z, NULL);
  9360. FREE_SP_INT_ARRAY(t, NULL);
  9361. return err;
  9362. }
  9363. #endif /* SP_INT_DIGITS >= 64 */
  9364. #if SP_INT_DIGITS >= 96
  9365. /* Multiply a by b and store in r: r = a * b
  9366. *
  9367. * Karatsuba implementaiton.
  9368. *
  9369. * @param [in] a SP integer to multiply.
  9370. * @param [in] b SP integer to multiply.
  9371. * @param [out] r SP integer result.
  9372. *
  9373. * @return MP_OKAY on success.
  9374. * @return MP_MEM when dynamic memory allocation fails.
  9375. */
  9376. static int _sp_mul_48(sp_int* a, sp_int* b, sp_int* r)
  9377. {
  9378. int err = MP_OKAY;
  9379. int i;
  9380. sp_int_digit l;
  9381. sp_int_digit h;
  9382. sp_int* a1;
  9383. sp_int* b1;
  9384. sp_int* z0;
  9385. sp_int* z1;
  9386. sp_int* z2;
  9387. sp_int_digit ca;
  9388. sp_int_digit cb;
  9389. DECL_SP_INT_ARRAY(t, 24, 2);
  9390. DECL_SP_INT_ARRAY(z, 49, 2);
  9391. ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
  9392. ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
  9393. if (err == MP_OKAY) {
  9394. a1 = t[0];
  9395. b1 = t[1];
  9396. z1 = z[0];
  9397. z2 = z[1];
  9398. z0 = r;
  9399. XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
  9400. a1->used = 24;
  9401. XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
  9402. b1->used = 24;
  9403. /* z2 = a1 * b1 */
  9404. err = _sp_mul_24(a1, b1, z2);
  9405. }
  9406. if (err == MP_OKAY) {
  9407. l = a1->dp[0];
  9408. h = 0;
  9409. SP_ASM_ADDC(l, h, a->dp[0]);
  9410. a1->dp[0] = l;
  9411. l = h;
  9412. h = 0;
  9413. for (i = 1; i < 24; i++) {
  9414. SP_ASM_ADDC(l, h, a1->dp[i]);
  9415. SP_ASM_ADDC(l, h, a->dp[i]);
  9416. a1->dp[i] = l;
  9417. l = h;
  9418. h = 0;
  9419. }
  9420. ca = l;
  9421. /* b01 = b0 + b1 */
  9422. l = b1->dp[0];
  9423. h = 0;
  9424. SP_ASM_ADDC(l, h, b->dp[0]);
  9425. b1->dp[0] = l;
  9426. l = h;
  9427. h = 0;
  9428. for (i = 1; i < 24; i++) {
  9429. SP_ASM_ADDC(l, h, b1->dp[i]);
  9430. SP_ASM_ADDC(l, h, b->dp[i]);
  9431. b1->dp[i] = l;
  9432. l = h;
  9433. h = 0;
  9434. }
  9435. cb = l;
  9436. /* z0 = a0 * b0 */
  9437. err = _sp_mul_24(a, b, z0);
  9438. }
  9439. if (err == MP_OKAY) {
  9440. /* z1 = (a0 + a1) * (b0 + b1) */
  9441. err = _sp_mul_24(a1, b1, z1);
  9442. }
  9443. if (err == MP_OKAY) {
  9444. /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
  9445. /* r = z0 */
  9446. /* r += (z1 - z0 - z2) << 24 */
  9447. z1->dp[48] = ca & cb;
  9448. l = 0;
  9449. if (ca) {
  9450. h = 0;
  9451. for (i = 0; i < 24; i++) {
  9452. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  9453. SP_ASM_ADDC(l, h, b1->dp[i]);
  9454. z1->dp[i + 24] = l;
  9455. l = h;
  9456. h = 0;
  9457. }
  9458. }
  9459. z1->dp[48] += l;
  9460. l = 0;
  9461. if (cb) {
  9462. h = 0;
  9463. for (i = 0; i < 24; i++) {
  9464. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  9465. SP_ASM_ADDC(l, h, a1->dp[i]);
  9466. z1->dp[i + 24] = l;
  9467. l = h;
  9468. h = 0;
  9469. }
  9470. }
  9471. z1->dp[48] += l;
  9472. /* z1 = z1 - z0 - z1 */
  9473. l = 0;
  9474. h = 0;
  9475. for (i = 0; i < 48; i++) {
  9476. l += z1->dp[i];
  9477. SP_ASM_SUBC(l, h, z0->dp[i]);
  9478. SP_ASM_SUBC(l, h, z2->dp[i]);
  9479. z1->dp[i] = l;
  9480. l = h;
  9481. h = 0;
  9482. }
  9483. z1->dp[i] += l;
  9484. /* r += z1 << 16 */
  9485. l = 0;
  9486. h = 0;
  9487. for (i = 0; i < 24; i++) {
  9488. SP_ASM_ADDC(l, h, r->dp[i + 24]);
  9489. SP_ASM_ADDC(l, h, z1->dp[i]);
  9490. r->dp[i + 24] = l;
  9491. l = h;
  9492. h = 0;
  9493. }
  9494. for (; i < 49; i++) {
  9495. SP_ASM_ADDC(l, h, z1->dp[i]);
  9496. r->dp[i + 24] = l;
  9497. l = h;
  9498. h = 0;
  9499. }
  9500. /* r += z2 << 48 */
  9501. l = 0;
  9502. h = 0;
  9503. for (i = 0; i < 25; i++) {
  9504. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  9505. SP_ASM_ADDC(l, h, z2->dp[i]);
  9506. r->dp[i + 48] = l;
  9507. l = h;
  9508. h = 0;
  9509. }
  9510. for (; i < 48; i++) {
  9511. SP_ASM_ADDC(l, h, z2->dp[i]);
  9512. r->dp[i + 48] = l;
  9513. l = h;
  9514. h = 0;
  9515. }
  9516. r->used = 96;
  9517. sp_clamp(r);
  9518. }
  9519. FREE_SP_INT_ARRAY(z, NULL);
  9520. FREE_SP_INT_ARRAY(t, NULL);
  9521. return err;
  9522. }
  9523. #endif /* SP_INT_DIGITS >= 96 */
  9524. #if SP_INT_DIGITS >= 128
  9525. /* Multiply a by b and store in r: r = a * b
  9526. *
  9527. * Karatsuba implementaiton.
  9528. *
  9529. * @param [in] a SP integer to multiply.
  9530. * @param [in] b SP integer to multiply.
  9531. * @param [out] r SP integer result.
  9532. *
  9533. * @return MP_OKAY on success.
  9534. * @return MP_MEM when dynamic memory allocation fails.
  9535. */
  9536. static int _sp_mul_64(sp_int* a, sp_int* b, sp_int* r)
  9537. {
  9538. int err = MP_OKAY;
  9539. int i;
  9540. sp_int_digit l;
  9541. sp_int_digit h;
  9542. sp_int* a1;
  9543. sp_int* b1;
  9544. sp_int* z0;
  9545. sp_int* z1;
  9546. sp_int* z2;
  9547. sp_int_digit ca;
  9548. sp_int_digit cb;
  9549. DECL_SP_INT_ARRAY(t, 32, 2);
  9550. DECL_SP_INT_ARRAY(z, 65, 2);
  9551. ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
  9552. ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
  9553. if (err == MP_OKAY) {
  9554. a1 = t[0];
  9555. b1 = t[1];
  9556. z1 = z[0];
  9557. z2 = z[1];
  9558. z0 = r;
  9559. XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
  9560. a1->used = 32;
  9561. XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
  9562. b1->used = 32;
  9563. /* z2 = a1 * b1 */
  9564. err = _sp_mul_32(a1, b1, z2);
  9565. }
  9566. if (err == MP_OKAY) {
  9567. l = a1->dp[0];
  9568. h = 0;
  9569. SP_ASM_ADDC(l, h, a->dp[0]);
  9570. a1->dp[0] = l;
  9571. l = h;
  9572. h = 0;
  9573. for (i = 1; i < 32; i++) {
  9574. SP_ASM_ADDC(l, h, a1->dp[i]);
  9575. SP_ASM_ADDC(l, h, a->dp[i]);
  9576. a1->dp[i] = l;
  9577. l = h;
  9578. h = 0;
  9579. }
  9580. ca = l;
  9581. /* b01 = b0 + b1 */
  9582. l = b1->dp[0];
  9583. h = 0;
  9584. SP_ASM_ADDC(l, h, b->dp[0]);
  9585. b1->dp[0] = l;
  9586. l = h;
  9587. h = 0;
  9588. for (i = 1; i < 32; i++) {
  9589. SP_ASM_ADDC(l, h, b1->dp[i]);
  9590. SP_ASM_ADDC(l, h, b->dp[i]);
  9591. b1->dp[i] = l;
  9592. l = h;
  9593. h = 0;
  9594. }
  9595. cb = l;
  9596. /* z0 = a0 * b0 */
  9597. err = _sp_mul_32(a, b, z0);
  9598. }
  9599. if (err == MP_OKAY) {
  9600. /* z1 = (a0 + a1) * (b0 + b1) */
  9601. err = _sp_mul_32(a1, b1, z1);
  9602. }
  9603. if (err == MP_OKAY) {
  9604. /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
  9605. /* r = z0 */
  9606. /* r += (z1 - z0 - z2) << 32 */
  9607. z1->dp[64] = ca & cb;
  9608. l = 0;
  9609. if (ca) {
  9610. h = 0;
  9611. for (i = 0; i < 32; i++) {
  9612. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  9613. SP_ASM_ADDC(l, h, b1->dp[i]);
  9614. z1->dp[i + 32] = l;
  9615. l = h;
  9616. h = 0;
  9617. }
  9618. }
  9619. z1->dp[64] += l;
  9620. l = 0;
  9621. if (cb) {
  9622. h = 0;
  9623. for (i = 0; i < 32; i++) {
  9624. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  9625. SP_ASM_ADDC(l, h, a1->dp[i]);
  9626. z1->dp[i + 32] = l;
  9627. l = h;
  9628. h = 0;
  9629. }
  9630. }
  9631. z1->dp[64] += l;
  9632. /* z1 = z1 - z0 - z1 */
  9633. l = 0;
  9634. h = 0;
  9635. for (i = 0; i < 64; i++) {
  9636. l += z1->dp[i];
  9637. SP_ASM_SUBC(l, h, z0->dp[i]);
  9638. SP_ASM_SUBC(l, h, z2->dp[i]);
  9639. z1->dp[i] = l;
  9640. l = h;
  9641. h = 0;
  9642. }
  9643. z1->dp[i] += l;
  9644. /* r += z1 << 16 */
  9645. l = 0;
  9646. h = 0;
  9647. for (i = 0; i < 32; i++) {
  9648. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  9649. SP_ASM_ADDC(l, h, z1->dp[i]);
  9650. r->dp[i + 32] = l;
  9651. l = h;
  9652. h = 0;
  9653. }
  9654. for (; i < 65; i++) {
  9655. SP_ASM_ADDC(l, h, z1->dp[i]);
  9656. r->dp[i + 32] = l;
  9657. l = h;
  9658. h = 0;
  9659. }
  9660. /* r += z2 << 64 */
  9661. l = 0;
  9662. h = 0;
  9663. for (i = 0; i < 33; i++) {
  9664. SP_ASM_ADDC(l, h, r->dp[i + 64]);
  9665. SP_ASM_ADDC(l, h, z2->dp[i]);
  9666. r->dp[i + 64] = l;
  9667. l = h;
  9668. h = 0;
  9669. }
  9670. for (; i < 64; i++) {
  9671. SP_ASM_ADDC(l, h, z2->dp[i]);
  9672. r->dp[i + 64] = l;
  9673. l = h;
  9674. h = 0;
  9675. }
  9676. r->used = 128;
  9677. sp_clamp(r);
  9678. }
  9679. FREE_SP_INT_ARRAY(z, NULL);
  9680. FREE_SP_INT_ARRAY(t, NULL);
  9681. return err;
  9682. }
  9683. #endif /* SP_INT_DIGITS >= 128 */
  9684. #if SP_INT_DIGITS >= 192
  9685. /* Multiply a by b and store in r: r = a * b
  9686. *
  9687. * Karatsuba implementaiton.
  9688. *
  9689. * @param [in] a SP integer to multiply.
  9690. * @param [in] b SP integer to multiply.
  9691. * @param [out] r SP integer result.
  9692. *
  9693. * @return MP_OKAY on success.
  9694. * @return MP_MEM when dynamic memory allocation fails.
  9695. */
  9696. static int _sp_mul_96(sp_int* a, sp_int* b, sp_int* r)
  9697. {
  9698. int err = MP_OKAY;
  9699. int i;
  9700. sp_int_digit l;
  9701. sp_int_digit h;
  9702. sp_int* a1;
  9703. sp_int* b1;
  9704. sp_int* z0;
  9705. sp_int* z1;
  9706. sp_int* z2;
  9707. sp_int_digit ca;
  9708. sp_int_digit cb;
  9709. DECL_SP_INT_ARRAY(t, 48, 2);
  9710. DECL_SP_INT_ARRAY(z, 97, 2);
  9711. ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
  9712. ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
  9713. if (err == MP_OKAY) {
  9714. a1 = t[0];
  9715. b1 = t[1];
  9716. z1 = z[0];
  9717. z2 = z[1];
  9718. z0 = r;
  9719. XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
  9720. a1->used = 48;
  9721. XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
  9722. b1->used = 48;
  9723. /* z2 = a1 * b1 */
  9724. err = _sp_mul_48(a1, b1, z2);
  9725. }
  9726. if (err == MP_OKAY) {
  9727. l = a1->dp[0];
  9728. h = 0;
  9729. SP_ASM_ADDC(l, h, a->dp[0]);
  9730. a1->dp[0] = l;
  9731. l = h;
  9732. h = 0;
  9733. for (i = 1; i < 48; i++) {
  9734. SP_ASM_ADDC(l, h, a1->dp[i]);
  9735. SP_ASM_ADDC(l, h, a->dp[i]);
  9736. a1->dp[i] = l;
  9737. l = h;
  9738. h = 0;
  9739. }
  9740. ca = l;
  9741. /* b01 = b0 + b1 */
  9742. l = b1->dp[0];
  9743. h = 0;
  9744. SP_ASM_ADDC(l, h, b->dp[0]);
  9745. b1->dp[0] = l;
  9746. l = h;
  9747. h = 0;
  9748. for (i = 1; i < 48; i++) {
  9749. SP_ASM_ADDC(l, h, b1->dp[i]);
  9750. SP_ASM_ADDC(l, h, b->dp[i]);
  9751. b1->dp[i] = l;
  9752. l = h;
  9753. h = 0;
  9754. }
  9755. cb = l;
  9756. /* z0 = a0 * b0 */
  9757. err = _sp_mul_48(a, b, z0);
  9758. }
  9759. if (err == MP_OKAY) {
  9760. /* z1 = (a0 + a1) * (b0 + b1) */
  9761. err = _sp_mul_48(a1, b1, z1);
  9762. }
  9763. if (err == MP_OKAY) {
  9764. /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
  9765. /* r = z0 */
  9766. /* r += (z1 - z0 - z2) << 48 */
  9767. z1->dp[96] = ca & cb;
  9768. l = 0;
  9769. if (ca) {
  9770. h = 0;
  9771. for (i = 0; i < 48; i++) {
  9772. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  9773. SP_ASM_ADDC(l, h, b1->dp[i]);
  9774. z1->dp[i + 48] = l;
  9775. l = h;
  9776. h = 0;
  9777. }
  9778. }
  9779. z1->dp[96] += l;
  9780. l = 0;
  9781. if (cb) {
  9782. h = 0;
  9783. for (i = 0; i < 48; i++) {
  9784. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  9785. SP_ASM_ADDC(l, h, a1->dp[i]);
  9786. z1->dp[i + 48] = l;
  9787. l = h;
  9788. h = 0;
  9789. }
  9790. }
  9791. z1->dp[96] += l;
  9792. /* z1 = z1 - z0 - z1 */
  9793. l = 0;
  9794. h = 0;
  9795. for (i = 0; i < 96; i++) {
  9796. l += z1->dp[i];
  9797. SP_ASM_SUBC(l, h, z0->dp[i]);
  9798. SP_ASM_SUBC(l, h, z2->dp[i]);
  9799. z1->dp[i] = l;
  9800. l = h;
  9801. h = 0;
  9802. }
  9803. z1->dp[i] += l;
  9804. /* r += z1 << 16 */
  9805. l = 0;
  9806. h = 0;
  9807. for (i = 0; i < 48; i++) {
  9808. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  9809. SP_ASM_ADDC(l, h, z1->dp[i]);
  9810. r->dp[i + 48] = l;
  9811. l = h;
  9812. h = 0;
  9813. }
  9814. for (; i < 97; i++) {
  9815. SP_ASM_ADDC(l, h, z1->dp[i]);
  9816. r->dp[i + 48] = l;
  9817. l = h;
  9818. h = 0;
  9819. }
  9820. /* r += z2 << 96 */
  9821. l = 0;
  9822. h = 0;
  9823. for (i = 0; i < 49; i++) {
  9824. SP_ASM_ADDC(l, h, r->dp[i + 96]);
  9825. SP_ASM_ADDC(l, h, z2->dp[i]);
  9826. r->dp[i + 96] = l;
  9827. l = h;
  9828. h = 0;
  9829. }
  9830. for (; i < 96; i++) {
  9831. SP_ASM_ADDC(l, h, z2->dp[i]);
  9832. r->dp[i + 96] = l;
  9833. l = h;
  9834. h = 0;
  9835. }
  9836. r->used = 192;
  9837. sp_clamp(r);
  9838. }
  9839. FREE_SP_INT_ARRAY(z, NULL);
  9840. FREE_SP_INT_ARRAY(t, NULL);
  9841. return err;
  9842. }
  9843. #endif /* SP_INT_DIGITS >= 192 */
  9844. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  9845. #endif /* !WOLFSSL_SP_SMALL */
  9846. /* Multiply a by b and store in r: r = a * b
  9847. *
  9848. * @param [in] a SP integer to multiply.
  9849. * @param [in] b SP integer to multiply.
  9850. * @param [out] r SP integer result.
  9851. *
  9852. * @return MP_OKAY on success.
  9853. * @return MP_VAL when a, b or is NULL; or the result will be too big for fixed
  9854. * data length.
  9855. * @return MP_MEM when dynamic memory allocation fails.
  9856. */
  9857. int sp_mul(sp_int* a, sp_int* b, sp_int* r)
  9858. {
  9859. int err = MP_OKAY;
  9860. #ifdef WOLFSSL_SP_INT_NEGATIVE
  9861. int sign = MP_ZPOS;
  9862. #endif
  9863. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  9864. err = MP_VAL;
  9865. }
  9866. /* Need extra digit during calculation. */
  9867. if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
  9868. err = MP_VAL;
  9869. }
  9870. #if 0
  9871. if (err == MP_OKAY) {
  9872. sp_print(a, "a");
  9873. sp_print(b, "b");
  9874. }
  9875. #endif
  9876. if (err == MP_OKAY) {
  9877. #ifdef WOLFSSL_SP_INT_NEGATIVE
  9878. sign = a->sign ^ b->sign;
  9879. #endif
  9880. if ((a->used == 0) || (b->used == 0)) {
  9881. _sp_zero(r);
  9882. }
  9883. else
  9884. #ifndef WOLFSSL_SP_SMALL
  9885. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  9886. #if SP_WORD_SIZE == 64
  9887. if ((a->used == 4) && (b->used == 4)) {
  9888. err = _sp_mul_4(a, b, r);
  9889. }
  9890. else
  9891. #endif /* SP_WORD_SIZE == 64 */
  9892. #if SP_WORD_SIZE == 64
  9893. #ifdef SQR_MUL_ASM
  9894. if ((a->used == 6) && (b->used == 6)) {
  9895. err = _sp_mul_6(a, b, r);
  9896. }
  9897. else
  9898. #endif /* SQR_MUL_ASM */
  9899. #endif /* SP_WORD_SIZE == 64 */
  9900. #if SP_WORD_SIZE == 32
  9901. #ifdef SQR_MUL_ASM
  9902. if ((a->used == 8) && (b->used == 8)) {
  9903. err = _sp_mul_8(a, b, r);
  9904. }
  9905. else
  9906. #endif /* SQR_MUL_ASM */
  9907. #endif /* SP_WORD_SIZE == 32 */
  9908. #if SP_WORD_SIZE == 32
  9909. #ifdef SQR_MUL_ASM
  9910. if ((a->used == 12) && (b->used == 12)) {
  9911. err = _sp_mul_12(a, b, r);
  9912. }
  9913. else
  9914. #endif /* SQR_MUL_ASM */
  9915. #endif /* SP_WORD_SIZE == 32 */
  9916. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  9917. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  9918. #if SP_INT_DIGITS >= 32
  9919. if ((a->used == 16) && (b->used == 16)) {
  9920. err = _sp_mul_16(a, b, r);
  9921. }
  9922. else
  9923. #endif /* SP_INT_DIGITS >= 32 */
  9924. #if SP_INT_DIGITS >= 48
  9925. if ((a->used == 24) && (b->used == 24)) {
  9926. err = _sp_mul_24(a, b, r);
  9927. }
  9928. else
  9929. #endif /* SP_INT_DIGITS >= 48 */
  9930. #if SP_INT_DIGITS >= 64
  9931. if ((a->used == 32) && (b->used == 32)) {
  9932. err = _sp_mul_32(a, b, r);
  9933. }
  9934. else
  9935. #endif /* SP_INT_DIGITS >= 64 */
  9936. #if SP_INT_DIGITS >= 96
  9937. if ((a->used == 48) && (b->used == 48)) {
  9938. err = _sp_mul_48(a, b, r);
  9939. }
  9940. else
  9941. #endif /* SP_INT_DIGITS >= 96 */
  9942. #if SP_INT_DIGITS >= 128
  9943. if ((a->used == 64) && (b->used == 64)) {
  9944. err = _sp_mul_64(a, b, r);
  9945. }
  9946. else
  9947. #endif /* SP_INT_DIGITS >= 128 */
  9948. #if SP_INT_DIGITS >= 192
  9949. if ((a->used == 96) && (b->used == 96)) {
  9950. err = _sp_mul_96(a, b, r);
  9951. }
  9952. else
  9953. #endif /* SP_INT_DIGITS >= 192 */
  9954. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  9955. #endif /* !WOLFSSL_SP_SMALL */
  9956. #ifdef SQR_MUL_ASM
  9957. if (a->used == b->used) {
  9958. err = _sp_mul_nxn(a, b, r);
  9959. }
  9960. else
  9961. #endif
  9962. {
  9963. err = _sp_mul(a, b, r);
  9964. }
  9965. }
  9966. #ifdef WOLFSSL_SP_INT_NEGATIVE
  9967. if (err == MP_OKAY) {
  9968. r->sign = (r->used == 0) ? MP_ZPOS : sign;
  9969. }
  9970. #endif
  9971. #if 0
  9972. if (err == MP_OKAY) {
  9973. sp_print(r, "rmul");
  9974. }
  9975. #endif
  9976. return err;
  9977. }
  9978. /* END SP_MUL implementations. */
  9979. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  9980. defined(WOLFCRYPT_HAVE_ECCSI) || \
  9981. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  9982. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  9983. *
  9984. * @param [in] a SP integer to multiply.
  9985. * @param [in] b SP integer to multiply.
  9986. * @param [in] m SP integer that is the modulus.
  9987. * @param [out] r SP integer result.
  9988. *
  9989. * @return MP_OKAY on success.
  9990. * @return MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
  9991. * fixed data length.
  9992. * @return MP_MEM when dynamic memory allocation fails.
  9993. */
  9994. int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
  9995. {
  9996. int err = MP_OKAY;
  9997. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  9998. err = MP_VAL;
  9999. }
  10000. if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
  10001. err = MP_VAL;
  10002. }
  10003. if (err == MP_OKAY) {
  10004. if ((r == m) || (r->size < a->used + b->used)) {
  10005. DECL_SP_INT(t, ((a == NULL) || (b == NULL)) ? 1 :
  10006. a->used + b->used);
  10007. ALLOC_SP_INT(t, a->used + b->used, err, NULL);
  10008. if (err == MP_OKAY) {
  10009. err = sp_init_size(t, a->used + b->used);
  10010. }
  10011. if (err == MP_OKAY) {
  10012. err = sp_mul(a, b, t);
  10013. }
  10014. if (err == MP_OKAY) {
  10015. err = sp_mod(t, m, r);
  10016. }
  10017. FREE_SP_INT(t, NULL);
  10018. }
  10019. else {
  10020. err = sp_mul(a, b, r);
  10021. if (err == MP_OKAY) {
  10022. err = sp_mod(r, m, r);
  10023. }
  10024. }
  10025. }
  10026. return err;
  10027. }
  10028. #endif
  10029. #ifdef WOLFSSL_SP_INVMOD
  10030. /* Calculates the multiplicative inverse in the field.
  10031. *
  10032. * @param [in] a SP integer to find inverse of.
  10033. * @param [in] m SP integer this is the modulus.
  10034. * @param [out] r SP integer to hold result. r cannot be m.
  10035. *
  10036. * @return MP_OKAY on success.
  10037. * @return MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
  10038. * m is negative.
  10039. * @return MP_MEM when dynamic memory allocation fails.
  10040. */
  10041. int sp_invmod(sp_int* a, sp_int* m, sp_int* r)
  10042. {
  10043. int err = MP_OKAY;
  10044. sp_int* u = NULL;
  10045. sp_int* v = NULL;
  10046. sp_int* b = NULL;
  10047. sp_int* mm;
  10048. int evenMod = 0;
  10049. DECL_SP_INT_ARRAY(t, (m == NULL) ? 1 : (m->used + 1), 3);
  10050. DECL_SP_INT(c, (m == NULL) ? 1 : (2 * m->used + 1));
  10051. if ((a == NULL) || (m == NULL) || (r == NULL) || (r == m)) {
  10052. err = MP_VAL;
  10053. }
  10054. if ((err == MP_OKAY) && (m->used * 2 > r->size)) {
  10055. err = MP_VAL;
  10056. }
  10057. #ifdef WOLFSSL_SP_INT_NEGATIVE
  10058. if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
  10059. err = MP_VAL;
  10060. }
  10061. #endif
  10062. ALLOC_SP_INT_ARRAY(t, m->used + 1, 3, err, NULL);
  10063. ALLOC_SP_INT(c, 2 * m->used + 1, err, NULL);
  10064. if (err == MP_OKAY) {
  10065. u = t[0];
  10066. v = t[1];
  10067. b = t[2];
  10068. /* c allocated separately and larger for even mod case. */
  10069. if (_sp_cmp_abs(a, m) != MP_LT) {
  10070. err = sp_mod(a, m, r);
  10071. a = r;
  10072. }
  10073. }
  10074. #ifdef WOLFSSL_SP_INT_NEGATIVE
  10075. if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
  10076. /* Make 'a' positive */
  10077. err = sp_add(m, a, r);
  10078. a = r;
  10079. }
  10080. #endif
  10081. /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
  10082. if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
  10083. err = MP_VAL;
  10084. }
  10085. /* r*2*x != n*2*y + 1 for integer x,y */
  10086. if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
  10087. err = MP_VAL;
  10088. }
  10089. /* 1*1 = 0*m + 1 */
  10090. if ((err == MP_OKAY) && sp_isone(a)) {
  10091. sp_set(r, 1);
  10092. }
  10093. else if (err != MP_OKAY) {
  10094. }
  10095. else {
  10096. sp_init_size(u, m->used + 1);
  10097. sp_init_size(v, m->used + 1);
  10098. sp_init_size(b, m->used + 1);
  10099. sp_init_size(c, 2 * m->used + 1);
  10100. if (sp_iseven(m)) {
  10101. /* a^-1 mod m = m + ((1 - m*(m^-1 % a)) / a) */
  10102. mm = a;
  10103. sp_copy(a, u);
  10104. sp_mod(m, a, v);
  10105. /* v == 0 when a divides m evenly - no inverse. */
  10106. if (sp_iszero(v)) {
  10107. /* Force u to no inverse answer. */
  10108. sp_set(u, 0);
  10109. }
  10110. evenMod = 1;
  10111. }
  10112. else {
  10113. mm = m;
  10114. sp_copy(m, u);
  10115. sp_copy(a, v);
  10116. }
  10117. _sp_zero(b);
  10118. sp_set(c, 1);
  10119. while (!sp_isone(v) && !sp_iszero(u)) {
  10120. if (sp_iseven(u)) {
  10121. sp_div_2(u, u);
  10122. if (sp_isodd(b)) {
  10123. _sp_add_off(b, mm, b, 0);
  10124. }
  10125. sp_div_2(b, b);
  10126. }
  10127. else if (sp_iseven(v)) {
  10128. sp_div_2(v, v);
  10129. if (sp_isodd(c)) {
  10130. _sp_add_off(c, mm, c, 0);
  10131. }
  10132. sp_div_2(c, c);
  10133. }
  10134. else if (_sp_cmp(u, v) != MP_LT) {
  10135. _sp_sub_off(u, v, u, 0);
  10136. if (_sp_cmp(b, c) == MP_LT) {
  10137. _sp_add_off(b, mm, b, 0);
  10138. }
  10139. _sp_sub_off(b, c, b, 0);
  10140. }
  10141. else {
  10142. _sp_sub_off(v, u, v, 0);
  10143. if (_sp_cmp(c, b) == MP_LT) {
  10144. _sp_add_off(c, mm, c, 0);
  10145. }
  10146. _sp_sub_off(c, b, c, 0);
  10147. }
  10148. }
  10149. if (sp_iszero(u)) {
  10150. err = MP_VAL;
  10151. }
  10152. else if (evenMod) {
  10153. /* Finish operation.
  10154. * a^-1 mod m = m + ((1 - m*c) / a)
  10155. * => a^-1 mod m = m - ((m*c - 1) / a)
  10156. */
  10157. err = sp_mul(c, m, c);
  10158. if (err == MP_OKAY) {
  10159. _sp_sub_d(c, 1, c);
  10160. err = sp_div(c, a, c, NULL);
  10161. }
  10162. if (err == MP_OKAY) {
  10163. sp_sub(m, c, r);
  10164. }
  10165. }
  10166. else {
  10167. err = sp_copy(c, r);
  10168. }
  10169. }
  10170. FREE_SP_INT(c, NULL);
  10171. FREE_SP_INT_ARRAY(t, NULL);
  10172. return err;
  10173. }
  10174. #endif /* WOLFSSL_SP_INVMOD */
  10175. #ifdef WOLFSSL_SP_INVMOD_MONT_CT
  10176. #define CT_INV_MOD_PRE_CNT 8
  10177. /* Calculates the multiplicative inverse in the field - constant time.
  10178. *
  10179. * Modulus (m) must be a prime and greater than 2.
  10180. *
  10181. * @param [in] a SP integer, Montgomery form, to find inverse of.
  10182. * @param [in] m SP integer this is the modulus.
  10183. * @param [out] r SP integer to hold result.
  10184. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  10185. *
  10186. * @return MP_OKAY on success.
  10187. * @return MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
  10188. * @return MP_MEM when dynamic memory allocation fails.
  10189. */
  10190. int sp_invmod_mont_ct(sp_int* a, sp_int* m, sp_int* r, sp_int_digit mp)
  10191. {
  10192. int err = MP_OKAY;
  10193. int i;
  10194. int j = 0;
  10195. sp_int* t = NULL;
  10196. sp_int* e = NULL;
  10197. DECL_SP_INT_ARRAY(pre, (m == NULL) ? 1 : m->used * 2 + 1,
  10198. CT_INV_MOD_PRE_CNT + 2);
  10199. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  10200. err = MP_VAL;
  10201. }
  10202. /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
  10203. if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
  10204. (m->used == 1 && m->dp[0] < 3))) {
  10205. err = MP_VAL;
  10206. }
  10207. ALLOC_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err, NULL);
  10208. if (err == MP_OKAY) {
  10209. t = pre[CT_INV_MOD_PRE_CNT + 0];
  10210. e = pre[CT_INV_MOD_PRE_CNT + 1];
  10211. sp_init_size(t, m->used * 2 + 1);
  10212. sp_init_size(e, m->used * 2 + 1);
  10213. sp_init_size(pre[0], m->used * 2 + 1);
  10214. err = sp_copy(a, pre[0]);
  10215. for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
  10216. sp_init_size(pre[i], m->used * 2 + 1);
  10217. err = sp_sqr(pre[i-1], pre[i]);
  10218. if (err == MP_OKAY) {
  10219. err = _sp_mont_red(pre[i], m, mp);
  10220. }
  10221. if (err == MP_OKAY) {
  10222. err = sp_mul(pre[i], a, pre[i]);
  10223. }
  10224. if (err == MP_OKAY) {
  10225. err = _sp_mont_red(pre[i], m, mp);
  10226. }
  10227. }
  10228. }
  10229. if (err == MP_OKAY) {
  10230. _sp_sub_d(m, 2, e);
  10231. for (i = sp_count_bits(e)-1, j = 0; i >= 0; i--, j++) {
  10232. if ((!sp_is_bit_set(e, i)) || (j == CT_INV_MOD_PRE_CNT)) {
  10233. break;
  10234. }
  10235. }
  10236. err = sp_copy(pre[j-1], t);
  10237. for (j = 0; (err == MP_OKAY) && (i >= 0); i--) {
  10238. int set = sp_is_bit_set(e, i);
  10239. if ((j == CT_INV_MOD_PRE_CNT) || ((!set) && j > 0)) {
  10240. err = sp_mul(t, pre[j-1], t);
  10241. if (err == MP_OKAY) {
  10242. err = _sp_mont_red(t, m, mp);
  10243. }
  10244. j = 0;
  10245. }
  10246. if (err == MP_OKAY) {
  10247. err = sp_sqr(t, t);
  10248. if (err == MP_OKAY) {
  10249. err = _sp_mont_red(t, m, mp);
  10250. }
  10251. }
  10252. j += set;
  10253. }
  10254. }
  10255. if (err == MP_OKAY) {
  10256. if (j > 0) {
  10257. err = sp_mul(t, pre[j-1], r);
  10258. if (err == MP_OKAY) {
  10259. err = _sp_mont_red(r, m, mp);
  10260. }
  10261. }
  10262. else {
  10263. err = sp_copy(t, r);
  10264. }
  10265. }
  10266. FREE_SP_INT_ARRAY(pre, NULL);
  10267. return err;
  10268. }
  10269. #endif /* WOLFSSL_SP_INVMOD_MONT_CT */
  10270. /**************************
  10271. * Exponentiation functions
  10272. **************************/
  10273. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  10274. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH)
  10275. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  10276. * Process the exponent one bit at a time.
  10277. * Is constant time and can be cache attack resistant.
  10278. *
  10279. * @param [in] b SP integer that is the base.
  10280. * @param [in] e SP integer that is the exponent.
  10281. * @param [in] bits Number of bits in base to use. May be greater than
  10282. * count of bits in b.
  10283. * @param [in] m SP integer that is the modulus.
  10284. * @param [out] r SP integer to hold result.
  10285. *
  10286. * @return MP_OKAY on success.
  10287. * @return MP_MEM when dynamic memory allocation fails.
  10288. */
  10289. static int _sp_exptmod_ex(sp_int* b, sp_int* e, int bits, sp_int* m, sp_int* r)
  10290. {
  10291. int i;
  10292. int err = MP_OKAY;
  10293. int done = 0;
  10294. int j;
  10295. int y;
  10296. int seenTopBit = 0;
  10297. #ifdef WC_NO_CACHE_RESISTANT
  10298. DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
  10299. #else
  10300. DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
  10301. #endif
  10302. #ifdef WC_NO_CACHE_RESISTANT
  10303. ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
  10304. #else
  10305. ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 3, err, NULL);
  10306. #endif
  10307. if (err == MP_OKAY) {
  10308. sp_init_size(t[0], 2 * m->used + 1);
  10309. sp_init_size(t[1], 2 * m->used + 1);
  10310. #ifndef WC_NO_CACHE_RESISTANT
  10311. sp_init_size(t[2], 2 * m->used + 1);
  10312. #endif
  10313. /* Ensure base is less than exponent. */
  10314. if (_sp_cmp_abs(b, m) != MP_LT) {
  10315. err = sp_mod(b, m, t[0]);
  10316. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  10317. sp_set(r, 0);
  10318. done = 1;
  10319. }
  10320. }
  10321. else {
  10322. err = sp_copy(b, t[0]);
  10323. }
  10324. }
  10325. if ((!done) && (err == MP_OKAY)) {
  10326. /* t[0] is dummy value and t[1] is result */
  10327. err = sp_copy(t[0], t[1]);
  10328. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  10329. #ifdef WC_NO_CACHE_RESISTANT
  10330. /* Square real result if seen the top bit. */
  10331. err = sp_sqrmod(t[seenTopBit], m, t[seenTopBit]);
  10332. if (err == MP_OKAY) {
  10333. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  10334. j = y & seenTopBit;
  10335. seenTopBit |= y;
  10336. /* Multiply real result if bit is set and seen the top bit. */
  10337. err = sp_mulmod(t[j], b, m, t[j]);
  10338. }
  10339. #else
  10340. /* Square real result if seen the top bit. */
  10341. sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
  10342. ((size_t)t[1] & sp_off_on_addr[seenTopBit ])),
  10343. t[2]);
  10344. err = sp_sqrmod(t[2], m, t[2]);
  10345. sp_copy(t[2],
  10346. (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
  10347. ((size_t)t[1] & sp_off_on_addr[seenTopBit ])));
  10348. if (err == MP_OKAY) {
  10349. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  10350. j = y & seenTopBit;
  10351. seenTopBit |= y;
  10352. /* Multiply real result if bit is set and seen the top bit. */
  10353. sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  10354. ((size_t)t[1] & sp_off_on_addr[j ])),
  10355. t[2]);
  10356. err = sp_mulmod(t[2], b, m, t[2]);
  10357. sp_copy(t[2],
  10358. (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  10359. ((size_t)t[1] & sp_off_on_addr[j ])));
  10360. }
  10361. #endif
  10362. }
  10363. }
  10364. if ((!done) && (err == MP_OKAY)) {
  10365. err = sp_copy(t[1], r);
  10366. }
  10367. FREE_SP_INT_ARRAY(t, NULL);
  10368. return err;
  10369. }
  10370. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  10371. * WOLFSSL_HAVE_SP_DH */
  10372. #if defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  10373. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))
  10374. #ifndef WC_NO_HARDEN
  10375. #if !defined(WC_NO_CACHE_RESISTANT)
  10376. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  10377. * Process the exponent one bit at a time with base in montgomery form.
  10378. * Is constant time and cache attack resistant.
  10379. *
  10380. * @param [in] b SP integer that is the base.
  10381. * @param [in] e SP integer that is the exponent.
  10382. * @param [in] bits Number of bits in base to use. May be greater than
  10383. * count of bits in b.
  10384. * @param [in] m SP integer that is the modulus.
  10385. * @param [out] r SP integer to hold result.
  10386. *
  10387. * @return MP_OKAY on success.
  10388. * @return MP_MEM when dynamic memory allocation fails.
  10389. */
  10390. static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m,
  10391. sp_int* r)
  10392. {
  10393. int i;
  10394. int err = MP_OKAY;
  10395. int done = 0;
  10396. int j;
  10397. int y;
  10398. int seenTopBit = 0;
  10399. sp_int_digit mp;
  10400. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  10401. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  10402. if (err == MP_OKAY) {
  10403. sp_init_size(t[0], m->used * 2 + 1);
  10404. sp_init_size(t[1], m->used * 2 + 1);
  10405. sp_init_size(t[2], m->used * 2 + 1);
  10406. sp_init_size(t[3], m->used * 2 + 1);
  10407. /* Ensure base is less than exponent. */
  10408. if (_sp_cmp_abs(b, m) != MP_LT) {
  10409. err = sp_mod(b, m, t[0]);
  10410. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  10411. sp_set(r, 0);
  10412. done = 1;
  10413. }
  10414. }
  10415. else {
  10416. err = sp_copy(b, t[0]);
  10417. }
  10418. }
  10419. if ((!done) && (err == MP_OKAY)) {
  10420. err = sp_mont_setup(m, &mp);
  10421. if (err == MP_OKAY) {
  10422. err = sp_mont_norm(t[1], m);
  10423. }
  10424. if (err == MP_OKAY) {
  10425. /* Convert to montgomery form. */
  10426. err = sp_mulmod(t[0], t[1], m, t[0]);
  10427. }
  10428. if (err == MP_OKAY) {
  10429. /* t[0] is fake working value and t[1] is real working value. */
  10430. sp_copy(t[0], t[1]);
  10431. /* Montgomert form of base to multiply by. */
  10432. sp_copy(t[0], t[2]);
  10433. }
  10434. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  10435. /* Square real working value if seen the top bit. */
  10436. sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
  10437. ((size_t)t[1] & sp_off_on_addr[seenTopBit ])),
  10438. t[3]);
  10439. err = sp_sqr(t[3], t[3]);
  10440. if (err == MP_OKAY) {
  10441. err = _sp_mont_red(t[3], m, mp);
  10442. }
  10443. sp_copy(t[3],
  10444. (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
  10445. ((size_t)t[1] & sp_off_on_addr[seenTopBit ])));
  10446. if (err == MP_OKAY) {
  10447. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  10448. j = y & seenTopBit;
  10449. seenTopBit |= y;
  10450. /* Multiply real value if bit is set and seen the top bit. */
  10451. sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  10452. ((size_t)t[1] & sp_off_on_addr[j ])),
  10453. t[3]);
  10454. err = sp_mul(t[3], t[2], t[3]);
  10455. if (err == MP_OKAY) {
  10456. err = _sp_mont_red(t[3], m, mp);
  10457. }
  10458. sp_copy(t[3],
  10459. (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  10460. ((size_t)t[1] & sp_off_on_addr[j ])));
  10461. }
  10462. }
  10463. if (err == MP_OKAY) {
  10464. /* Convert from montgomery form. */
  10465. err = _sp_mont_red(t[1], m, mp);
  10466. /* Reduction implementation returns number to range < m. */
  10467. }
  10468. }
  10469. if ((!done) && (err == MP_OKAY)) {
  10470. err = sp_copy(t[1], r);
  10471. }
  10472. FREE_SP_INT_ARRAY(t, NULL);
  10473. return err;
  10474. }
  10475. #else
  10476. /* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
  10477. #define SP_ALLOC
  10478. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  10479. * Creates a window of precalculated exponents with base in montgomery form.
  10480. * Is constant time but NOT cache attack resistant.
  10481. *
  10482. * @param [in] b SP integer that is the base.
  10483. * @param [in] e SP integer that is the exponent.
  10484. * @param [in] bits Number of bits in base to use. May be greater than
  10485. * count of bits in b.
  10486. * @param [in] m SP integer that is the modulus.
  10487. * @param [out] r SP integer to hold result.
  10488. *
  10489. * @return MP_OKAY on success.
  10490. * @return MP_MEM when dynamic memory allocation fails.
  10491. */
  10492. static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m,
  10493. sp_int* r)
  10494. {
  10495. int i;
  10496. int j;
  10497. int c;
  10498. int y;
  10499. int winBits;
  10500. int preCnt;
  10501. int err = MP_OKAY;
  10502. int done = 0;
  10503. sp_int_digit mp;
  10504. sp_int_digit n;
  10505. sp_int_digit mask;
  10506. sp_int* tr = NULL;
  10507. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
  10508. if (bits > 450) {
  10509. winBits = 6;
  10510. }
  10511. else if (bits <= 21) {
  10512. winBits = 1;
  10513. }
  10514. else if (bits <= 36) {
  10515. winBits = 3;
  10516. }
  10517. else if (bits <= 140) {
  10518. winBits = 4;
  10519. }
  10520. else {
  10521. winBits = 5;
  10522. }
  10523. preCnt = 1 << winBits;
  10524. mask = preCnt - 1;
  10525. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
  10526. if (err == MP_OKAY) {
  10527. tr = t[preCnt];
  10528. for (i = 0; i < preCnt; i++) {
  10529. sp_init_size(t[i], m->used * 2 + 1);
  10530. }
  10531. sp_init_size(tr, m->used * 2 + 1);
  10532. /* Ensure base is less than exponent. */
  10533. if (_sp_cmp_abs(b, m) != MP_LT) {
  10534. err = sp_mod(b, m, t[1]);
  10535. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  10536. sp_set(r, 0);
  10537. done = 1;
  10538. }
  10539. }
  10540. else {
  10541. err = sp_copy(b, t[1]);
  10542. }
  10543. }
  10544. if ((!done) && (err == MP_OKAY)) {
  10545. err = sp_mont_setup(m, &mp);
  10546. if (err == MP_OKAY) {
  10547. /* Norm value is 1 in montgomery form. */
  10548. err = sp_mont_norm(t[0], m);
  10549. }
  10550. if (err == MP_OKAY) {
  10551. /* Convert base to montgomery form. */
  10552. err = sp_mulmod(t[1], t[0], m, t[1]);
  10553. }
  10554. /* Pre-calculate values */
  10555. for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
  10556. if ((i & 1) == 0) {
  10557. err = sp_sqr(t[i/2], t[i]);
  10558. }
  10559. else {
  10560. err = sp_mul(t[i-1], t[1], t[i]);
  10561. }
  10562. if (err == MP_OKAY) {
  10563. err = _sp_mont_red(t[i], m, mp);
  10564. }
  10565. }
  10566. if (err == MP_OKAY) {
  10567. /* Bits from the top that - possibly left over. */
  10568. i = (bits - 1) >> SP_WORD_SHIFT;
  10569. n = e->dp[i--];
  10570. c = bits & (SP_WORD_SIZE - 1);
  10571. if (c == 0) {
  10572. c = SP_WORD_SIZE;
  10573. }
  10574. c -= bits % winBits;
  10575. y = (int)(n >> c);
  10576. n <<= SP_WORD_SIZE - c;
  10577. /* Copy window number for top bits. */
  10578. sp_copy(t[y], tr);
  10579. for (; (i >= 0) || (c >= winBits); ) {
  10580. if (c == 0) {
  10581. /* Bits up to end of digit */
  10582. n = e->dp[i--];
  10583. y = (int)(n >> (SP_WORD_SIZE - winBits));
  10584. n <<= winBits;
  10585. c = SP_WORD_SIZE - winBits;
  10586. }
  10587. else if (c < winBits) {
  10588. /* Bits to end of digit and part of next */
  10589. y = (int)(n >> (SP_WORD_SIZE - winBits));
  10590. n = e->dp[i--];
  10591. c = winBits - c;
  10592. y |= (int)(n >> (SP_WORD_SIZE - c));
  10593. n <<= c;
  10594. c = SP_WORD_SIZE - c;
  10595. }
  10596. else {
  10597. /* Bits from middle of digit */
  10598. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  10599. n <<= winBits;
  10600. c -= winBits;
  10601. }
  10602. /* Square for number of bits in window. */
  10603. for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
  10604. err = sp_sqr(tr, tr);
  10605. if (err == MP_OKAY) {
  10606. err = _sp_mont_red(tr, m, mp);
  10607. }
  10608. }
  10609. /* Multiply by window number for next set of bits. */
  10610. if (err == MP_OKAY) {
  10611. err = sp_mul(tr, t[y], tr);
  10612. }
  10613. if (err == MP_OKAY) {
  10614. err = _sp_mont_red(tr, m, mp);
  10615. }
  10616. }
  10617. }
  10618. if (err == MP_OKAY) {
  10619. /* Convert from montgomery form. */
  10620. err = _sp_mont_red(tr, m, mp);
  10621. /* Reduction implementation returns number to range < m. */
  10622. }
  10623. }
  10624. if ((!done) && (err == MP_OKAY)) {
  10625. err = sp_copy(tr, r);
  10626. }
  10627. FREE_SP_INT_ARRAY(t, NULL);
  10628. return err;
  10629. }
  10630. #undef SP_ALLOC
  10631. #endif /* !WC_NO_CACHE_RESISTANT */
  10632. #endif /* !WC_NO_HARDEN */
  10633. #if SP_WORD_SIZE <= 16
  10634. #define EXP2_WINSIZE 2
  10635. #elif SP_WORD_SIZE <= 32
  10636. #define EXP2_WINSIZE 3
  10637. #elif SP_WORD_SIZE <= 64
  10638. #define EXP2_WINSIZE 4
  10639. #elif SP_WORD_SIZE <= 128
  10640. #define EXP2_WINSIZE 5
  10641. #endif
  10642. /* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
  10643. * Is constant time and cache attack resistant.
  10644. *
  10645. * @param [in] e SP integer that is the exponent.
  10646. * @param [in] digits Number of digits in base to use. May be greater than
  10647. * count of bits in b.
  10648. * @param [in] m SP integer that is the modulus.
  10649. * @param [out] r SP integer to hold result.
  10650. *
  10651. * @return MP_OKAY on success.
  10652. * @return MP_MEM when dynamic memory allocation fails.
  10653. */
  10654. static int _sp_exptmod_base_2(sp_int* e, int digits, sp_int* m, sp_int* r)
  10655. {
  10656. int i = 0;
  10657. int j;
  10658. int c = 0;
  10659. int y;
  10660. int err = MP_OKAY;
  10661. sp_int* t = NULL;
  10662. sp_int* tr = NULL;
  10663. sp_int_digit mp = 0, n = 0;
  10664. DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
  10665. #if 0
  10666. sp_print_int(2, "a");
  10667. sp_print(e, "b");
  10668. sp_print(m, "m");
  10669. #endif
  10670. ALLOC_SP_INT_ARRAY(d, m->used * 2 + 1, 2, err, NULL);
  10671. if (err == MP_OKAY) {
  10672. t = d[0];
  10673. tr = d[1];
  10674. sp_init_size(t, m->used * 2 + 1);
  10675. sp_init_size(tr, m->used * 2 + 1);
  10676. if (m->used > 1) {
  10677. err = sp_mont_setup(m, &mp);
  10678. if (err == MP_OKAY) {
  10679. /* Norm value is 1 in montgomery form. */
  10680. err = sp_mont_norm(tr, m);
  10681. }
  10682. if (err == MP_OKAY) {
  10683. err = sp_mul_2d(m, 1 << EXP2_WINSIZE, t);
  10684. }
  10685. }
  10686. else {
  10687. err = sp_set(tr, 1);
  10688. }
  10689. if (err == MP_OKAY) {
  10690. /* Bits from the top. */
  10691. i = digits - 1;
  10692. n = e->dp[i--];
  10693. c = SP_WORD_SIZE;
  10694. #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
  10695. c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
  10696. if (c != SP_WORD_SIZE) {
  10697. y = (int)(n >> c);
  10698. n <<= SP_WORD_SIZE - c;
  10699. }
  10700. else
  10701. #endif
  10702. {
  10703. y = 0;
  10704. }
  10705. /* Multiply montgomery representation of 1 by 2 ^ top */
  10706. err = sp_mul_2d(tr, y, tr);
  10707. }
  10708. if ((err == MP_OKAY) && (m->used > 1)) {
  10709. err = sp_add(tr, t, tr);
  10710. }
  10711. if (err == MP_OKAY) {
  10712. err = sp_mod(tr, m, tr);
  10713. }
  10714. if (err == MP_OKAY) {
  10715. for (; (i >= 0) || (c >= EXP2_WINSIZE); ) {
  10716. if (c == 0) {
  10717. /* Bits up to end of digit */
  10718. n = e->dp[i--];
  10719. y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
  10720. n <<= EXP2_WINSIZE;
  10721. c = SP_WORD_SIZE - EXP2_WINSIZE;
  10722. }
  10723. #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
  10724. else if (c < EXP2_WINSIZE) {
  10725. /* Bits to end of digit and part of next */
  10726. y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
  10727. n = e->dp[i--];
  10728. c = EXP2_WINSIZE - c;
  10729. y |= (int)(n >> (SP_WORD_SIZE - c));
  10730. n <<= c;
  10731. c = SP_WORD_SIZE - c;
  10732. }
  10733. #endif
  10734. else {
  10735. /* Bits from middle of digit */
  10736. y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) &
  10737. ((1 << EXP2_WINSIZE) - 1));
  10738. n <<= EXP2_WINSIZE;
  10739. c -= EXP2_WINSIZE;
  10740. }
  10741. /* Square for number of bits in window. */
  10742. for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
  10743. err = sp_sqr(tr, tr);
  10744. if (err != MP_OKAY) {
  10745. break;
  10746. }
  10747. if (m->used > 1) {
  10748. err = _sp_mont_red(tr, m, mp);
  10749. }
  10750. else {
  10751. err = sp_mod(tr, m, tr);
  10752. }
  10753. }
  10754. if (err == MP_OKAY) {
  10755. /* then multiply by 2^y */
  10756. err = sp_mul_2d(tr, y, tr);
  10757. }
  10758. if ((err == MP_OKAY) && (m->used > 1)) {
  10759. /* Add in value to make mod operation take same time */
  10760. err = sp_add(tr, t, tr);
  10761. }
  10762. if (err == MP_OKAY) {
  10763. err = sp_mod(tr, m, tr);
  10764. }
  10765. if (err != MP_OKAY) {
  10766. break;
  10767. }
  10768. }
  10769. }
  10770. if ((err == MP_OKAY) && (m->used > 1)) {
  10771. /* Convert from montgomery form. */
  10772. err = _sp_mont_red(tr, m, mp);
  10773. /* Reduction implementation returns number to range < m. */
  10774. }
  10775. }
  10776. if (err == MP_OKAY) {
  10777. err = sp_copy(tr, r);
  10778. }
  10779. #if 0
  10780. sp_print(r, "rme");
  10781. #endif
  10782. FREE_SP_INT_ARRAY(d, NULL);
  10783. return err;
  10784. }
  10785. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  10786. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  10787. !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  10788. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  10789. *
  10790. * @param [in] b SP integer that is the base.
  10791. * @param [in] e SP integer that is the exponent.
  10792. * @param [in] bits Number of bits in base to use. May be greater than
  10793. * count of bits in b.
  10794. * @param [in] m SP integer that is the modulus.
  10795. * @param [out] r SP integer to hold result.
  10796. *
  10797. * @return MP_OKAY on success.
  10798. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  10799. * @return MP_MEM when dynamic memory allocation fails.
  10800. */
  10801. int sp_exptmod_ex(sp_int* b, sp_int* e, int digits, sp_int* m, sp_int* r)
  10802. {
  10803. int err = MP_OKAY;
  10804. int done = 0;
  10805. int mBits = sp_count_bits(m);
  10806. int bBits = sp_count_bits(b);
  10807. int eBits = sp_count_bits(e);
  10808. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  10809. err = MP_VAL;
  10810. }
  10811. #if 0
  10812. if (err == MP_OKAY) {
  10813. sp_print(b, "a");
  10814. sp_print(e, "b");
  10815. sp_print(m, "m");
  10816. }
  10817. #endif
  10818. /* Check for invalid modulus. */
  10819. if ((err == MP_OKAY) && sp_iszero(m)) {
  10820. err = MP_VAL;
  10821. }
  10822. #ifdef WOLFSSL_SP_INT_NEGATIVE
  10823. /* Check for unsupported negative values of exponent and modulus. */
  10824. if ((err == MP_OKAY) && ((e->sign == MP_NEG) || (m->sign == MP_NEG))) {
  10825. err = MP_VAL;
  10826. }
  10827. #endif
  10828. /* Check for degenerate cases. */
  10829. if ((err == MP_OKAY) && sp_isone(m)) {
  10830. sp_set(r, 0);
  10831. done = 1;
  10832. }
  10833. if ((!done) && (err == MP_OKAY) && sp_iszero(e)) {
  10834. sp_set(r, 1);
  10835. done = 1;
  10836. }
  10837. /* Check whether base needs to be reduced. */
  10838. if ((!done) && (err == MP_OKAY) && (_sp_cmp_abs(b, m) != MP_LT)) {
  10839. if ((r == e) || (r == m)) {
  10840. err = MP_VAL;
  10841. }
  10842. if (err == MP_OKAY) {
  10843. err = sp_mod(b, m, r);
  10844. }
  10845. if (err == MP_OKAY) {
  10846. b = r;
  10847. }
  10848. }
  10849. /* Check for degenerate case of base. */
  10850. if ((!done) && (err == MP_OKAY) && sp_iszero(b)) {
  10851. sp_set(r, 0);
  10852. done = 1;
  10853. }
  10854. /* Ensure SP integers have space for intermediate values. */
  10855. if ((!done) && (err == MP_OKAY) && (m->used * 2 >= r->size)) {
  10856. err = MP_VAL;
  10857. }
  10858. if ((!done) && (err == MP_OKAY)) {
  10859. /* Use code optimized for specific sizes if possible */
  10860. #if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
  10861. (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))
  10862. #ifndef WOLFSSL_SP_NO_2048
  10863. if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
  10864. (eBits <= 1024)) {
  10865. err = sp_ModExp_1024(b, e, m, r);
  10866. done = 1;
  10867. }
  10868. else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
  10869. (eBits <= 2048)) {
  10870. err = sp_ModExp_2048(b, e, m, r);
  10871. done = 1;
  10872. }
  10873. else
  10874. #endif
  10875. #ifndef WOLFSSL_SP_NO_3072
  10876. if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
  10877. (eBits <= 1536)) {
  10878. err = sp_ModExp_1536(b, e, m, r);
  10879. done = 1;
  10880. }
  10881. else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
  10882. (eBits <= 3072)) {
  10883. err = sp_ModExp_3072(b, e, m, r);
  10884. done = 1;
  10885. }
  10886. else
  10887. #endif
  10888. #ifdef WOLFSSL_SP_4096
  10889. if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
  10890. (eBits <= 4096)) {
  10891. err = sp_ModExp_4096(b, e, m, r);
  10892. done = 1;
  10893. }
  10894. else
  10895. #endif
  10896. #endif
  10897. {
  10898. }
  10899. }
  10900. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH)
  10901. #if (defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)) && \
  10902. defined(NO_DH)
  10903. if ((!done) && (err == MP_OKAY))
  10904. err = sp_exptmod_nct(b, e, m, r);
  10905. }
  10906. #else
  10907. #if defined(WOLFSSL_SP_MATH_ALL)
  10908. if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
  10909. mp_isodd(m)) {
  10910. /* Use the generic base 2 implementation. */
  10911. err = _sp_exptmod_base_2(e, digits, m, r);
  10912. }
  10913. else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
  10914. #ifndef WC_NO_HARDEN
  10915. err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
  10916. #else
  10917. err = sp_exptmod_nct(b, e, m, r);
  10918. #endif
  10919. }
  10920. else
  10921. #endif /* WOLFSSL_SP_MATH_ALL */
  10922. if ((!done) && (err == MP_OKAY)) {
  10923. /* Otherwise use the generic implementation. */
  10924. err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
  10925. }
  10926. #endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
  10927. #else
  10928. if ((!done) && (err == MP_OKAY)) {
  10929. err = MP_VAL;
  10930. }
  10931. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  10932. (void)mBits;
  10933. (void)bBits;
  10934. (void)eBits;
  10935. (void)digits;
  10936. #if 0
  10937. if (err == MP_OKAY) {
  10938. sp_print(r, "rme");
  10939. }
  10940. #endif
  10941. return err;
  10942. }
  10943. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  10944. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  10945. !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  10946. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  10947. *
  10948. * @param [in] b SP integer that is the base.
  10949. * @param [in] e SP integer that is the exponent.
  10950. * @param [in] m SP integer that is the modulus.
  10951. * @param [out] r SP integer to hold result.
  10952. *
  10953. * @return MP_OKAY on success.
  10954. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  10955. * @return MP_MEM when dynamic memory allocation fails.
  10956. */
  10957. int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
  10958. {
  10959. int err = MP_OKAY;
  10960. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  10961. err = MP_VAL;
  10962. }
  10963. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  10964. if (err == MP_OKAY) {
  10965. err = sp_exptmod_ex(b, e, e->used, m, r);
  10966. }
  10967. RESTORE_VECTOR_REGISTERS();
  10968. return err;
  10969. }
  10970. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  10971. * WOLFSSL_HAVE_SP_DH */
  10972. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
  10973. #if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
  10974. /* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
  10975. #define SP_ALLOC
  10976. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  10977. * Creates a window of precalculated exponents with base in montgomery form.
  10978. * Sliding window and is NOT constant time.
  10979. *
  10980. * @param [in] b SP integer that is the base.
  10981. * @param [in] e SP integer that is the exponent.
  10982. * @param [in] bits Number of bits in base to use. May be greater than
  10983. * count of bits in b.
  10984. * @param [in] m SP integer that is the modulus.
  10985. * @param [out] r SP integer to hold result.
  10986. *
  10987. * @return MP_OKAY on success.
  10988. * @return MP_MEM when dynamic memory allocation fails.
  10989. */
  10990. static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
  10991. {
  10992. int i = 0;
  10993. int j = 0;
  10994. int c = 0;
  10995. int y = 0;
  10996. int bits;
  10997. int winBits;
  10998. int preCnt;
  10999. int err = MP_OKAY;
  11000. int done = 0;
  11001. sp_int* tr = NULL;
  11002. sp_int* bm = NULL;
  11003. sp_int_digit mask;
  11004. /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
  11005. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
  11006. bits = sp_count_bits(e);
  11007. if (bits > 450) {
  11008. winBits = 6;
  11009. }
  11010. else if (bits <= 21) {
  11011. winBits = 1;
  11012. }
  11013. else if (bits <= 36) {
  11014. winBits = 3;
  11015. }
  11016. else if (bits <= 140) {
  11017. winBits = 4;
  11018. }
  11019. else {
  11020. winBits = 5;
  11021. }
  11022. preCnt = 1 << (winBits - 1);
  11023. mask = preCnt - 1;
  11024. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 2, err, NULL);
  11025. if (err == MP_OKAY) {
  11026. /* Initialize window numbers and temporary result. */
  11027. tr = t[preCnt + 0];
  11028. bm = t[preCnt + 1];
  11029. for (i = 0; i < preCnt; i++) {
  11030. sp_init_size(t[i], m->used * 2 + 1);
  11031. }
  11032. sp_init_size(tr, m->used * 2 + 1);
  11033. sp_init_size(bm, m->used * 2 + 1);
  11034. /* Ensure base is less than exponent. */
  11035. if (_sp_cmp_abs(b, m) != MP_LT) {
  11036. err = sp_mod(b, m, bm);
  11037. if ((err == MP_OKAY) && sp_iszero(bm)) {
  11038. sp_set(r, 0);
  11039. done = 1;
  11040. }
  11041. }
  11042. else {
  11043. err = sp_copy(b, bm);
  11044. }
  11045. }
  11046. if ((!done) && (err == MP_OKAY)) {
  11047. sp_int_digit mp;
  11048. sp_int_digit n;
  11049. err = sp_mont_setup(m, &mp);
  11050. if (err == MP_OKAY) {
  11051. err = sp_mont_norm(t[0], m);
  11052. }
  11053. if (err == MP_OKAY) {
  11054. err = sp_mulmod(bm, t[0], m, bm);
  11055. }
  11056. if (err == MP_OKAY) {
  11057. err = sp_copy(bm, t[0]);
  11058. }
  11059. for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
  11060. err = sp_sqr(t[0], t[0]);
  11061. if (err == MP_OKAY) {
  11062. err = _sp_mont_red(t[0], m, mp);
  11063. }
  11064. }
  11065. for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
  11066. err = sp_mul(t[i-1], bm, t[i]);
  11067. if (err == MP_OKAY) {
  11068. err = _sp_mont_red(t[i], m, mp);
  11069. }
  11070. }
  11071. if (err == MP_OKAY) {
  11072. /* Find the top bit. */
  11073. i = (bits - 1) >> SP_WORD_SHIFT;
  11074. n = e->dp[i--];
  11075. c = bits % SP_WORD_SIZE;
  11076. if (c == 0) {
  11077. c = SP_WORD_SIZE;
  11078. }
  11079. /* Put top bit at highest offset in digit. */
  11080. n <<= SP_WORD_SIZE - c;
  11081. if (bits >= winBits) {
  11082. /* Top bit set. Copy from window. */
  11083. if (c < winBits) {
  11084. /* Bits to end of digit and part of next */
  11085. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  11086. n = e->dp[i--];
  11087. c = winBits - c;
  11088. y |= (int)(n >> (SP_WORD_SIZE - c));
  11089. n <<= c;
  11090. c = SP_WORD_SIZE - c;
  11091. }
  11092. else {
  11093. /* Bits from middle of digit */
  11094. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  11095. n <<= winBits;
  11096. c -= winBits;
  11097. }
  11098. err = sp_copy(t[y], tr);
  11099. }
  11100. else {
  11101. /* 1 in Montgomery form. */
  11102. err = sp_mont_norm(tr, m);
  11103. }
  11104. while (err == MP_OKAY) {
  11105. /* Sqaure until we find bit that is 1 or there's less than a
  11106. * window of bits left.
  11107. */
  11108. while (err == MP_OKAY && ((i >= 0) || (c >= winBits))) {
  11109. sp_int_digit n2 = n;
  11110. int c2 = c;
  11111. int i2 = i;
  11112. /* Make sure n2 has bits from the right digit. */
  11113. if (c2 == 0) {
  11114. n2 = e->dp[i2--];
  11115. c2 = SP_WORD_SIZE;
  11116. }
  11117. /* Mask off the next bit. */
  11118. y = (int)((n2 >> (SP_WORD_SIZE - 1)) & 1);
  11119. if (y == 1) {
  11120. break;
  11121. }
  11122. /* Square and update position. */
  11123. err = sp_sqr(tr, tr);
  11124. if (err == MP_OKAY) {
  11125. err = _sp_mont_red(tr, m, mp);
  11126. }
  11127. n = n2 << 1;
  11128. c = c2 - 1;
  11129. i = i2;
  11130. }
  11131. if (err == MP_OKAY) {
  11132. /* Check we have enough bits left for a window. */
  11133. if ((i < 0) && (c < winBits)) {
  11134. break;
  11135. }
  11136. if (c == 0) {
  11137. /* Bits up to end of digit */
  11138. n = e->dp[i--];
  11139. y = (int)(n >> (SP_WORD_SIZE - winBits));
  11140. n <<= winBits;
  11141. c = SP_WORD_SIZE - winBits;
  11142. }
  11143. else if (c < winBits) {
  11144. /* Bits to end of digit and part of next */
  11145. y = (int)(n >> (SP_WORD_SIZE - winBits));
  11146. n = e->dp[i--];
  11147. c = winBits - c;
  11148. y |= (int)(n >> (SP_WORD_SIZE - c));
  11149. n <<= c;
  11150. c = SP_WORD_SIZE - c;
  11151. }
  11152. else {
  11153. /* Bits from middle of digit */
  11154. y = (int)(n >> (SP_WORD_SIZE - winBits));
  11155. n <<= winBits;
  11156. c -= winBits;
  11157. }
  11158. y &= mask;
  11159. }
  11160. /* Square for number of bits in window. */
  11161. for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
  11162. err = sp_sqr(tr, tr);
  11163. if (err == MP_OKAY) {
  11164. err = _sp_mont_red(tr, m, mp);
  11165. }
  11166. }
  11167. /* Multiply by window number for next set of bits. */
  11168. if (err == MP_OKAY) {
  11169. err = sp_mul(tr, t[y], tr);
  11170. }
  11171. if (err == MP_OKAY) {
  11172. err = _sp_mont_red(tr, m, mp);
  11173. }
  11174. }
  11175. if ((err == MP_OKAY) && (c > 0)) {
  11176. /* Handle remaining bits.
  11177. * Window values have top bit set and can't be used. */
  11178. n = e->dp[0];
  11179. for (--c; (err == MP_OKAY) && (c >= 0); c--) {
  11180. err = sp_sqr(tr, tr);
  11181. if (err == MP_OKAY) {
  11182. err = _sp_mont_red(tr, m, mp);
  11183. }
  11184. if ((err == MP_OKAY) && ((n >> c) & 1)) {
  11185. err = sp_mul(tr, bm, tr);
  11186. if (err == MP_OKAY) {
  11187. err = _sp_mont_red(tr, m, mp);
  11188. }
  11189. }
  11190. }
  11191. }
  11192. }
  11193. if (err == MP_OKAY) {
  11194. /* Convert from montgomery form. */
  11195. err = _sp_mont_red(tr, m, mp);
  11196. /* Reduction implementation returns number to range < m. */
  11197. }
  11198. }
  11199. if ((!done) && (err == MP_OKAY)) {
  11200. err = sp_copy(tr, r);
  11201. }
  11202. FREE_SP_INT_ARRAY(t, NULL);
  11203. return err;
  11204. }
  11205. #undef SP_ALLOC
  11206. #else
  11207. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  11208. * Non-constant time implementation.
  11209. *
  11210. * @param [in] b SP integer that is the base.
  11211. * @param [in] e SP integer that is the exponent.
  11212. * @param [in] m SP integer that is the modulus.
  11213. * @param [out] r SP integer to hold result.
  11214. *
  11215. * @return MP_OKAY on success.
  11216. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  11217. * @return MP_MEM when dynamic memory allocation fails.
  11218. */
  11219. static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
  11220. {
  11221. int i;
  11222. int err = MP_OKAY;
  11223. int done = 0;
  11224. int y = 0;
  11225. int bits = sp_count_bits(e);
  11226. sp_int_digit mp;
  11227. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
  11228. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
  11229. if (err == MP_OKAY) {
  11230. sp_init_size(t[0], m->used * 2 + 1);
  11231. sp_init_size(t[1], m->used * 2 + 1);
  11232. /* Ensure base is less than exponent. */
  11233. if (_sp_cmp_abs(b, m) != MP_LT) {
  11234. err = sp_mod(b, m, t[0]);
  11235. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  11236. sp_set(r, 0);
  11237. done = 1;
  11238. }
  11239. }
  11240. else {
  11241. err = sp_copy(b, t[0]);
  11242. }
  11243. }
  11244. if ((!done) && (err == MP_OKAY)) {
  11245. err = sp_mont_setup(m, &mp);
  11246. if (err == MP_OKAY) {
  11247. err = sp_mont_norm(t[1], m);
  11248. }
  11249. if (err == MP_OKAY) {
  11250. /* Convert to montgomery form. */
  11251. err = sp_mulmod(t[0], t[1], m, t[0]);
  11252. }
  11253. if (err == MP_OKAY) {
  11254. /* Montgomert form of base to multiply by. */
  11255. sp_copy(t[0], t[1]);
  11256. }
  11257. for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
  11258. err = sp_sqr(t[0], t[0]);
  11259. if (err == MP_OKAY) {
  11260. err = _sp_mont_red(t[0], m, mp);
  11261. }
  11262. if (err == MP_OKAY) {
  11263. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  11264. if (y != 0) {
  11265. err = sp_mul(t[0], t[1], t[0]);
  11266. if (err == MP_OKAY) {
  11267. err = _sp_mont_red(t[0], m, mp);
  11268. }
  11269. }
  11270. }
  11271. }
  11272. if (err == MP_OKAY) {
  11273. /* Convert from montgomery form. */
  11274. err = _sp_mont_red(t[0], m, mp);
  11275. /* Reduction implementation returns number to range < m. */
  11276. }
  11277. }
  11278. if ((!done) && (err == MP_OKAY)) {
  11279. err = sp_copy(t[0], r);
  11280. }
  11281. FREE_SP_INT_ARRAY(t, NULL);
  11282. return err;
  11283. }
  11284. #endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
  11285. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  11286. * Non-constant time implementation.
  11287. *
  11288. * @param [in] b SP integer that is the base.
  11289. * @param [in] e SP integer that is the exponent.
  11290. * @param [in] m SP integer that is the modulus.
  11291. * @param [out] r SP integer to hold result.
  11292. *
  11293. * @return MP_OKAY on success.
  11294. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  11295. * @return MP_MEM when dynamic memory allocation fails.
  11296. */
  11297. int sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
  11298. {
  11299. int err = MP_OKAY;
  11300. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  11301. err = MP_VAL;
  11302. }
  11303. #if 0
  11304. if (err == MP_OKAY) {
  11305. sp_print(b, "a");
  11306. sp_print(e, "b");
  11307. sp_print(m, "m");
  11308. }
  11309. #endif
  11310. if (err != MP_OKAY) {
  11311. }
  11312. /* Handle special cases. */
  11313. else if (sp_iszero(m)) {
  11314. err = MP_VAL;
  11315. }
  11316. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11317. else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
  11318. err = MP_VAL;
  11319. }
  11320. #endif
  11321. else if (sp_isone(m)) {
  11322. sp_set(r, 0);
  11323. }
  11324. else if (sp_iszero(e)) {
  11325. sp_set(r, 1);
  11326. }
  11327. else if (sp_iszero(b)) {
  11328. sp_set(r, 0);
  11329. }
  11330. /* Ensure SP integers have space for intermediate values. */
  11331. else if (m->used * 2 >= r->size) {
  11332. err = MP_VAL;
  11333. }
  11334. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  11335. else if (mp_iseven(m)) {
  11336. err = _sp_exptmod_ex(b, e, e->used * SP_WORD_SIZE, m, r);
  11337. }
  11338. #endif
  11339. else {
  11340. err = _sp_exptmod_nct(b, e, m, r);
  11341. }
  11342. #if 0
  11343. if (err == MP_OKAY) {
  11344. sp_print(r, "rme");
  11345. }
  11346. #endif
  11347. return err;
  11348. }
  11349. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  11350. /***************
  11351. * 2^e functions
  11352. ***************/
  11353. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  11354. /* Divide by 2^e: r = a >> e and rem = bits shifted out
  11355. *
  11356. * @param [in] a SP integer to divide.
  11357. * @param [in] e Exponent bits (dividing by 2^e).
  11358. * @param [in] m SP integer that is the modulus.
  11359. * @param [out] r SP integer to hold result.
  11360. * @param [out] rem SP integer to hold remainder.
  11361. *
  11362. * @return MP_OKAY on success.
  11363. * @return MP_VAL when a is NULL.
  11364. */
  11365. int sp_div_2d(sp_int* a, int e, sp_int* r, sp_int* rem)
  11366. {
  11367. int err = MP_OKAY;
  11368. if (a == NULL) {
  11369. err = MP_VAL;
  11370. }
  11371. if (err == MP_OKAY) {
  11372. int remBits = sp_count_bits(a) - e;
  11373. if (remBits <= 0) {
  11374. /* Shifting down by more bits than in number. */
  11375. _sp_zero(r);
  11376. sp_copy(a, rem);
  11377. }
  11378. else {
  11379. if (rem != NULL) {
  11380. /* Copy a in to remainder. */
  11381. err = sp_copy(a, rem);
  11382. }
  11383. /* Shift a down by into result. */
  11384. sp_rshb(a, e, r);
  11385. if (rem != NULL) {
  11386. /* Set used and mask off top digit of remainder. */
  11387. rem->used = (e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
  11388. e &= SP_WORD_MASK;
  11389. if (e > 0) {
  11390. rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
  11391. }
  11392. sp_clamp(rem);
  11393. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11394. rem->sign = MP_ZPOS;
  11395. #endif
  11396. }
  11397. }
  11398. }
  11399. return err;
  11400. }
  11401. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  11402. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  11403. /* The bottom e bits: r = a & ((1 << e) - 1)
  11404. *
  11405. * @param [in] a SP integer to reduce.
  11406. * @param [in] e Modulus bits (modulus equals 2^e).
  11407. * @param [out] r SP integer to hold result.
  11408. *
  11409. * @return MP_OKAY on success.
  11410. * @return MP_VAL when a or r is NULL.
  11411. */
  11412. int sp_mod_2d(sp_int* a, int e, sp_int* r)
  11413. {
  11414. int err = MP_OKAY;
  11415. if ((a == NULL) || (r == NULL)) {
  11416. err = MP_VAL;
  11417. }
  11418. if (err == MP_OKAY) {
  11419. int digits = (e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
  11420. if (a != r) {
  11421. XMEMCPY(r->dp, a->dp, digits * sizeof(sp_int_digit));
  11422. r->used = a->used;
  11423. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11424. r->sign = a->sign;
  11425. #endif
  11426. }
  11427. #ifndef WOLFSSL_SP_INT_NEGATIVE
  11428. if (digits <= a->used)
  11429. #else
  11430. if ((a->sign != MP_ZPOS) || (digits <= a->used))
  11431. #endif
  11432. {
  11433. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11434. if (a->sign == MP_NEG) {
  11435. int i;
  11436. sp_int_digit carry = 0;
  11437. /* Negate value. */
  11438. for (i = 0; i < r->used; i++) {
  11439. sp_int_digit next = r->dp[i] > 0;
  11440. r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
  11441. carry |= next;
  11442. }
  11443. for (; i < digits; i++) {
  11444. r->dp[i] = (sp_int_digit)0 - carry;
  11445. }
  11446. r->sign = MP_ZPOS;
  11447. }
  11448. #endif
  11449. /* Set used and mask off top digit of result. */
  11450. r->used = digits;
  11451. e &= SP_WORD_MASK;
  11452. if (e > 0) {
  11453. r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
  11454. }
  11455. sp_clamp(r);
  11456. }
  11457. }
  11458. return err;
  11459. }
  11460. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  11461. #if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  11462. !defined(NO_DH))
  11463. /* Multiply by 2^e: r = a << e
  11464. *
  11465. * @param [in] a SP integer to multiply.
  11466. * @param [in] e Multiplier bits (multiplier equals 2^e).
  11467. * @param [out] r SP integer to hold result.
  11468. *
  11469. * @return MP_OKAY on success.
  11470. * @return MP_VAL when a or r is NULL, or result is too big for fixed data
  11471. * length.
  11472. */
  11473. int sp_mul_2d(sp_int* a, int e, sp_int* r)
  11474. {
  11475. int err = MP_OKAY;
  11476. if ((a == NULL) || (r == NULL)) {
  11477. err = MP_VAL;
  11478. }
  11479. if ((err == MP_OKAY) && (sp_count_bits(a) + e > r->size * SP_WORD_SIZE)) {
  11480. err = MP_VAL;
  11481. }
  11482. if (err == MP_OKAY) {
  11483. /* Copy a into r as left shift function works on the number. */
  11484. if (a != r) {
  11485. err = sp_copy(a, r);
  11486. }
  11487. }
  11488. if (err == MP_OKAY) {
  11489. #if 0
  11490. sp_print(a, "a");
  11491. sp_print_int(e, "n");
  11492. #endif
  11493. err = sp_lshb(r, e);
  11494. #if 0
  11495. sp_print(r, "rsl");
  11496. #endif
  11497. }
  11498. return err;
  11499. }
  11500. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  11501. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  11502. defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  11503. /* START SP_SQR implementations */
  11504. /* This code is generated.
  11505. * To generate:
  11506. * cd scripts/sp/sp_int
  11507. * ./gen.sh
  11508. * File sp_sqr.c contains code.
  11509. */
  11510. #if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
  11511. #ifdef SQR_MUL_ASM
  11512. /* Square a and store in r. r = a * a
  11513. *
  11514. * @param [in] a SP integer to square.
  11515. * @param [out] r SP integer result.
  11516. *
  11517. * @return MP_OKAY on success.
  11518. * @return MP_MEM when dynamic memory allocation fails.
  11519. */
  11520. static int _sp_sqr(sp_int* a, sp_int* r)
  11521. {
  11522. int err = MP_OKAY;
  11523. int i;
  11524. int j;
  11525. int k;
  11526. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11527. sp_int_digit* t = NULL;
  11528. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  11529. defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
  11530. sp_int_digit t[a->used * 2];
  11531. #else
  11532. sp_int_digit t[SP_INT_DIGITS];
  11533. #endif
  11534. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11535. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
  11536. DYNAMIC_TYPE_BIGINT);
  11537. if (t == NULL) {
  11538. err = MP_MEM;
  11539. }
  11540. #endif
  11541. if ((err == MP_OKAY) && (a->used <= 1)) {
  11542. sp_int_digit l, h;
  11543. h = 0;
  11544. l = 0;
  11545. SP_ASM_SQR(h, l, a->dp[0]);
  11546. t[0] = h;
  11547. t[1] = l;
  11548. }
  11549. else if (err == MP_OKAY) {
  11550. sp_int_digit l, h, o;
  11551. h = 0;
  11552. l = 0;
  11553. SP_ASM_SQR(h, l, a->dp[0]);
  11554. t[0] = h;
  11555. h = 0;
  11556. o = 0;
  11557. for (k = 1; k < (a->used + 1) / 2; k++) {
  11558. i = k;
  11559. j = k - 1;
  11560. for (; (j >= 0); i++, j--) {
  11561. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  11562. }
  11563. t[k * 2 - 1] = l;
  11564. l = h;
  11565. h = o;
  11566. o = 0;
  11567. SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
  11568. i = k + 1;
  11569. j = k - 1;
  11570. for (; (j >= 0); i++, j--) {
  11571. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  11572. }
  11573. t[k * 2] = l;
  11574. l = h;
  11575. h = o;
  11576. o = 0;
  11577. }
  11578. for (; k < a->used; k++) {
  11579. i = k;
  11580. j = k - 1;
  11581. for (; (i < a->used); i++, j--) {
  11582. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  11583. }
  11584. t[k * 2 - 1] = l;
  11585. l = h;
  11586. h = o;
  11587. o = 0;
  11588. SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
  11589. i = k + 1;
  11590. j = k - 1;
  11591. for (; (i < a->used); i++, j--) {
  11592. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  11593. }
  11594. t[k * 2] = l;
  11595. l = h;
  11596. h = o;
  11597. o = 0;
  11598. }
  11599. t[k * 2 - 1] = l;
  11600. }
  11601. if (err == MP_OKAY) {
  11602. r->used = a->used * 2;
  11603. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  11604. sp_clamp(r);
  11605. }
  11606. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11607. if (t != NULL) {
  11608. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  11609. }
  11610. #endif
  11611. return err;
  11612. }
  11613. #else /* !SQR_MUL_ASM */
  11614. /* Square a and store in r. r = a * a
  11615. *
  11616. * @param [in] a SP integer to square.
  11617. * @param [out] r SP integer result.
  11618. *
  11619. * @return MP_OKAY on success.
  11620. * @return MP_MEM when dynamic memory allocation fails.
  11621. */
  11622. static int _sp_sqr(sp_int* a, sp_int* r)
  11623. {
  11624. int err = MP_OKAY;
  11625. int i;
  11626. int j;
  11627. int k;
  11628. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11629. sp_int_digit* t = NULL;
  11630. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  11631. defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
  11632. sp_int_digit t[a->used * 2];
  11633. #else
  11634. sp_int_digit t[SP_INT_DIGITS];
  11635. #endif
  11636. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11637. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
  11638. DYNAMIC_TYPE_BIGINT);
  11639. if (t == NULL) {
  11640. err = MP_MEM;
  11641. }
  11642. #endif
  11643. if (err == MP_OKAY) {
  11644. sp_int_word w;
  11645. sp_int_word l;
  11646. sp_int_word h;
  11647. #ifdef SP_WORD_OVERFLOW
  11648. sp_int_word o;
  11649. #endif
  11650. w = (sp_int_word)a->dp[0] * a->dp[0];
  11651. t[0] = (sp_int_digit)w;
  11652. l = (sp_int_digit)(w >> SP_WORD_SIZE);
  11653. h = 0;
  11654. #ifdef SP_WORD_OVERFLOW
  11655. o = 0;
  11656. #endif
  11657. for (k = 1; k <= (a->used - 1) * 2; k++) {
  11658. i = k / 2;
  11659. j = k - i;
  11660. if (i == j) {
  11661. w = (sp_int_word)a->dp[i] * a->dp[j];
  11662. l += (sp_int_digit)w;
  11663. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  11664. #ifdef SP_WORD_OVERFLOW
  11665. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  11666. l &= SP_MASK;
  11667. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  11668. h &= SP_MASK;
  11669. #endif
  11670. }
  11671. for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
  11672. w = (sp_int_word)a->dp[i] * a->dp[j];
  11673. l += (sp_int_digit)w;
  11674. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  11675. #ifdef SP_WORD_OVERFLOW
  11676. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  11677. l &= SP_MASK;
  11678. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  11679. h &= SP_MASK;
  11680. #endif
  11681. l += (sp_int_digit)w;
  11682. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  11683. #ifdef SP_WORD_OVERFLOW
  11684. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  11685. l &= SP_MASK;
  11686. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  11687. h &= SP_MASK;
  11688. #endif
  11689. }
  11690. t[k] = (sp_int_digit)l;
  11691. l >>= SP_WORD_SIZE;
  11692. l += (sp_int_digit)h;
  11693. h >>= SP_WORD_SIZE;
  11694. #ifdef SP_WORD_OVERFLOW
  11695. h += o & SP_MASK;
  11696. o >>= SP_WORD_SIZE;
  11697. #endif
  11698. }
  11699. t[k] = (sp_int_digit)l;
  11700. r->used = k + 1;
  11701. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  11702. sp_clamp(r);
  11703. }
  11704. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11705. if (t != NULL) {
  11706. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  11707. }
  11708. #endif
  11709. return err;
  11710. }
  11711. #endif /* SQR_MUL_ASM */
  11712. #endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
  11713. #ifndef WOLFSSL_SP_SMALL
  11714. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  11715. #if SP_WORD_SIZE == 64
  11716. #ifndef SQR_MUL_ASM
  11717. /* Square a and store in r. r = a * a
  11718. *
  11719. * Long-hand implementation.
  11720. *
  11721. * @param [in] a SP integer to square.
  11722. * @param [out] r SP integer result.
  11723. *
  11724. * @return MP_OKAY on success.
  11725. * @return MP_MEM when dynamic memory allocation fails.
  11726. */
  11727. static int _sp_sqr_4(sp_int* a, sp_int* r)
  11728. {
  11729. int err = MP_OKAY;
  11730. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11731. sp_int_word* w = NULL;
  11732. #else
  11733. sp_int_word w[10];
  11734. #endif
  11735. sp_int_digit* da = a->dp;
  11736. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11737. w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
  11738. DYNAMIC_TYPE_BIGINT);
  11739. if (w == NULL) {
  11740. err = MP_MEM;
  11741. }
  11742. #endif
  11743. if (err == MP_OKAY) {
  11744. w[0] = (sp_int_word)da[0] * da[0];
  11745. w[1] = (sp_int_word)da[0] * da[1];
  11746. w[2] = (sp_int_word)da[0] * da[2];
  11747. w[3] = (sp_int_word)da[1] * da[1];
  11748. w[4] = (sp_int_word)da[0] * da[3];
  11749. w[5] = (sp_int_word)da[1] * da[2];
  11750. w[6] = (sp_int_word)da[1] * da[3];
  11751. w[7] = (sp_int_word)da[2] * da[2];
  11752. w[8] = (sp_int_word)da[2] * da[3];
  11753. w[9] = (sp_int_word)da[3] * da[3];
  11754. r->dp[0] = w[0];
  11755. w[0] >>= SP_WORD_SIZE;
  11756. w[0] += (sp_int_digit)w[1];
  11757. w[0] += (sp_int_digit)w[1];
  11758. r->dp[1] = w[0];
  11759. w[0] >>= SP_WORD_SIZE;
  11760. w[1] >>= SP_WORD_SIZE;
  11761. w[0] += (sp_int_digit)w[1];
  11762. w[0] += (sp_int_digit)w[1];
  11763. w[0] += (sp_int_digit)w[2];
  11764. w[0] += (sp_int_digit)w[2];
  11765. w[0] += (sp_int_digit)w[3];
  11766. r->dp[2] = w[0];
  11767. w[0] >>= SP_WORD_SIZE;
  11768. w[2] >>= SP_WORD_SIZE;
  11769. w[0] += (sp_int_digit)w[2];
  11770. w[0] += (sp_int_digit)w[2];
  11771. w[3] >>= SP_WORD_SIZE;
  11772. w[0] += (sp_int_digit)w[3];
  11773. w[0] += (sp_int_digit)w[4];
  11774. w[0] += (sp_int_digit)w[4];
  11775. w[0] += (sp_int_digit)w[5];
  11776. w[0] += (sp_int_digit)w[5];
  11777. r->dp[3] = w[0];
  11778. w[0] >>= SP_WORD_SIZE;
  11779. w[4] >>= SP_WORD_SIZE;
  11780. w[0] += (sp_int_digit)w[4];
  11781. w[0] += (sp_int_digit)w[4];
  11782. w[5] >>= SP_WORD_SIZE;
  11783. w[0] += (sp_int_digit)w[5];
  11784. w[0] += (sp_int_digit)w[5];
  11785. w[0] += (sp_int_digit)w[6];
  11786. w[0] += (sp_int_digit)w[6];
  11787. w[0] += (sp_int_digit)w[7];
  11788. r->dp[4] = w[0];
  11789. w[0] >>= SP_WORD_SIZE;
  11790. w[6] >>= SP_WORD_SIZE;
  11791. w[0] += (sp_int_digit)w[6];
  11792. w[0] += (sp_int_digit)w[6];
  11793. w[7] >>= SP_WORD_SIZE;
  11794. w[0] += (sp_int_digit)w[7];
  11795. w[0] += (sp_int_digit)w[8];
  11796. w[0] += (sp_int_digit)w[8];
  11797. r->dp[5] = w[0];
  11798. w[0] >>= SP_WORD_SIZE;
  11799. w[8] >>= SP_WORD_SIZE;
  11800. w[0] += (sp_int_digit)w[8];
  11801. w[0] += (sp_int_digit)w[8];
  11802. w[0] += (sp_int_digit)w[9];
  11803. r->dp[6] = w[0];
  11804. w[0] >>= SP_WORD_SIZE;
  11805. w[9] >>= SP_WORD_SIZE;
  11806. w[0] += (sp_int_digit)w[9];
  11807. r->dp[7] = w[0];
  11808. r->used = 8;
  11809. sp_clamp(r);
  11810. }
  11811. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  11812. if (w != NULL) {
  11813. XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
  11814. }
  11815. #endif
  11816. return err;
  11817. }
  11818. #else /* SQR_MUL_ASM */
  11819. /* Square a and store in r. r = a * a
  11820. *
  11821. * Comba implementation.
  11822. *
  11823. * @param [in] a SP integer to square.
  11824. * @param [out] r SP integer result.
  11825. *
  11826. * @return MP_OKAY on success.
  11827. * @return MP_MEM when dynamic memory allocation fails.
  11828. */
  11829. static int _sp_sqr_4(sp_int* a, sp_int* r)
  11830. {
  11831. sp_int_digit l = 0;
  11832. sp_int_digit h = 0;
  11833. sp_int_digit o = 0;
  11834. sp_int_digit t[4];
  11835. SP_ASM_SQR(h, l, a->dp[0]);
  11836. t[0] = h;
  11837. h = 0;
  11838. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  11839. t[1] = l;
  11840. l = h;
  11841. h = o;
  11842. o = 0;
  11843. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  11844. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  11845. t[2] = l;
  11846. l = h;
  11847. h = o;
  11848. o = 0;
  11849. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  11850. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  11851. t[3] = l;
  11852. l = h;
  11853. h = o;
  11854. o = 0;
  11855. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  11856. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  11857. r->dp[4] = l;
  11858. l = h;
  11859. h = o;
  11860. o = 0;
  11861. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
  11862. r->dp[5] = l;
  11863. l = h;
  11864. h = o;
  11865. SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
  11866. r->dp[6] = l;
  11867. r->dp[7] = h;
  11868. XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
  11869. r->used = 8;
  11870. sp_clamp(r);
  11871. return MP_OKAY;
  11872. }
  11873. #endif /* SQR_MUL_ASM */
  11874. #endif /* SP_WORD_SIZE == 64 */
  11875. #if SP_WORD_SIZE == 64
  11876. #ifdef SQR_MUL_ASM
  11877. /* Square a and store in r. r = a * a
  11878. *
  11879. * Comba implementation.
  11880. *
  11881. * @param [in] a SP integer to square.
  11882. * @param [out] r SP integer result.
  11883. *
  11884. * @return MP_OKAY on success.
  11885. * @return MP_MEM when dynamic memory allocation fails.
  11886. */
  11887. static int _sp_sqr_6(sp_int* a, sp_int* r)
  11888. {
  11889. sp_int_digit l = 0;
  11890. sp_int_digit h = 0;
  11891. sp_int_digit o = 0;
  11892. sp_int_digit tl = 0;
  11893. sp_int_digit th = 0;
  11894. sp_int_digit to;
  11895. sp_int_digit t[6];
  11896. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  11897. to = 0;
  11898. #endif
  11899. SP_ASM_SQR(h, l, a->dp[0]);
  11900. t[0] = h;
  11901. h = 0;
  11902. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  11903. t[1] = l;
  11904. l = h;
  11905. h = o;
  11906. o = 0;
  11907. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  11908. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  11909. t[2] = l;
  11910. l = h;
  11911. h = o;
  11912. o = 0;
  11913. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  11914. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  11915. t[3] = l;
  11916. l = h;
  11917. h = o;
  11918. o = 0;
  11919. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  11920. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  11921. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  11922. t[4] = l;
  11923. l = h;
  11924. h = o;
  11925. o = 0;
  11926. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  11927. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  11928. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  11929. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  11930. t[5] = l;
  11931. l = h;
  11932. h = o;
  11933. o = 0;
  11934. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
  11935. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
  11936. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  11937. r->dp[6] = l;
  11938. l = h;
  11939. h = o;
  11940. o = 0;
  11941. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
  11942. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
  11943. r->dp[7] = l;
  11944. l = h;
  11945. h = o;
  11946. o = 0;
  11947. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
  11948. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  11949. r->dp[8] = l;
  11950. l = h;
  11951. h = o;
  11952. o = 0;
  11953. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
  11954. r->dp[9] = l;
  11955. l = h;
  11956. h = o;
  11957. SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
  11958. r->dp[10] = l;
  11959. r->dp[11] = h;
  11960. XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
  11961. r->used = 12;
  11962. sp_clamp(r);
  11963. return MP_OKAY;
  11964. }
  11965. #endif /* SQR_MUL_ASM */
  11966. #endif /* SP_WORD_SIZE == 64 */
  11967. #if SP_WORD_SIZE == 32
  11968. #ifdef SQR_MUL_ASM
  11969. /* Square a and store in r. r = a * a
  11970. *
  11971. * Comba implementation.
  11972. *
  11973. * @param [in] a SP integer to square.
  11974. * @param [out] r SP integer result.
  11975. *
  11976. * @return MP_OKAY on success.
  11977. * @return MP_MEM when dynamic memory allocation fails.
  11978. */
  11979. static int _sp_sqr_8(sp_int* a, sp_int* r)
  11980. {
  11981. sp_int_digit l = 0;
  11982. sp_int_digit h = 0;
  11983. sp_int_digit o = 0;
  11984. sp_int_digit tl = 0;
  11985. sp_int_digit th = 0;
  11986. sp_int_digit to;
  11987. sp_int_digit t[8];
  11988. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  11989. to = 0;
  11990. #endif
  11991. SP_ASM_SQR(h, l, a->dp[0]);
  11992. t[0] = h;
  11993. h = 0;
  11994. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  11995. t[1] = l;
  11996. l = h;
  11997. h = o;
  11998. o = 0;
  11999. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  12000. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  12001. t[2] = l;
  12002. l = h;
  12003. h = o;
  12004. o = 0;
  12005. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  12006. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  12007. t[3] = l;
  12008. l = h;
  12009. h = o;
  12010. o = 0;
  12011. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  12012. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  12013. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  12014. t[4] = l;
  12015. l = h;
  12016. h = o;
  12017. o = 0;
  12018. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  12019. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  12020. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  12021. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12022. t[5] = l;
  12023. l = h;
  12024. h = o;
  12025. o = 0;
  12026. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  12027. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  12028. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  12029. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  12030. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12031. t[6] = l;
  12032. l = h;
  12033. h = o;
  12034. o = 0;
  12035. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  12036. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  12037. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  12038. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  12039. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12040. t[7] = l;
  12041. l = h;
  12042. h = o;
  12043. o = 0;
  12044. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
  12045. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  12046. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  12047. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  12048. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12049. r->dp[8] = l;
  12050. l = h;
  12051. h = o;
  12052. o = 0;
  12053. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
  12054. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  12055. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  12056. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12057. r->dp[9] = l;
  12058. l = h;
  12059. h = o;
  12060. o = 0;
  12061. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
  12062. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
  12063. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  12064. r->dp[10] = l;
  12065. l = h;
  12066. h = o;
  12067. o = 0;
  12068. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
  12069. SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
  12070. r->dp[11] = l;
  12071. l = h;
  12072. h = o;
  12073. o = 0;
  12074. SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
  12075. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  12076. r->dp[12] = l;
  12077. l = h;
  12078. h = o;
  12079. o = 0;
  12080. SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
  12081. r->dp[13] = l;
  12082. l = h;
  12083. h = o;
  12084. SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
  12085. r->dp[14] = l;
  12086. r->dp[15] = h;
  12087. XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
  12088. r->used = 16;
  12089. sp_clamp(r);
  12090. return MP_OKAY;
  12091. }
  12092. #endif /* SQR_MUL_ASM */
  12093. #endif /* SP_WORD_SIZE == 32 */
  12094. #if SP_WORD_SIZE == 32
  12095. #ifdef SQR_MUL_ASM
  12096. /* Square a and store in r. r = a * a
  12097. *
  12098. * Comba implementation.
  12099. *
  12100. * @param [in] a SP integer to square.
  12101. * @param [out] r SP integer result.
  12102. *
  12103. * @return MP_OKAY on success.
  12104. * @return MP_MEM when dynamic memory allocation fails.
  12105. */
  12106. static int _sp_sqr_12(sp_int* a, sp_int* r)
  12107. {
  12108. sp_int_digit l = 0;
  12109. sp_int_digit h = 0;
  12110. sp_int_digit o = 0;
  12111. sp_int_digit tl = 0;
  12112. sp_int_digit th = 0;
  12113. sp_int_digit to;
  12114. sp_int_digit t[12];
  12115. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  12116. to = 0;
  12117. #endif
  12118. SP_ASM_SQR(h, l, a->dp[0]);
  12119. t[0] = h;
  12120. h = 0;
  12121. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  12122. t[1] = l;
  12123. l = h;
  12124. h = o;
  12125. o = 0;
  12126. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  12127. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  12128. t[2] = l;
  12129. l = h;
  12130. h = o;
  12131. o = 0;
  12132. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  12133. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  12134. t[3] = l;
  12135. l = h;
  12136. h = o;
  12137. o = 0;
  12138. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  12139. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  12140. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  12141. t[4] = l;
  12142. l = h;
  12143. h = o;
  12144. o = 0;
  12145. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  12146. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  12147. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  12148. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12149. t[5] = l;
  12150. l = h;
  12151. h = o;
  12152. o = 0;
  12153. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  12154. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  12155. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  12156. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  12157. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12158. t[6] = l;
  12159. l = h;
  12160. h = o;
  12161. o = 0;
  12162. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  12163. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  12164. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  12165. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  12166. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12167. t[7] = l;
  12168. l = h;
  12169. h = o;
  12170. o = 0;
  12171. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  12172. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  12173. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  12174. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  12175. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  12176. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12177. t[8] = l;
  12178. l = h;
  12179. h = o;
  12180. o = 0;
  12181. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  12182. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  12183. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  12184. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  12185. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  12186. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12187. t[9] = l;
  12188. l = h;
  12189. h = o;
  12190. o = 0;
  12191. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  12192. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  12193. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  12194. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  12195. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  12196. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  12197. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12198. t[10] = l;
  12199. l = h;
  12200. h = o;
  12201. o = 0;
  12202. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  12203. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  12204. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  12205. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  12206. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  12207. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  12208. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12209. t[11] = l;
  12210. l = h;
  12211. h = o;
  12212. o = 0;
  12213. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
  12214. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  12215. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  12216. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  12217. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  12218. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  12219. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12220. r->dp[12] = l;
  12221. l = h;
  12222. h = o;
  12223. o = 0;
  12224. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
  12225. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  12226. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  12227. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  12228. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  12229. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12230. r->dp[13] = l;
  12231. l = h;
  12232. h = o;
  12233. o = 0;
  12234. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
  12235. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  12236. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  12237. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  12238. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  12239. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12240. r->dp[14] = l;
  12241. l = h;
  12242. h = o;
  12243. o = 0;
  12244. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
  12245. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  12246. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  12247. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  12248. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12249. r->dp[15] = l;
  12250. l = h;
  12251. h = o;
  12252. o = 0;
  12253. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
  12254. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  12255. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  12256. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  12257. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12258. r->dp[16] = l;
  12259. l = h;
  12260. h = o;
  12261. o = 0;
  12262. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
  12263. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  12264. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  12265. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12266. r->dp[17] = l;
  12267. l = h;
  12268. h = o;
  12269. o = 0;
  12270. SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
  12271. SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
  12272. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  12273. r->dp[18] = l;
  12274. l = h;
  12275. h = o;
  12276. o = 0;
  12277. SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
  12278. SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
  12279. r->dp[19] = l;
  12280. l = h;
  12281. h = o;
  12282. o = 0;
  12283. SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
  12284. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  12285. r->dp[20] = l;
  12286. l = h;
  12287. h = o;
  12288. o = 0;
  12289. SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
  12290. r->dp[21] = l;
  12291. l = h;
  12292. h = o;
  12293. SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
  12294. r->dp[22] = l;
  12295. r->dp[23] = h;
  12296. XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
  12297. r->used = 24;
  12298. sp_clamp(r);
  12299. return MP_OKAY;
  12300. }
  12301. #endif /* SQR_MUL_ASM */
  12302. #endif /* SP_WORD_SIZE == 32 */
  12303. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  12304. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  12305. #if SP_INT_DIGITS >= 32
  12306. /* Square a and store in r. r = a * a
  12307. *
  12308. * Comba implementation.
  12309. *
  12310. * @param [in] a SP integer to square.
  12311. * @param [out] r SP integer result.
  12312. *
  12313. * @return MP_OKAY on success.
  12314. * @return MP_MEM when dynamic memory allocation fails.
  12315. */
  12316. static int _sp_sqr_16(sp_int* a, sp_int* r)
  12317. {
  12318. int err = MP_OKAY;
  12319. sp_int_digit l = 0;
  12320. sp_int_digit h = 0;
  12321. sp_int_digit o = 0;
  12322. sp_int_digit tl = 0;
  12323. sp_int_digit th = 0;
  12324. sp_int_digit to;
  12325. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  12326. sp_int_digit* t = NULL;
  12327. #else
  12328. sp_int_digit t[16];
  12329. #endif
  12330. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  12331. to = 0;
  12332. #endif
  12333. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  12334. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
  12335. DYNAMIC_TYPE_BIGINT);
  12336. if (t == NULL) {
  12337. err = MP_MEM;
  12338. }
  12339. #endif
  12340. if (err == MP_OKAY) {
  12341. SP_ASM_SQR(h, l, a->dp[0]);
  12342. t[0] = h;
  12343. h = 0;
  12344. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  12345. t[1] = l;
  12346. l = h;
  12347. h = o;
  12348. o = 0;
  12349. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  12350. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  12351. t[2] = l;
  12352. l = h;
  12353. h = o;
  12354. o = 0;
  12355. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  12356. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  12357. t[3] = l;
  12358. l = h;
  12359. h = o;
  12360. o = 0;
  12361. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  12362. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  12363. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  12364. t[4] = l;
  12365. l = h;
  12366. h = o;
  12367. o = 0;
  12368. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  12369. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  12370. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  12371. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12372. t[5] = l;
  12373. l = h;
  12374. h = o;
  12375. o = 0;
  12376. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  12377. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  12378. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  12379. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  12380. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12381. t[6] = l;
  12382. l = h;
  12383. h = o;
  12384. o = 0;
  12385. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  12386. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  12387. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  12388. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  12389. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12390. t[7] = l;
  12391. l = h;
  12392. h = o;
  12393. o = 0;
  12394. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  12395. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  12396. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  12397. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  12398. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  12399. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12400. t[8] = l;
  12401. l = h;
  12402. h = o;
  12403. o = 0;
  12404. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  12405. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  12406. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  12407. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  12408. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  12409. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12410. t[9] = l;
  12411. l = h;
  12412. h = o;
  12413. o = 0;
  12414. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  12415. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  12416. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  12417. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  12418. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  12419. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  12420. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12421. t[10] = l;
  12422. l = h;
  12423. h = o;
  12424. o = 0;
  12425. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  12426. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  12427. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  12428. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  12429. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  12430. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  12431. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12432. t[11] = l;
  12433. l = h;
  12434. h = o;
  12435. o = 0;
  12436. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
  12437. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
  12438. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  12439. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  12440. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  12441. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  12442. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  12443. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12444. t[12] = l;
  12445. l = h;
  12446. h = o;
  12447. o = 0;
  12448. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
  12449. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
  12450. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
  12451. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  12452. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  12453. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  12454. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  12455. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12456. t[13] = l;
  12457. l = h;
  12458. h = o;
  12459. o = 0;
  12460. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
  12461. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
  12462. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
  12463. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
  12464. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  12465. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  12466. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  12467. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  12468. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12469. t[14] = l;
  12470. l = h;
  12471. h = o;
  12472. o = 0;
  12473. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
  12474. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
  12475. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
  12476. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
  12477. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
  12478. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  12479. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  12480. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  12481. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12482. t[15] = l;
  12483. l = h;
  12484. h = o;
  12485. o = 0;
  12486. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
  12487. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
  12488. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
  12489. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
  12490. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
  12491. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  12492. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  12493. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  12494. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12495. r->dp[16] = l;
  12496. l = h;
  12497. h = o;
  12498. o = 0;
  12499. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
  12500. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
  12501. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
  12502. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
  12503. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
  12504. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  12505. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  12506. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12507. r->dp[17] = l;
  12508. l = h;
  12509. h = o;
  12510. o = 0;
  12511. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
  12512. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
  12513. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
  12514. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
  12515. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
  12516. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
  12517. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  12518. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12519. r->dp[18] = l;
  12520. l = h;
  12521. h = o;
  12522. o = 0;
  12523. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
  12524. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
  12525. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
  12526. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
  12527. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
  12528. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
  12529. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12530. r->dp[19] = l;
  12531. l = h;
  12532. h = o;
  12533. o = 0;
  12534. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
  12535. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
  12536. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
  12537. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
  12538. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
  12539. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  12540. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12541. r->dp[20] = l;
  12542. l = h;
  12543. h = o;
  12544. o = 0;
  12545. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
  12546. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
  12547. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
  12548. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
  12549. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
  12550. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12551. r->dp[21] = l;
  12552. l = h;
  12553. h = o;
  12554. o = 0;
  12555. SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
  12556. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
  12557. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
  12558. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
  12559. SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
  12560. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12561. r->dp[22] = l;
  12562. l = h;
  12563. h = o;
  12564. o = 0;
  12565. SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
  12566. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
  12567. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
  12568. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
  12569. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12570. r->dp[23] = l;
  12571. l = h;
  12572. h = o;
  12573. o = 0;
  12574. SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
  12575. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
  12576. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
  12577. SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
  12578. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12579. r->dp[24] = l;
  12580. l = h;
  12581. h = o;
  12582. o = 0;
  12583. SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
  12584. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
  12585. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
  12586. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12587. r->dp[25] = l;
  12588. l = h;
  12589. h = o;
  12590. o = 0;
  12591. SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
  12592. SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
  12593. SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
  12594. r->dp[26] = l;
  12595. l = h;
  12596. h = o;
  12597. o = 0;
  12598. SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
  12599. SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
  12600. r->dp[27] = l;
  12601. l = h;
  12602. h = o;
  12603. o = 0;
  12604. SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
  12605. SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
  12606. r->dp[28] = l;
  12607. l = h;
  12608. h = o;
  12609. o = 0;
  12610. SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
  12611. r->dp[29] = l;
  12612. l = h;
  12613. h = o;
  12614. SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
  12615. r->dp[30] = l;
  12616. r->dp[31] = h;
  12617. XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
  12618. r->used = 32;
  12619. sp_clamp(r);
  12620. }
  12621. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  12622. if (t != NULL) {
  12623. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  12624. }
  12625. #endif
  12626. return err;
  12627. }
  12628. #endif /* SP_INT_DIGITS >= 32 */
  12629. #if SP_INT_DIGITS >= 48
  12630. /* Square a and store in r. r = a * a
  12631. *
  12632. * Comba implementation.
  12633. *
  12634. * @param [in] a SP integer to square.
  12635. * @param [out] r SP integer result.
  12636. *
  12637. * @return MP_OKAY on success.
  12638. * @return MP_MEM when dynamic memory allocation fails.
  12639. */
  12640. static int _sp_sqr_24(sp_int* a, sp_int* r)
  12641. {
  12642. int err = MP_OKAY;
  12643. sp_int_digit l = 0;
  12644. sp_int_digit h = 0;
  12645. sp_int_digit o = 0;
  12646. sp_int_digit tl = 0;
  12647. sp_int_digit th = 0;
  12648. sp_int_digit to;
  12649. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  12650. sp_int_digit* t = NULL;
  12651. #else
  12652. sp_int_digit t[24];
  12653. #endif
  12654. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  12655. to = 0;
  12656. #endif
  12657. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  12658. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
  12659. DYNAMIC_TYPE_BIGINT);
  12660. if (t == NULL) {
  12661. err = MP_MEM;
  12662. }
  12663. #endif
  12664. if (err == MP_OKAY) {
  12665. SP_ASM_SQR(h, l, a->dp[0]);
  12666. t[0] = h;
  12667. h = 0;
  12668. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  12669. t[1] = l;
  12670. l = h;
  12671. h = o;
  12672. o = 0;
  12673. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  12674. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  12675. t[2] = l;
  12676. l = h;
  12677. h = o;
  12678. o = 0;
  12679. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  12680. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  12681. t[3] = l;
  12682. l = h;
  12683. h = o;
  12684. o = 0;
  12685. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  12686. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  12687. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  12688. t[4] = l;
  12689. l = h;
  12690. h = o;
  12691. o = 0;
  12692. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  12693. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  12694. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  12695. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12696. t[5] = l;
  12697. l = h;
  12698. h = o;
  12699. o = 0;
  12700. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  12701. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  12702. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  12703. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  12704. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12705. t[6] = l;
  12706. l = h;
  12707. h = o;
  12708. o = 0;
  12709. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  12710. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  12711. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  12712. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  12713. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12714. t[7] = l;
  12715. l = h;
  12716. h = o;
  12717. o = 0;
  12718. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  12719. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  12720. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  12721. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  12722. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  12723. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12724. t[8] = l;
  12725. l = h;
  12726. h = o;
  12727. o = 0;
  12728. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  12729. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  12730. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  12731. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  12732. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  12733. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12734. t[9] = l;
  12735. l = h;
  12736. h = o;
  12737. o = 0;
  12738. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  12739. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  12740. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  12741. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  12742. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  12743. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  12744. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12745. t[10] = l;
  12746. l = h;
  12747. h = o;
  12748. o = 0;
  12749. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  12750. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  12751. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  12752. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  12753. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  12754. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  12755. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12756. t[11] = l;
  12757. l = h;
  12758. h = o;
  12759. o = 0;
  12760. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
  12761. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
  12762. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  12763. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  12764. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  12765. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  12766. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  12767. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12768. t[12] = l;
  12769. l = h;
  12770. h = o;
  12771. o = 0;
  12772. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
  12773. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
  12774. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
  12775. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  12776. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  12777. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  12778. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  12779. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12780. t[13] = l;
  12781. l = h;
  12782. h = o;
  12783. o = 0;
  12784. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
  12785. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
  12786. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
  12787. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
  12788. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  12789. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  12790. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  12791. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  12792. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12793. t[14] = l;
  12794. l = h;
  12795. h = o;
  12796. o = 0;
  12797. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
  12798. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
  12799. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
  12800. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
  12801. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
  12802. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  12803. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  12804. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  12805. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12806. t[15] = l;
  12807. l = h;
  12808. h = o;
  12809. o = 0;
  12810. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
  12811. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
  12812. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
  12813. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
  12814. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
  12815. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
  12816. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  12817. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  12818. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  12819. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12820. t[16] = l;
  12821. l = h;
  12822. h = o;
  12823. o = 0;
  12824. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
  12825. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
  12826. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
  12827. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
  12828. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
  12829. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
  12830. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
  12831. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  12832. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  12833. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12834. t[17] = l;
  12835. l = h;
  12836. h = o;
  12837. o = 0;
  12838. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
  12839. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
  12840. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
  12841. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
  12842. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
  12843. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
  12844. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
  12845. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
  12846. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
  12847. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  12848. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12849. t[18] = l;
  12850. l = h;
  12851. h = o;
  12852. o = 0;
  12853. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
  12854. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
  12855. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
  12856. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
  12857. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
  12858. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
  12859. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
  12860. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
  12861. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
  12862. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
  12863. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12864. t[19] = l;
  12865. l = h;
  12866. h = o;
  12867. o = 0;
  12868. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
  12869. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
  12870. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
  12871. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
  12872. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
  12873. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
  12874. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
  12875. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
  12876. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
  12877. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
  12878. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  12879. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12880. t[20] = l;
  12881. l = h;
  12882. h = o;
  12883. o = 0;
  12884. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
  12885. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
  12886. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
  12887. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
  12888. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
  12889. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
  12890. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
  12891. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
  12892. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
  12893. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
  12894. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
  12895. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12896. t[21] = l;
  12897. l = h;
  12898. h = o;
  12899. o = 0;
  12900. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
  12901. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
  12902. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
  12903. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
  12904. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
  12905. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
  12906. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
  12907. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
  12908. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
  12909. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
  12910. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
  12911. SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
  12912. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12913. t[22] = l;
  12914. l = h;
  12915. h = o;
  12916. o = 0;
  12917. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
  12918. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
  12919. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
  12920. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
  12921. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
  12922. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
  12923. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
  12924. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
  12925. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
  12926. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
  12927. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
  12928. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
  12929. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12930. t[23] = l;
  12931. l = h;
  12932. h = o;
  12933. o = 0;
  12934. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
  12935. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
  12936. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
  12937. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
  12938. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
  12939. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
  12940. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
  12941. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
  12942. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
  12943. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
  12944. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
  12945. SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
  12946. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12947. r->dp[24] = l;
  12948. l = h;
  12949. h = o;
  12950. o = 0;
  12951. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
  12952. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
  12953. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
  12954. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
  12955. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
  12956. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
  12957. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
  12958. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
  12959. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
  12960. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
  12961. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
  12962. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12963. r->dp[25] = l;
  12964. l = h;
  12965. h = o;
  12966. o = 0;
  12967. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
  12968. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
  12969. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
  12970. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
  12971. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
  12972. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
  12973. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
  12974. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
  12975. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
  12976. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
  12977. SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
  12978. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12979. r->dp[26] = l;
  12980. l = h;
  12981. h = o;
  12982. o = 0;
  12983. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
  12984. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
  12985. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
  12986. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
  12987. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
  12988. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
  12989. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
  12990. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
  12991. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
  12992. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
  12993. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  12994. r->dp[27] = l;
  12995. l = h;
  12996. h = o;
  12997. o = 0;
  12998. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
  12999. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
  13000. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
  13001. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
  13002. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
  13003. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
  13004. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
  13005. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
  13006. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
  13007. SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
  13008. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13009. r->dp[28] = l;
  13010. l = h;
  13011. h = o;
  13012. o = 0;
  13013. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
  13014. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
  13015. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
  13016. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
  13017. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
  13018. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
  13019. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
  13020. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
  13021. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
  13022. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13023. r->dp[29] = l;
  13024. l = h;
  13025. h = o;
  13026. o = 0;
  13027. SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
  13028. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
  13029. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
  13030. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
  13031. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
  13032. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
  13033. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
  13034. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
  13035. SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
  13036. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13037. r->dp[30] = l;
  13038. l = h;
  13039. h = o;
  13040. o = 0;
  13041. SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
  13042. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
  13043. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
  13044. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
  13045. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
  13046. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
  13047. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
  13048. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
  13049. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13050. r->dp[31] = l;
  13051. l = h;
  13052. h = o;
  13053. o = 0;
  13054. SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
  13055. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
  13056. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
  13057. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
  13058. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
  13059. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
  13060. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
  13061. SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
  13062. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13063. r->dp[32] = l;
  13064. l = h;
  13065. h = o;
  13066. o = 0;
  13067. SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
  13068. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
  13069. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
  13070. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
  13071. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
  13072. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
  13073. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
  13074. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13075. r->dp[33] = l;
  13076. l = h;
  13077. h = o;
  13078. o = 0;
  13079. SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
  13080. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
  13081. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
  13082. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
  13083. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
  13084. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
  13085. SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
  13086. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13087. r->dp[34] = l;
  13088. l = h;
  13089. h = o;
  13090. o = 0;
  13091. SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
  13092. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
  13093. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
  13094. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
  13095. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
  13096. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
  13097. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13098. r->dp[35] = l;
  13099. l = h;
  13100. h = o;
  13101. o = 0;
  13102. SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
  13103. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
  13104. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
  13105. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
  13106. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
  13107. SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
  13108. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13109. r->dp[36] = l;
  13110. l = h;
  13111. h = o;
  13112. o = 0;
  13113. SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
  13114. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
  13115. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
  13116. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
  13117. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
  13118. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13119. r->dp[37] = l;
  13120. l = h;
  13121. h = o;
  13122. o = 0;
  13123. SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
  13124. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
  13125. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
  13126. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
  13127. SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
  13128. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13129. r->dp[38] = l;
  13130. l = h;
  13131. h = o;
  13132. o = 0;
  13133. SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
  13134. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
  13135. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
  13136. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
  13137. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13138. r->dp[39] = l;
  13139. l = h;
  13140. h = o;
  13141. o = 0;
  13142. SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
  13143. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
  13144. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
  13145. SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
  13146. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13147. r->dp[40] = l;
  13148. l = h;
  13149. h = o;
  13150. o = 0;
  13151. SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
  13152. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
  13153. SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
  13154. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  13155. r->dp[41] = l;
  13156. l = h;
  13157. h = o;
  13158. o = 0;
  13159. SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
  13160. SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
  13161. SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
  13162. r->dp[42] = l;
  13163. l = h;
  13164. h = o;
  13165. o = 0;
  13166. SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
  13167. SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
  13168. r->dp[43] = l;
  13169. l = h;
  13170. h = o;
  13171. o = 0;
  13172. SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
  13173. SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
  13174. r->dp[44] = l;
  13175. l = h;
  13176. h = o;
  13177. o = 0;
  13178. SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
  13179. r->dp[45] = l;
  13180. l = h;
  13181. h = o;
  13182. SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
  13183. r->dp[46] = l;
  13184. r->dp[47] = h;
  13185. XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
  13186. r->used = 48;
  13187. sp_clamp(r);
  13188. }
  13189. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13190. if (t != NULL) {
  13191. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  13192. }
  13193. #endif
  13194. return err;
  13195. }
  13196. #endif /* SP_INT_DIGITS >= 48 */
  13197. #if SP_INT_DIGITS >= 64
  13198. /* Square a and store in r. r = a * a
  13199. *
  13200. * Karatsuba implementation.
  13201. *
  13202. * @param [in] a SP integer to square.
  13203. * @param [out] r SP integer result.
  13204. *
  13205. * @return MP_OKAY on success.
  13206. * @return MP_MEM when dynamic memory allocation fails.
  13207. */
  13208. static int _sp_sqr_32(sp_int* a, sp_int* r)
  13209. {
  13210. int err = MP_OKAY;
  13211. int i;
  13212. sp_int_digit l;
  13213. sp_int_digit h;
  13214. sp_int* z0;
  13215. sp_int* z1;
  13216. sp_int* z2;
  13217. sp_int_digit ca;
  13218. DECL_SP_INT(a1, 16);
  13219. DECL_SP_INT_ARRAY(z, 33, 2);
  13220. ALLOC_SP_INT(a1, 16, err, NULL);
  13221. ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
  13222. if (err == MP_OKAY) {
  13223. z1 = z[0];
  13224. z2 = z[1];
  13225. z0 = r;
  13226. XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
  13227. a1->used = 16;
  13228. /* z2 = a1 ^ 2 */
  13229. err = _sp_sqr_16(a1, z2);
  13230. }
  13231. if (err == MP_OKAY) {
  13232. l = 0;
  13233. h = 0;
  13234. for (i = 0; i < 16; i++) {
  13235. SP_ASM_ADDC(l, h, a1->dp[i]);
  13236. SP_ASM_ADDC(l, h, a->dp[i]);
  13237. a1->dp[i] = l;
  13238. l = h;
  13239. h = 0;
  13240. }
  13241. ca = l;
  13242. /* z0 = a0 ^ 2 */
  13243. err = _sp_sqr_16(a, z0);
  13244. }
  13245. if (err == MP_OKAY) {
  13246. /* z1 = (a0 + a1) ^ 2 */
  13247. err = _sp_sqr_16(a1, z1);
  13248. }
  13249. if (err == MP_OKAY) {
  13250. /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
  13251. /* r = z0 */
  13252. /* r += (z1 - z0 - z2) << 16 */
  13253. z1->dp[32] = ca;
  13254. l = 0;
  13255. if (ca) {
  13256. l = z1->dp[0 + 16];
  13257. h = 0;
  13258. SP_ASM_ADDC(l, h, a1->dp[0]);
  13259. SP_ASM_ADDC(l, h, a1->dp[0]);
  13260. z1->dp[0 + 16] = l;
  13261. l = h;
  13262. h = 0;
  13263. for (i = 1; i < 16; i++) {
  13264. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  13265. SP_ASM_ADDC(l, h, a1->dp[i]);
  13266. SP_ASM_ADDC(l, h, a1->dp[i]);
  13267. z1->dp[i + 16] = l;
  13268. l = h;
  13269. h = 0;
  13270. }
  13271. }
  13272. z1->dp[32] += l;
  13273. /* z1 = z1 - z0 - z1 */
  13274. l = z1->dp[0];
  13275. h = 0;
  13276. SP_ASM_SUBC(l, h, z0->dp[0]);
  13277. SP_ASM_SUBC(l, h, z2->dp[0]);
  13278. z1->dp[0] = l;
  13279. l = h;
  13280. h = 0;
  13281. for (i = 1; i < 32; i++) {
  13282. l += z1->dp[i];
  13283. SP_ASM_SUBC(l, h, z0->dp[i]);
  13284. SP_ASM_SUBC(l, h, z2->dp[i]);
  13285. z1->dp[i] = l;
  13286. l = h;
  13287. h = 0;
  13288. }
  13289. z1->dp[i] += l;
  13290. /* r += z1 << 16 */
  13291. l = 0;
  13292. h = 0;
  13293. for (i = 0; i < 16; i++) {
  13294. SP_ASM_ADDC(l, h, r->dp[i + 16]);
  13295. SP_ASM_ADDC(l, h, z1->dp[i]);
  13296. r->dp[i + 16] = l;
  13297. l = h;
  13298. h = 0;
  13299. }
  13300. for (; i < 33; i++) {
  13301. SP_ASM_ADDC(l, h, z1->dp[i]);
  13302. r->dp[i + 16] = l;
  13303. l = h;
  13304. h = 0;
  13305. }
  13306. /* r += z2 << 32 */
  13307. l = 0;
  13308. h = 0;
  13309. for (i = 0; i < 17; i++) {
  13310. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  13311. SP_ASM_ADDC(l, h, z2->dp[i]);
  13312. r->dp[i + 32] = l;
  13313. l = h;
  13314. h = 0;
  13315. }
  13316. for (; i < 32; i++) {
  13317. SP_ASM_ADDC(l, h, z2->dp[i]);
  13318. r->dp[i + 32] = l;
  13319. l = h;
  13320. h = 0;
  13321. }
  13322. r->used = 64;
  13323. sp_clamp(r);
  13324. }
  13325. FREE_SP_INT_ARRAY(z, NULL);
  13326. FREE_SP_INT(a1, NULL);
  13327. return err;
  13328. }
  13329. #endif /* SP_INT_DIGITS >= 64 */
  13330. #if SP_INT_DIGITS >= 96
  13331. /* Square a and store in r. r = a * a
  13332. *
  13333. * Karatsuba implementation.
  13334. *
  13335. * @param [in] a SP integer to square.
  13336. * @param [out] r SP integer result.
  13337. *
  13338. * @return MP_OKAY on success.
  13339. * @return MP_MEM when dynamic memory allocation fails.
  13340. */
  13341. static int _sp_sqr_48(sp_int* a, sp_int* r)
  13342. {
  13343. int err = MP_OKAY;
  13344. int i;
  13345. sp_int_digit l;
  13346. sp_int_digit h;
  13347. sp_int* z0;
  13348. sp_int* z1;
  13349. sp_int* z2;
  13350. sp_int_digit ca;
  13351. DECL_SP_INT(a1, 24);
  13352. DECL_SP_INT_ARRAY(z, 49, 2);
  13353. ALLOC_SP_INT(a1, 24, err, NULL);
  13354. ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
  13355. if (err == MP_OKAY) {
  13356. z1 = z[0];
  13357. z2 = z[1];
  13358. z0 = r;
  13359. XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
  13360. a1->used = 24;
  13361. /* z2 = a1 ^ 2 */
  13362. err = _sp_sqr_24(a1, z2);
  13363. }
  13364. if (err == MP_OKAY) {
  13365. l = 0;
  13366. h = 0;
  13367. for (i = 0; i < 24; i++) {
  13368. SP_ASM_ADDC(l, h, a1->dp[i]);
  13369. SP_ASM_ADDC(l, h, a->dp[i]);
  13370. a1->dp[i] = l;
  13371. l = h;
  13372. h = 0;
  13373. }
  13374. ca = l;
  13375. /* z0 = a0 ^ 2 */
  13376. err = _sp_sqr_24(a, z0);
  13377. }
  13378. if (err == MP_OKAY) {
  13379. /* z1 = (a0 + a1) ^ 2 */
  13380. err = _sp_sqr_24(a1, z1);
  13381. }
  13382. if (err == MP_OKAY) {
  13383. /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
  13384. /* r = z0 */
  13385. /* r += (z1 - z0 - z2) << 24 */
  13386. z1->dp[48] = ca;
  13387. l = 0;
  13388. if (ca) {
  13389. l = z1->dp[0 + 24];
  13390. h = 0;
  13391. SP_ASM_ADDC(l, h, a1->dp[0]);
  13392. SP_ASM_ADDC(l, h, a1->dp[0]);
  13393. z1->dp[0 + 24] = l;
  13394. l = h;
  13395. h = 0;
  13396. for (i = 1; i < 24; i++) {
  13397. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  13398. SP_ASM_ADDC(l, h, a1->dp[i]);
  13399. SP_ASM_ADDC(l, h, a1->dp[i]);
  13400. z1->dp[i + 24] = l;
  13401. l = h;
  13402. h = 0;
  13403. }
  13404. }
  13405. z1->dp[48] += l;
  13406. /* z1 = z1 - z0 - z1 */
  13407. l = z1->dp[0];
  13408. h = 0;
  13409. SP_ASM_SUBC(l, h, z0->dp[0]);
  13410. SP_ASM_SUBC(l, h, z2->dp[0]);
  13411. z1->dp[0] = l;
  13412. l = h;
  13413. h = 0;
  13414. for (i = 1; i < 48; i++) {
  13415. l += z1->dp[i];
  13416. SP_ASM_SUBC(l, h, z0->dp[i]);
  13417. SP_ASM_SUBC(l, h, z2->dp[i]);
  13418. z1->dp[i] = l;
  13419. l = h;
  13420. h = 0;
  13421. }
  13422. z1->dp[i] += l;
  13423. /* r += z1 << 16 */
  13424. l = 0;
  13425. h = 0;
  13426. for (i = 0; i < 24; i++) {
  13427. SP_ASM_ADDC(l, h, r->dp[i + 24]);
  13428. SP_ASM_ADDC(l, h, z1->dp[i]);
  13429. r->dp[i + 24] = l;
  13430. l = h;
  13431. h = 0;
  13432. }
  13433. for (; i < 49; i++) {
  13434. SP_ASM_ADDC(l, h, z1->dp[i]);
  13435. r->dp[i + 24] = l;
  13436. l = h;
  13437. h = 0;
  13438. }
  13439. /* r += z2 << 48 */
  13440. l = 0;
  13441. h = 0;
  13442. for (i = 0; i < 25; i++) {
  13443. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  13444. SP_ASM_ADDC(l, h, z2->dp[i]);
  13445. r->dp[i + 48] = l;
  13446. l = h;
  13447. h = 0;
  13448. }
  13449. for (; i < 48; i++) {
  13450. SP_ASM_ADDC(l, h, z2->dp[i]);
  13451. r->dp[i + 48] = l;
  13452. l = h;
  13453. h = 0;
  13454. }
  13455. r->used = 96;
  13456. sp_clamp(r);
  13457. }
  13458. FREE_SP_INT_ARRAY(z, NULL);
  13459. FREE_SP_INT(a1, NULL);
  13460. return err;
  13461. }
  13462. #endif /* SP_INT_DIGITS >= 96 */
  13463. #if SP_INT_DIGITS >= 128
  13464. /* Square a and store in r. r = a * a
  13465. *
  13466. * Karatsuba implementation.
  13467. *
  13468. * @param [in] a SP integer to square.
  13469. * @param [out] r SP integer result.
  13470. *
  13471. * @return MP_OKAY on success.
  13472. * @return MP_MEM when dynamic memory allocation fails.
  13473. */
  13474. static int _sp_sqr_64(sp_int* a, sp_int* r)
  13475. {
  13476. int err = MP_OKAY;
  13477. int i;
  13478. sp_int_digit l;
  13479. sp_int_digit h;
  13480. sp_int* z0;
  13481. sp_int* z1;
  13482. sp_int* z2;
  13483. sp_int_digit ca;
  13484. DECL_SP_INT(a1, 32);
  13485. DECL_SP_INT_ARRAY(z, 65, 2);
  13486. ALLOC_SP_INT(a1, 32, err, NULL);
  13487. ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
  13488. if (err == MP_OKAY) {
  13489. z1 = z[0];
  13490. z2 = z[1];
  13491. z0 = r;
  13492. XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
  13493. a1->used = 32;
  13494. /* z2 = a1 ^ 2 */
  13495. err = _sp_sqr_32(a1, z2);
  13496. }
  13497. if (err == MP_OKAY) {
  13498. l = 0;
  13499. h = 0;
  13500. for (i = 0; i < 32; i++) {
  13501. SP_ASM_ADDC(l, h, a1->dp[i]);
  13502. SP_ASM_ADDC(l, h, a->dp[i]);
  13503. a1->dp[i] = l;
  13504. l = h;
  13505. h = 0;
  13506. }
  13507. ca = l;
  13508. /* z0 = a0 ^ 2 */
  13509. err = _sp_sqr_32(a, z0);
  13510. }
  13511. if (err == MP_OKAY) {
  13512. /* z1 = (a0 + a1) ^ 2 */
  13513. err = _sp_sqr_32(a1, z1);
  13514. }
  13515. if (err == MP_OKAY) {
  13516. /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
  13517. /* r = z0 */
  13518. /* r += (z1 - z0 - z2) << 32 */
  13519. z1->dp[64] = ca;
  13520. l = 0;
  13521. if (ca) {
  13522. l = z1->dp[0 + 32];
  13523. h = 0;
  13524. SP_ASM_ADDC(l, h, a1->dp[0]);
  13525. SP_ASM_ADDC(l, h, a1->dp[0]);
  13526. z1->dp[0 + 32] = l;
  13527. l = h;
  13528. h = 0;
  13529. for (i = 1; i < 32; i++) {
  13530. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  13531. SP_ASM_ADDC(l, h, a1->dp[i]);
  13532. SP_ASM_ADDC(l, h, a1->dp[i]);
  13533. z1->dp[i + 32] = l;
  13534. l = h;
  13535. h = 0;
  13536. }
  13537. }
  13538. z1->dp[64] += l;
  13539. /* z1 = z1 - z0 - z1 */
  13540. l = z1->dp[0];
  13541. h = 0;
  13542. SP_ASM_SUBC(l, h, z0->dp[0]);
  13543. SP_ASM_SUBC(l, h, z2->dp[0]);
  13544. z1->dp[0] = l;
  13545. l = h;
  13546. h = 0;
  13547. for (i = 1; i < 64; i++) {
  13548. l += z1->dp[i];
  13549. SP_ASM_SUBC(l, h, z0->dp[i]);
  13550. SP_ASM_SUBC(l, h, z2->dp[i]);
  13551. z1->dp[i] = l;
  13552. l = h;
  13553. h = 0;
  13554. }
  13555. z1->dp[i] += l;
  13556. /* r += z1 << 16 */
  13557. l = 0;
  13558. h = 0;
  13559. for (i = 0; i < 32; i++) {
  13560. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  13561. SP_ASM_ADDC(l, h, z1->dp[i]);
  13562. r->dp[i + 32] = l;
  13563. l = h;
  13564. h = 0;
  13565. }
  13566. for (; i < 65; i++) {
  13567. SP_ASM_ADDC(l, h, z1->dp[i]);
  13568. r->dp[i + 32] = l;
  13569. l = h;
  13570. h = 0;
  13571. }
  13572. /* r += z2 << 64 */
  13573. l = 0;
  13574. h = 0;
  13575. for (i = 0; i < 33; i++) {
  13576. SP_ASM_ADDC(l, h, r->dp[i + 64]);
  13577. SP_ASM_ADDC(l, h, z2->dp[i]);
  13578. r->dp[i + 64] = l;
  13579. l = h;
  13580. h = 0;
  13581. }
  13582. for (; i < 64; i++) {
  13583. SP_ASM_ADDC(l, h, z2->dp[i]);
  13584. r->dp[i + 64] = l;
  13585. l = h;
  13586. h = 0;
  13587. }
  13588. r->used = 128;
  13589. sp_clamp(r);
  13590. }
  13591. FREE_SP_INT_ARRAY(z, NULL);
  13592. FREE_SP_INT(a1, NULL);
  13593. return err;
  13594. }
  13595. #endif /* SP_INT_DIGITS >= 128 */
  13596. #if SP_INT_DIGITS >= 192
  13597. /* Square a and store in r. r = a * a
  13598. *
  13599. * Karatsuba implementation.
  13600. *
  13601. * @param [in] a SP integer to square.
  13602. * @param [out] r SP integer result.
  13603. *
  13604. * @return MP_OKAY on success.
  13605. * @return MP_MEM when dynamic memory allocation fails.
  13606. */
  13607. static int _sp_sqr_96(sp_int* a, sp_int* r)
  13608. {
  13609. int err = MP_OKAY;
  13610. int i;
  13611. sp_int_digit l;
  13612. sp_int_digit h;
  13613. sp_int* z0;
  13614. sp_int* z1;
  13615. sp_int* z2;
  13616. sp_int_digit ca;
  13617. DECL_SP_INT(a1, 48);
  13618. DECL_SP_INT_ARRAY(z, 97, 2);
  13619. ALLOC_SP_INT(a1, 48, err, NULL);
  13620. ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
  13621. if (err == MP_OKAY) {
  13622. z1 = z[0];
  13623. z2 = z[1];
  13624. z0 = r;
  13625. XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
  13626. a1->used = 48;
  13627. /* z2 = a1 ^ 2 */
  13628. err = _sp_sqr_48(a1, z2);
  13629. }
  13630. if (err == MP_OKAY) {
  13631. l = 0;
  13632. h = 0;
  13633. for (i = 0; i < 48; i++) {
  13634. SP_ASM_ADDC(l, h, a1->dp[i]);
  13635. SP_ASM_ADDC(l, h, a->dp[i]);
  13636. a1->dp[i] = l;
  13637. l = h;
  13638. h = 0;
  13639. }
  13640. ca = l;
  13641. /* z0 = a0 ^ 2 */
  13642. err = _sp_sqr_48(a, z0);
  13643. }
  13644. if (err == MP_OKAY) {
  13645. /* z1 = (a0 + a1) ^ 2 */
  13646. err = _sp_sqr_48(a1, z1);
  13647. }
  13648. if (err == MP_OKAY) {
  13649. /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
  13650. /* r = z0 */
  13651. /* r += (z1 - z0 - z2) << 48 */
  13652. z1->dp[96] = ca;
  13653. l = 0;
  13654. if (ca) {
  13655. l = z1->dp[0 + 48];
  13656. h = 0;
  13657. SP_ASM_ADDC(l, h, a1->dp[0]);
  13658. SP_ASM_ADDC(l, h, a1->dp[0]);
  13659. z1->dp[0 + 48] = l;
  13660. l = h;
  13661. h = 0;
  13662. for (i = 1; i < 48; i++) {
  13663. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  13664. SP_ASM_ADDC(l, h, a1->dp[i]);
  13665. SP_ASM_ADDC(l, h, a1->dp[i]);
  13666. z1->dp[i + 48] = l;
  13667. l = h;
  13668. h = 0;
  13669. }
  13670. }
  13671. z1->dp[96] += l;
  13672. /* z1 = z1 - z0 - z1 */
  13673. l = z1->dp[0];
  13674. h = 0;
  13675. SP_ASM_SUBC(l, h, z0->dp[0]);
  13676. SP_ASM_SUBC(l, h, z2->dp[0]);
  13677. z1->dp[0] = l;
  13678. l = h;
  13679. h = 0;
  13680. for (i = 1; i < 96; i++) {
  13681. l += z1->dp[i];
  13682. SP_ASM_SUBC(l, h, z0->dp[i]);
  13683. SP_ASM_SUBC(l, h, z2->dp[i]);
  13684. z1->dp[i] = l;
  13685. l = h;
  13686. h = 0;
  13687. }
  13688. z1->dp[i] += l;
  13689. /* r += z1 << 16 */
  13690. l = 0;
  13691. h = 0;
  13692. for (i = 0; i < 48; i++) {
  13693. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  13694. SP_ASM_ADDC(l, h, z1->dp[i]);
  13695. r->dp[i + 48] = l;
  13696. l = h;
  13697. h = 0;
  13698. }
  13699. for (; i < 97; i++) {
  13700. SP_ASM_ADDC(l, h, z1->dp[i]);
  13701. r->dp[i + 48] = l;
  13702. l = h;
  13703. h = 0;
  13704. }
  13705. /* r += z2 << 96 */
  13706. l = 0;
  13707. h = 0;
  13708. for (i = 0; i < 49; i++) {
  13709. SP_ASM_ADDC(l, h, r->dp[i + 96]);
  13710. SP_ASM_ADDC(l, h, z2->dp[i]);
  13711. r->dp[i + 96] = l;
  13712. l = h;
  13713. h = 0;
  13714. }
  13715. for (; i < 96; i++) {
  13716. SP_ASM_ADDC(l, h, z2->dp[i]);
  13717. r->dp[i + 96] = l;
  13718. l = h;
  13719. h = 0;
  13720. }
  13721. r->used = 192;
  13722. sp_clamp(r);
  13723. }
  13724. FREE_SP_INT_ARRAY(z, NULL);
  13725. FREE_SP_INT(a1, NULL);
  13726. return err;
  13727. }
  13728. #endif /* SP_INT_DIGITS >= 192 */
  13729. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  13730. #endif /* !WOLFSSL_SP_SMALL */
  13731. /* Square a and store in r. r = a * a
  13732. *
  13733. * @param [in] a SP integer to square.
  13734. * @param [out] r SP integer result.
  13735. *
  13736. * @return MP_OKAY on success.
  13737. * @return MP_VAL when a or r is NULL, or the result will be too big for fixed
  13738. * data length.
  13739. * @return MP_MEM when dynamic memory allocation fails.
  13740. */
  13741. int sp_sqr(sp_int* a, sp_int* r)
  13742. {
  13743. #if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
  13744. return sp_mul(a, a, r);
  13745. #else
  13746. int err = MP_OKAY;
  13747. if ((a == NULL) || (r == NULL)) {
  13748. err = MP_VAL;
  13749. }
  13750. /* Need extra digit during calculation. */
  13751. if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
  13752. err = MP_VAL;
  13753. }
  13754. #if 0
  13755. if (err == MP_OKAY) {
  13756. sp_print(a, "a");
  13757. }
  13758. #endif
  13759. if (err == MP_OKAY) {
  13760. if (a->used == 0) {
  13761. _sp_zero(r);
  13762. }
  13763. else
  13764. #ifndef WOLFSSL_SP_SMALL
  13765. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  13766. #if SP_WORD_SIZE == 64
  13767. if (a->used == 4) {
  13768. err = _sp_sqr_4(a, r);
  13769. }
  13770. else
  13771. #endif /* SP_WORD_SIZE == 64 */
  13772. #if SP_WORD_SIZE == 64
  13773. #ifdef SQR_MUL_ASM
  13774. if (a->used == 6) {
  13775. err = _sp_sqr_6(a, r);
  13776. }
  13777. else
  13778. #endif /* SQR_MUL_ASM */
  13779. #endif /* SP_WORD_SIZE == 64 */
  13780. #if SP_WORD_SIZE == 32
  13781. #ifdef SQR_MUL_ASM
  13782. if (a->used == 8) {
  13783. err = _sp_sqr_8(a, r);
  13784. }
  13785. else
  13786. #endif /* SQR_MUL_ASM */
  13787. #endif /* SP_WORD_SIZE == 32 */
  13788. #if SP_WORD_SIZE == 32
  13789. #ifdef SQR_MUL_ASM
  13790. if (a->used == 12) {
  13791. err = _sp_sqr_12(a, r);
  13792. }
  13793. else
  13794. #endif /* SQR_MUL_ASM */
  13795. #endif /* SP_WORD_SIZE == 32 */
  13796. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  13797. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  13798. #if SP_INT_DIGITS >= 32
  13799. if (a->used == 16) {
  13800. err = _sp_sqr_16(a, r);
  13801. }
  13802. else
  13803. #endif /* SP_INT_DIGITS >= 32 */
  13804. #if SP_INT_DIGITS >= 48
  13805. if (a->used == 24) {
  13806. err = _sp_sqr_24(a, r);
  13807. }
  13808. else
  13809. #endif /* SP_INT_DIGITS >= 48 */
  13810. #if SP_INT_DIGITS >= 64
  13811. if (a->used == 32) {
  13812. err = _sp_sqr_32(a, r);
  13813. }
  13814. else
  13815. #endif /* SP_INT_DIGITS >= 64 */
  13816. #if SP_INT_DIGITS >= 96
  13817. if (a->used == 48) {
  13818. err = _sp_sqr_48(a, r);
  13819. }
  13820. else
  13821. #endif /* SP_INT_DIGITS >= 96 */
  13822. #if SP_INT_DIGITS >= 128
  13823. if (a->used == 64) {
  13824. err = _sp_sqr_64(a, r);
  13825. }
  13826. else
  13827. #endif /* SP_INT_DIGITS >= 128 */
  13828. #if SP_INT_DIGITS >= 192
  13829. if (a->used == 96) {
  13830. err = _sp_sqr_96(a, r);
  13831. }
  13832. else
  13833. #endif /* SP_INT_DIGITS >= 192 */
  13834. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  13835. #endif /* !WOLFSSL_SP_SMALL */
  13836. {
  13837. err = _sp_sqr(a, r);
  13838. }
  13839. }
  13840. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13841. if (err == MP_OKAY) {
  13842. r->sign = MP_ZPOS;
  13843. }
  13844. #endif
  13845. #if 0
  13846. if (err == MP_OKAY) {
  13847. sp_print(r, "rsqr");
  13848. }
  13849. #endif
  13850. return err;
  13851. #endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
  13852. }
  13853. /* END SP_SQR implementations */
  13854. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
  13855. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  13856. #if (!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  13857. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH)
  13858. /* Square a mod m and store in r: r = (a * a) mod m
  13859. *
  13860. * @param [in] a SP integer to square.
  13861. * @param [in] m SP integer that is the modulus.
  13862. * @param [out] r SP integer result.
  13863. *
  13864. * @return MP_OKAY on success.
  13865. * @return MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
  13866. * for fixed data length.
  13867. * @return MP_MEM when dynamic memory allocation fails.
  13868. */
  13869. int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r)
  13870. {
  13871. int err = MP_OKAY;
  13872. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  13873. err = MP_VAL;
  13874. }
  13875. if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
  13876. err = MP_VAL;
  13877. }
  13878. if (err == MP_OKAY) {
  13879. err = sp_sqr(a, r);
  13880. }
  13881. if (err == MP_OKAY) {
  13882. err = sp_mod(r, m, r);
  13883. }
  13884. return err;
  13885. }
  13886. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  13887. /**********************
  13888. * Montgomery functions
  13889. **********************/
  13890. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  13891. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
  13892. /* Reduce a number in montgomery form.
  13893. *
  13894. * Assumes a and m are not NULL and m is not 0.
  13895. *
  13896. * @param [in,out] a SP integer to Montgomery reduce.
  13897. * @param [in] m SP integer that is the modulus.
  13898. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  13899. *
  13900. * @return MP_OKAY on success.
  13901. */
  13902. static int _sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp)
  13903. {
  13904. #if !defined(SQR_MUL_ASM)
  13905. int i;
  13906. int bits;
  13907. sp_int_word w;
  13908. sp_int_digit mu;
  13909. #if 0
  13910. sp_print(a, "a");
  13911. sp_print(m, "m");
  13912. #endif
  13913. bits = sp_count_bits(m);
  13914. for (i = a->used; i < m->used * 2; i++) {
  13915. a->dp[i] = 0;
  13916. }
  13917. if (m->used == 1) {
  13918. mu = mp * a->dp[0];
  13919. w = a->dp[0];
  13920. w += (sp_int_word)mu * m->dp[0];
  13921. a->dp[0] = (sp_int_digit)w;
  13922. w >>= SP_WORD_SIZE;
  13923. w += a->dp[1];
  13924. a->dp[1] = (sp_int_digit)w;
  13925. w >>= SP_WORD_SIZE;
  13926. a->dp[2] = (sp_int_digit)w;
  13927. a->used = 3;
  13928. /* mp is SP_WORD_SIZE */
  13929. bits = SP_WORD_SIZE;
  13930. }
  13931. else {
  13932. sp_int_digit mask = (sp_int_digit)
  13933. ((1UL << (bits & (SP_WORD_SIZE - 1))) - 1);
  13934. sp_int_word o = 0;
  13935. for (i = 0; i < m->used; i++) {
  13936. int j;
  13937. mu = mp * a->dp[i];
  13938. if ((i == m->used - 1) && (mask != 0)) {
  13939. mu &= mask;
  13940. }
  13941. w = a->dp[i];
  13942. w += (sp_int_word)mu * m->dp[0];
  13943. a->dp[i] = (sp_int_digit)w;
  13944. w >>= SP_WORD_SIZE;
  13945. for (j = 1; j < m->used - 1; j++) {
  13946. w += a->dp[i + j];
  13947. w += (sp_int_word)mu * m->dp[j];
  13948. a->dp[i + j] = (sp_int_digit)w;
  13949. w >>= SP_WORD_SIZE;
  13950. }
  13951. w += o;
  13952. w += a->dp[i + j];
  13953. o = (sp_int_digit)(w >> SP_WORD_SIZE);
  13954. w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
  13955. a->dp[i + j] = (sp_int_digit)w;
  13956. w >>= SP_WORD_SIZE;
  13957. o += w;
  13958. }
  13959. o += a->dp[m->used * 2 - 1];
  13960. a->dp[m->used * 2 - 1] = (sp_int_digit)o;
  13961. o >>= SP_WORD_SIZE;
  13962. a->dp[m->used * 2] = (sp_int_digit)o;
  13963. a->used = m->used * 2 + 1;
  13964. }
  13965. sp_clamp(a);
  13966. sp_rshb(a, bits, a);
  13967. if (_sp_cmp_abs(a, m) != MP_LT) {
  13968. _sp_sub_off(a, m, a, 0);
  13969. }
  13970. #if 0
  13971. sp_print(a, "rr");
  13972. #endif
  13973. return MP_OKAY;
  13974. #else /* !SQR_MUL_ASM */
  13975. int i;
  13976. int j;
  13977. int bits;
  13978. sp_int_digit mu;
  13979. sp_int_digit o;
  13980. sp_int_digit mask;
  13981. bits = sp_count_bits(m);
  13982. mask = ((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1;
  13983. for (i = a->used; i < m->used * 2; i++) {
  13984. a->dp[i] = 0;
  13985. }
  13986. if (m->used <= 1) {
  13987. #ifndef SQR_MUL_ASM
  13988. sp_int_word w;
  13989. #else
  13990. sp_int_digit l;
  13991. sp_int_digit h;
  13992. sp_int_digit t;
  13993. #endif
  13994. mu = mp * a->dp[0];
  13995. #ifndef SQR_MUL_ASM
  13996. w = a->dp[0];
  13997. w += (sp_int_word)mu * m->dp[0];
  13998. a->dp[0] = (sp_int_digit)w;
  13999. w >>= SP_WORD_SIZE;
  14000. w += a->dp[1];
  14001. a->dp[1] = (sp_int_digit)w;
  14002. w >>= SP_WORD_SIZE;
  14003. a->dp[2] = (sp_int_digit)w;
  14004. #else
  14005. l = a->dp[0];
  14006. h = 0;
  14007. t = m->dp[0];
  14008. SP_ASM_MUL_ADD_NO(l, h, mu, t);
  14009. a->dp[0] = l;
  14010. l = h;
  14011. h = 0;
  14012. t = a->dp[1];
  14013. SP_ASM_ADDC(l, h, t);
  14014. a->dp[1] = l;
  14015. a->dp[2] = h;
  14016. #endif
  14017. a->used = m->used * 2 + 1;
  14018. /* mp is SP_WORD_SIZE */
  14019. bits = SP_WORD_SIZE;
  14020. }
  14021. #ifndef WOLFSSL_HAVE_SP_ECC
  14022. #if SP_WORD_SIZE == 64
  14023. else if ((m->used == 4) && (mask == 0)) {
  14024. sp_int_digit l;
  14025. sp_int_digit h;
  14026. sp_int_digit o2;
  14027. l = 0;
  14028. h = 0;
  14029. o = 0;
  14030. o2 = 0;
  14031. for (i = 0; i < 4; i++) {
  14032. mu = mp * a->dp[0];
  14033. l = a->dp[0];
  14034. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  14035. l = h;
  14036. h = 0;
  14037. SP_ASM_ADDC(l, h, a->dp[1]);
  14038. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
  14039. a->dp[0] = l;
  14040. l = h;
  14041. h = 0;
  14042. SP_ASM_ADDC(l, h, a->dp[2]);
  14043. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
  14044. a->dp[1] = l;
  14045. l = h;
  14046. h = o2;
  14047. o2 = 0;
  14048. SP_ASM_ADDC_REG(l, h, o);
  14049. SP_ASM_ADDC(l, h, a->dp[i + 3]);
  14050. SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
  14051. a->dp[2] = l;
  14052. o = h;
  14053. l = h;
  14054. h = 0;
  14055. }
  14056. h = o2;
  14057. SP_ASM_ADDC(l, h, a->dp[7]);
  14058. a->dp[3] = l;
  14059. a->dp[4] = h;
  14060. a->used = 5;
  14061. sp_clamp(a);
  14062. if (_sp_cmp_abs(a, m) != MP_LT) {
  14063. sp_sub(a, m, a);
  14064. }
  14065. return MP_OKAY;
  14066. }
  14067. else if ((m->used == 6) && (mask == 0)) {
  14068. sp_int_digit l;
  14069. sp_int_digit h;
  14070. sp_int_digit o2;
  14071. l = 0;
  14072. h = 0;
  14073. o = 0;
  14074. o2 = 0;
  14075. for (i = 0; i < 6; i++) {
  14076. mu = mp * a->dp[0];
  14077. l = a->dp[0];
  14078. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  14079. l = h;
  14080. h = 0;
  14081. SP_ASM_ADDC(l, h, a->dp[1]);
  14082. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
  14083. a->dp[0] = l;
  14084. l = h;
  14085. h = 0;
  14086. SP_ASM_ADDC(l, h, a->dp[2]);
  14087. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
  14088. a->dp[1] = l;
  14089. l = h;
  14090. h = 0;
  14091. SP_ASM_ADDC(l, h, a->dp[3]);
  14092. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
  14093. a->dp[2] = l;
  14094. l = h;
  14095. h = 0;
  14096. SP_ASM_ADDC(l, h, a->dp[4]);
  14097. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
  14098. a->dp[3] = l;
  14099. l = h;
  14100. h = o2;
  14101. o2 = 0;
  14102. SP_ASM_ADDC_REG(l, h, o);
  14103. SP_ASM_ADDC(l, h, a->dp[i + 5]);
  14104. SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
  14105. a->dp[4] = l;
  14106. o = h;
  14107. l = h;
  14108. h = 0;
  14109. }
  14110. h = o2;
  14111. SP_ASM_ADDC(l, h, a->dp[11]);
  14112. a->dp[5] = l;
  14113. a->dp[6] = h;
  14114. a->used = 7;
  14115. sp_clamp(a);
  14116. if (_sp_cmp_abs(a, m) != MP_LT) {
  14117. sp_sub(a, m, a);
  14118. }
  14119. return MP_OKAY;
  14120. }
  14121. #elif SP_WORD_SIZE == 32
  14122. else if ((m->used <= 12) && (mask == 0)) {
  14123. sp_int_digit l;
  14124. sp_int_digit h;
  14125. sp_int_digit o2;
  14126. sp_int_digit* ad;
  14127. sp_int_digit* md;
  14128. o = 0;
  14129. o2 = 0;
  14130. ad = a->dp;
  14131. for (i = 0; i < m->used; i++) {
  14132. md = m->dp;
  14133. mu = mp * ad[0];
  14134. l = ad[0];
  14135. h = 0;
  14136. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  14137. l = h;
  14138. for (j = 1; j + 1 < m->used - 1; j += 2) {
  14139. h = 0;
  14140. SP_ASM_ADDC(l, h, ad[j]);
  14141. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  14142. ad[j - 1] = l;
  14143. l = 0;
  14144. SP_ASM_ADDC(h, l, ad[j + 1]);
  14145. SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
  14146. ad[j] = h;
  14147. }
  14148. for (; j < m->used - 1; j++) {
  14149. h = 0;
  14150. SP_ASM_ADDC(l, h, ad[j]);
  14151. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  14152. ad[j - 1] = l;
  14153. l = h;
  14154. }
  14155. h = o2;
  14156. o2 = 0;
  14157. SP_ASM_ADDC_REG(l, h, o);
  14158. SP_ASM_ADDC(l, h, ad[i + j]);
  14159. SP_ASM_MUL_ADD(l, h, o2, mu, *md);
  14160. ad[j - 1] = l;
  14161. o = h;
  14162. }
  14163. l = o;
  14164. h = o2;
  14165. SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
  14166. a->dp[m->used - 1] = l;
  14167. a->dp[m->used] = h;
  14168. a->used = m->used + 1;
  14169. sp_clamp(a);
  14170. if (_sp_cmp_abs(a, m) != MP_LT) {
  14171. sp_sub(a, m, a);
  14172. }
  14173. return MP_OKAY;
  14174. }
  14175. #endif /* SP_WORD_SIZE == 64 | 32 */
  14176. #endif /* WOLFSSL_HAVE_SP_ECC */
  14177. else {
  14178. sp_int_digit l;
  14179. sp_int_digit h;
  14180. sp_int_digit o2;
  14181. sp_int_digit* ad;
  14182. sp_int_digit* md;
  14183. o = 0;
  14184. o2 = 0;
  14185. ad = a->dp;
  14186. for (i = 0; i < m->used; i++, ad++) {
  14187. md = m->dp;
  14188. mu = mp * ad[0];
  14189. if ((i == m->used - 1) && (mask != 0)) {
  14190. mu &= mask;
  14191. }
  14192. l = ad[0];
  14193. h = 0;
  14194. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  14195. ad[0] = l;
  14196. l = h;
  14197. for (j = 1; j + 1 < m->used - 1; j += 2) {
  14198. h = 0;
  14199. SP_ASM_ADDC(l, h, ad[j + 0]);
  14200. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  14201. ad[j + 0] = l;
  14202. l = 0;
  14203. SP_ASM_ADDC(h, l, ad[j + 1]);
  14204. SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
  14205. ad[j + 1] = h;
  14206. }
  14207. for (; j < m->used - 1; j++) {
  14208. h = 0;
  14209. SP_ASM_ADDC(l, h, ad[j]);
  14210. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  14211. ad[j] = l;
  14212. l = h;
  14213. }
  14214. h = o2;
  14215. o2 = 0;
  14216. SP_ASM_ADDC_REG(l, h, o);
  14217. SP_ASM_ADDC(l, h, ad[j]);
  14218. SP_ASM_MUL_ADD(l, h, o2, mu, *md);
  14219. ad[j] = l;
  14220. o = h;
  14221. }
  14222. l = o;
  14223. h = o2;
  14224. SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
  14225. a->dp[m->used * 2 - 1] = l;
  14226. a->dp[m->used * 2] = h;
  14227. a->used = m->used * 2 + 1;
  14228. }
  14229. sp_clamp(a);
  14230. sp_rshb(a, bits, a);
  14231. if (_sp_cmp_abs(a, m) != MP_LT) {
  14232. sp_sub(a, m, a);
  14233. }
  14234. return MP_OKAY;
  14235. #endif /* !SQR_MUL_ASM */
  14236. }
  14237. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  14238. (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
  14239. /* Reduce a number in montgomery form.
  14240. *
  14241. * @param [in,out] a SP integer to Montgomery reduce.
  14242. * @param [in] m SP integer that is the modulus.
  14243. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  14244. *
  14245. * @return MP_OKAY on success.
  14246. * @return MP_VAL when a or m is NULL or m is zero.
  14247. */
  14248. int sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp)
  14249. {
  14250. int err;
  14251. if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
  14252. err = MP_VAL;
  14253. }
  14254. else if (a->size < m->used * 2 + 1) {
  14255. err = MP_VAL;
  14256. }
  14257. else {
  14258. err = _sp_mont_red(a, m, mp);
  14259. }
  14260. return err;
  14261. }
  14262. #endif
  14263. /* Calculate the bottom digit of the inverse of negative m.
  14264. *
  14265. * Used when performing Montgomery Reduction.
  14266. *
  14267. * @param [in] m SP integer that is the modulus.
  14268. * @param [out] mp SP integer digit that is the bottom digit of inv(-m).
  14269. *
  14270. * @return MP_OKAY on success.
  14271. * @return MP_VAL when m or rho is NULL.
  14272. */
  14273. int sp_mont_setup(sp_int* m, sp_int_digit* rho)
  14274. {
  14275. int err = MP_OKAY;
  14276. if ((m == NULL) || (rho == NULL)) {
  14277. err = MP_VAL;
  14278. }
  14279. if ((err == MP_OKAY) && !sp_isodd(m)) {
  14280. err = MP_VAL;
  14281. }
  14282. if (err == MP_OKAY) {
  14283. sp_int_digit x;
  14284. sp_int_digit b;
  14285. b = m->dp[0];
  14286. x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
  14287. x *= 2 - b * x; /* here x*a==1 mod 2**8 */
  14288. #if SP_WORD_SIZE >= 16
  14289. x *= 2 - b * x; /* here x*a==1 mod 2**16 */
  14290. #if SP_WORD_SIZE >= 32
  14291. x *= 2 - b * x; /* here x*a==1 mod 2**32 */
  14292. #if SP_WORD_SIZE >= 64
  14293. x *= 2 - b * x; /* here x*a==1 mod 2**64 */
  14294. #endif /* SP_WORD_SIZE >= 64 */
  14295. #endif /* SP_WORD_SIZE >= 32 */
  14296. #endif /* SP_WORD_SIZE >= 16 */
  14297. /* rho = -1/m mod b, subtract x (unsigned) from 0, assign negative */
  14298. *rho = (sp_int_digit)((sp_int_digit)0 - (sp_sint_digit)x);
  14299. }
  14300. return err;
  14301. }
  14302. /* Calculate the normalization value of m.
  14303. * norm = 2^k - m, where k is the number of bits in m
  14304. *
  14305. * @param [out] norm SP integer that normalises numbers into Montgomery
  14306. * form.
  14307. * @param [in] m SP integer that is the modulus.
  14308. *
  14309. * @return MP_OKAY on success.
  14310. * @return MP_VAL when norm or m is NULL, or number of bits in m is maximual.
  14311. */
  14312. int sp_mont_norm(sp_int* norm, sp_int* m)
  14313. {
  14314. int err = MP_OKAY;
  14315. int bits = 0;
  14316. if ((norm == NULL) || (m == NULL)) {
  14317. err = MP_VAL;
  14318. }
  14319. if (err == MP_OKAY) {
  14320. bits = sp_count_bits(m);
  14321. if (bits == m->size * SP_WORD_SIZE) {
  14322. err = MP_VAL;
  14323. }
  14324. }
  14325. if (err == MP_OKAY) {
  14326. if (bits < SP_WORD_SIZE) {
  14327. bits = SP_WORD_SIZE;
  14328. }
  14329. _sp_zero(norm);
  14330. sp_set_bit(norm, bits);
  14331. err = sp_sub(norm, m, norm);
  14332. }
  14333. if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
  14334. norm->dp[0] %= m->dp[0];
  14335. }
  14336. if (err == MP_OKAY) {
  14337. sp_clamp(norm);
  14338. }
  14339. return err;
  14340. }
  14341. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
  14342. * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
  14343. /*********************************
  14344. * To and from binary and strings.
  14345. *********************************/
  14346. /* Calculate the number of 8-bit values required to represent the
  14347. * multi-precision number.
  14348. *
  14349. * When a is NULL, return s 0.
  14350. *
  14351. * @param [in] a SP integer.
  14352. *
  14353. * @return The count of 8-bit values.
  14354. */
  14355. int sp_unsigned_bin_size(const sp_int* a)
  14356. {
  14357. int cnt = 0;
  14358. if (a != NULL) {
  14359. cnt = (sp_count_bits(a) + 7) / 8;
  14360. }
  14361. return cnt;
  14362. }
  14363. /* Convert a number as an array of bytes in big-endian format to a
  14364. * multi-precision number.
  14365. *
  14366. * @param [out] a SP integer.
  14367. * @param [in] in Array of bytes.
  14368. * @param [in] inSz Number of data bytes in array.
  14369. *
  14370. * @return MP_OKAY on success.
  14371. * @return MP_VAL when the number is too big to fit in an SP.
  14372. */
  14373. int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
  14374. {
  14375. int err = MP_OKAY;
  14376. if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
  14377. err = MP_VAL;
  14378. }
  14379. if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
  14380. err = MP_VAL;
  14381. }
  14382. #ifndef LITTLE_ENDIAN_ORDER
  14383. if (err == MP_OKAY) {
  14384. int i;
  14385. int j;
  14386. int s;
  14387. a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  14388. #ifndef WOLFSSL_SP_INT_DIGIT_ALIGN
  14389. for (i = inSz-1,j = 0; i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF,j++) {
  14390. a->dp[j] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
  14391. }
  14392. #else
  14393. for (i = inSz-1, j = 0; i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
  14394. a->dp[j] = ((sp_int_digit)in[i - 0] << 0);
  14395. #if SP_WORD_SIZE >= 16
  14396. a->dp[j] |= ((sp_int_digit)in[i - 1] << 8);
  14397. #endif
  14398. #if SP_WORD_SIZE >= 32
  14399. a->dp[j] |= ((sp_int_digit)in[i - 2] << 16) |
  14400. ((sp_int_digit)in[i - 3] << 24);
  14401. #endif
  14402. #if SP_WORD_SIZE >= 64
  14403. a->dp[j] |= ((sp_int_digit)in[i - 4] << 32) |
  14404. ((sp_int_digit)in[i - 5] << 40) |
  14405. ((sp_int_digit)in[i - 6] << 48) |
  14406. ((sp_int_digit)in[i - 7] << 56);
  14407. #endif
  14408. j++;
  14409. }
  14410. #endif
  14411. if (i >= 0) {
  14412. a->dp[a->used - 1] = 0;
  14413. for (s = 0; i >= 0; i--,s += 8) {
  14414. a->dp[j] |= ((sp_int_digit)in[i]) << s;
  14415. }
  14416. }
  14417. sp_clamp(a);
  14418. }
  14419. #else
  14420. if (err == MP_OKAY) {
  14421. int i;
  14422. int j;
  14423. a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  14424. for (i = inSz-1, j = 0; i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
  14425. a->dp[j] = ((sp_int_digit)in[i - 0] << 0);
  14426. #if SP_WORD_SIZE >= 16
  14427. a->dp[j] |= ((sp_int_digit)in[i - 1] << 8);
  14428. #endif
  14429. #if SP_WORD_SIZE >= 32
  14430. a->dp[j] |= ((sp_int_digit)in[i - 2] << 16) |
  14431. ((sp_int_digit)in[i - 3] << 24);
  14432. #endif
  14433. #if SP_WORD_SIZE >= 64
  14434. a->dp[j] |= ((sp_int_digit)in[i - 4] << 32) |
  14435. ((sp_int_digit)in[i - 5] << 40) |
  14436. ((sp_int_digit)in[i - 6] << 48) |
  14437. ((sp_int_digit)in[i - 7] << 56);
  14438. #endif
  14439. j++;
  14440. }
  14441. #if SP_WORD_SIZE >= 16
  14442. if (i >= 0) {
  14443. byte *d = (byte*)a->dp;
  14444. a->dp[a->used - 1] = 0;
  14445. switch (i) {
  14446. case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
  14447. case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
  14448. case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
  14449. case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
  14450. case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
  14451. case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
  14452. case 0: d[inSz - 1 - 0] = in[0];
  14453. }
  14454. }
  14455. #endif
  14456. sp_clamp(a);
  14457. }
  14458. #endif /* LITTLE_ENDIAN_ORDER */
  14459. return err;
  14460. }
  14461. /* Convert the multi-precision number to an array of bytes in big-endian format.
  14462. *
  14463. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  14464. * to calculate the number of bytes required.
  14465. *
  14466. * @param [in] a SP integer.
  14467. * @param [out] out Array to put encoding into.
  14468. *
  14469. * @return MP_OKAY on success.
  14470. * @return MP_VAL when a or out is NULL.
  14471. */
  14472. int sp_to_unsigned_bin(sp_int* a, byte* out)
  14473. {
  14474. return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
  14475. }
  14476. /* Convert the multi-precision number to an array of bytes in big-endian format.
  14477. *
  14478. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  14479. * to calculate the number of bytes required.
  14480. * Front-pads the output array with zeros make number the size of the array.
  14481. *
  14482. * @param [in] a SP integer.
  14483. * @param [out] out Array to put encoding into.
  14484. * @param [in] outSz Size of the array in bytes.
  14485. *
  14486. * @return MP_OKAY on success.
  14487. * @return MP_VAL when a or out is NULL.
  14488. */
  14489. int sp_to_unsigned_bin_len(sp_int* a, byte* out, int outSz)
  14490. {
  14491. int err = MP_OKAY;
  14492. if ((a == NULL) || (out == NULL)) {
  14493. err = MP_VAL;
  14494. }
  14495. if (err == MP_OKAY) {
  14496. int j = outSz - 1;
  14497. if (!sp_iszero(a)) {
  14498. int i;
  14499. for (i = 0; (j >= 0) && (i < a->used); i++) {
  14500. int b;
  14501. for (b = 0; b < SP_WORD_SIZE; b += 8) {
  14502. out[j--] = (byte)(a->dp[i] >> b);
  14503. if (j < 0) {
  14504. break;
  14505. }
  14506. }
  14507. }
  14508. }
  14509. for (; j >= 0; j--) {
  14510. out[j] = 0;
  14511. }
  14512. }
  14513. return err;
  14514. }
  14515. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  14516. /* Store the number in big-endian format in array at an offset.
  14517. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  14518. * to calculate the number of bytes required.
  14519. *
  14520. * @param [in] o Offset into array o start encoding.
  14521. * @param [in] a SP integer.
  14522. * @param [out] out Array to put encoding into.
  14523. *
  14524. * @return Index of next byte after data.
  14525. * @return MP_VAL when a or out is NULL.
  14526. */
  14527. int sp_to_unsigned_bin_at_pos(int o, sp_int*a, unsigned char* out)
  14528. {
  14529. int ret = sp_to_unsigned_bin(a, out + o);
  14530. if (ret == MP_OKAY) {
  14531. ret = o + sp_unsigned_bin_size(a);
  14532. }
  14533. return ret;
  14534. }
  14535. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
  14536. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  14537. defined(HAVE_ECC) || !defined(NO_DSA)
  14538. /* Convert hexadecimal number as string in big-endian format to a
  14539. * multi-precision number.
  14540. *
  14541. * Negative values supported when compiled with WOLFSSL_SP_INT_NEGATIVE.
  14542. *
  14543. * @param [out] a SP integer.
  14544. * @param [in] in NUL terminated string.
  14545. *
  14546. * @return MP_OKAY on success.
  14547. * @return MP_VAL when radix not supported, value is negative, or a character
  14548. * is not valid.
  14549. */
  14550. static int _sp_read_radix_16(sp_int* a, const char* in)
  14551. {
  14552. int err = MP_OKAY;
  14553. int i;
  14554. int s = 0;
  14555. int j = 0;
  14556. #ifdef WOLFSSL_SP_INT_NEGATIVE
  14557. if (*in == '-') {
  14558. a->sign = MP_NEG;
  14559. in++;
  14560. }
  14561. #endif
  14562. while (*in == '0') {
  14563. in++;
  14564. }
  14565. a->dp[0] = 0;
  14566. for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
  14567. int ch = (int)HexCharToByte(in[i]);
  14568. if (ch < 0) {
  14569. err = MP_VAL;
  14570. break;
  14571. }
  14572. if (s == SP_WORD_SIZE) {
  14573. j++;
  14574. if (j >= a->size) {
  14575. err = MP_VAL;
  14576. break;
  14577. }
  14578. s = 0;
  14579. a->dp[j] = 0;
  14580. }
  14581. a->dp[j] |= ((sp_int_digit)ch) << s;
  14582. s += 4;
  14583. }
  14584. if (err == MP_OKAY) {
  14585. a->used = j + 1;
  14586. sp_clamp(a);
  14587. #ifdef WOLFSSL_SP_INT_NEGATIVE
  14588. if (sp_iszero(a)) {
  14589. a->sign = MP_ZPOS;
  14590. }
  14591. #endif
  14592. }
  14593. return err;
  14594. }
  14595. #endif /* (WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
  14596. #ifdef WOLFSSL_SP_READ_RADIX_10
  14597. /* Convert decimal number as string in big-endian format to a multi-precision
  14598. * number.
  14599. *
  14600. * Negative values supported when compiled with WOLFSSL_SP_INT_NEGATIVE.
  14601. *
  14602. * @param [out] a SP integer.
  14603. * @param [in] in NUL terminated string.
  14604. *
  14605. * @return MP_OKAY on success.
  14606. * @return MP_VAL when radix not supported, value is negative, or a character
  14607. * is not valid.
  14608. */
  14609. static int _sp_read_radix_10(sp_int* a, const char* in)
  14610. {
  14611. int err = MP_OKAY;
  14612. int i;
  14613. int len;
  14614. char ch;
  14615. _sp_zero(a);
  14616. #ifdef WOLFSSL_SP_INT_NEGATIVE
  14617. if (*in == '-') {
  14618. a->sign = MP_NEG;
  14619. in++;
  14620. }
  14621. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  14622. while (*in == '0') {
  14623. in++;
  14624. }
  14625. len = (int)XSTRLEN(in);
  14626. for (i = 0; i < len; i++) {
  14627. ch = in[i];
  14628. if ((ch >= '0') && (ch <= '9')) {
  14629. ch -= '0';
  14630. }
  14631. else {
  14632. err = MP_VAL;
  14633. break;
  14634. }
  14635. err = _sp_mul_d(a, 10, a, 0);
  14636. if (err != MP_OKAY) {
  14637. break;
  14638. }
  14639. err = _sp_add_d(a, ch, a);
  14640. if (err != MP_OKAY) {
  14641. break;
  14642. }
  14643. }
  14644. #ifdef WOLFSSL_SP_INT_NEGATIVE
  14645. if ((err == MP_OKAY) && sp_iszero(a)) {
  14646. a->sign = MP_ZPOS;
  14647. }
  14648. #endif
  14649. return err;
  14650. }
  14651. #endif /* WOLFSSL_SP_READ_RADIX_10 */
  14652. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  14653. !defined(WOLFSSL_RSA_VERIFY_ONLY)) || defined(HAVE_ECC) || !defined(NO_DSA)
  14654. /* Convert a number as string in big-endian format to a big number.
  14655. * Only supports base-16 (hexadecimal) and base-10 (decimal).
  14656. *
  14657. * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
  14658. *
  14659. * @param [out] a SP integer.
  14660. * @param [in] in NUL terminated string.
  14661. * @param [in] radix Number of values in a digit.
  14662. *
  14663. * @return MP_OKAY on success.
  14664. * @return MP_VAL when a or in is NULL, radix not supported, value is negative,
  14665. * or a character is not valid.
  14666. */
  14667. int sp_read_radix(sp_int* a, const char* in, int radix)
  14668. {
  14669. int err = MP_OKAY;
  14670. if ((a == NULL) || (in == NULL)) {
  14671. err = MP_VAL;
  14672. }
  14673. if (err == MP_OKAY) {
  14674. #ifndef WOLFSSL_SP_INT_NEGATIVE
  14675. if (*in == '-') {
  14676. err = MP_VAL;
  14677. }
  14678. else
  14679. #endif
  14680. if (radix == 16) {
  14681. err = _sp_read_radix_16(a, in);
  14682. }
  14683. #ifdef WOLFSSL_SP_READ_RADIX_10
  14684. else if (radix == 10) {
  14685. err = _sp_read_radix_10(a, in);
  14686. }
  14687. #endif
  14688. else {
  14689. err = MP_VAL;
  14690. }
  14691. }
  14692. return err;
  14693. }
  14694. #endif /* (WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
  14695. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  14696. defined(WC_MP_TO_RADIX)
  14697. /* Put the big-endian, hex string encoding of a into str.
  14698. *
  14699. * Assumes str is large enough for result.
  14700. * Use sp_radix_size() to calculate required length.
  14701. *
  14702. * @param [in] a SP integer to convert.
  14703. * @param [out] str String to hold hex string result.
  14704. *
  14705. * @return MP_OKAY on success.
  14706. * @return MP_VAL when a or str is NULL.
  14707. */
  14708. int sp_tohex(sp_int* a, char* str)
  14709. {
  14710. int err = MP_OKAY;
  14711. int i;
  14712. int j;
  14713. if ((a == NULL) || (str == NULL)) {
  14714. err = MP_VAL;
  14715. }
  14716. if (err == MP_OKAY) {
  14717. /* quick out if its zero */
  14718. if (sp_iszero(a) == MP_YES) {
  14719. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  14720. *str++ = '0';
  14721. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  14722. *str++ = '0';
  14723. *str = '\0';
  14724. }
  14725. else {
  14726. #ifdef WOLFSSL_SP_INT_NEGATIVE
  14727. if (a->sign == MP_NEG) {
  14728. *str = '-';
  14729. str++;
  14730. }
  14731. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  14732. i = a->used - 1;
  14733. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  14734. /* Find highest non-zero byte in most-significant word. */
  14735. for (j = SP_WORD_SIZE - 8; j >= 0; j -= 8) {
  14736. if (((a->dp[i] >> j) & 0xff) != 0) {
  14737. break;
  14738. }
  14739. else if (j == 0) {
  14740. j = SP_WORD_SIZE - 8;
  14741. --i;
  14742. }
  14743. }
  14744. /* Start with high nibble of byte. */
  14745. j += 4;
  14746. #else
  14747. /* Find highest non-zero nibble in most-significant word. */
  14748. for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
  14749. if (((a->dp[i] >> j) & 0xf) != 0) {
  14750. break;
  14751. }
  14752. else if (j == 0) {
  14753. j = SP_WORD_SIZE - 4;
  14754. --i;
  14755. }
  14756. }
  14757. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  14758. /* Most-significant word. */
  14759. for (; j >= 0; j -= 4) {
  14760. *(str++) = ByteToHex((byte)(a->dp[i] >> j));
  14761. }
  14762. for (--i; i >= 0; i--) {
  14763. for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
  14764. *(str++) = (byte)ByteToHex((byte)(a->dp[i] >> j));
  14765. }
  14766. }
  14767. *str = '\0';
  14768. }
  14769. }
  14770. return err;
  14771. }
  14772. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  14773. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  14774. defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
  14775. defined(WC_MP_TO_RADIX)
  14776. /* Put the big-endian, decimal string encoding of a into str.
  14777. *
  14778. * Assumes str is large enough for result.
  14779. * Use sp_radix_size() to calculate required length.
  14780. *
  14781. * @param [in] a SP integer to convert.
  14782. * @param [out] str String to hold hex string result.
  14783. *
  14784. * @return MP_OKAY on success.
  14785. * @return MP_VAL when a or str is NULL.
  14786. * @return MP_MEM when dynamic memory allocation fails.
  14787. */
  14788. int sp_todecimal(sp_int* a, char* str)
  14789. {
  14790. int err = MP_OKAY;
  14791. int i;
  14792. int j;
  14793. sp_int_digit d;
  14794. if ((a == NULL) || (str == NULL)) {
  14795. err = MP_VAL;
  14796. }
  14797. /* quick out if its zero */
  14798. else if (sp_iszero(a) == MP_YES) {
  14799. *str++ = '0';
  14800. *str = '\0';
  14801. }
  14802. else {
  14803. DECL_SP_INT(t, a->used + 1);
  14804. ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
  14805. if (err == MP_OKAY) {
  14806. err = sp_copy(a, t);
  14807. }
  14808. if (err == MP_OKAY) {
  14809. #ifdef WOLFSSL_SP_INT_NEGATIVE
  14810. if (a->sign == MP_NEG) {
  14811. *str = '-';
  14812. str++;
  14813. }
  14814. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  14815. i = 0;
  14816. while (!sp_iszero(t)) {
  14817. sp_div_d(t, 10, t, &d);
  14818. str[i++] = (char)('0' + d);
  14819. }
  14820. str[i] = '\0';
  14821. for (j = 0; j <= (i - 1) / 2; j++) {
  14822. int c = (unsigned char)str[j];
  14823. str[j] = str[i - 1 - j];
  14824. str[i - 1 - j] = (char)c;
  14825. }
  14826. }
  14827. FREE_SP_INT(t, NULL);
  14828. }
  14829. return err;
  14830. }
  14831. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  14832. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  14833. defined(WC_MP_TO_RADIX)
  14834. /* Put the string version, big-endian, of a in str using the given radix.
  14835. *
  14836. * @param [in] a SP integer to convert.
  14837. * @param [out] str String to hold hex string result.
  14838. * @param [in] radix Base of character.
  14839. * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
  14840. *
  14841. * @return MP_OKAY on success.
  14842. * @return MP_VAL when a or str is NULL, or radix not supported.
  14843. */
  14844. int sp_toradix(sp_int* a, char* str, int radix)
  14845. {
  14846. int err = MP_OKAY;
  14847. if ((a == NULL) || (str == NULL)) {
  14848. err = MP_VAL;
  14849. }
  14850. else if (radix == MP_RADIX_HEX) {
  14851. err = sp_tohex(a, str);
  14852. }
  14853. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
  14854. defined(HAVE_COMP_KEY)
  14855. else if (radix == MP_RADIX_DEC) {
  14856. err = sp_todecimal(a, str);
  14857. }
  14858. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  14859. else {
  14860. err = MP_VAL;
  14861. }
  14862. return err;
  14863. }
  14864. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  14865. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  14866. defined(WC_MP_TO_RADIX)
  14867. /* Calculate the length of the string version, big-endian, of a using the given
  14868. * radix.
  14869. *
  14870. * @param [in] a SP integer to convert.
  14871. * @param [in] radix Base of character.
  14872. * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
  14873. * @param [out] size The number of characters in encoding.
  14874. *
  14875. * @return MP_OKAY on success.
  14876. * @return MP_VAL when a or size is NULL, or radix not supported.
  14877. */
  14878. int sp_radix_size(sp_int* a, int radix, int* size)
  14879. {
  14880. int err = MP_OKAY;
  14881. if ((a == NULL) || (size == NULL)) {
  14882. err = MP_VAL;
  14883. }
  14884. else if (radix == MP_RADIX_HEX) {
  14885. if (a->used == 0) {
  14886. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  14887. /* 00 and '\0' */
  14888. *size = 2 + 1;
  14889. #else
  14890. /* Zero and '\0' */
  14891. *size = 1 + 1;
  14892. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  14893. }
  14894. else {
  14895. int nibbles = (sp_count_bits(a) + 3) / 4;
  14896. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  14897. if (nibbles & 1) {
  14898. nibbles++;
  14899. }
  14900. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  14901. #ifdef WOLFSSL_SP_INT_NEGATIVE
  14902. if (a->sign == MP_NEG) {
  14903. nibbles++;
  14904. }
  14905. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  14906. /* One more for \0 */
  14907. *size = nibbles + 1;
  14908. }
  14909. }
  14910. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
  14911. defined(HAVE_COMP_KEY)
  14912. else if (radix == MP_RADIX_DEC) {
  14913. int i;
  14914. sp_int_digit d;
  14915. /* quick out if its zero */
  14916. if (sp_iszero(a) == MP_YES) {
  14917. /* Zero and '\0' */
  14918. *size = 1 + 1;
  14919. }
  14920. else {
  14921. DECL_SP_INT(t, a->used + 1);
  14922. ALLOC_SP_INT(t, a->used + 1, err, NULL);
  14923. if (err == MP_OKAY) {
  14924. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14925. t->size = a->used + 1;
  14926. #endif /* WOLFSSL_SMALL_STACK && !WOLFSSL_SP_NO_MALLOC */
  14927. err = sp_copy(a, t);
  14928. }
  14929. if (err == MP_OKAY) {
  14930. for (i = 0; !sp_iszero(t); i++) {
  14931. sp_div_d(t, 10, t, &d);
  14932. }
  14933. #ifdef WOLFSSL_SP_INT_NEGATIVE
  14934. if (a->sign == MP_NEG) {
  14935. i++;
  14936. }
  14937. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  14938. /* One more for \0 */
  14939. *size = i + 1;
  14940. }
  14941. FREE_SP_INT(t, NULL);
  14942. }
  14943. }
  14944. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  14945. else {
  14946. err = MP_VAL;
  14947. }
  14948. return err;
  14949. }
  14950. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  14951. /***************************************
  14952. * Prime number generation and checking.
  14953. ***************************************/
  14954. #if defined(WOLFSSL_KEY_GEN) && (!defined(NO_RSA) || !defined(NO_DH) || \
  14955. !defined(NO_DSA)) && !defined(WC_NO_RNG)
  14956. /* Generate a random prime for RSA only.
  14957. *
  14958. * @param [out] r SP integer to hold result.
  14959. * @param [in] len Number of bytes in prime.
  14960. * @param [in] rng Random number generator.
  14961. * @param [in] heap Heap hint. Unused.
  14962. *
  14963. * @return MP_OKAY on success
  14964. * @return MP_VAL when r or rng is NULL, length is not supported or random
  14965. * number generator fails.
  14966. */
  14967. int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
  14968. {
  14969. static const int USE_BBS = 1;
  14970. int err = MP_OKAY;
  14971. int type = 0;
  14972. int isPrime = MP_NO;
  14973. #ifdef WOLFSSL_SP_MATH_ALL
  14974. int bits = 0;
  14975. #endif /* WOLFSSL_SP_MATH_ALL */
  14976. (void)heap;
  14977. /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
  14978. if ((r == NULL) || (rng == NULL) || (len == 0)) {
  14979. err = MP_VAL;
  14980. }
  14981. if (err == MP_OKAY) {
  14982. /* get type */
  14983. if (len < 0) {
  14984. type = USE_BBS;
  14985. len = -len;
  14986. }
  14987. #ifndef WOLFSSL_SP_MATH_ALL
  14988. /* For minimal maths, support only what's in SP and needed for DH. */
  14989. #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
  14990. if (len == 32) {
  14991. }
  14992. else
  14993. #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
  14994. /* Generate RSA primes that are half the modulus length. */
  14995. #ifndef WOLFSSL_SP_NO_3072
  14996. if ((len != 128) && (len != 192))
  14997. #else
  14998. if (len != 128)
  14999. #endif /* WOLFSSL_SP_NO_3072 */
  15000. {
  15001. err = MP_VAL;
  15002. }
  15003. #endif /* !WOLFSSL_SP_MATH_ALL */
  15004. #ifdef WOLFSSL_SP_INT_NEGATIVE
  15005. r->sign = MP_ZPOS;
  15006. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  15007. r->used = (len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  15008. #ifdef WOLFSSL_SP_MATH_ALL
  15009. bits = (len * 8) & SP_WORD_MASK;
  15010. #endif /* WOLFSSL_SP_MATH_ALL */
  15011. }
  15012. /* Assume the candidate is probably prime and then test until
  15013. * it is proven composite. */
  15014. while (err == MP_OKAY && isPrime == MP_NO) {
  15015. #ifdef SHOW_GEN
  15016. printf(".");
  15017. fflush(stdout);
  15018. #endif /* SHOW_GEN */
  15019. /* generate value */
  15020. err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, len);
  15021. if (err != 0) {
  15022. err = MP_VAL;
  15023. break;
  15024. }
  15025. /* munge bits */
  15026. #ifndef LITTLE_ENDIAN_ORDER
  15027. ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
  15028. #else
  15029. ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
  15030. #endif /* LITTLE_ENDIAN_ORDER */
  15031. r->dp[0] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
  15032. #ifndef LITTLE_ENDIAN_ORDER
  15033. if (((len * 8) & SP_WORD_MASK) != 0) {
  15034. r->dp[r->used-1] >>= SP_WORD_SIZE - ((len * 8) & SP_WORD_MASK);
  15035. }
  15036. #endif /* LITTLE_ENDIAN_ORDER */
  15037. #ifdef WOLFSSL_SP_MATH_ALL
  15038. if (bits > 0) {
  15039. r->dp[r->used - 1] &= ((sp_int_digit)1 << bits) - 1;
  15040. }
  15041. #endif /* WOLFSSL_SP_MATH_ALL */
  15042. /* test */
  15043. /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
  15044. * of a 1024-bit candidate being a false positive, when it is our
  15045. * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
  15046. * Using 8 because we've always used 8 */
  15047. sp_prime_is_prime_ex(r, 8, &isPrime, rng);
  15048. }
  15049. return err;
  15050. }
  15051. #endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
  15052. #ifdef WOLFSSL_SP_PRIME_GEN
  15053. /* Miller-Rabin test of "a" to the base of "b" as described in
  15054. * HAC pp. 139 Algorithm 4.24
  15055. *
  15056. * Sets result to 0 if definitely composite or 1 if probably prime.
  15057. * Randomly the chance of error is no more than 1/4 and often
  15058. * very much lower.
  15059. *
  15060. * @param [in] a SP integer to check.
  15061. * @param [in] b SP integer that is a small prime.
  15062. * @param [out] result MP_YES when number is likey prime.
  15063. * MP_NO otherwise.
  15064. * @param [in] n1 SP integer temporary.
  15065. * @param [in] y SP integer temporary.
  15066. * @param [in] r SP integer temporary.
  15067. *
  15068. * @return MP_OKAY on success.
  15069. * @return MP_MEM when dynamic memory allocation fails.
  15070. */
  15071. static int sp_prime_miller_rabin_ex(sp_int* a, sp_int* b, int* result,
  15072. sp_int* n1, sp_int* y, sp_int* r)
  15073. {
  15074. int s;
  15075. int j;
  15076. int err = MP_OKAY;
  15077. /* default */
  15078. *result = MP_NO;
  15079. /* ensure b > 1 */
  15080. if (sp_cmp_d(b, 1) == MP_GT) {
  15081. /* get n1 = a - 1 */
  15082. (void)sp_copy(a, n1);
  15083. _sp_sub_d(n1, 1, n1);
  15084. /* set 2**s * r = n1 */
  15085. (void)sp_copy(n1, r);
  15086. /* count the number of least significant bits
  15087. * which are zero
  15088. */
  15089. s = sp_cnt_lsb(r);
  15090. /* now divide n - 1 by 2**s */
  15091. sp_rshb(r, s, r);
  15092. /* compute y = b**r mod a */
  15093. err = sp_exptmod(b, r, a, y);
  15094. if (err == MP_OKAY) {
  15095. /* probably prime until shown otherwise */
  15096. *result = MP_YES;
  15097. /* if y != 1 and y != n1 do */
  15098. if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
  15099. j = 1;
  15100. /* while j <= s-1 and y != n1 */
  15101. while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
  15102. err = sp_sqrmod(y, a, y);
  15103. if (err != MP_OKAY) {
  15104. break;
  15105. }
  15106. /* if y == 1 then composite */
  15107. if (sp_cmp_d(y, 1) == MP_EQ) {
  15108. *result = MP_NO;
  15109. break;
  15110. }
  15111. ++j;
  15112. }
  15113. /* if y != n1 then composite */
  15114. if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
  15115. *result = MP_NO;
  15116. }
  15117. }
  15118. }
  15119. }
  15120. return err;
  15121. }
  15122. /* Miller-Rabin test of "a" to the base of "b" as described in
  15123. * HAC pp. 139 Algorithm 4.24
  15124. *
  15125. * Sets result to 0 if definitely composite or 1 if probably prime.
  15126. * Randomly the chance of error is no more than 1/4 and often
  15127. * very much lower.
  15128. *
  15129. * @param [in] a SP integer to check.
  15130. * @param [in] b SP integer that is a small prime.
  15131. * @param [out] result MP_YES when number is likey prime.
  15132. * MP_NO otherwise.
  15133. *
  15134. * @return MP_OKAY on success.
  15135. * @return MP_MEM when dynamic memory allocation fails.
  15136. */
  15137. static int sp_prime_miller_rabin(sp_int* a, sp_int* b, int* result)
  15138. {
  15139. int err = MP_OKAY;
  15140. sp_int *n1;
  15141. sp_int *y;
  15142. sp_int *r;
  15143. DECL_SP_INT_ARRAY(t, a->used * 2 + 1, 3);
  15144. ALLOC_SP_INT_ARRAY(t, a->used * 2 + 1, 3, err, NULL);
  15145. if (err == MP_OKAY) {
  15146. n1 = t[0];
  15147. y = t[1];
  15148. r = t[2];
  15149. /* Only 'y' needs to be twice as big. */
  15150. sp_init_size(n1, a->used * 2 + 1);
  15151. sp_init_size(y, a->used * 2 + 1);
  15152. sp_init_size(r, a->used * 2 + 1);
  15153. err = sp_prime_miller_rabin_ex(a, b, result, n1, y, r);
  15154. sp_clear(n1);
  15155. sp_clear(y);
  15156. sp_clear(r);
  15157. }
  15158. FREE_SP_INT_ARRAY(t, NULL);
  15159. return err;
  15160. }
  15161. #if SP_WORD_SIZE == 8
  15162. /* Number of pre-computed primes. First n primes - fitting in a digit. */
  15163. #define SP_PRIME_SIZE 54
  15164. static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
  15165. 0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
  15166. 0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
  15167. 0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
  15168. 0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
  15169. 0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
  15170. 0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
  15171. 0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
  15172. };
  15173. #else
  15174. /* Number of pre-computed primes. First n primes. */
  15175. #define SP_PRIME_SIZE 256
  15176. /* The first 256 primes. */
  15177. static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
  15178. 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
  15179. 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
  15180. 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
  15181. 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
  15182. 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
  15183. 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
  15184. 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
  15185. 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
  15186. 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
  15187. 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
  15188. 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
  15189. 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
  15190. 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
  15191. 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
  15192. 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
  15193. 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
  15194. 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
  15195. 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
  15196. 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
  15197. 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
  15198. 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
  15199. 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
  15200. 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
  15201. 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
  15202. 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
  15203. 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
  15204. 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
  15205. 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
  15206. 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
  15207. 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
  15208. 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
  15209. 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
  15210. };
  15211. #endif
  15212. /* Check whether a is prime.
  15213. * Checks against a number of small primes and does t iterations of
  15214. * Miller-Rabin.
  15215. *
  15216. * @param [in] a SP integer to check.
  15217. * @param [in] t Number of iterations of Miller-Rabin test to perform.
  15218. * @param [out] result MP_YES when number is prime.
  15219. * MP_NO otherwise.
  15220. *
  15221. * @return MP_OKAY on success.
  15222. * @return MP_VAL when a or result is NULL, or t is out of range.
  15223. * @return MP_MEM when dynamic memory allocation fails.
  15224. */
  15225. int sp_prime_is_prime(sp_int* a, int t, int* result)
  15226. {
  15227. int err = MP_OKAY;
  15228. int i;
  15229. int haveRes = 0;
  15230. sp_int_digit d;
  15231. DECL_SP_INT(b, 2);
  15232. if ((a == NULL) || (result == NULL)) {
  15233. if (result != NULL) {
  15234. *result = MP_NO;
  15235. }
  15236. err = MP_VAL;
  15237. }
  15238. if ((err == MP_OKAY) && ((t <= 0) || (t > SP_PRIME_SIZE))) {
  15239. *result = MP_NO;
  15240. err = MP_VAL;
  15241. }
  15242. if ((err == MP_OKAY) && sp_isone(a)) {
  15243. *result = MP_NO;
  15244. haveRes = 1;
  15245. }
  15246. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  15247. if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
  15248. /* check against primes table */
  15249. for (i = 0; i < SP_PRIME_SIZE; i++) {
  15250. if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
  15251. *result = MP_YES;
  15252. haveRes = 1;
  15253. break;
  15254. }
  15255. }
  15256. }
  15257. if ((err == MP_OKAY) && (!haveRes)) {
  15258. /* do trial division */
  15259. for (i = 0; i < SP_PRIME_SIZE; i++) {
  15260. err = sp_mod_d(a, sp_primes[i], &d);
  15261. if ((err != MP_OKAY) || (d == 0)) {
  15262. *result = MP_NO;
  15263. haveRes = 1;
  15264. break;
  15265. }
  15266. }
  15267. }
  15268. if ((err == MP_OKAY) && (!haveRes)) {
  15269. ALLOC_SP_INT(b, 1, err, NULL);
  15270. if (err == MP_OKAY) {
  15271. /* now do 't' miller rabins */
  15272. sp_init_size(b, 1);
  15273. for (i = 0; i < t; i++) {
  15274. sp_set(b, sp_primes[i]);
  15275. err = sp_prime_miller_rabin(a, b, result);
  15276. if ((err != MP_OKAY) || (*result == MP_NO)) {
  15277. break;
  15278. }
  15279. }
  15280. }
  15281. }
  15282. RESTORE_VECTOR_REGISTERS();
  15283. FREE_SP_INT(b, NULL);
  15284. return err;
  15285. }
  15286. /* Check whether a is prime.
  15287. * Checks against a number of small primes and does t iterations of
  15288. * Miller-Rabin.
  15289. *
  15290. * @param [in] a SP integer to check.
  15291. * @param [in] t Number of iterations of Miller-Rabin test to perform.
  15292. * @param [out] result MP_YES when number is prime.
  15293. * MP_NO otherwise.
  15294. * @param [in] rng Random number generator for Miller-Rabin testing.
  15295. *
  15296. * @return MP_OKAY on success.
  15297. * @return MP_VAL when a, result or rng is NULL.
  15298. * @return MP_MEM when dynamic memory allocation fails.
  15299. */
  15300. int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng)
  15301. {
  15302. int err = MP_OKAY;
  15303. int ret = MP_YES;
  15304. int haveRes = 0;
  15305. int i;
  15306. #ifndef WC_NO_RNG
  15307. sp_int *b = NULL;
  15308. sp_int *c = NULL;
  15309. sp_int *n1 = NULL;
  15310. sp_int *y = NULL;
  15311. sp_int *r = NULL;
  15312. #endif /* WC_NO_RNG */
  15313. if ((a == NULL) || (result == NULL) || (rng == NULL)) {
  15314. err = MP_VAL;
  15315. }
  15316. #ifdef WOLFSSL_SP_INT_NEGATIVE
  15317. if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
  15318. err = MP_VAL;
  15319. }
  15320. #endif
  15321. if ((err == MP_OKAY) && sp_isone(a)) {
  15322. ret = MP_NO;
  15323. haveRes = 1;
  15324. }
  15325. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  15326. if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
  15327. /* check against primes table */
  15328. for (i = 0; i < SP_PRIME_SIZE; i++) {
  15329. if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
  15330. ret = MP_YES;
  15331. haveRes = 1;
  15332. break;
  15333. }
  15334. }
  15335. }
  15336. if ((err == MP_OKAY) && (!haveRes)) {
  15337. sp_int_digit d;
  15338. /* do trial division */
  15339. for (i = 0; i < SP_PRIME_SIZE; i++) {
  15340. err = sp_mod_d(a, sp_primes[i], &d);
  15341. if ((err != MP_OKAY) || (d == 0)) {
  15342. ret = MP_NO;
  15343. haveRes = 1;
  15344. break;
  15345. }
  15346. }
  15347. }
  15348. #ifndef WC_NO_RNG
  15349. /* now do a miller rabin with up to t random numbers, this should
  15350. * give a (1/4)^t chance of a false prime. */
  15351. if ((err == MP_OKAY) && (!haveRes)) {
  15352. int bits = sp_count_bits(a);
  15353. word32 baseSz = (bits + 7) / 8;
  15354. DECL_SP_INT_ARRAY(ds, a->used + 1, 3);
  15355. DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 2);
  15356. ALLOC_SP_INT_ARRAY(ds, a->used + 1, 3, err, NULL);
  15357. ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 2, err, NULL);
  15358. if (err == MP_OKAY) {
  15359. b = ds[0];
  15360. c = ds[1];
  15361. n1 = ds[2];
  15362. y = d[0];
  15363. r = d[1];
  15364. /* Only 'y' needs to be twice as big. */
  15365. sp_init_size(b , a->used + 1);
  15366. sp_init_size(c , a->used + 1);
  15367. sp_init_size(n1, a->used + 1);
  15368. sp_init_size(y , a->used * 2 + 1);
  15369. sp_init_size(r , a->used * 2 + 1);
  15370. _sp_sub_d(a, 2, c);
  15371. bits &= SP_WORD_MASK;
  15372. while (t > 0) {
  15373. err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
  15374. if (err != MP_OKAY) {
  15375. break;
  15376. }
  15377. b->used = a->used;
  15378. #ifdef BIG_ENDIAN_ORDER
  15379. if (((baseSz * 8) & SP_WORD_MASK) != 0) {
  15380. b->dp[b->used-1] >>=
  15381. SP_WORD_SIZE - ((baseSz * 8) & SP_WORD_MASK);
  15382. }
  15383. #endif /* LITTLE_ENDIAN_ORDER */
  15384. /* Ensure the top word has no more bits than necessary. */
  15385. if (bits > 0) {
  15386. b->dp[b->used - 1] &= ((sp_int_digit)1 << bits) - 1;
  15387. sp_clamp(b);
  15388. }
  15389. if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
  15390. continue;
  15391. }
  15392. err = sp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
  15393. if ((err != MP_OKAY) || (ret == MP_NO)) {
  15394. break;
  15395. }
  15396. t--;
  15397. }
  15398. sp_clear(n1);
  15399. sp_clear(y);
  15400. sp_clear(r);
  15401. sp_clear(b);
  15402. sp_clear(c);
  15403. }
  15404. FREE_SP_INT_ARRAY(d, NULL);
  15405. FREE_SP_INT_ARRAY(ds, NULL);
  15406. }
  15407. #else
  15408. (void)t;
  15409. #endif /* !WC_NO_RNG */
  15410. if (result != NULL) {
  15411. *result = ret;
  15412. }
  15413. RESTORE_VECTOR_REGISTERS();
  15414. return err;
  15415. }
  15416. #endif /* WOLFSSL_SP_PRIME_GEN */
  15417. #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
  15418. /* Calculates the Greatest Common Denominator (GCD) of a and b into r.
  15419. *
  15420. * a and b are positive integers.
  15421. *
  15422. * @param [in] a SP integer of first operand.
  15423. * @param [in] b SP integer of second operand.
  15424. * @param [out] r SP integer to hold result.
  15425. *
  15426. * @return MP_OKAY on success.
  15427. * @return MP_VAL when a, b or r is NULL or too large.
  15428. * @return MP_MEM when dynamic memory allocation fails.
  15429. */
  15430. int sp_gcd(sp_int* a, sp_int* b, sp_int* r)
  15431. {
  15432. int err = MP_OKAY;
  15433. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  15434. err = MP_VAL;
  15435. }
  15436. else if (a->used >= SP_INT_DIGITS || b->used >= SP_INT_DIGITS) {
  15437. err = MP_VAL;
  15438. }
  15439. else if (sp_iszero(a)) {
  15440. /* GCD of 0 and 0 is undefined as all integers divide 0. */
  15441. if (sp_iszero(b)) {
  15442. err = MP_VAL;
  15443. }
  15444. else {
  15445. err = sp_copy(b, r);
  15446. }
  15447. }
  15448. else if (sp_iszero(b)) {
  15449. err = sp_copy(a, r);
  15450. }
  15451. else {
  15452. sp_int* u = NULL;
  15453. sp_int* v = NULL;
  15454. sp_int* t = NULL;
  15455. int used = (a->used >= b->used) ? a->used + 1 : b->used + 1;
  15456. DECL_SP_INT_ARRAY(d, used, 3);
  15457. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  15458. ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
  15459. if (err == MP_OKAY) {
  15460. u = d[0];
  15461. v = d[1];
  15462. t = d[2];
  15463. sp_init_size(u, used);
  15464. sp_init_size(v, used);
  15465. sp_init_size(t, used);
  15466. if (_sp_cmp(a, b) != MP_LT) {
  15467. sp_copy(b, u);
  15468. /* First iteration - u = a, v = b */
  15469. if (b->used == 1) {
  15470. err = sp_mod_d(a, b->dp[0], &v->dp[0]);
  15471. if (err == MP_OKAY) {
  15472. v->used = (v->dp[0] != 0);
  15473. }
  15474. }
  15475. else {
  15476. err = sp_mod(a, b, v);
  15477. }
  15478. }
  15479. else {
  15480. sp_copy(a, u);
  15481. /* First iteration - u = b, v = a */
  15482. if (a->used == 1) {
  15483. err = sp_mod_d(b, a->dp[0], &v->dp[0]);
  15484. if (err == MP_OKAY) {
  15485. v->used = (v->dp[0] != 0);
  15486. }
  15487. }
  15488. else {
  15489. err = sp_mod(b, a, v);
  15490. }
  15491. }
  15492. }
  15493. if (err == MP_OKAY) {
  15494. #ifdef WOLFSSL_SP_INT_NEGATIVE
  15495. u->sign = MP_ZPOS;
  15496. v->sign = MP_ZPOS;
  15497. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  15498. while (!sp_iszero(v)) {
  15499. if (v->used == 1) {
  15500. err = sp_mod_d(u, v->dp[0], &t->dp[0]);
  15501. if (err == MP_OKAY) {
  15502. t->used = (t->dp[0] != 0);
  15503. }
  15504. }
  15505. else {
  15506. err = sp_mod(u, v, t);
  15507. }
  15508. if (err != MP_OKAY) {
  15509. break;
  15510. }
  15511. sp_copy(v, u);
  15512. sp_copy(t, v);
  15513. }
  15514. if (err == MP_OKAY)
  15515. err = sp_copy(u, r);
  15516. }
  15517. FREE_SP_INT_ARRAY(d, NULL);
  15518. RESTORE_VECTOR_REGISTERS();
  15519. }
  15520. return err;
  15521. }
  15522. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
  15523. #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && \
  15524. (!defined(WC_RSA_BLINDING) || defined(HAVE_FIPS) || defined(HAVE_SELFTEST))
  15525. /* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
  15526. *
  15527. * a and b are positive integers.
  15528. *
  15529. * @param [in] a SP integer of first operand.
  15530. * @param [in] b SP integer of second operand.
  15531. * @param [out] r SP integer to hold result.
  15532. *
  15533. * @return MP_OKAY on success.
  15534. * @return MP_VAL when a, b or r is NULL; or a or b is zero.
  15535. * @return MP_MEM when dynamic memory allocation fails.
  15536. */
  15537. int sp_lcm(sp_int* a, sp_int* b, sp_int* r)
  15538. {
  15539. int err = MP_OKAY;
  15540. int used = ((a == NULL) || (b == NULL)) ? 1 :
  15541. (a->used >= b->used ? a->used + 1: b->used + 1);
  15542. DECL_SP_INT_ARRAY(t, used, 2);
  15543. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  15544. err = MP_VAL;
  15545. }
  15546. /* LCM of 0 and any number is undefined as 0 is not in the set of values
  15547. * being used.
  15548. */
  15549. if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
  15550. err = MP_VAL;
  15551. }
  15552. ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
  15553. if (err == MP_OKAY) {
  15554. sp_init_size(t[0], used);
  15555. sp_init_size(t[1], used);
  15556. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  15557. if (err == MP_OKAY)
  15558. err = sp_gcd(a, b, t[0]);
  15559. if (err == MP_OKAY) {
  15560. if (_sp_cmp_abs(a, b) == MP_GT) {
  15561. err = sp_div(a, t[0], t[1], NULL);
  15562. if (err == MP_OKAY) {
  15563. err = sp_mul(b, t[1], r);
  15564. }
  15565. }
  15566. else {
  15567. err = sp_div(b, t[0], t[1], NULL);
  15568. if (err == MP_OKAY) {
  15569. err = sp_mul(a, t[1], r);
  15570. }
  15571. }
  15572. }
  15573. RESTORE_VECTOR_REGISTERS();
  15574. }
  15575. FREE_SP_INT_ARRAY(t, NULL);
  15576. return err;
  15577. }
  15578. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
  15579. /* Returns the run time settings.
  15580. *
  15581. * @return Settings value.
  15582. */
  15583. word32 CheckRunTimeSettings(void)
  15584. {
  15585. return CTC_SETTINGS;
  15586. }
  15587. /* Returns the fast math settings.
  15588. *
  15589. * @return Setting - number of bits in a digit.
  15590. */
  15591. word32 CheckRunTimeFastMath(void)
  15592. {
  15593. return SP_WORD_SIZE;
  15594. }
  15595. #ifdef WOLFSSL_CHECK_MEM_ZERO
  15596. /* Add an MP to check.
  15597. *
  15598. * @param [in] name Name of address to check.
  15599. * @param [in] mp mp_int that needs to be checked.
  15600. */
  15601. void sp_memzero_add(const char* name, mp_int* mp)
  15602. {
  15603. wc_MemZero_Add(name, mp->dp, mp->size * sizeof(sp_digit));
  15604. }
  15605. /* Check the memory in the data pointer for memory that must be zero.
  15606. *
  15607. * @param [in] mp mp_int that needs to be checked.
  15608. */
  15609. void sp_memzero_check(mp_int* mp)
  15610. {
  15611. wc_MemZero_Check(mp->dp, mp->size * sizeof(sp_digit));
  15612. }
  15613. #endif /* WOLFSSL_CHECK_MEM_ZERO */
  15614. #endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */